├── .gitignore ├── .travis.yml ├── Cargo.toml ├── LICENSE ├── README.md └── src ├── decoder.rs ├── encoder.rs └── lib.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /Cargo.lock 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | sudo: required 3 | rust: 4 | - nightly 5 | before_script: 6 | - | 7 | pip install 'travis-cargo<0.2' --user && 8 | export PATH=$HOME/.local/bin:$PATH 9 | script: 10 | - | 11 | travis-cargo build && 12 | travis-cargo test && 13 | travis-cargo --only nightly doc 14 | after_success: 15 | - travis-cargo --only nightly doc-upload 16 | env: 17 | global: 18 | - TRAVIS_CARGO_NIGHTLY_FEATURE='' 19 | - secure: WL6XCAj9AIOGQQ6fe03S9uuwvJxAX/WMve7A+mmFqWVkVOOPhOVc4cVDUbA/RzgX1qOz8syq0YA2oOkCXMg5l6KOYOaBw5e8Dba2R6ZO4PP060mnRMbW1LjCtbHM3RKxHABcridiUV5BMsfM5+Fi58f/Oab34t9nfMzVGq7iRUw= 20 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | 3 | name = "bytekey" 4 | version = "0.4.2" 5 | authors = ["Dan Burkert "] 6 | license = "Apache-2.0" 7 | 8 | description = "lexicographic sort-order preserving binary encoding" 9 | repository = "https://github.com/danburkert/bytekey.git" 10 | readme = "README.md" 11 | keywords = ["encoding", "serialization", "library"] 12 | 13 | [dependencies] 14 | byteorder = "*" 15 | rustc-serialize = "*" 16 | 17 | [dev-dependencies] 18 | quickcheck = "*" 19 | quickcheck_macros = "*" 20 | rand = "*" 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2014 Dan Burkert 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/danburkert/bytekey.svg?branch=master)](https://travis-ci.org/danburkert/bytekey) 2 | 3 | [rustdoc](https://danburkert.github.io/bytekey/bytekey/index.html) 4 | 5 | # bytekey 6 | 7 | Binary encoding for Rust values which preserves lexicographic sort order. Order-preserving encoding 8 | is useful for creating keys for sorted key-value stores with byte string typed keys, such as 9 | [leveldb](https://github.com/google/leveldb). `bytekey` attempts to encode values into the fewest 10 | number of bytes possible while preserving order guarantees. Type information is *not* serialized 11 | alongside values, and thus the type of serialized data must be known in order to perform decoding 12 | (`bytekey` does not implement a self-describing format). 13 | 14 | ## Supported Data Types 15 | 16 | `bytekey` encoding currently supports all Rust primitives, strings, options, structs, enums, and 17 | tuples. `isize` and `usize` types are variable-length encoded. Sequence (`Vec`) and map types are 18 | not currently supported (but could be in the future). See `Encoder` for details on the 19 | serialization format. 20 | 21 | ## Usage 22 | 23 | ``` 24 | extern crate serialize; 25 | extern crate bytekey; 26 | use bytekey::{encode, decode}; 27 | 28 | #[deriving(Encodable, Decodable, Show, PartialEq)] 29 | struct MyKey { a: usize, b: String } 30 | 31 | let a = MyKey { a: 1, b: "foo".to_string() }; 32 | let b = MyKey { a: 2, b: "foo".to_string() }; 33 | let c = MyKey { a: 2, b: "fooz".to_string() }; 34 | 35 | assert!(encode(&a) < encode(&b)); 36 | assert!(encode(&b) < encode(&c)); 37 | assert_eq!(a, decode(encode(&a)).unwrap()); 38 | ``` 39 | 40 | ## Type Evolution 41 | 42 | In general, the exact type of a serialized value must be known in order to correctly deserialize 43 | it. For structs and enums, the type is effectively frozen once any values of the type have been 44 | serialized: changes to the struct or enum will cause deserialization of already encoded values 45 | to fail or return incorrect values. The only exception is adding adding new variants to the end 46 | of an existing enum. Enum variants may *not* change type, be removed, or be reordered. All 47 | changes to structs, including adding, removing, reordering, or changing the type of a field are 48 | forbidden. 49 | 50 | These restrictions lead to a few best-practices when using `bytekey` encoding: 51 | 52 | * Don't use `bytekey` unless you need lexicographic ordering of encoded values! A more 53 | general encoding library such as [Cap'n Proto](https://github.com/dwrensha/capnproto-rust) or 54 | [binary-encode](https://github.com/TyOverby/binary-encode) will serve you better if this 55 | feature is not necessary. 56 | * If you persist encoded values for longer than the life of a process (i.e. you write the 57 | encoded values to a file or a database), consider using an enum as a top-level wrapper type. 58 | This will allow you to seamlessly add a new variant when you need to change the key format in a 59 | backwards-compatible manner (the different key types will sort seperately). If your enum has 60 | less than 16 variants, then the overhead is just a single byte in encoded output. 61 | 62 | ## License 63 | 64 | `bytekey` is licensed under the Apache License, Version 2.0. See LICENSE for full license text. 65 | -------------------------------------------------------------------------------- /src/decoder.rs: -------------------------------------------------------------------------------- 1 | use std::{i8, i16, i32, i64}; 2 | use std::io::{self, Read}; 3 | use std::iter::range_inclusive; 4 | use std::mem::transmute; 5 | use std::num::wrapping::OverflowingOps; 6 | 7 | use byteorder::BigEndian; 8 | use byteorder::ReadBytesExt; 9 | use rustc_serialize; 10 | 11 | use Error; 12 | use Result; 13 | 14 | /// A decoder for deserializing bytes in an order preserving format to a value. 15 | pub struct Decoder { 16 | reader: io::BufReader 17 | } 18 | 19 | impl Decoder { 20 | 21 | /// Creates a new ordered bytes encoder whose output will be written to the provided writer. 22 | pub fn new(reader: R) -> Decoder { 23 | Decoder { reader: io::BufReader::new(reader) } 24 | } 25 | 26 | pub fn read_var_u64(&mut self) -> Result { 27 | let header = try!(self.reader.read_u8()); 28 | let n = header >> 4; 29 | let (mut val, _) = ((header & 0x0F) as u64).overflowing_shl(n as u32 * 8); 30 | for i in range_inclusive(1, n) { 31 | let byte = try!(self.reader.read_u8()); 32 | val += (byte as u64) << ((n - i) * 8); 33 | } 34 | Ok(val) 35 | } 36 | 37 | pub fn read_var_i64(&mut self) -> Result { 38 | let header = try!(self.reader.read_u8()); 39 | let mask = ((header ^ 0x80) as i8 >> 7) as u8; 40 | let n = ((header >> 3) ^ mask) & 0x0F; 41 | let (mut val, _) = (((header ^ mask) & 0x07) as u64).overflowing_shl(n as u32 * 8); 42 | for i in range_inclusive(1, n) { 43 | let byte = try!(self.reader.read_u8()); 44 | val += ((byte ^ mask) as u64) << ((n - i) * 8); 45 | } 46 | let final_mask = (((mask as i64) << 63) >> 63) as u64; 47 | val ^= final_mask; 48 | Ok(val as i64) 49 | } 50 | } 51 | 52 | impl rustc_serialize::Decoder for Decoder 53 | where R: io::Read { 54 | 55 | type Error = Error; 56 | 57 | fn read_nil(&mut self) -> Result<()> { Ok(()) } 58 | 59 | fn read_u8(&mut self) -> Result { 60 | self.reader.read_u8().map_err(From::from) 61 | } 62 | fn read_u16(&mut self) -> Result { 63 | self.reader.read_u16::().map_err(From::from) 64 | } 65 | fn read_u32(&mut self) -> Result { 66 | self.reader.read_u32::().map_err(From::from) 67 | } 68 | fn read_u64(&mut self) -> Result { 69 | self.reader.read_u64::().map_err(From::from) 70 | } 71 | fn read_usize(&mut self) -> Result { 72 | let val = try!(self.read_var_u64()); 73 | Ok(val as usize) 74 | } 75 | 76 | fn read_i8(&mut self) -> Result { 77 | let val = try!(self.reader.read_i8()); 78 | Ok(val ^ i8::MIN) 79 | } 80 | fn read_i16(&mut self) -> Result { 81 | let val = try!(self.reader.read_i16::()); 82 | Ok(val ^ i16::MIN) 83 | } 84 | fn read_i32(&mut self) -> Result { 85 | let val = try!(self.reader.read_i32::()); 86 | Ok(val ^ i32::MIN) 87 | } 88 | fn read_i64(&mut self) -> Result { 89 | let val = try!(self.reader.read_i64::()); 90 | Ok(val ^ i64::MIN) 91 | } 92 | fn read_isize(&mut self) -> Result { 93 | let val = try!(self.read_var_i64()); 94 | Ok(val as isize) 95 | } 96 | 97 | fn read_bool(&mut self) -> Result { 98 | match try!(self.reader.read_u8()) { 99 | 0 => Ok(false), 100 | _ => Ok(true) 101 | } 102 | } 103 | 104 | fn read_f32(&mut self) -> Result { 105 | let val = try!(self.reader.read_i32::()); 106 | let t = ((val ^ i32::MIN) >> 31) | i32::MIN; 107 | let f = unsafe { transmute(val ^ t) }; 108 | Ok(f) 109 | } 110 | fn read_f64(&mut self) -> Result { 111 | let val = try!(self.reader.read_i64::()); 112 | let t = ((val ^ i64::MIN) >> 63) | i64::MIN; 113 | let f = unsafe { transmute(val ^ t) }; 114 | Ok(f) 115 | } 116 | 117 | fn read_char(&mut self) -> Result { 118 | match (&mut self.reader).chars().next() { 119 | Some(Ok(c)) => Ok(c), 120 | Some(Err(io::CharsError::NotUtf8)) => Err(Error::NotUtf8), 121 | Some(Err(io::CharsError::Other(error))) => Err(Error::Io(error)), 122 | None => Err(Error::UnexpectedEof), 123 | } 124 | } 125 | 126 | fn read_str(&mut self) -> Result { 127 | let mut string = String::new(); 128 | 129 | loop { 130 | let c = try!(self.read_char()); 131 | if c == '\0' { break; } 132 | string.push(c); 133 | } 134 | 135 | Ok(string) 136 | } 137 | 138 | fn read_enum(&mut self, _name: &str, f: F) -> Result 139 | where F: FnOnce(&mut Self) -> Result { 140 | f(self) 141 | } 142 | fn read_enum_variant(&mut self, _names: &[&str], mut f: F) -> Result 143 | where F: FnMut(&mut Self, usize) -> Result { 144 | let id = try!(self.read_usize()); 145 | f(self, id) 146 | } 147 | fn read_enum_variant_arg(&mut self, _idx: usize, f: F) -> Result 148 | where F: FnOnce(&mut Self) -> Result { 149 | f(self) 150 | } 151 | fn read_enum_struct_variant(&mut self, names: &[&str], f: F) -> Result 152 | where F: FnMut(&mut Self, usize) -> Result { 153 | self.read_enum_variant(names, f) 154 | } 155 | fn read_enum_struct_variant_field(&mut self, 156 | _name: &str, 157 | idx: usize, 158 | f: F) 159 | -> Result 160 | where F: FnOnce(&mut Self) -> Result { 161 | self.read_enum_variant_arg(idx, f) 162 | } 163 | 164 | fn read_struct(&mut self, _name: &str, _len: usize, f: F) -> Result 165 | where F: FnOnce(&mut Self) -> Result { 166 | f(self) 167 | } 168 | fn read_struct_field(&mut self, _name: &str, _idx: usize, f: F) -> Result 169 | where F: FnOnce(&mut Self) -> Result { 170 | f(self) 171 | } 172 | 173 | fn read_tuple(&mut self, _len: usize, f: F) -> Result 174 | where F: FnOnce(&mut Self) -> Result { 175 | f(self) 176 | } 177 | fn read_tuple_arg(&mut self, _idx: usize, f: F) -> Result 178 | where F: FnOnce(&mut Self) -> Result { 179 | f(self) 180 | } 181 | 182 | fn read_tuple_struct(&mut self, _name: &str, len: usize, f: F) -> Result 183 | where F: FnOnce(&mut Self) -> Result { 184 | self.read_tuple(len, f) 185 | } 186 | fn read_tuple_struct_arg(&mut self, idx: usize, f: F) -> Result 187 | where F: FnOnce(&mut Self) -> Result { 188 | self.read_tuple_arg(idx, f) 189 | } 190 | 191 | fn read_option(&mut self, mut f: F) -> Result 192 | where F: FnMut(&mut Self, bool) -> Result { 193 | let is_some = try!(self.read_bool()); 194 | f(self, is_some) 195 | } 196 | 197 | fn read_seq(&mut self, _f: F) -> Result 198 | where F: FnOnce(&mut Self, usize) -> Result { 199 | unimplemented!() 200 | } 201 | fn read_seq_elt(&mut self, _idx: usize, _f: F) -> Result 202 | where F: FnOnce(&mut Self) -> Result { 203 | unimplemented!() 204 | } 205 | 206 | fn read_map(&mut self, _f: F) -> Result 207 | where F: FnOnce(&mut Self, usize) -> Result { 208 | unimplemented!() 209 | } 210 | fn read_map_elt_key(&mut self, _idx: usize, _f: F) -> Result 211 | where F: FnOnce(&mut Self) -> Result { 212 | unimplemented!() 213 | } 214 | fn read_map_elt_val(&mut self, _idx: usize, _f: F) -> Result 215 | where F: FnOnce(&mut Self) -> Result { 216 | unimplemented!() 217 | } 218 | 219 | fn error(&mut self, err: &str) -> Error { 220 | Error::Io(io::Error::new(io::ErrorKind::Other, err)) 221 | } 222 | } 223 | 224 | #[cfg(test)] 225 | mod test { 226 | 227 | use std::{f32, f64, isize, usize}; 228 | 229 | use {encode, decode}; 230 | use encoder::test::{TestStruct, TestEnum}; 231 | 232 | #[quickcheck] 233 | fn check_u8(val: u8) -> bool { 234 | val == decode(encode(&val).unwrap()).unwrap() 235 | } 236 | #[quickcheck] 237 | fn check_u16(val: u16) -> bool { 238 | val == decode(encode(&val).unwrap()).unwrap() 239 | } 240 | #[quickcheck] 241 | fn check_u32(val: u32) -> bool { 242 | val == decode(encode(&val).unwrap()).unwrap() 243 | } 244 | #[quickcheck] 245 | fn check_u64(val: u64) -> bool { 246 | val == decode(encode(&val).unwrap()).unwrap() 247 | } 248 | #[quickcheck] 249 | fn check_usize(val: usize) -> bool { 250 | val == decode(encode(&val).unwrap()).unwrap() 251 | } 252 | #[test] 253 | fn test_usize() { 254 | let values = vec![ 255 | 0usize, 2usize.pow(0), 256 | 2usize.pow(4) - 1, 2usize.pow(4), 257 | 2usize.pow(12) - 1, 2usize.pow(12), 258 | 2usize.pow(20) - 1, 2usize.pow(20), 259 | 2usize.pow(28) - 1, 2usize.pow(28), 260 | 2usize.pow(36) - 1, 2usize.pow(36), 261 | 2usize.pow(44) - 1, 2usize.pow(44), 262 | 2usize.pow(52) - 1, 2usize.pow(52), 263 | 2usize.pow(60) - 1, 2usize.pow(60), 264 | usize::MAX, 265 | ]; 266 | for val in values.iter() { 267 | println!("testing value: {}", val); 268 | assert_eq!(*val, decode(encode(val).unwrap()).unwrap()); 269 | } 270 | } 271 | 272 | #[quickcheck] 273 | fn check_i8(val: i8) -> bool { 274 | val == decode(encode(&val).unwrap()).unwrap() 275 | } 276 | #[quickcheck] 277 | fn check_i16(val: i16) -> bool { 278 | val == decode(encode(&val).unwrap()).unwrap() 279 | } 280 | #[quickcheck] 281 | fn check_i32(val: i32) -> bool { 282 | val == decode(encode(&val).unwrap()).unwrap() 283 | } 284 | #[quickcheck] 285 | fn check_i64(val: i64) -> bool { 286 | val == decode(encode(&val).unwrap()).unwrap() 287 | } 288 | #[quickcheck] 289 | fn check_isize(val: isize) -> bool { 290 | val == decode(encode(&val).unwrap()).unwrap() 291 | } 292 | #[test] 293 | fn test_isize() { 294 | let values = vec![ 295 | -2isize.pow(0), 0isize, 2isize.pow(0), 296 | -2isize.pow(3) - 1, -2isize.pow(3), 2isize.pow(3) - 1, 2isize.pow(3), 297 | -2isize.pow(11) - 1, -2isize.pow(11), 2isize.pow(11) - 1, 2isize.pow(11), 298 | -2isize.pow(19) - 1, -2isize.pow(19), 2isize.pow(19) - 1, 2isize.pow(19), 299 | -2isize.pow(27) - 1, -2isize.pow(27), 2isize.pow(27) - 1, 2isize.pow(27), 300 | -2isize.pow(35) - 1, -2isize.pow(35), 2isize.pow(35) - 1, 2isize.pow(35), 301 | -2isize.pow(43) - 1, -2isize.pow(43), 2isize.pow(43) - 1, 2isize.pow(43), 302 | -2isize.pow(51) - 1, -2isize.pow(51), 2isize.pow(51) - 1, 2isize.pow(51), 303 | -2isize.pow(59) - 1, -2isize.pow(59), 2isize.pow(59) - 1, 2isize.pow(59), 304 | isize::MIN, isize::MAX, 305 | ]; 306 | for val in values.iter() { 307 | assert_eq!(*val, decode(encode(val).unwrap()).unwrap()); 308 | } 309 | } 310 | 311 | #[quickcheck] 312 | fn check_f32(val: f32) -> bool { 313 | val == decode(encode(&val).unwrap()).unwrap() 314 | } 315 | #[test] 316 | fn test_f32() { 317 | let values = vec![ 318 | f32::NEG_INFINITY, 319 | f32::MIN, 320 | -0.0, 321 | 0.0, 322 | f32::MIN_POSITIVE, 323 | f32::MAX, 324 | f32::INFINITY 325 | ]; 326 | for val in values.iter() { 327 | assert_eq!(*val, decode(encode(val).unwrap()).unwrap()); 328 | } 329 | } 330 | 331 | #[quickcheck] 332 | fn check_f64(val: f64) -> bool { 333 | val == decode(encode(&val).unwrap()).unwrap() 334 | } 335 | #[test] 336 | fn test_f64() { 337 | let values = vec![ 338 | f64::NEG_INFINITY, 339 | f64::MIN, 340 | -0.0, 341 | 0.0, 342 | f64::MIN_POSITIVE, 343 | f64::MAX, 344 | f64::INFINITY 345 | ]; 346 | for val in values.iter() { 347 | assert_eq!(*val, decode(encode(val).unwrap()).unwrap()); 348 | } 349 | } 350 | 351 | #[quickcheck] 352 | fn check_char(val: char) -> bool { 353 | val == decode(encode(&val).unwrap()).unwrap() 354 | } 355 | 356 | #[quickcheck] 357 | fn check_string(val: String) -> bool { 358 | val == decode::(encode(&val).unwrap()).unwrap() 359 | } 360 | 361 | #[quickcheck] 362 | fn check_option(val: Option) -> bool { 363 | val == decode(encode(&val).unwrap()).unwrap() 364 | } 365 | 366 | #[quickcheck] 367 | fn check_struct(val: TestStruct) -> bool { 368 | val == decode(encode(&val).unwrap()).unwrap() 369 | } 370 | 371 | #[quickcheck] 372 | fn check_tuple(val: (usize, char, String)) -> bool { 373 | val == decode(encode(&val).unwrap()).unwrap() 374 | } 375 | 376 | #[quickcheck] 377 | fn check_enum(val: TestEnum) -> bool { 378 | val == decode(encode(&val).unwrap()).unwrap() 379 | } 380 | } 381 | -------------------------------------------------------------------------------- /src/encoder.rs: -------------------------------------------------------------------------------- 1 | use std::{i8, i16, i32, i64}; 2 | use std::io::{self, Write}; 3 | use std::mem::transmute; 4 | 5 | use byteorder::BigEndian; 6 | use byteorder::WriteBytesExt; 7 | use rustc_serialize; 8 | 9 | use Error; 10 | use Result; 11 | 12 | /// An encoder for serializing data to a byte format that preserves lexicographic sort order. 13 | /// 14 | /// The byte format is designed with a few goals: 15 | /// 16 | /// * Order must be preserved 17 | /// * Serialized representations should be as compact as possible 18 | /// * Type information is *not* serialized with values 19 | /// 20 | /// #### Supported Data Types 21 | /// 22 | /// ##### Unsigned Integers 23 | /// 24 | /// `u8`, `u16`, `u32`, and `u64` are encoded into 1, 2, 4, and 8 bytes of output, respectively. 25 | /// Order is preserved by encoding the bytes in big-endian (most-significant bytes first) format. 26 | /// 27 | /// `usize` is variable-length encoded into between 1 and 9 bytes. Smaller magnitude values (closer 28 | /// to 0) will encode into fewer bytes. See `emit_var_u64` for details on serialization 29 | /// size and format. 30 | /// 31 | /// ##### Signed Integers 32 | /// 33 | /// `i8`, `i16`, `i32`, and `i64` are encoded into 1, 2, 4, and 8 bytes of output, respectively. 34 | /// Order is preserved by taking the bitwise complement of the value, and encoding the resulting 35 | /// bytes in big-endian format. 36 | /// 37 | /// `isize` is variable-length encoded into between 1 and 9 bytes. Smaller magnitude values (closer 38 | /// to 0) will encode into fewer bytes. See `emit_var_i64` for details on serialization 39 | /// size and format. 40 | /// 41 | /// ##### Floating Point Numbers 42 | /// 43 | /// `f32` and `f64` are encoded into 4 and 8 bytes of output, respectively. Order is preserved 44 | /// by encoding the value, or the bitwise complement of the value if negative, into bytes in 45 | /// big-endian format. `NAN` values will sort after all other values. In general, it is 46 | /// unwise to use IEEE 754 floating point values in keys, because rounding errors are pervasive. 47 | /// It is typically hard or impossible to use an approximate 'epsilon' approach when using keys for 48 | /// lookup. 49 | /// 50 | /// ##### Characters 51 | /// 52 | /// Characters are serialized into between 1 and 4 bytes of output. 53 | /// 54 | /// ##### Booleans 55 | /// 56 | /// Booleans are serialized into a single byte of output. `false` values will sort before `true` 57 | /// values. 58 | /// 59 | /// ##### Strings 60 | /// 61 | /// Strings are encoded into their natural UTF8 representation plus a single null byte suffix. 62 | /// In general, strings should not contain null bytes. The encoder will not check for null bytes, 63 | /// however their presence will break lexicographic sorting. The only exception to this rule is 64 | /// the case where the string is the final (or only) component of the key. If the string field 65 | /// is the final component of a tuple, enum-struct, or struct, then it may contain null bytes 66 | /// without breaking sort order. 67 | /// 68 | /// ##### Options 69 | /// 70 | /// An optional wrapper type adds a 1 byte overhead to the wrapped data type. `None` values will 71 | /// sort before `Some` values. 72 | /// 73 | /// ##### Structs & Tuples 74 | /// 75 | /// Structs and tuples are encoded by serializing their consituent fields in order with no prefix, 76 | /// suffix, or padding bytes. 77 | /// 78 | /// ##### Enums 79 | /// 80 | /// Enums are encoded with a variable-length unsigned-integer variant tag, plus the consituent 81 | /// fields in the case of an enum-struct. The tag adds an overhead of between 1 and 9 bytes (it 82 | /// will be a single byte for up to 16 variants). This encoding allows more enum variants to be 83 | /// added in a backwards-compatible manner, as long as variants are not removed and the variant 84 | /// order does not change. 85 | /// 86 | /// #### Unsupported Data Types 87 | /// 88 | /// Sequences and maps are unsupported at this time. Sequences and maps could probably be 89 | /// implemented with a single byte overhead per item, key, and value, but these types are not 90 | /// typically useful in keys. 91 | /// 92 | /// Raw byte arrays are unsupported. The Rust `Encoder`/`Decoder` mechanism makes no distinction 93 | /// between byte arrays and sequences, and thus the overhead for encoding a raw byte array would be 94 | /// 1 byte per input byte. The theoretical best-case overhead for serializing a raw (null 95 | /// containing) byte array in order-preserving format is 1 bit per byte, or 9 bytes of output for 96 | /// every 8 bytes of input. 97 | pub struct Encoder where W: Write { 98 | writer: io::BufWriter, 99 | } 100 | 101 | impl Encoder where W: Write { 102 | 103 | /// Creates a new ordered bytes encoder whose output will be written to the provided writer. 104 | pub fn new(writer: W) -> Encoder { 105 | Encoder { writer: io::BufWriter::new(writer) } 106 | } 107 | 108 | /// Encode a `u64` into a variable number of bytes. 109 | /// 110 | /// The variable-length encoding scheme uses between 1 and 9 bytes depending on the value. 111 | /// Smaller magnitude (closer to 0) `u64`s will encode to fewer bytes. 112 | /// 113 | /// ##### Encoding 114 | /// 115 | /// The encoding uses the first 4 bits to store the number of trailing bytes, between 0 and 8. 116 | /// Subsequent bits are the input value in big-endian format with leading 0 bytes removed. 117 | /// 118 | /// ##### Encoded Size 119 | /// 120 | /// 121 | /// 122 | /// 123 | /// 124 | /// 125 | /// 126 | /// 127 | /// 128 | /// 129 | /// 130 | /// 131 | /// 132 | /// 133 | /// 134 | /// 135 | /// 136 | /// 137 | /// 138 | /// 139 | /// 140 | /// 141 | /// 142 | /// 143 | /// 144 | /// 145 | /// 146 | /// 147 | /// 148 | /// 149 | /// 150 | /// 151 | /// 152 | /// 153 | /// 154 | /// 155 | /// 156 | /// 157 | /// 158 | /// 159 | /// 160 | /// 161 | ///
rangesize (bytes)
[0, 24)1
[24, 212)2
[212, 220)3
[220, 228)4
[228, 236)5
[236, 244)6
[244, 252)7
[252, 260)8
[260, 264)9
162 | pub fn emit_var_u64(&mut self, val: u64) -> Result<()> { 163 | if val < 1 << 4 { 164 | self.writer.write_u8(val as u8) 165 | } else if val < 1 << 12 { 166 | self.writer.write_u16::((val as u16) | 1 << 12) 167 | } else if val < 1 << 20 { 168 | try!(self.writer.write_u8(((val >> 16) as u8) | 2 << 4)); 169 | self.writer.write_u16::((val as u16)) 170 | } else if val < 1 << 28 { 171 | self.writer.write_u32::((val as u32) | 3 << 28) 172 | } else if val < 1 << 36 { 173 | try!(self.writer.write_u8(((val >> 32) as u8) | 4 << 4)); 174 | self.writer.write_u32::((val as u32)) 175 | } else if val < 1 << 44 { 176 | try!(self.writer.write_u16::(((val >> 32) as u16) | 5 << 12)); 177 | self.writer.write_u32::((val as u32)) 178 | } else if val < 1 << 52 { 179 | try!(self.writer.write_u8(((val >> 48) as u8) | 6 << 4)); 180 | try!(self.writer.write_u16::((val >> 32) as u16)); 181 | self.writer.write_u32::((val as u32)) 182 | } else if val < 1 << 60 { 183 | self.writer.write_u64::((val as u64) | 7 << 60) 184 | } else { 185 | try!(self.writer.write_u8(8 << 4)); 186 | self.writer.write_u64::(val) 187 | }.map_err(From::from) 188 | } 189 | 190 | /// Encode an `i64` into a variable number of bytes. 191 | /// 192 | /// The variable-length encoding scheme uses between 1 and 9 bytes depending on the value. 193 | /// Smaller magnitude (closer to 0) `i64`s will encode to fewer bytes. 194 | /// 195 | /// ##### Encoding 196 | /// 197 | /// The encoding uses the first bit to encode the sign: `0` for negative values and `1` for 198 | /// positive values. The following 4 bits store the number of trailing bytes, between 0 and 8. 199 | /// Subsequent bits are the absolute value of the input value in big-endian format with leading 200 | /// 0 bytes removed. If the original value was negative, than 1 is subtracted from the absolute 201 | /// value before encoding. Finally, if the value is negative, all bits except the sign bit are 202 | /// flipped (1s become 0s and 0s become 1s). 203 | /// 204 | /// ##### Encoded Size 205 | /// 206 | /// 207 | /// 208 | /// 209 | /// 210 | /// 211 | /// 212 | /// 213 | /// 214 | /// 215 | /// 216 | /// 217 | /// 218 | /// 219 | /// 220 | /// 221 | /// 222 | /// 223 | /// 224 | /// 225 | /// 226 | /// 227 | /// 228 | /// 229 | /// 230 | /// 231 | /// 232 | /// 233 | /// 234 | /// 235 | /// 236 | /// 237 | /// 238 | /// 239 | /// 240 | /// 241 | /// 242 | /// 243 | /// 244 | /// 245 | /// 246 | /// 247 | /// 248 | /// 249 | /// 250 | /// 251 | /// 252 | /// 253 | /// 254 | /// 255 | /// 256 | /// 257 | ///
negative rangepositive rangesize (bytes)
[-23, 0)[0, 23)1
[-211, -23)[23, 211)2
[-219, -211)[211, 219)3
[-227, -219)[219, 227)4
[-235, -227)[227, 235)5
[-243, -235)[235, 243)6
[-251, -243)[243, 251)7
[-259, -251)[251, 259)8
[-263, -259)[259, 263)9
258 | pub fn emit_var_i64(&mut self, v: i64) -> Result<()> { 259 | // The mask is 0 for positive input and u64::MAX for negative input 260 | let mask = (v >> 63) as u64; 261 | let val = v.abs() as u64 - (1 & mask); 262 | if val < 1 << 3 { 263 | let masked = (val | (0x10 << 3)) ^ mask; 264 | self.writer.write_u8(masked as u8) 265 | } else if val < 1 << 11 { 266 | let masked = (val | (0x11 << 11)) ^ mask; 267 | self.writer.write_u16::(masked as u16) 268 | } else if val < 1 << 19 { 269 | let masked = (val | (0x12 << 19)) ^ mask; 270 | try!(self.writer.write_u8((masked >> 16) as u8)); 271 | self.writer.write_u16::(masked as u16) 272 | } else if val < 1 << 27 { 273 | let masked = (val | (0x13 << 27)) ^ mask; 274 | self.writer.write_u32::(masked as u32) 275 | } else if val < 1 << 35 { 276 | let masked = (val | (0x14 << 35)) ^ mask; 277 | try!(self.writer.write_u8((masked >> 32) as u8)); 278 | self.writer.write_u32::(masked as u32) 279 | } else if val < 1 << 43 { 280 | let masked = (val | (0x15 << 43)) ^ mask; 281 | try!(self.writer.write_u16::((masked >> 32) as u16)); 282 | self.writer.write_u32::(masked as u32) 283 | } else if val < 1 << 51 { 284 | let masked = (val | (0x16 << 51)) ^ mask; 285 | try!(self.writer.write_u8((masked >> 48) as u8)); 286 | try!(self.writer.write_u16::((masked >> 32) as u16)); 287 | self.writer.write_u32::(masked as u32) 288 | } else if val < 1 << 59 { 289 | let masked = (val | (0x17 << 59)) ^ mask; 290 | self.writer.write_u64::(masked as u64) 291 | } else { 292 | try!(self.writer.write_u8((0x18 << 3) ^ mask as u8)); 293 | self.writer.write_u64::(val ^ mask) 294 | }.map_err(From::from) 295 | } 296 | } 297 | 298 | impl rustc_serialize::Encoder for Encoder where W: Write { 299 | 300 | type Error = Error; 301 | 302 | fn emit_nil(&mut self) -> Result<()> { 303 | self.writer.write_all(&[]).map_err(From::from) 304 | } 305 | 306 | fn emit_u8(&mut self, v: u8) -> Result<()> { 307 | self.writer.write_u8(v).map_err(From::from) 308 | } 309 | fn emit_u16(&mut self, v: u16) -> Result<()> { 310 | self.writer.write_u16::(v).map_err(From::from) 311 | } 312 | fn emit_u32(&mut self, v: u32) -> Result<()> { 313 | self.writer.write_u32::(v).map_err(From::from) 314 | } 315 | fn emit_u64(&mut self, v: u64) -> Result<()> { 316 | self.writer.write_u64::(v).map_err(From::from) 317 | } 318 | fn emit_usize(&mut self, v: usize) -> Result<()> { 319 | self.emit_var_u64(v as u64).map_err(From::from) 320 | } 321 | 322 | fn emit_i8(&mut self, v: i8) -> Result<()> { 323 | self.writer.write_i8(v ^ i8::MIN).map_err(From::from) 324 | } 325 | fn emit_i16(&mut self, v: i16) -> Result<()> { 326 | self.writer.write_i16::(v ^ i16::MIN).map_err(From::from) 327 | } 328 | fn emit_i32(&mut self, v: i32) -> Result<()> { 329 | self.writer.write_i32::(v ^ i32::MIN).map_err(From::from) 330 | } 331 | fn emit_i64(&mut self, v: i64) -> Result<()> { 332 | self.writer.write_i64::(v ^ i64::MIN).map_err(From::from) 333 | } 334 | fn emit_isize(&mut self, v: isize) -> Result<()> { self.emit_var_i64(v as i64) } 335 | 336 | fn emit_bool(&mut self, v: bool) -> Result<()> { 337 | self.writer.write_u8(if v { 1 } else { 0 }).map_err(From::from) 338 | } 339 | 340 | /// Encode an `f32` into sortable bytes. 341 | /// 342 | /// `NaN`s will sort greater than positive infinity. -0.0 will sort directly before +0.0. 343 | /// 344 | /// See [Hacker's Delight 2nd Edition](http://www.hackersdelight.org/) Section 17-3. 345 | fn emit_f32(&mut self, v: f32) -> Result<()> { 346 | let val = unsafe { transmute::(v) }; 347 | let t = (val >> 31) | i32::MIN; 348 | self.writer.write_i32::(val ^ t).map_err(From::from) 349 | } 350 | 351 | /// Encode an `f64` into sortable bytes. 352 | /// 353 | /// `NaN`s will sort greater than positive infinity. -0.0 will sort directly before +0.0. 354 | /// 355 | /// See [Hacker's Delight 2nd Edition](http://www.hackersdelight.org/) Section 17-3. 356 | fn emit_f64(&mut self, v: f64) -> Result<()> { 357 | let val = unsafe { transmute::(v) }; 358 | let t = (val >> 63) | i64::MIN; 359 | self.writer.write_i64::(val ^ t).map_err(From::from) 360 | } 361 | 362 | fn emit_char(&mut self, v: char) -> Result<()> { 363 | let mut buf = [0u8; 4]; 364 | let n = v.encode_utf8(&mut buf).unwrap_or(0); 365 | self.writer.write_all(&buf[..n]).map_err(From::from) 366 | } 367 | 368 | fn emit_str(&mut self, v: &str) -> Result<()> { 369 | try!(self.writer.write_all(v.as_bytes())); 370 | self.writer.write_u8(0u8).map_err(From::from) 371 | } 372 | 373 | fn emit_enum(&mut self, _name: &str, f: F) -> Result<()> 374 | where F: FnOnce(&mut Self) -> Result<()> { 375 | f(self) 376 | } 377 | fn emit_enum_variant(&mut self, 378 | _name: &str, 379 | id: usize, 380 | _len: usize, 381 | f: F) -> Result<()> 382 | where F: FnOnce(&mut Self) -> Result<()> { 383 | try!(self.emit_usize(id)); 384 | f(self) 385 | } 386 | fn emit_enum_variant_arg(&mut self, 387 | _idx: usize, 388 | f: F) -> Result<()> 389 | where F: FnOnce(&mut Self) -> Result<()> { 390 | f(self) 391 | } 392 | fn emit_enum_struct_variant(&mut self, 393 | _name: &str, 394 | id: usize, 395 | _len: usize, 396 | f: F) -> Result<()> 397 | where F: FnOnce(&mut Self) -> Result<()> { 398 | try!(self.emit_usize(id)); 399 | f(self) 400 | } 401 | fn emit_enum_struct_variant_field(&mut self, 402 | _name: &str, 403 | _idx: usize, 404 | f: F) -> Result<()> 405 | where F: FnOnce(&mut Self) -> Result<()> { 406 | f(self) 407 | } 408 | 409 | fn emit_struct(&mut self, _name: &str, _len: usize, f: F) 410 | -> Result<()> 411 | where F: FnOnce(&mut Self) -> Result<()> { 412 | f(self) 413 | } 414 | fn emit_struct_field(&mut self, _name: &str, _idx: usize, f: F) 415 | -> Result<()> 416 | where F: FnOnce(&mut Self) -> Result<()> { 417 | f(self) 418 | } 419 | 420 | fn emit_tuple(&mut self, _len: usize, f: F) -> Result<()> 421 | where F: FnOnce(&mut Self) -> Result<()> { 422 | f(self) 423 | } 424 | fn emit_tuple_arg(&mut self, _idx: usize, f: F) -> Result<()> 425 | where F: FnOnce(&mut Self) -> Result<()> { 426 | f(self) 427 | } 428 | fn emit_tuple_struct(&mut self, 429 | name: &str, 430 | len: usize, 431 | f: F) -> Result<()> 432 | where F: FnOnce(&mut Self) -> Result<()> { 433 | self.emit_struct(name, len, f) 434 | } 435 | fn emit_tuple_struct_arg(&mut self, 436 | idx: usize, 437 | f: F) -> Result<()> 438 | where F: FnOnce(&mut Self) -> Result<()> { 439 | self.emit_struct_field("", idx, f) 440 | } 441 | 442 | fn emit_option(&mut self, f: F) -> Result<()> 443 | where F: FnOnce(&mut Self) -> Result<()> { 444 | f(self) 445 | } 446 | fn emit_option_none(&mut self) -> Result<()> { 447 | self.emit_bool(false) 448 | } 449 | fn emit_option_some(&mut self, f: F) -> Result<()> 450 | where F: FnOnce(&mut Self) -> Result<()> { 451 | try!(self.emit_bool(true)); 452 | f(self) 453 | } 454 | 455 | fn emit_seq(&mut self, _len: usize, _f: F) -> Result<()> 456 | where F: FnOnce(&mut Self) -> Result<()> { 457 | unimplemented!() 458 | } 459 | fn emit_seq_elt(&mut self, _idx: usize, _f: F) -> Result<()> 460 | where F: FnOnce(&mut Self) -> Result<()> { 461 | unimplemented!() 462 | } 463 | 464 | fn emit_map(&mut self, _len: usize, _f: F) -> Result<()> 465 | where F: FnOnce(&mut Self) -> Result<()> { 466 | unimplemented!() 467 | } 468 | fn emit_map_elt_key(&mut self, _idx: usize, _f: F) -> Result<()> { 469 | unimplemented!() 470 | } 471 | fn emit_map_elt_val(&mut self, _idx: usize, _f: F) -> Result<()> 472 | where F: FnOnce(&mut Self) -> Result<()> { 473 | unimplemented!() 474 | } 475 | } 476 | 477 | #[cfg(test)] 478 | pub mod test { 479 | 480 | use std::{f32, f64, i16, i8, isize, u16, u8, usize}; 481 | use std::iter::range_inclusive; 482 | 483 | use quickcheck::{Arbitrary, Gen}; 484 | use rand::Rng; 485 | 486 | use encode; 487 | 488 | #[test] 489 | fn test_u8() { 490 | let mut previous = encode(&u8::MIN).unwrap(); 491 | for i in range_inclusive(u8::MIN + 1, u8::MAX) { 492 | let current = encode(&i).unwrap(); 493 | assert!(current > previous); 494 | previous = current; 495 | } 496 | } 497 | 498 | #[test] 499 | fn test_u16() { 500 | let mut previous = encode(&u16::MIN).unwrap(); 501 | for i in range_inclusive(u16::MIN + 1, u16::MAX) { 502 | let current = encode(&i).unwrap(); 503 | assert!(current > previous); 504 | previous = current; 505 | } 506 | } 507 | 508 | #[quickcheck] 509 | fn check_u32(a: u32, b: u32) -> bool { 510 | a.cmp(&b) == encode(&a).unwrap().cmp(&encode(&b).unwrap()) 511 | } 512 | 513 | #[quickcheck] 514 | fn check_u64(a: u64, b: u64) -> bool { 515 | a.cmp(&b) == encode(&a).unwrap().cmp(&encode(&b).unwrap()) 516 | } 517 | 518 | #[test] 519 | fn test_var_u64() { 520 | assert_eq!(vec!(0x00), encode(&0usize).unwrap()); 521 | assert_eq!(vec!(0x01), encode(&2usize.pow(0)).unwrap()); 522 | 523 | assert_eq!(vec!(0x0F), encode(&(2usize.pow(4) - 1)).unwrap()); 524 | assert_eq!(vec!(0x10, 0x10), encode(&2usize.pow(4)).unwrap()); 525 | 526 | assert_eq!(vec!(0x1F, 0xFF), encode(&(2usize.pow(12) - 1)).unwrap()); 527 | assert_eq!(vec!(0x20, 0x10, 0x00), encode(&2usize.pow(12)).unwrap()); 528 | 529 | assert_eq!(vec!(0x2F, 0xFF, 0xFF), encode(&(2usize.pow(20) - 1)).unwrap()); 530 | assert_eq!(vec!(0x30, 0x10, 0x00, 0x00), encode(&2usize.pow(20)).unwrap()); 531 | 532 | assert_eq!(vec!(0x3F, 0xFF, 0xFF, 0xFF), encode(&(2usize.pow(28) - 1)).unwrap()); 533 | assert_eq!(vec!(0x40, 0x10, 0x00, 0x00, 0x00), encode(&2usize.pow(28)).unwrap()); 534 | 535 | assert_eq!(vec!(0x4F, 0xFF, 0xFF, 0xFF, 0xFF), encode(&(2usize.pow(36) - 1)).unwrap()); 536 | assert_eq!(vec!(0x50, 0x10, 0x00, 0x00, 0x00, 0x00), encode(&2usize.pow(36)).unwrap()); 537 | 538 | assert_eq!(vec!(0x5F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF), encode(&(2usize.pow(44) - 1)).unwrap()); 539 | assert_eq!(vec!(0x60, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00), encode(&2usize.pow(44)).unwrap()); 540 | 541 | assert_eq!(vec!(0x6F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF), encode(&(2usize.pow(52) - 1)).unwrap()); 542 | assert_eq!(vec!(0x70, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), encode(&2usize.pow(52)).unwrap()); 543 | 544 | assert_eq!(vec!(0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF), encode(&(2usize.pow(60) - 1)).unwrap()); 545 | assert_eq!(vec!(0x80, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), encode(&2usize.pow(60)).unwrap()); 546 | 547 | assert_eq!(vec!(0x80, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF), encode(&usize::MAX).unwrap()); 548 | } 549 | 550 | #[quickcheck] 551 | fn check_usize(a: usize, b: usize) -> bool { 552 | a.cmp(&b) == encode(&a).unwrap().cmp(&encode(&b).unwrap()) 553 | } 554 | 555 | #[test] 556 | fn test_i8() { 557 | let mut previous = encode(&i8::MIN).unwrap(); 558 | for i in range_inclusive(i8::MIN + 1, i8::MAX) { 559 | let current = encode(&i).unwrap(); 560 | assert!(current > previous); 561 | previous = current; 562 | } 563 | } 564 | 565 | #[test] 566 | fn test_i16() { 567 | let mut previous = encode(&i16::MIN).unwrap(); 568 | for i in range_inclusive(i16::MIN + 1, i16::MAX) { 569 | let current = encode(&i).unwrap(); 570 | assert!(current > previous); 571 | previous = current; 572 | } 573 | } 574 | 575 | #[quickcheck] 576 | fn check_i32(a: i32, b: i32) -> bool { 577 | a.cmp(&b) == encode(&a).unwrap().cmp(&encode(&b).unwrap()) 578 | } 579 | 580 | #[quickcheck] 581 | fn check_i64(a: i64, b: i64) -> bool { 582 | a.cmp(&b) == encode(&a).unwrap().cmp(&encode(&b).unwrap()) 583 | } 584 | 585 | #[test] 586 | fn test_pos_var_i64() { 587 | assert_eq!(vec!(0x80), encode(&0isize).unwrap()); 588 | assert_eq!(vec!(0x81), encode(&2isize.pow(0)).unwrap()); 589 | 590 | assert_eq!(vec!(0x87), encode(&(2isize.pow(3) - 1)).unwrap()); 591 | assert_eq!(vec!(0x88, 0x08), encode(&2isize.pow(3)).unwrap()); 592 | 593 | assert_eq!(vec!(0x8F, 0xFF), encode(&(2isize.pow(11) - 1)).unwrap()); 594 | assert_eq!(vec!(0x90, 0x08, 0x00), encode(&2isize.pow(11)).unwrap()); 595 | 596 | assert_eq!(vec!(0x97, 0xFF, 0xFF), encode(&(2isize.pow(19) - 1)).unwrap()); 597 | assert_eq!(vec!(0x98, 0x08, 0x00, 0x00), encode(&2isize.pow(19)).unwrap()); 598 | 599 | assert_eq!(vec!(0x9F, 0xFF, 0xFF, 0xFF), encode(&(2isize.pow(27) - 1)).unwrap()); 600 | assert_eq!(vec!(0xA0, 0x08, 0x00, 0x00, 0x00), encode(&2isize.pow(27)).unwrap()); 601 | 602 | assert_eq!(vec!(0xA7, 0xFF, 0xFF, 0xFF, 0xFF), encode(&(2isize.pow(35) - 1)).unwrap()); 603 | assert_eq!(vec!(0xA8, 0x08, 0x00, 0x00, 0x00, 0x00), encode(&2isize.pow(35)).unwrap()); 604 | 605 | assert_eq!(vec!(0xAF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF), encode(&(2isize.pow(43) - 1)).unwrap()); 606 | assert_eq!(vec!(0xB0, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00), encode(&2isize.pow(43)).unwrap()); 607 | 608 | assert_eq!(vec!(0xB7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF), encode(&(2isize.pow(51) - 1)).unwrap()); 609 | assert_eq!(vec!(0xB8, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), encode(&2isize.pow(51)).unwrap()); 610 | 611 | assert_eq!(vec!(0xBF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF), encode(&(2isize.pow(59) - 1)).unwrap()); 612 | assert_eq!(vec!(0xC0, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), encode(&2isize.pow(59)).unwrap()); 613 | 614 | assert_eq!(vec!(0xC0, 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF), encode(&isize::MAX).unwrap()); 615 | } 616 | 617 | #[test] 618 | fn test_neg_var_i64() { 619 | assert_eq!(vec!(0x7F), encode(&(0isize - 1)).unwrap()); 620 | 621 | assert_eq!(vec!(0x78), encode(&-2isize.pow(3)).unwrap()); 622 | assert_eq!(vec!(0x77, 0xF7), encode(&(-2isize.pow(3) - 1)).unwrap()); 623 | 624 | assert_eq!(vec!(0x70, 0x00), encode(&-2isize.pow(11)).unwrap()); 625 | assert_eq!(vec!(0x6F, 0xF7, 0xFF), encode(&(-2isize.pow(11) - 1)).unwrap()); 626 | 627 | assert_eq!(vec!(0x68, 0x00, 0x00), encode(&-2isize.pow(19)).unwrap()); 628 | assert_eq!(vec!(0x67, 0xF7, 0xFF, 0xFF), encode(&(-2isize.pow(19) - 1)).unwrap()); 629 | 630 | assert_eq!(vec!(0x60, 0x00, 0x00, 0x00), encode(&-2isize.pow(27)).unwrap()); 631 | assert_eq!(vec!(0x5F, 0xF7, 0xFF, 0xFF, 0xFF), encode(&(-2isize.pow(27) - 1)).unwrap()); 632 | 633 | assert_eq!(vec!(0x58, 0x00, 0x00, 0x00, 0x00), encode(&-2isize.pow(35)).unwrap()); 634 | assert_eq!(vec!(0x57, 0xF7, 0xFF, 0xFF, 0xFF, 0xFF), encode(&(-2isize.pow(35) - 1)).unwrap()); 635 | 636 | assert_eq!(vec!(0x50, 0x00, 0x00, 0x00, 0x00, 0x00), encode(&-2isize.pow(43)).unwrap()); 637 | assert_eq!(vec!(0x4F, 0xF7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF), encode(&(-2isize.pow(43) - 1)).unwrap()); 638 | 639 | assert_eq!(vec!(0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), encode(&-2isize.pow(51)).unwrap()); 640 | assert_eq!(vec!(0x47, 0xF7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF), encode(&(-2isize.pow(51) - 1)).unwrap()); 641 | 642 | assert_eq!(vec!(0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), encode(&-2isize.pow(59)).unwrap()); 643 | assert_eq!(vec!(0x3F, 0xF7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF), encode(&(-2isize.pow(59) - 1)).unwrap()); 644 | 645 | assert_eq!(vec!(0x3F, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), encode(&isize::MIN).unwrap()); 646 | } 647 | 648 | #[quickcheck] 649 | fn check_isize(a: isize, b: isize) -> bool { 650 | a.cmp(&b) == encode(&a).unwrap().cmp(&encode(&b).unwrap()) 651 | } 652 | 653 | #[quickcheck] 654 | fn check_f32(a: f32, b: f32) -> bool { 655 | a.partial_cmp(&b) == encode(&a).unwrap().partial_cmp(&encode(&b).unwrap()) 656 | && a.partial_cmp(&b) == encode(&a).unwrap().partial_cmp(&encode(&(b.next_after(a))).unwrap()) 657 | && b.partial_cmp(&a) == encode(&b).unwrap().partial_cmp(&encode(&(a.next_after(b))).unwrap()) 658 | } 659 | 660 | #[test] 661 | fn test_f32() { 662 | assert!(encode(&f32::NEG_INFINITY).unwrap() < encode(&f32::MIN).unwrap()); 663 | assert!(encode(&f32::MIN).unwrap() < encode(&(f32::MIN.next_after(f32::INFINITY))).unwrap()); 664 | 665 | assert!(encode(&(-0.0f32).next_after(f32::NEG_INFINITY)).unwrap() < encode(&-0.0f32).unwrap()); 666 | assert!(encode(&-0f32).unwrap() < encode(&0f32).unwrap()); 667 | assert!(encode(&0f32).unwrap() < encode(&f32::MIN_POSITIVE).unwrap()); 668 | 669 | assert!(encode(&(f32::MAX.next_after(f32::NEG_INFINITY))).unwrap() < encode(&f32::MAX).unwrap()); 670 | assert!(encode(&f32::MAX).unwrap() < encode(&f32::INFINITY).unwrap()); 671 | assert!(encode(&f32::INFINITY).unwrap() < encode(&f32::NAN).unwrap()); 672 | } 673 | 674 | #[quickcheck] 675 | fn check_f64(a: f64, b: f64) -> bool { 676 | a.partial_cmp(&b) == encode(&a).unwrap().partial_cmp(&encode(&b).unwrap()) 677 | && a.partial_cmp(&b) == encode(&a).unwrap().partial_cmp(&encode(&(b.next_after(a))).unwrap()) 678 | && b.partial_cmp(&a) == encode(&b).unwrap().partial_cmp(&encode(&(a.next_after(b))).unwrap()) 679 | } 680 | 681 | #[test] 682 | fn test_f64() { 683 | assert!(encode(&f64::NEG_INFINITY).unwrap() < encode(&f64::MIN).unwrap()); 684 | assert!(encode(&f64::MIN).unwrap() < encode(&(f64::MIN.next_after(f64::INFINITY))).unwrap()); 685 | 686 | assert!(encode(&(-0.0f64).next_after(f64::NEG_INFINITY)).unwrap() < encode(&-0.0f64).unwrap()); 687 | assert!(encode(&-0f64).unwrap() < encode(&0f64).unwrap()); 688 | assert!(encode(&0f64).unwrap() < encode(&f64::MIN_POSITIVE).unwrap()); 689 | 690 | assert!(encode(&(f64::MAX.next_after(f64::NEG_INFINITY))).unwrap() < encode(&f64::MAX).unwrap()); 691 | assert!(encode(&f64::MAX).unwrap() < encode(&f64::INFINITY).unwrap()); 692 | assert!(encode(&f64::INFINITY).unwrap() < encode(&f64::NAN).unwrap()); 693 | } 694 | 695 | #[test] 696 | fn test_bool() { 697 | for &(a, b) in vec!((true, true), (true, false), (false, true), (false, false)).iter() { 698 | assert_eq!(a.partial_cmp(&b), encode(&a).unwrap().partial_cmp(&encode(&b).unwrap())) 699 | } 700 | } 701 | 702 | #[quickcheck] 703 | fn check_char(a: char, b: char) -> bool { 704 | a.partial_cmp(&b) == encode(&a).unwrap().partial_cmp(&encode(&b).unwrap()) 705 | } 706 | 707 | #[quickcheck] 708 | fn check_string(a: String, b: String) -> bool { 709 | a.partial_cmp(&b) == encode(&a).unwrap().partial_cmp(&encode(&b).unwrap()) 710 | } 711 | 712 | #[quickcheck] 713 | fn check_option(a: Option, b: Option) -> bool { 714 | a.partial_cmp(&b) == encode(&a).unwrap().partial_cmp(&encode(&b).unwrap()) 715 | } 716 | 717 | #[quickcheck] 718 | fn check_struct(a: TestStruct, b: TestStruct) -> bool { 719 | a.partial_cmp(&b) == encode(&a).unwrap().partial_cmp(&encode(&b).unwrap()) 720 | } 721 | 722 | #[quickcheck] 723 | fn check_tuple(a: (u32, char, String), b: (u32, char, String)) -> bool { 724 | a.partial_cmp(&b) == encode(&a).unwrap().partial_cmp(&encode(&b).unwrap()) 725 | } 726 | 727 | #[quickcheck] 728 | fn check_enum(a: TestEnum, b: TestEnum) -> bool { 729 | a.partial_cmp(&b) == encode(&a).unwrap().partial_cmp(&encode(&b).unwrap()) 730 | } 731 | 732 | #[derive(RustcEncodable, RustcDecodable, Clone, Debug, PartialEq, PartialOrd)] 733 | pub struct TestStruct { 734 | u8_: u8, 735 | u16_: u16, 736 | u32_: u32, 737 | u64_: u64, 738 | usize_: usize, 739 | 740 | i8_: i8, 741 | i16_: i16, 742 | i32_: i32, 743 | i64_: i64, 744 | isize_: isize, 745 | 746 | f32_: f32, 747 | f64_: f64, 748 | 749 | bool_: bool, 750 | char_: char, 751 | 752 | string: String, 753 | } 754 | 755 | impl Arbitrary for TestStruct { 756 | fn arbitrary(g: &mut G) -> TestStruct { 757 | TestStruct { 758 | u8_: Arbitrary::arbitrary(g), 759 | u16_: Arbitrary::arbitrary(g), 760 | u32_: Arbitrary::arbitrary(g), 761 | u64_: Arbitrary::arbitrary(g), 762 | usize_: Arbitrary::arbitrary(g), 763 | 764 | i8_: Arbitrary::arbitrary(g), 765 | i16_: Arbitrary::arbitrary(g), 766 | i32_: Arbitrary::arbitrary(g), 767 | i64_: Arbitrary::arbitrary(g), 768 | isize_: Arbitrary::arbitrary(g), 769 | 770 | f32_: Arbitrary::arbitrary(g), 771 | f64_: Arbitrary::arbitrary(g), 772 | 773 | bool_: Arbitrary::arbitrary(g), 774 | char_: Arbitrary::arbitrary(g), 775 | 776 | string: Arbitrary::arbitrary(g) 777 | } 778 | } 779 | } 780 | 781 | #[derive(RustcEncodable, RustcDecodable, Clone, Debug, PartialEq, PartialOrd)] 782 | pub enum TestEnum { 783 | A(u32, String), 784 | B, 785 | C(isize) 786 | } 787 | 788 | impl Arbitrary for TestEnum { 789 | fn arbitrary(g: &mut G) -> TestEnum { 790 | let mut variants = vec![ 791 | TestEnum::A(Arbitrary::arbitrary(g), Arbitrary::arbitrary(g)), 792 | TestEnum::B, 793 | TestEnum::C(Arbitrary::arbitrary(g)) 794 | ]; 795 | 796 | g.shuffle(&mut variants[..]); 797 | variants.pop().unwrap() 798 | } 799 | } 800 | } 801 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Binary encoding for Rust values which preserves lexicographic sort order. Order-preserving 2 | //! encoding is useful for creating keys for sorted key-value stores with byte string typed keys, 3 | //! such as [leveldb](https://github.com/google/leveldb). `bytekey` attempts to encode values into 4 | //! the fewest number of bytes possible while preserving ordering. Type information is *not* 5 | //! serialized alongside values, and thus the type of serialized data must be known in order to 6 | //! perform decoding (`bytekey` does not implement a self-describing format). 7 | //! 8 | //! #### Supported Data Types 9 | //! 10 | //! `bytekey` encoding currently supports all Rust primitives, strings, options, structs, enums, and 11 | //! tuples. `isize` and `usize` types are variable-length encoded. Sequence (`Vec`) and map types are 12 | //! not currently supported (but could be in the future). See `Encoder` for details on the 13 | //! serialization format. 14 | //! 15 | //! #### Usage 16 | //! 17 | //! ``` 18 | //! extern crate rustc_serialize; 19 | //! extern crate bytekey; 20 | //! use bytekey::{encode, decode}; 21 | //! 22 | //! #[derive(RustcEncodable, RustcDecodable, Show, PartialEq)] 23 | //! struct MyKey { a: u32, b: String } 24 | //! 25 | //! # fn main() { 26 | //! let a = MyKey { a: 1, b: "foo".to_string() }; 27 | //! let b = MyKey { a: 2, b: "foo".to_string() }; 28 | //! let c = MyKey { a: 2, b: "fooz".to_string() }; 29 | //! 30 | //! assert!(encode(&a).unwrap() < encode(&b).unwrap()); 31 | //! assert!(encode(&b).unwrap() < encode(&c).unwrap()); 32 | //! assert_eq!(a, decode(encode(&a).unwrap()).unwrap()); 33 | //! # } 34 | //! ``` 35 | //! 36 | //! #### Type Evolution 37 | //! 38 | //! In general, the exact type of a serialized value must be known in order to correctly deserialize 39 | //! it. For structs and enums, the type is effectively frozen once any values of the type have been 40 | //! serialized: changes to the struct or enum will cause deserialization of already encoded values 41 | //! to fail or return incorrect values. The only exception is adding adding new variants to the end 42 | //! of an existing enum. Enum variants may *not* change type, be removed, or be reordered. All 43 | //! changes to structs, including adding, removing, reordering, or changing the type of a field are 44 | //! forbidden. 45 | //! 46 | //! These restrictions lead to a few best-practices when using `bytekey` encoding: 47 | //! 48 | //! * Don't use `bytekey` unless you need lexicographic ordering of encoded values! A more 49 | //! general encoding library such as [Cap'n Proto](https://github.com/dwrensha/capnproto-rust) or 50 | //! [binary-encode](https://github.com/TyOverby/binary-encode) will serve you better if this 51 | //! feature is not necessary. 52 | //! * If you persist encoded values for longer than the life of a process (i.e. you write the 53 | //! encoded values to a file or a database), consider using an enum as a top-level wrapper type. 54 | //! This will allow you to seamlessly add a new variant when you need to change the key format in a 55 | //! backwards-compatible manner (the different key types will sort seperately). If your enum has 56 | //! less than 16 variants, then the overhead is just a single byte in encoded output. 57 | 58 | #![feature(core, custom_attribute, io, plugin, unicode)] 59 | #![cfg_attr(test, feature(std_misc))] 60 | #![cfg_attr(test, plugin(quickcheck_macros))] 61 | 62 | extern crate byteorder; 63 | extern crate rustc_serialize; 64 | 65 | #[cfg(test)] extern crate quickcheck; 66 | #[cfg(test)] extern crate rand; 67 | 68 | pub use encoder::Encoder; 69 | pub use decoder::Decoder; 70 | 71 | mod encoder; 72 | mod decoder; 73 | 74 | use rustc_serialize::{Encodable, Decodable}; 75 | use std::{error, fmt, io, result}; 76 | use std::error::Error as StdError; 77 | 78 | /// Encode data into a byte vector. 79 | /// 80 | /// #### Usage 81 | /// 82 | /// ``` 83 | /// # use bytekey::encode; 84 | /// assert_eq!(vec!(0x00, 0x00, 0x00, 0x2A), encode(&42u32).unwrap()); 85 | /// assert_eq!(vec!(0x66, 0x69, 0x7A, 0x7A, 0x62, 0x75, 0x7A, 0x7A, 0x00), encode(&"fizzbuzz").unwrap()); 86 | /// assert_eq!(vec!(0x2A, 0x66, 0x69, 0x7A, 0x7A, 0x00), encode(&(42u8, "fizz")).unwrap()); 87 | /// ``` 88 | pub fn encode(value: &T) -> Result> 89 | where T: Encodable { 90 | let mut writer = Vec::new(); 91 | { 92 | let mut encoder = Encoder::new(&mut writer); 93 | try!(value.encode(&mut encoder)); 94 | } 95 | Ok(writer) 96 | } 97 | 98 | /// Decode data from a byte vector. 99 | /// 100 | /// #### Usage 101 | /// 102 | /// ``` 103 | /// # use bytekey::{encode, decode}; 104 | /// assert_eq!(42usize, decode::(encode(&42usize).unwrap()).unwrap()); 105 | /// ``` 106 | pub fn decode(bytes: Vec) -> Result 107 | where T: Decodable { 108 | Decodable::decode(&mut Decoder::new(io::Cursor::new(bytes))) 109 | } 110 | 111 | /// A short-hand for `result::Result`. 112 | pub type Result = result::Result; 113 | 114 | /// An error type for bytekey decoding and encoding. 115 | /// 116 | /// This is a thin wrapper over the standard `io::Error` type. Namely, it 117 | /// adds two additional error cases: an unexpected EOF, and invalid utf8. 118 | #[derive(Debug)] 119 | pub enum Error { 120 | 121 | /// Variant representing that the underlying stream was read successfully but it did not contain 122 | /// valid utf8 data. 123 | NotUtf8, 124 | 125 | /// Variant representing that the underlying stream returns less bytes, than are required to 126 | /// decode a meaningful value. 127 | UnexpectedEof, 128 | 129 | /// Variant representing that an I/O error occurred. 130 | Io(io::Error), 131 | } 132 | 133 | impl From for Error { 134 | fn from(error: io::Error) -> Error { Error::Io(error) } 135 | } 136 | 137 | impl From for Error { 138 | fn from(error: io::CharsError) -> Error { 139 | match error { 140 | io::CharsError::NotUtf8 => Error::NotUtf8, 141 | io::CharsError::Other(error) => Error::Io(error), 142 | } 143 | } 144 | } 145 | 146 | impl From for Error { 147 | fn from(error: byteorder::Error) -> Error { 148 | match error { 149 | byteorder::Error::UnexpectedEOF => Error::UnexpectedEof, 150 | byteorder::Error::Io(error) => Error::Io(error), 151 | } 152 | } 153 | } 154 | 155 | impl fmt::Display for Error { 156 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 157 | match *self { 158 | Error::NotUtf8 => write!(f, "byte stream did not contain valid utf8"), 159 | Error::UnexpectedEof => write!(f, "unexpected end of file"), 160 | Error::Io(ref err) => err.fmt(f), 161 | } 162 | } 163 | } 164 | 165 | impl error::Error for Error { 166 | fn description(&self) -> &str { 167 | match *self { 168 | Error::NotUtf8 => "invalid utf8 encoding", 169 | Error::UnexpectedEof => "unexpected end of file", 170 | Error::Io(ref err) => err.description(), 171 | } 172 | } 173 | 174 | fn cause(&self) -> Option<&error::Error> { 175 | match *self { 176 | Error::NotUtf8 => None, 177 | Error::UnexpectedEof => None, 178 | Error::Io(ref err) => err.cause(), 179 | } 180 | } 181 | } 182 | --------------------------------------------------------------------------------