├── .gitignore ├── .travis.yml ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── benches ├── bloomset.rs └── list.rs └── src ├── arena.rs ├── bloom.rs ├── cell.rs ├── impl_debug.rs ├── impl_partial_eq.rs ├── impl_serialize.rs ├── lib.rs ├── list.rs ├── map.rs └── set.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | **/*.rs.bk 3 | Cargo.lock 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | 3 | rust: 4 | - stable 5 | - beta 6 | - nightly 7 | 8 | branches: 9 | except: 10 | - dev 11 | - benches 12 | 13 | matrix: 14 | allow_failures: 15 | - rust: nightly 16 | 17 | os: 18 | - linux 19 | 20 | script: 21 | - | 22 | cargo test --features impl_serialize && 23 | cargo test --features impl_serialize --release 24 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "toolshed" 3 | version = "0.8.1" 4 | authors = ["maciejhirsz "] 5 | license = "MIT/Apache-2.0" 6 | description = "Arena allocator and a handful of useful data structures" 7 | repository = "https://github.com/ratel-rust/toolshed" 8 | documentation = "https://docs.rs/toolshed/" 9 | readme = "README.md" 10 | edition = "2018" 11 | 12 | [dependencies] 13 | rustc-hash = "1.0" 14 | serde = { version = "1.0", optional = true } 15 | 16 | [dev-dependencies] 17 | serde_json = "1.0" 18 | 19 | [features] 20 | default = [] 21 | 22 | impl_serialize = ["serde"] 23 | 24 | [profile.bench] 25 | lto = true 26 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2016 Maciej Hirsz 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Maciej Hirsz 2 | 3 | The MIT License (MIT) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Toolshed 2 | 3 | This crate contains an `Arena` allocator, along with a few common data 4 | structures that can be used in tandem with it. 5 | 6 | For all those times when you need to create a recursively nested tree 7 | of `enum`s and find yourself in pain having to put everything in 8 | `Box`es all the time. 9 | 10 | ## Features 11 | 12 | + Paginated `Arena`: internally preallocates 64KiB _pages_ on the heap and 13 | allows `Copy` types to be put on that heap. 14 | 15 | + `CopyCell`: virtually identical to `std::cell::Cell` but requires that 16 | internal types implement `Copy`, and implements `Copy` itself. 17 | 18 | + `List`, `Map` and `Set`: your basic data structures that allocate on the 19 | `Arena` and use internal mutability via `CopyCell`. Never worry about 20 | sharing pointers again! 21 | 22 | + `BloomMap` and `BloomSet`: special variants of `Map` and `Set` with a 23 | very simple but very fast bloom filter. If a map / set is often queried 24 | for keys / elements it doesn't contain, the bloom filter check will 25 | reduce the need to do a full tree lookup, greatly increasing performance. 26 | The overhead compared to a regular `Map` or `Set` is also minimal. 27 | 28 | + All data structures implement expected traits, such as `Debug` or `PartialEq`. 29 | 30 | + Optional **serde** `Serialize` support behind a feature flag. 31 | 32 | ## Example 33 | 34 | ```rust 35 | extern crate toolshed; 36 | 37 | use toolshed::Arena; 38 | use toolshed::map::Map; 39 | 40 | // Only `Copy` types can be allocated on the `Arena`! 41 | #[derive(Debug, PartialEq, Clone, Copy)] 42 | enum Foo<'arena> { 43 | Integer(u64), 44 | 45 | // Recursive enum without `Box`es! 46 | Nested(&'arena Foo<'arena>), 47 | } 48 | 49 | fn main() { 50 | // Create a new arena 51 | let arena = Arena::new(); 52 | 53 | // We allocate first instance of `Foo` in the arena. 54 | // 55 | // Please note that the `alloc` method returns a `&mut` reference. 56 | // Since we want to share our references around, we are going to 57 | // dereference and re-reference them to immutable ones with `&*`. 58 | let child: &Foo = &*arena.alloc(Foo::Integer(42)); 59 | 60 | // Next instance of `Foo` will contain the child reference. 61 | let parent: &Foo = &*arena.alloc(Foo::Nested(child)); 62 | 63 | // Empty map does not allocate 64 | let map = Map::new(); 65 | 66 | // Inserting stuff in the map requires a reference to the `Arena`. 67 | // The reference can be shared, since `Arena` uses interior mutability. 68 | map.insert(&arena, "child", child); 69 | 70 | // We can put our `map` on the arena as well. Once again we use the `&*` 71 | // operation to change the reference to be immutable, just to demonstrate 72 | // that our `Map` implementation is perfectly happy with internal mutability. 73 | let map: &Map<&str, &Foo> = &*arena.alloc(map); 74 | 75 | // Each insert allocates a small chunk of data on the arena. Since arena is 76 | // preallocated on the heap, these inserts are very, very fast. 77 | // 78 | // We only have a non-mutable reference to `map` now, however `Map` is also 79 | // using interior mutability on references to allow exactly this kind of 80 | // behavior in a safe manner. 81 | map.insert(&arena, "parent", parent); 82 | 83 | assert_eq!(map.get("child"), Some(&Foo::Integer(42))); 84 | assert_eq!(map.get("parent"), Some(&Foo::Nested(&Foo::Integer(42)))); 85 | assert_eq!(map.get("heh"), None); 86 | } 87 | ``` 88 | 89 | ## Benches 90 | 91 | Here is a very biased benchmark of the different sets: 92 | 93 | ``` 94 | running 8 tests 95 | test bloom_set_create ... bench: 49 ns/iter (+/- 0) 96 | test bloom_set_read ... bench: 181 ns/iter (+/- 10) 97 | test fxhash_set_create ... bench: 86 ns/iter (+/- 1) 98 | test fxhash_set_read ... bench: 312 ns/iter (+/- 4) 99 | test hash_set_create ... bench: 152 ns/iter (+/- 94) 100 | test hash_set_read ... bench: 1,105 ns/iter (+/- 1) 101 | test set_create ... bench: 37 ns/iter (+/- 0) 102 | test set_read ... bench: 440 ns/iter (+/- 1) 103 | ``` 104 | 105 | * `set` and `bloom_set` are benchmarks of `Set` and `BloomSet` from this crate. 106 | * `hash_set` is the default stdlib `HashSet`. 107 | * `fxhash_set` is a `HashSet` using the `fxhash` crate hash. 108 | 109 | ## License 110 | 111 | This crate is distributed under the terms of both the MIT license 112 | and the Apache License (Version 2.0). Choose whichever one works best for you. 113 | 114 | See [LICENSE-APACHE](LICENSE-APACHE) and [LICENSE-MIT](LICENSE-MIT) for details. 115 | -------------------------------------------------------------------------------- /benches/bloomset.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | extern crate test; 3 | 4 | use toolshed::set::{BloomSet, Set}; 5 | use toolshed::Arena; 6 | use test::{Bencher, black_box}; 7 | use std::collections::HashSet; 8 | use rustc_hash::FxHashSet; 9 | 10 | static WORDS: &[&str] = &[ 11 | "ARENA_BLOCK", "Arena", "Cell", "Self", "String", "T", "Vec", "_unchecked", "a", 12 | "alignment", "alloc", "alloc_bytes", "alloc_str", "alloc_str_zero_end", "alloc_string", 13 | "as", "as_bytes", "as_mut_ptr", "as_ptr", "block", "cap", "cell", "const", 14 | "copy_nonoverlapping", "else", "extend_from_slice", "fn", "from_raw_parts", "from_utf", 15 | "get", "grow", "if", "impl", "inline", "into", "into_bytes", "isize", "len", 16 | "len_with_zero", "let", "mem", "mut", "new", "offset", "ptr", "pub", "push", 17 | "replace", "return", "self", "set", "size_of", "slice", "std", "store", "str", 18 | "struct", "temp", "u", "unsafe", "use", "usize", "val", "vec", "with_capacity" 19 | ]; 20 | static SET_WORDS: &[&str] = &["alloc_bytes", "alloc", "Cell", "String", "yetAnother"]; 21 | 22 | #[bench] 23 | fn set_read(b: &mut Bencher) { 24 | let arena = Arena::new(); 25 | let a = &arena; 26 | let set = Set::new(); 27 | 28 | for word in SET_WORDS.iter() { 29 | set.insert(a, *word); 30 | } 31 | 32 | b.iter(|| { 33 | for word in WORDS.iter() { 34 | black_box(set.contains(word)); 35 | } 36 | }) 37 | } 38 | 39 | #[bench] 40 | fn set_create(b: &mut Bencher) { 41 | let arena = Arena::new(); 42 | let a = &arena; 43 | 44 | b.iter(|| { 45 | unsafe { a.clear() }; 46 | let set = Set::new(); 47 | 48 | for word in SET_WORDS.iter() { 49 | set.insert(a, *word); 50 | } 51 | 52 | black_box(set) 53 | }) 54 | } 55 | 56 | #[bench] 57 | fn bloom_set_read(b: &mut Bencher) { 58 | let arena = Arena::new(); 59 | let a = &arena; 60 | let set = BloomSet::new(); 61 | 62 | for word in SET_WORDS.iter() { 63 | set.insert(a, *word); 64 | } 65 | 66 | b.iter(|| { 67 | for word in WORDS.iter() { 68 | black_box(set.contains(word)); 69 | } 70 | }) 71 | } 72 | 73 | #[bench] 74 | fn bloom_set_create(b: &mut Bencher) { 75 | let arena = Arena::new(); 76 | let a = &arena; 77 | 78 | b.iter(|| { 79 | unsafe { a.clear() }; 80 | let set = BloomSet::new(); 81 | 82 | for word in SET_WORDS.iter() { 83 | set.insert(a, *word); 84 | } 85 | 86 | black_box(set) 87 | }) 88 | } 89 | 90 | #[bench] 91 | fn fxhash_set_read(b: &mut Bencher) { 92 | let mut set = FxHashSet::default(); 93 | 94 | for word in SET_WORDS.iter() { 95 | set.insert(*word); 96 | } 97 | 98 | b.iter(|| { 99 | for word in WORDS.iter() { 100 | black_box(set.contains(word)); 101 | } 102 | }) 103 | } 104 | 105 | #[bench] 106 | fn fxhash_set_create(b: &mut Bencher) { 107 | b.iter(|| { 108 | let mut set = FxHashSet::default(); 109 | 110 | for word in SET_WORDS.iter() { 111 | set.insert(*word); 112 | } 113 | 114 | black_box(set) 115 | }) 116 | } 117 | 118 | #[bench] 119 | fn hash_set_read(b: &mut Bencher) { 120 | let mut set = HashSet::new(); 121 | 122 | for word in SET_WORDS.iter() { 123 | set.insert(*word); 124 | } 125 | 126 | b.iter(|| { 127 | for word in WORDS.iter() { 128 | black_box(set.contains(word)); 129 | } 130 | }) 131 | } 132 | 133 | #[bench] 134 | fn hash_set_create(b: &mut Bencher) { 135 | b.iter(|| { 136 | let mut set = HashSet::new(); 137 | 138 | for word in SET_WORDS.iter() { 139 | set.insert(*word); 140 | } 141 | 142 | black_box(set) 143 | }) 144 | } 145 | -------------------------------------------------------------------------------- /benches/list.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | extern crate test; 3 | 4 | use toolshed::list::ListBuilder; 5 | use toolshed::Arena; 6 | use test::{Bencher, black_box}; 7 | 8 | static WORDS: &[&str] = &[ 9 | "ARENA_BLOCK", "Arena", "Cell", "Self", "String", "T", "Vec", "_unchecked", "a", 10 | "alignment", "alloc", "alloc_bytes", "alloc_str", "alloc_str_zero_end", "alloc_string", 11 | "as", "as_bytes", "as_mut_ptr", "as_ptr", "block", "cap", "cell", "const", 12 | "copy_nonoverlapping", "else", "extend_from_slice", "fn", "from_raw_parts", "from_utf", 13 | "get", "grow", "if", "impl", "inline", "into", "into_bytes", "isize", "len", 14 | "len_with_zero", "let", "mem", "mut", "new", "offset", "ptr", "pub", "push", 15 | "replace", "return", "self", "set", "size_of", "slice", "std", "store", "str", 16 | "struct", "temp", "u", "unsafe", "use", "usize", "val", "vec", "with_capacity" 17 | ]; 18 | 19 | #[bench] 20 | fn vec_create_016(b: &mut Bencher) { 21 | let words = &WORDS[..16]; 22 | 23 | b.iter(|| { 24 | let mut vec = Vec::new(); 25 | 26 | for word in words.iter() { 27 | vec.push(word); 28 | } 29 | 30 | black_box(vec); 31 | }) 32 | } 33 | 34 | #[bench] 35 | fn vec_create_032(b: &mut Bencher) { 36 | let words = &WORDS[..32]; 37 | 38 | b.iter(|| { 39 | let mut vec = Vec::new(); 40 | 41 | for word in words.iter() { 42 | vec.push(word); 43 | } 44 | 45 | black_box(vec); 46 | }) 47 | } 48 | 49 | #[bench] 50 | fn vec_create_064(b: &mut Bencher) { 51 | let words = &WORDS[..64]; 52 | 53 | b.iter(|| { 54 | let mut vec = Vec::new(); 55 | 56 | for word in words.iter() { 57 | vec.push(word); 58 | } 59 | 60 | black_box(vec); 61 | }) 62 | } 63 | 64 | #[bench] 65 | fn vec_create_256(b: &mut Bencher) { 66 | b.iter(|| { 67 | let mut vec = Vec::new(); 68 | 69 | for i in 0..256usize { 70 | vec.push((i, i)); 71 | } 72 | 73 | black_box(vec); 74 | }) 75 | } 76 | 77 | #[bench] 78 | fn list_create_016(b: &mut Bencher) { 79 | let arena = Arena::new(); 80 | let words = &WORDS[1..16]; 81 | 82 | b.iter(|| { 83 | unsafe { arena.clear() }; 84 | let builder = ListBuilder::new(&arena, WORDS[0]); 85 | 86 | for word in words.iter() { 87 | builder.push(&arena, *word); 88 | } 89 | 90 | black_box(builder.as_list()); 91 | }) 92 | } 93 | 94 | #[bench] 95 | fn list_create_032(b: &mut Bencher) { 96 | let arena = Arena::new(); 97 | let words = &WORDS[1..32]; 98 | 99 | b.iter(|| { 100 | unsafe { arena.clear() }; 101 | let builder = ListBuilder::new(&arena, WORDS[0]); 102 | 103 | for word in words.iter() { 104 | builder.push(&arena, *word); 105 | } 106 | 107 | black_box(builder.as_list()); 108 | }) 109 | } 110 | 111 | #[bench] 112 | fn list_create_064(b: &mut Bencher) { 113 | let arena = Arena::new(); 114 | let words = &WORDS[1..64]; 115 | 116 | b.iter(|| { 117 | unsafe { arena.clear() }; 118 | let builder = ListBuilder::new(&arena, WORDS[0]); 119 | 120 | for word in words.iter() { 121 | builder.push(&arena, *word); 122 | } 123 | 124 | black_box(builder.as_list()); 125 | }) 126 | } 127 | 128 | #[bench] 129 | fn list_create_256(b: &mut Bencher) { 130 | let arena = Arena::new(); 131 | 132 | b.iter(|| { 133 | unsafe { arena.clear() }; 134 | let builder = ListBuilder::new(&arena, (0usize, 0)); 135 | 136 | for i in 1..256usize { 137 | builder.push(&arena, (i, i)); 138 | } 139 | 140 | black_box(builder.as_list()); 141 | }) 142 | } 143 | -------------------------------------------------------------------------------- /src/arena.rs: -------------------------------------------------------------------------------- 1 | //! Module containing the `Arena` and `Uninitialized` structs. For convenience the 2 | //! `Arena` is exported at the root of the crate. 3 | 4 | use std::mem::size_of; 5 | use std::ops::Deref; 6 | use std::cell::Cell; 7 | use std::borrow::Cow; 8 | use std::fmt; 9 | 10 | const ARENA_BLOCK: usize = 64 * 1024; 11 | 12 | /// An arena implementation that uses preallocated 64KiB pages for all allocations. 13 | /// If a new allocation were to be pushed over the the boundaries of the page, a 14 | /// new page is internally allocated first, thus this version of the arena can never 15 | /// run out of memory unless the process runs out of heap altogether. 16 | /// 17 | /// Allocating a type larger than the page size will result in a new heap allocation 18 | /// just for that type separate from the page mechanism. 19 | pub struct Arena { 20 | store: Cell>>, 21 | ptr: Cell<*mut u8>, 22 | offset: Cell, 23 | } 24 | 25 | /// A pointer to an uninitialized region of memory. 26 | pub struct Uninitialized<'arena, T: Copy> { 27 | pointer: &'arena mut MaybeUninit, 28 | } 29 | 30 | /// Almost a copy of https://github.com/rust-lang/rust/issues/53491 31 | union MaybeUninit { 32 | value: T, 33 | _uninit: (), 34 | } 35 | 36 | impl<'arena, T: Copy> Uninitialized<'arena, T> { 37 | /// Initialize the memory at the pointer with a given value. 38 | #[inline] 39 | pub fn init(self, value: T) -> &'arena mut T { 40 | unsafe { 41 | self.pointer.value = value; 42 | &mut self.pointer.value 43 | } 44 | } 45 | 46 | /// Get a reference to the pointer without writing to it. 47 | /// 48 | /// **Calling this method without calling `init` is undefined behavior.** 49 | #[inline] 50 | pub unsafe fn as_ref(&self) -> &'arena T { 51 | &*(&self.pointer.value as *const T) 52 | } 53 | 54 | /// Convert the `Uninitialized` to a regular mutable reference. 55 | /// 56 | /// **Calling this method without calling `init` is undefined behavior.** 57 | #[inline] 58 | pub unsafe fn as_mut_ref(self) -> &'arena mut T { 59 | &mut self.pointer.value 60 | } 61 | 62 | /// Convert a raw pointer to an `Uninitialized`. This method is unsafe since it can 63 | /// bind to arbitrary lifetimes. 64 | #[inline] 65 | pub unsafe fn from_raw(pointer: *mut T) -> Self { 66 | Uninitialized { 67 | pointer: &mut *(pointer as *mut MaybeUninit), 68 | } 69 | } 70 | } 71 | 72 | impl<'arena, T: Copy> From<&'arena mut T> for Uninitialized<'arena, T> { 73 | #[inline] 74 | fn from(pointer: &'arena mut T) -> Self { 75 | unsafe { Self::from_raw(pointer) } 76 | } 77 | } 78 | 79 | /// A wrapper around a `str` slice that has an extra `0` byte allocated following 80 | /// its contents. 81 | #[derive(Clone, Copy, PartialEq)] 82 | pub struct NulTermStr<'arena>(&'arena str); 83 | 84 | impl<'arena> fmt::Debug for NulTermStr<'arena> { 85 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 86 | fmt::Debug::fmt(self.0, f) 87 | } 88 | } 89 | 90 | impl<'arena> fmt::Display for NulTermStr<'arena> { 91 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 92 | fmt::Display::fmt(self.0, f) 93 | } 94 | } 95 | 96 | impl<'arena> NulTermStr<'arena> { 97 | /// Read byte at a given `index`. This does not check for length boundaries, 98 | /// but is guaranteed to return `0` for `index` equal to the length. 99 | /// 100 | /// This can be a very useful optimization when reading a long string one 101 | /// byte at a time until termination, if checking for `0` can replace what 102 | /// would otherwise have to be length checks. 103 | /// 104 | /// ```rust 105 | /// # use toolshed::Arena; 106 | /// # fn main() { 107 | /// let arena = Arena::new(); 108 | /// let str = arena.alloc_nul_term_str("foo"); 109 | /// 110 | /// // We can safely get the underlying `&str` at any time. 111 | /// assert_eq!(&str[..], "foo"); 112 | /// 113 | /// unsafe { 114 | /// // First 3 bytes are known to us 115 | /// assert_eq!(str.byte_unchecked(0), b'f'); 116 | /// assert_eq!(str.byte_unchecked(1), b'o'); 117 | /// assert_eq!(str.byte_unchecked(2), b'o'); 118 | /// 119 | /// // Following is safe and guaranteed to be '0' 120 | /// assert_eq!(str.byte_unchecked(3), 0); 121 | /// 122 | /// // Reading index 4 would be undefined behavior! 123 | /// } 124 | /// # } 125 | /// ``` 126 | pub unsafe fn byte_unchecked(&self, index: usize) -> u8 { 127 | *self.0.as_ptr().add(index) 128 | } 129 | } 130 | 131 | impl<'arena> AsRef for NulTermStr<'arena> { 132 | fn as_ref(&self) -> &str { 133 | self.0 134 | } 135 | } 136 | 137 | impl<'arena> Deref for NulTermStr<'arena> { 138 | type Target = &'arena str; 139 | 140 | fn deref(&self) -> &&'arena str { 141 | &self.0 142 | } 143 | } 144 | 145 | impl<'arena> From> for &'arena str { 146 | fn from(nts: NulTermStr<'arena>) -> &'arena str { 147 | nts.0 148 | } 149 | } 150 | 151 | impl Arena { 152 | /// Create a new arena with a single preallocated 64KiB page. 153 | pub fn new() -> Self { 154 | let mut store = vec![Vec::with_capacity(ARENA_BLOCK)]; 155 | let ptr = store[0].as_mut_ptr(); 156 | 157 | Arena { 158 | store: Cell::new(store), 159 | ptr: Cell::new(ptr), 160 | offset: Cell::new(0), 161 | } 162 | } 163 | 164 | /// Put the value onto the page of the arena and return a reference to it. 165 | #[inline] 166 | pub fn alloc<'arena, T: Sized + Copy>(&'arena self, value: T) -> &'arena mut T { 167 | self.alloc_uninitialized().init(value) 168 | } 169 | 170 | /// Allocate enough bytes for the type `T`, then return an `Uninitialized` pointer to the memory. 171 | #[inline] 172 | pub fn alloc_uninitialized<'arena, T: Sized + Copy>(&'arena self) -> Uninitialized<'arena, T> { 173 | Uninitialized { 174 | pointer: unsafe { &mut *(self.require(size_of::()) as *mut MaybeUninit) }, 175 | } 176 | } 177 | 178 | /// Allocate a slice of `T` slice onto the arena and return a reference to it. 179 | /// This is useful when the original slice has an undefined lifetime. 180 | /// 181 | /// Note: static slices (`&'static [T]`) can be safely used in place of arena-bound 182 | /// slices without having to go through this method. 183 | pub fn alloc_slice<'arena, T: Copy>(&'arena self, val: &[T]) -> &'arena [T] { 184 | let ptr = self.require(val.len() * size_of::()) as *mut T; 185 | 186 | unsafe { 187 | use std::ptr::copy_nonoverlapping; 188 | use std::slice::from_raw_parts; 189 | 190 | copy_nonoverlapping(val.as_ptr(), ptr, val.len()); 191 | from_raw_parts(ptr, val.len()) 192 | } 193 | } 194 | 195 | /// Allocate a statically-sized but lazily-generated slice `[T]` out of an iterator 196 | /// This is useful if you're going to make a slice of something and put it on the arena, 197 | /// but you don't want to make an allocation first just to have something to copy in. 198 | /// 199 | /// The slice will be at maximum length `n`, further elements of the iterator ignored and not evaluated. 200 | /// If the iterator yields less than `n` elements, a shorter slice will simply be returned. 201 | pub fn alloc_lazy_slice<'arena, T, I: Iterator>(&'arena self, vals: I, n: usize) -> &'arena [T] { 202 | // Grab space for `n` elements even if it may turn out we have to walk it back 203 | let ptr = self.require(n * size_of::()) as *mut T; 204 | let mut i: usize = 0; 205 | 206 | unsafe { 207 | use std::slice::from_raw_parts; 208 | 209 | for val in vals.take(n) { 210 | *ptr.offset(i as isize) = val; 211 | i += 1; 212 | } 213 | // Now fix the slice length and arena offset 214 | let diff = n - i; 215 | self.reset_to( self.offset() - diff * size_of::() ); 216 | from_raw_parts(ptr, i) 217 | } 218 | } 219 | 220 | /// Put a `Vec` on the arena without reallocating. 221 | pub fn alloc_vec<'arena, T: Copy>(&'arena self, mut val: Vec) -> &'arena [T] { 222 | use std::{mem, slice}; 223 | 224 | let ptr = val.as_mut_ptr(); 225 | let cap = val.capacity(); 226 | let len = val.len(); 227 | 228 | mem::forget(val); 229 | 230 | let out = self.alloc_byte_vec(unsafe { 231 | Vec::from_raw_parts(ptr as _, 0, cap * size_of::()) 232 | }); 233 | 234 | unsafe { slice::from_raw_parts(out as _, len) } 235 | } 236 | 237 | /// Allocate many items at once, avoid allocation for owned values. 238 | #[inline] 239 | pub fn alloc_cow<'input, 'arena, T>(&'arena self, vals: Cow<'input, [T]>) -> &'arena [T] 240 | where 241 | T: Sized + Copy + 'input, 242 | { 243 | match vals { 244 | Cow::Owned(vec) => self.alloc_vec(vec), 245 | Cow::Borrowed(slice) => self.alloc_slice(slice), 246 | } 247 | } 248 | 249 | /// Allocate an `&str` slice onto the arena and return a reference to it. This is 250 | /// useful when the original slice has an undefined lifetime. 251 | /// 252 | /// Note: static slices (`&'static str`) can be safely used in place of arena-bound 253 | /// slices without having to go through this method. 254 | pub fn alloc_str<'arena>(&'arena self, val: &str) -> &'arena str { 255 | unsafe { 256 | use std::str::from_utf8_unchecked; 257 | 258 | from_utf8_unchecked(self.alloc_slice(val.as_bytes())) 259 | } 260 | } 261 | 262 | /// Allocate an `&str` slice onto the arena as null terminated C-style string. 263 | /// No checks are performed on the source and whether or not it already contains 264 | /// any nul bytes. While this does not create any memory issues, it assumes that 265 | /// the reader of the source can deal with malformed source. 266 | pub fn alloc_nul_term_str<'arena>(&'arena self, val: &str) -> NulTermStr { 267 | let len_with_zero = val.len() + 1; 268 | let ptr = self.require(len_with_zero); 269 | 270 | unsafe { 271 | use std::ptr::copy_nonoverlapping; 272 | use std::slice::from_raw_parts; 273 | use std::str::from_utf8_unchecked; 274 | 275 | copy_nonoverlapping(val.as_ptr(), ptr, val.len()); 276 | *ptr.add(val.len()) = 0; 277 | 278 | NulTermStr(from_utf8_unchecked(from_raw_parts(ptr, val.len()))) 279 | } 280 | } 281 | 282 | /// Pushes the `String` as it's own page onto the arena and returns a reference to it. 283 | /// This does not copy or reallocate the original `String`. 284 | pub fn alloc_string<'arena>(&'arena self, val: String) -> &'arena str { 285 | let len = val.len(); 286 | let ptr = self.alloc_byte_vec(val.into_bytes()); 287 | 288 | unsafe { 289 | use std::str::from_utf8_unchecked; 290 | use std::slice::from_raw_parts; 291 | 292 | from_utf8_unchecked(from_raw_parts(ptr, len)) 293 | } 294 | } 295 | 296 | #[inline] 297 | fn alloc_byte_vec(&self, mut val: Vec) -> *mut u8 { 298 | let ptr = val.as_mut_ptr(); 299 | 300 | let mut temp = self.store.replace(Vec::new()); 301 | temp.push(val); 302 | self.store.replace(temp); 303 | 304 | ptr 305 | } 306 | 307 | fn alloc_bytes(&self, size: usize) -> *mut u8 { 308 | self.alloc_byte_vec(Vec::with_capacity(size)) 309 | } 310 | 311 | #[inline] 312 | fn require(&self, size: usize) -> *mut u8 { 313 | // This should be optimized away for size known at compile time. 314 | if size > ARENA_BLOCK { 315 | return self.alloc_bytes(size); 316 | } 317 | 318 | let size = match size % size_of::() { 319 | 0 => size, 320 | n => size + (size_of::() - n), 321 | }; 322 | 323 | let offset = self.offset.get(); 324 | let cap = offset + size; 325 | 326 | if cap > ARENA_BLOCK { 327 | self.grow(); 328 | 329 | self.offset.set(size); 330 | self.ptr.get() 331 | } else { 332 | self.offset.set(cap); 333 | unsafe { self.ptr.get().add(offset) } 334 | } 335 | } 336 | 337 | fn grow(&self) { 338 | let ptr = self.alloc_byte_vec(Vec::with_capacity(ARENA_BLOCK)); 339 | self.ptr.set(ptr); 340 | } 341 | 342 | /// Resets the pointer to the current page of the arena. 343 | /// 344 | /// **Using this method is an extremely bad idea!** 345 | /// 346 | /// The only case where the use of this method would be justified is 347 | /// in benchmarks where creation of a structure on the arena is to be 348 | /// tested without the cost of re-creating the arena itself on every iteration. 349 | #[doc(hidden)] 350 | #[inline] 351 | pub unsafe fn clear(&self) { 352 | self.reset_to(0) 353 | } 354 | 355 | #[doc(hidden)] 356 | #[inline] 357 | pub unsafe fn offset(&self) -> usize { 358 | self.offset.get() 359 | } 360 | 361 | #[doc(hidden)] 362 | #[inline] 363 | pub unsafe fn reset_to(&self, offset: usize) { 364 | self.offset.set(offset) 365 | } 366 | } 367 | 368 | /// Akin to `CopyCell`: `Sync` is unsafe but `Send` is totally fine! 369 | unsafe impl Send for Arena {} 370 | 371 | #[cfg(test)] 372 | mod test { 373 | use super::*; 374 | 375 | #[test] 376 | fn allocate_some_stuff() { 377 | let arena = Arena::new(); 378 | 379 | assert_eq!(arena.alloc(0u64), &0); 380 | assert_eq!(arena.alloc(42u64), &42); 381 | assert_eq!(arena.alloc(0x8000000u64), &0x8000000u64); 382 | 383 | assert_eq!(arena.offset.get(), 8 * 3); 384 | 385 | // For inspecting internals 386 | let mut arena = arena; 387 | 388 | assert_eq!(arena.store.get_mut().len(), 1); 389 | } 390 | 391 | #[test] 392 | fn allocate_some_vecs() { 393 | let arena = Arena::new(); 394 | 395 | let vecs = vec![vec![1u64, 2, 3, 4], vec![7; ARENA_BLOCK * 2], vec![]]; 396 | 397 | for vec in vecs { 398 | assert_eq!(arena.alloc_vec(vec.clone()), &vec[..]); 399 | } 400 | } 401 | 402 | #[test] 403 | fn allocate_some_cows() { 404 | let arena = Arena::new(); 405 | 406 | let vecs = vec![vec![1u64, 2, 3, 4], vec![7; ARENA_BLOCK * 2], vec![]]; 407 | 408 | for vec in vecs { 409 | assert_eq!(arena.alloc_cow(vec.clone().into()), &vec[..]); 410 | } 411 | } 412 | 413 | #[test] 414 | fn allocate_huge_heap() { 415 | let arena = Arena::new(); 416 | 417 | assert_eq!(arena.alloc(0u64), &0); 418 | assert_eq!(arena.alloc(42u64), &42); 419 | 420 | arena.alloc_uninitialized::<[usize; 1024 * 1024]>(); 421 | 422 | // Still writes to the first page 423 | assert_eq!(arena.offset.get(), 8 * 2); 424 | assert_eq!(arena.alloc(0x8000000u64), &0x8000000u64); 425 | assert_eq!(arena.offset.get(), 8 * 3); 426 | 427 | // For inspecting internals 428 | let mut arena = arena; 429 | 430 | // However second page has been added 431 | assert_eq!(arena.store.get_mut().len(), 2); 432 | 433 | // Second page is appropriately large 434 | assert_eq!( 435 | arena.store.get_mut()[1].capacity(), 436 | size_of::() * 1024 * 1024 437 | ); 438 | } 439 | 440 | #[test] 441 | fn alloc_slice() { 442 | let arena = Arena::new(); 443 | 444 | assert_eq!(arena.alloc_slice(&[10u16, 20u16]), &[10u16, 20u16][..]); 445 | assert_eq!(arena.offset.get(), 8); 446 | } 447 | 448 | #[test] 449 | fn alloc_lazy_slices() { 450 | let arena = Arena::new(); 451 | let nums: [u32; 6] = [1, 2, 3, 4, 5, 1000]; 452 | let big_nums: [u32; 6] = [100, 200, 300, 400, 500, 1050]; 453 | 454 | // Put the whole array in the arena 455 | let all_nums = arena.alloc_lazy_slice(nums.iter().map(|x| *x), 6); 456 | // Truncate it using the `n` argument 457 | let trunc_nums = arena.alloc_lazy_slice(big_nums.iter().map(|x| *x), 3); 458 | // Put a whole array of half the nums in the arena 459 | let half_nums = arena.alloc_lazy_slice(nums[0..3].iter().map(|x| *x), 6); 460 | 461 | assert!(nums.iter().eq(all_nums.iter())); 462 | assert!(nums[0..3].iter().eq(half_nums.iter())); 463 | assert!(big_nums[0..3].iter().eq(trunc_nums.iter())); 464 | } 465 | 466 | #[test] 467 | fn aligns_slice_allocs() { 468 | let arena = Arena::new(); 469 | 470 | assert_eq!(arena.alloc_slice(b"foo"), b"foo"); 471 | assert_eq!(arena.offset.get(), 8); 472 | 473 | assert_eq!(arena.alloc_slice(b"doge to the moon!"), b"doge to the moon!"); 474 | assert_eq!(arena.offset.get(), 32); 475 | } 476 | 477 | #[test] 478 | fn aligns_str_allocs() { 479 | let arena = Arena::new(); 480 | 481 | assert_eq!(arena.alloc_str("foo"), "foo"); 482 | assert_eq!(arena.offset.get(), 8); 483 | 484 | assert_eq!(arena.alloc_str("doge to the moon!"), "doge to the moon!"); 485 | assert_eq!(arena.offset.get(), 32); 486 | } 487 | 488 | #[test] 489 | fn alloc_nul_term_str() { 490 | let arena = Arena::new(); 491 | let nts = arena.alloc_nul_term_str("abcdefghijk"); 492 | let allocated = unsafe { ::std::slice::from_raw_parts(nts.as_ptr(), 12) }; 493 | 494 | assert_eq!(arena.offset.get(), 16); 495 | assert_eq!( 496 | allocated, 497 | "abcdefghijk\u{0}".as_bytes(), 498 | ); 499 | 500 | assert_eq!(&**nts, "abcdefghijk"); 501 | } 502 | } 503 | -------------------------------------------------------------------------------- /src/bloom.rs: -------------------------------------------------------------------------------- 1 | const A__: u16 = 0; 2 | const A00: u16 = 1; 3 | const A01: u16 = 1 << 1; 4 | const A02: u16 = 1 << 2; 5 | const A03: u16 = 1 << 3; 6 | const A04: u16 = 1 << 4; 7 | const A05: u16 = 1 << 5; 8 | const A06: u16 = 1 << 6; 9 | const A07: u16 = 1 << 7; 10 | const A08: u16 = 1 << 8; 11 | const A09: u16 = 1 << 9; 12 | const A10: u16 = 1 << 10; 13 | const A11: u16 = 1 << 11; 14 | const A12: u16 = 1 << 12; 15 | const A13: u16 = 1 << 13; 16 | const A14: u16 = 1 << 14; 17 | const A15: u16 = 1 << 15; 18 | 19 | const B__: u32 = 0; 20 | const B00: u32 = 1 << 16; 21 | const B01: u32 = 1 << 17; 22 | const B02: u32 = 1 << 18; 23 | const B03: u32 = 1 << 19; 24 | const B04: u32 = 1 << 20; 25 | const B05: u32 = 1 << 21; 26 | const B06: u32 = 1 << 22; 27 | const B07: u32 = 1 << 23; 28 | const B08: u32 = 1 << 24; 29 | const B09: u32 = 1 << 25; 30 | const B10: u32 = 1 << 26; 31 | const B11: u32 = 1 << 27; 32 | const B12: u32 = 1 << 28; 33 | const B13: u32 = 1 << 29; 34 | const B14: u32 = 1 << 30; 35 | const B15: u32 = 1 << 31; 36 | 37 | const C__: u64 = 0; 38 | const C00: u64 = 1 << 32; 39 | const C01: u64 = 1 << 33; 40 | const C02: u64 = 1 << 34; 41 | const C03: u64 = 1 << 35; 42 | const C04: u64 = 1 << 36; 43 | const C05: u64 = 1 << 37; 44 | const C06: u64 = 1 << 38; 45 | const C07: u64 = 1 << 39; 46 | const C08: u64 = 1 << 40; 47 | const C09: u64 = 1 << 41; 48 | const C10: u64 = 1 << 42; 49 | const C11: u64 = 1 << 43; 50 | const C12: u64 = 1 << 44; 51 | const C13: u64 = 1 << 45; 52 | const C14: u64 = 1 << 46; 53 | const C15: u64 = 1 << 47; 54 | 55 | static BYTE_MASKS_A: [u16; 256] = [ 56 | // 0 1 2 3 4 5 6 7 8 9 A B C D E F // 57 | A__, A__, A__, A__, A__, A__, A__, A__, A__, A__, A__, A__, A__, A__, A__, A__, // 0 58 | A__, A__, A__, A__, A__, A__, A__, A__, A__, A__, A__, A__, A__, A__, A__, A__, // 1 59 | A__, A__, A__, A__, A01, A__, A__, A__, A__, A__, A__, A__, A__, A__, A__, A__, // 2 60 | A02, A03, A04, A05, A06, A07, A08, A09, A10, A11, A__, A__, A__, A__, A__, A__, // 3 61 | A__, A12, A13, A14, A15, A00, A01, A02, A03, A04, A05, A06, A07, A08, A09, A10, // 4 62 | A11, A12, A13, A14, A15, A00, A01, A02, A03, A04, A05, A__, A__, A__, A__, A00, // 5 63 | A__, A06, A07, A08, A09, A10, A11, A12, A13, A14, A15, A00, A01, A02, A03, A04, // 6 64 | A05, A06, A07, A08, A09, A10, A11, A12, A13, A14, A15, A__, A__, A__, A__, A__, // 7 65 | A00, A01, A02, A03, A04, A05, A06, A07, A08, A09, A10, A11, A12, A13, A14, A15, // 8 66 | A00, A01, A02, A03, A04, A05, A06, A07, A08, A09, A10, A11, A12, A13, A14, A15, // 9 67 | A00, A01, A02, A03, A04, A05, A06, A07, A08, A09, A10, A11, A12, A13, A14, A15, // A 68 | A00, A01, A02, A03, A04, A05, A06, A07, A08, A09, A10, A11, A12, A13, A14, A15, // B 69 | A00, A01, A02, A03, A04, A05, A06, A07, A08, A09, A10, A11, A12, A13, A14, A15, // C 70 | A00, A01, A02, A03, A04, A05, A06, A07, A08, A09, A10, A11, A12, A13, A14, A15, // D 71 | A00, A01, A02, A03, A04, A05, A06, A07, A08, A09, A10, A11, A12, A13, A14, A15, // E 72 | A00, A01, A02, A03, A04, A05, A06, A07, A08, A09, A10, A11, A12, A13, A14, A15, // F 73 | ]; 74 | 75 | static BYTE_MASKS_B: [u32; 256] = [ 76 | // 0 1 2 3 4 5 6 7 8 9 A B C D E F // 77 | B__, B__, B__, B__, B__, B__, B__, B__, B__, B__, B__, B__, B__, B__, B__, B__, // 0 78 | B__, B__, B__, B__, B__, B__, B__, B__, B__, B__, B__, B__, B__, B__, B__, B__, // 1 79 | B__, B__, B__, B__, B01, B__, B__, B__, B__, B__, B__, B__, B__, B__, B__, B__, // 2 80 | B02, B03, B04, B05, B06, B07, B08, B09, B10, B11, B__, B__, B__, B__, B__, B__, // 3 81 | B__, B12, B13, B14, B15, B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, // 4 82 | B11, B12, B13, B14, B15, B00, B01, B02, B03, B04, B05, B__, B__, B__, B__, B00, // 5 83 | B__, B06, B07, B08, B09, B10, B11, B12, B13, B14, B15, B00, B01, B02, B03, B04, // 6 84 | B05, B06, B07, B08, B09, B10, B11, B12, B13, B14, B15, B__, B__, B__, B__, B__, // 7 85 | B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B11, B12, B13, B14, B15, // 8 86 | B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B11, B12, B13, B14, B15, // 9 87 | B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B11, B12, B13, B14, B15, // A 88 | B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B11, B12, B13, B14, B15, // B 89 | B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B11, B12, B13, B14, B15, // C 90 | B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B11, B12, B13, B14, B15, // D 91 | B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B11, B12, B13, B14, B15, // E 92 | B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B11, B12, B13, B14, B15, // F 93 | ]; 94 | 95 | static BYTE_MASKS_C: [u64; 256] = [ 96 | // 0 1 2 3 4 5 6 7 8 9 A B C D E F // 97 | C__, C__, C__, C__, C__, C__, C__, C__, C__, C__, C__, C__, C__, C__, C__, C__, // 0 98 | C__, C__, C__, C__, C__, C__, C__, C__, C__, C__, C__, C__, C__, C__, C__, C__, // 1 99 | C__, C__, C__, C__, C01, C__, C__, C__, C__, C__, C__, C__, C__, C__, C__, C__, // 2 100 | C02, C03, C04, C05, C06, C07, C08, C09, C10, C11, C__, C__, C__, C__, C__, C__, // 3 101 | C__, C12, C13, C14, C15, C00, C01, C02, C03, C04, C05, C06, C07, C08, C09, C10, // 4 102 | C11, C12, C13, C14, C15, C00, C01, C02, C03, C04, C05, C__, C__, C__, C__, C00, // 5 103 | C__, C06, C07, C08, C09, C10, C11, C12, C13, C14, C15, C00, C01, C02, C03, C04, // 6 104 | C05, C06, C07, C08, C09, C10, C11, C12, C13, C14, C15, C__, C__, C__, C__, C__, // 7 105 | C00, C01, C02, C03, C04, C05, C06, C07, C08, C09, C10, C11, C12, C13, C14, C15, // 8 106 | C00, C01, C02, C03, C04, C05, C06, C07, C08, C09, C10, C11, C12, C13, C14, C15, // 9 107 | C00, C01, C02, C03, C04, C05, C06, C07, C08, C09, C10, C11, C12, C13, C14, C15, // A 108 | C00, C01, C02, C03, C04, C05, C06, C07, C08, C09, C10, C11, C12, C13, C14, C15, // B 109 | C00, C01, C02, C03, C04, C05, C06, C07, C08, C09, C10, C11, C12, C13, C14, C15, // C 110 | C00, C01, C02, C03, C04, C05, C06, C07, C08, C09, C10, C11, C12, C13, C14, C15, // D 111 | C00, C01, C02, C03, C04, C05, C06, C07, C08, C09, C10, C11, C12, C13, C14, C15, // E 112 | C00, C01, C02, C03, C04, C05, C06, C07, C08, C09, C10, C11, C12, C13, C14, C15, // F 113 | ]; 114 | 115 | /// Calculate a bloom filter for `T`. This function is very fast and works as a constant 116 | /// speed regardless of the length of bytes, ~1ns on modern laptop. 117 | #[inline] 118 | pub fn bloom>(val: T) -> u64 { 119 | let s = val.as_ref(); 120 | 121 | match s.len() { 122 | 0 => 0x0001000000000000, 123 | 124 | 1 => 0x0002000000000000 125 | | BYTE_MASKS_A[s[0] as usize] as u64, 126 | 127 | 2 => 0x0004000000000000 128 | | BYTE_MASKS_A[s[0] as usize] as u64 129 | | BYTE_MASKS_B[s[1] as usize] as u64, 130 | 131 | n => 0x0001000000000000 << n % 16 132 | | BYTE_MASKS_C[s[2] as usize] 133 | | BYTE_MASKS_B[s[1] as usize] as u64 134 | | BYTE_MASKS_A[s[0] as usize] as u64 135 | } 136 | } 137 | 138 | 139 | #[cfg(test)] 140 | mod test { 141 | use super::*; 142 | 143 | fn is_match(filter: u64, bloom: u64) -> bool { 144 | filter & bloom == bloom 145 | } 146 | 147 | #[test] 148 | fn produces_correct_number_of_bits() { 149 | assert_eq!(bloom("").count_ones(), 1); // just length 150 | assert_eq!(bloom("a").count_ones(), 2); // length + 1 byte 151 | assert_eq!(bloom("ab").count_ones(), 3); // length + 2 bytes 152 | assert_eq!(bloom("abc").count_ones(), 4); // length + 3 bytes 153 | assert_eq!(bloom("abcd").count_ones(), 4); // length + 3 bytes (ignore rest) 154 | assert_eq!(bloom("abcde").count_ones(), 4); 155 | assert_eq!(bloom("abcdef").count_ones(), 4); 156 | assert_eq!(bloom("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ").count_ones(), 4); 157 | 158 | assert_eq!(bloom("").count_ones(), 1); 159 | assert_eq!(bloom("_").count_ones(), 2); 160 | assert_eq!(bloom("_$").count_ones(), 3); 161 | assert_eq!(bloom("_$0").count_ones(), 4); 162 | assert_eq!(bloom("123").count_ones(), 4); 163 | assert_eq!(bloom("456").count_ones(), 4); 164 | assert_eq!(bloom("789").count_ones(), 4); 165 | 166 | // special characters (other than `$` and `_`) are void, not to add garbage to the filter 167 | assert_eq!(bloom("").count_ones(), 1); 168 | assert_eq!(bloom("{").count_ones(), 1); 169 | assert_eq!(bloom("{}").count_ones(), 1); 170 | assert_eq!(bloom("{}[").count_ones(), 1); 171 | assert_eq!(bloom("{}[]").count_ones(), 1); 172 | } 173 | 174 | #[test] 175 | fn does_not_conflict_on_different_lengths() { 176 | let filter = bloom("abcd") | bloom("ab"); 177 | 178 | // For visibility :) 179 | const __: bool = false; 180 | 181 | assert_eq!(is_match(filter, bloom("")), __); 182 | assert_eq!(is_match(filter, bloom("a")), __); 183 | assert_eq!(is_match(filter, bloom("ab")), true); 184 | assert_eq!(is_match(filter, bloom("abc")), __); 185 | assert_eq!(is_match(filter, bloom("abcd")), true); 186 | assert_eq!(is_match(filter, bloom("abcde")), __); 187 | assert_eq!(is_match(filter, bloom("abcdef")), __); 188 | } 189 | 190 | #[test] 191 | fn does_not_conflict_on_letter_casing() { 192 | let filter = bloom("abc") | bloom("def"); 193 | 194 | assert_eq!(is_match(filter, bloom("abc")), true); 195 | assert_eq!(is_match(filter, bloom("def")), true); 196 | assert_eq!(is_match(filter, bloom("ABC")), false); 197 | assert_eq!(is_match(filter, bloom("DEF")), false); 198 | } 199 | 200 | #[test] 201 | fn has_low_enough_conflict_rate() { 202 | let filter = bloom("alloc_bytes") | bloom("alloc") | bloom("Cell") | bloom("String") | bloom("yetAnother"); 203 | let mut matches = 0; 204 | 205 | assert!(is_match(filter, bloom("alloc_bytes"))); 206 | assert!(is_match(filter, bloom("alloc"))); 207 | assert!(is_match(filter, bloom("Cell"))); 208 | assert!(is_match(filter, bloom("String"))); 209 | assert!(is_match(filter, bloom("yetAnother"))); 210 | 211 | static WORDS: &[&str] = &[ 212 | "ARENA_BLOCK", "Arena", "Cell", "Self", "String", "T", "Vec", "_unchecked", "a", 213 | "alignment", "alloc", "alloc_bytes", "alloc_str", "alloc_str_zero_end", "alloc_string", 214 | "as", "as_bytes", "as_mut_ptr", "as_ptr", "block", "cap", "cell", "const", 215 | "copy_nonoverlapping", "else", "extend_from_slice", "fn", "from_raw_parts", "from_utf", 216 | "get", "grow", "if", "impl", "inline", "into", "into_bytes", "isize", "len", 217 | "len_with_zero", "let", "mem", "mut", "new", "offset", "ptr", "pub", "push", 218 | "replace", "return", "self", "set", "size_of", "slice", "std", "store", "str", 219 | "struct", "temp", "u", "unsafe", "use", "usize", "val", "vec", "with_capacity" 220 | ]; 221 | 222 | for word in WORDS.iter() { 223 | 224 | if is_match(filter, bloom(word)) { 225 | matches += 1; 226 | } 227 | } 228 | 229 | // `yetAnother` is not in the WORDS, however there is a conflict with `Self`, which is ok! 230 | assert_eq!(matches, 5); 231 | } 232 | } 233 | -------------------------------------------------------------------------------- /src/cell.rs: -------------------------------------------------------------------------------- 1 | //! A mutable memory location for `Copy` types. 2 | 3 | use std::fmt::{self, Debug}; 4 | use std::marker::PhantomData; 5 | 6 | /// This should be identical to the `Cell` implementation in the standard 7 | /// library, but always require that the internal type implements `Copy` 8 | /// and implements `Copy` itself. 9 | #[derive(PartialEq, Eq, Copy, Clone)] 10 | #[repr(transparent)] 11 | pub struct CopyCell { 12 | /// Internal value 13 | value: T, 14 | 15 | /// We trick the compiler to think that `CopyCell` contains a raw pointer, 16 | /// this way we make sure the `Sync` marker is not implemented and `CopyCell` 17 | /// cannot be shared across threads! 18 | _no_sync: PhantomData<*mut T> 19 | } 20 | 21 | /// `Sync` is unsafe due to mutability, however `Send` is totally fine! 22 | unsafe impl Send for CopyCell {} 23 | 24 | impl CopyCell { 25 | /// Creates a new `CopyCell` containing the given value. 26 | pub const fn new(value: T) -> Self { 27 | CopyCell { 28 | value, 29 | _no_sync: PhantomData 30 | } 31 | } 32 | } 33 | 34 | impl CopyCell { 35 | /// Returns a copy of the contained value. 36 | #[inline] 37 | pub fn get(&self) -> T { 38 | self.value 39 | } 40 | 41 | /// Sets the contained value. 42 | #[inline] 43 | pub fn set(&self, value: T) { 44 | use std::ptr::write_volatile; 45 | 46 | // Regular write produces abnormal behavior when running tests in 47 | // `--release` mode. Reordering writes when the compiler assumes 48 | // things are immutable is dangerous. 49 | // 50 | // We can just cast the pointer from `CopyCell` to `T` because of 51 | // #[repr(transparent)] 52 | // 53 | // This behavior is copied over from the std implementation of 54 | // the `UnsafeCell`, and it's the best we can do right now in terms 55 | // of soundness till we get a stable `UnsafeCell` that implements `Copy`. 56 | unsafe { write_volatile(self as *const CopyCell as *const T as *mut T, value) }; 57 | } 58 | } 59 | 60 | impl Debug for CopyCell { 61 | #[inline] 62 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 63 | Debug::fmt(&self.value, f) 64 | } 65 | } 66 | 67 | #[cfg(test)] 68 | mod test { 69 | use super::*; 70 | 71 | #[test] 72 | fn cell() { 73 | let cell_a = CopyCell::new(42u64); 74 | let cell_b = cell_a; // copy 75 | let cell_c = &cell_a; // reference 76 | 77 | assert_eq!(cell_a.get(), 42); 78 | assert_eq!(cell_b.get(), 42); 79 | assert_eq!(cell_c.get(), 42); 80 | 81 | // Only affects the copy 82 | cell_b.set(100); 83 | 84 | assert_eq!(cell_a.get(), 42); 85 | assert_eq!(cell_b.get(), 100); 86 | assert_eq!(cell_c.get(), 42); 87 | 88 | // Affects a since c is a ref 89 | cell_c.set(200); 90 | 91 | assert_eq!(cell_a.get(), 200); 92 | assert_eq!(cell_b.get(), 100); 93 | assert_eq!(cell_c.get(), 200); 94 | 95 | // Again, only affects the copy 96 | cell_b.set(300); 97 | 98 | assert_eq!(cell_a.get(), 200); 99 | assert_eq!(cell_b.get(), 300); 100 | assert_eq!(cell_c.get(), 200); 101 | } 102 | 103 | #[test] 104 | fn contain_static_ref() { 105 | static REF: &(&u64, u64) = &(&0, 0); 106 | 107 | let cell = CopyCell::new(REF); 108 | 109 | assert_eq!(cell.get(), REF); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/impl_debug.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{self, Debug}; 2 | use crate::list::{List, GrowableList, ListBuilder}; 3 | use crate::map::{Map, BloomMap}; 4 | use crate::set::{Set, BloomSet}; 5 | 6 | impl<'arena, T> Debug for List<'arena, T> 7 | where 8 | T: Debug, 9 | { 10 | #[inline] 11 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 12 | f.debug_list().entries(self.iter()).finish() 13 | } 14 | } 15 | 16 | impl<'arena, T> Debug for GrowableList<'arena, T> 17 | where 18 | T: Debug, 19 | { 20 | #[inline] 21 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 22 | self.as_list().fmt(f) 23 | } 24 | } 25 | 26 | impl<'arena, T> Debug for ListBuilder<'arena, T> 27 | where 28 | T: Debug, 29 | { 30 | #[inline] 31 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 32 | self.as_list().fmt(f) 33 | } 34 | } 35 | 36 | impl<'arena, K, V> Debug for Map<'arena, K, V> 37 | where 38 | K: Debug, 39 | V: Debug + Copy, 40 | { 41 | #[inline] 42 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 43 | f.debug_map().entries(self.iter()).finish() 44 | } 45 | } 46 | 47 | impl<'arena, K, V> Debug for BloomMap<'arena, K, V> 48 | where 49 | K: Debug, 50 | V: Debug + Copy, 51 | { 52 | #[inline] 53 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 54 | f.debug_map().entries(self.iter()).finish() 55 | } 56 | } 57 | 58 | impl<'arena, I> Debug for Set<'arena, I> 59 | where 60 | I: Debug, 61 | { 62 | #[inline] 63 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 64 | f.debug_set().entries(self.iter()).finish() 65 | } 66 | } 67 | 68 | impl<'arena, I> Debug for BloomSet<'arena, I> 69 | where 70 | I: Debug, 71 | { 72 | #[inline] 73 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 74 | f.debug_set().entries(self.iter()).finish() 75 | } 76 | } 77 | 78 | #[cfg(test)] 79 | mod test { 80 | use super::*; 81 | use crate::Arena; 82 | 83 | #[test] 84 | fn list_debug() { 85 | let arena = Arena::new(); 86 | let list = List::from_iter(&arena, ["doge", "to", "the", "moon!"].iter().cloned()); 87 | 88 | let debug = format!("{:?}", list); 89 | 90 | assert_eq!(debug, r#"["doge", "to", "the", "moon!"]"#); 91 | } 92 | 93 | #[test] 94 | fn map_debug() { 95 | let arena = Arena::new(); 96 | let map = Map::new(); 97 | 98 | map.insert(&arena, "foo", 10u64); 99 | map.insert(&arena, "bar", 20); 100 | map.insert(&arena, "doge", 30); 101 | 102 | let debug = format!("{:?}", map); 103 | 104 | assert_eq!(debug, r#"{"foo": 10, "bar": 20, "doge": 30}"#); 105 | } 106 | 107 | #[test] 108 | fn bloom_map_debug() { 109 | let arena = Arena::new(); 110 | let map = BloomMap::new(); 111 | 112 | map.insert(&arena, "foo", 10u64); 113 | map.insert(&arena, "bar", 20); 114 | map.insert(&arena, "doge", 30); 115 | 116 | let debug = format!("{:?}", map); 117 | 118 | assert_eq!(debug, r#"{"foo": 10, "bar": 20, "doge": 30}"#); 119 | } 120 | 121 | #[test] 122 | fn set_debug() { 123 | let arena = Arena::new(); 124 | let set = Set::new(); 125 | 126 | set.insert(&arena, "foo"); 127 | set.insert(&arena, "bar"); 128 | set.insert(&arena, "doge"); 129 | 130 | let debug = format!("{:?}", set); 131 | 132 | assert_eq!(debug, r#"{"foo", "bar", "doge"}"#); 133 | } 134 | 135 | #[test] 136 | fn bloom_set_debug() { 137 | let arena = Arena::new(); 138 | let set = BloomSet::new(); 139 | 140 | set.insert(&arena, "foo"); 141 | set.insert(&arena, "bar"); 142 | set.insert(&arena, "doge"); 143 | 144 | let debug = format!("{:?}", set); 145 | 146 | assert_eq!(debug, r#"{"foo", "bar", "doge"}"#); 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /src/impl_partial_eq.rs: -------------------------------------------------------------------------------- 1 | use crate::list::List; 2 | use crate::map::{Map, BloomMap}; 3 | use crate::set::{Set, BloomSet}; 4 | 5 | impl<'a, 'b, A, B> PartialEq> for List<'a, A> 6 | where 7 | A: PartialEq, 8 | { 9 | #[inline] 10 | fn eq(&self, other: &List<'b, B>) -> bool { 11 | self.iter().eq(other.iter()) 12 | } 13 | } 14 | 15 | impl<'a, 'b, KA, VA, KB, VB> PartialEq> for Map<'a, KA, VA> 16 | where 17 | (&'a KA, VA): PartialEq<(&'b KB, VB)>, 18 | VA: Copy, 19 | VB: Copy, 20 | { 21 | #[inline] 22 | fn eq(&self, other: &Map<'b, KB, VB>) -> bool { 23 | self.iter().eq(other.iter()) 24 | } 25 | } 26 | 27 | impl<'a, 'b, KA, VA, KB, VB> PartialEq> for BloomMap<'a, KA, VA> 28 | where 29 | (&'a KA, VA): PartialEq<(&'b KB, VB)>, 30 | VA: Copy, 31 | VB: Copy, 32 | { 33 | #[inline] 34 | fn eq(&self, other: &BloomMap<'b, KB, VB>) -> bool { 35 | self.iter().eq(other.iter()) 36 | } 37 | } 38 | 39 | impl<'a, 'b, A, B> PartialEq> for Set<'a, A> 40 | where 41 | A: PartialEq, 42 | { 43 | #[inline] 44 | fn eq(&self, other: &Set<'b, B>) -> bool { 45 | self.iter().eq(other.iter()) 46 | } 47 | } 48 | 49 | impl<'a, 'b, A, B> PartialEq> for BloomSet<'a, A> 50 | where 51 | A: PartialEq, 52 | { 53 | #[inline] 54 | fn eq(&self, other: &BloomSet<'b, B>) -> bool { 55 | self.iter().eq(other.iter()) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/impl_serialize.rs: -------------------------------------------------------------------------------- 1 | use serde::ser::{Serialize, Serializer}; 2 | use crate::list::List; 3 | use crate::map::{Map, BloomMap}; 4 | use crate::set::{Set, BloomSet}; 5 | 6 | impl<'arena, T> Serialize for List<'arena, T> 7 | where 8 | T: Serialize 9 | { 10 | #[inline] 11 | fn serialize(&self, serializer: S) -> Result 12 | where 13 | S: Serializer 14 | { 15 | serializer.collect_seq(self.iter()) 16 | } 17 | } 18 | 19 | impl<'arena, K, V> Serialize for Map<'arena, K, V> 20 | where 21 | K: Serialize, 22 | V: Serialize + Copy, 23 | { 24 | #[inline] 25 | fn serialize(&self, serializer: S) -> Result 26 | where 27 | S: Serializer 28 | { 29 | serializer.collect_map(self.iter()) 30 | } 31 | } 32 | 33 | impl<'arena, K, V> Serialize for BloomMap<'arena, K, V> 34 | where 35 | K: Serialize, 36 | V: Serialize + Copy, 37 | { 38 | #[inline] 39 | fn serialize(&self, serializer: S) -> Result 40 | where 41 | S: Serializer 42 | { 43 | serializer.collect_map(self.iter()) 44 | } 45 | } 46 | 47 | impl<'arena, I> Serialize for Set<'arena, I> 48 | where 49 | I: Serialize, 50 | { 51 | #[inline] 52 | fn serialize(&self, serializer: S) -> Result 53 | where 54 | S: Serializer 55 | { 56 | serializer.collect_seq(self.iter()) 57 | } 58 | } 59 | 60 | impl<'arena, I> Serialize for BloomSet<'arena, I> 61 | where 62 | I: Serialize, 63 | { 64 | #[inline] 65 | fn serialize(&self, serializer: S) -> Result 66 | where 67 | S: Serializer 68 | { 69 | serializer.collect_seq(self.iter()) 70 | } 71 | } 72 | 73 | #[cfg(test)] 74 | mod test { 75 | use super::*; 76 | use serde_json; 77 | use crate::Arena; 78 | 79 | #[test] 80 | fn list_can_be_serialized() { 81 | let arena = Arena::new(); 82 | let list = List::from_iter(&arena, ["doge", "to", "the", "moon!"].iter().cloned()); 83 | let json = serde_json::to_string(&list).unwrap(); 84 | 85 | assert_eq!(json, r#"["doge","to","the","moon!"]"#); 86 | } 87 | 88 | #[test] 89 | fn map_can_be_serialized() { 90 | let arena = Arena::new(); 91 | let map = Map::new(); 92 | 93 | map.insert(&arena, "foo", 10u64); 94 | map.insert(&arena, "bar", 20); 95 | map.insert(&arena, "doge", 30); 96 | 97 | let json = serde_json::to_string(&map).unwrap(); 98 | 99 | assert_eq!(json, r#"{"foo":10,"bar":20,"doge":30}"#); 100 | } 101 | 102 | #[test] 103 | fn bloom_map_can_be_serialized() { 104 | let arena = Arena::new(); 105 | let map = BloomMap::new(); 106 | 107 | map.insert(&arena, "foo", 10u64); 108 | map.insert(&arena, "bar", 20); 109 | map.insert(&arena, "doge", 30); 110 | 111 | let json = serde_json::to_string(&map).unwrap(); 112 | 113 | assert_eq!(json, r#"{"foo":10,"bar":20,"doge":30}"#); 114 | } 115 | 116 | #[test] 117 | fn set_can_be_serialized() { 118 | let arena = Arena::new(); 119 | let set = Set::new(); 120 | 121 | set.insert(&arena, "foo"); 122 | set.insert(&arena, "bar"); 123 | set.insert(&arena, "doge"); 124 | 125 | let json = serde_json::to_string(&set).unwrap(); 126 | 127 | assert_eq!(json, r#"["foo","bar","doge"]"#); 128 | } 129 | 130 | #[test] 131 | fn bloom_set_can_be_serialized() { 132 | let arena = Arena::new(); 133 | let set = BloomSet::new(); 134 | 135 | set.insert(&arena, "foo"); 136 | set.insert(&arena, "bar"); 137 | set.insert(&arena, "doge"); 138 | 139 | let json = serde_json::to_string(&set).unwrap(); 140 | 141 | assert_eq!(json, r#"["foo","bar","doge"]"#); 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # Toolshed 2 | //! 3 | //! This crate contains an `Arena` allocator, along with a few common data 4 | //! structures that can be used in tandem with it. 5 | //! 6 | //! For all those times when you need to create a recursively nested tree 7 | //! of `enum`s and find yourself in pain having to put everything in 8 | //! `Box`es all the time. 9 | //! 10 | //! ## Features 11 | //! 12 | //! + Paginated `Arena`: internally preallocates 64KiB _pages_ on the heap and 13 | //! allows `Copy` types to be put on that heap. 14 | //! 15 | //! + `CopyCell`: virtually identical to `std::cell::Cell` but requires that 16 | //! internal types implement `Copy`, and implements `Copy` itself. 17 | //! 18 | //! + `List`, `Map` and `Set`: your basic data structures that allocate on the 19 | //! `Arena` and use internal mutability via `CopyCell`. Never worry about 20 | //! sharing pointers again! 21 | //! 22 | //! + `BloomMap` and `BloomSet`: special variants of `Map` and `Set` with a 23 | //! very simple but very fast bloom filter. If a map / set is often queried 24 | //! for keys / elements it doesn't contain, the bloom filter check will 25 | //! reduce the need to do a full tree lookup, greatly increasing performance. 26 | //! The overhead compared to a regular `Map` or `Set` is also minimal. 27 | //! 28 | //! + All data structures implement expected traits, such as `Debug` or `PartialEq`. 29 | //! 30 | //! + Optional **serde** `Serialize` support behind a feature flag. 31 | //! 32 | //! ## Example 33 | //! 34 | //! ```rust 35 | //! use toolshed::Arena; 36 | //! use toolshed::map::Map; 37 | //! 38 | //! // Only `Copy` types can be allocated on the `Arena`! 39 | //! #[derive(Debug, PartialEq, Clone, Copy)] 40 | //! enum Foo<'arena> { 41 | //! Integer(u64), 42 | //! 43 | //! // Recursive enum without `Box`es! 44 | //! Nested(&'arena Foo<'arena>), 45 | //! } 46 | //! 47 | //! fn main() { 48 | //! // Create a new arena 49 | //! let arena = Arena::new(); 50 | //! 51 | //! // We allocate first instance of `Foo` in the arena. 52 | //! // 53 | //! // Please note that the `alloc` method returns a `&mut` reference. 54 | //! // Since we want to share our references around, we are going to 55 | //! // dereference and re-reference them to immutable ones with `&*`. 56 | //! let child: &Foo = &*arena.alloc(Foo::Integer(42)); 57 | //! 58 | //! // Next instance of `Foo` will contain the child reference. 59 | //! let parent: &Foo = &*arena.alloc(Foo::Nested(child)); 60 | //! 61 | //! // Empty map does not allocate 62 | //! let map = Map::new(); 63 | //! 64 | //! // Inserting stuff in the map requires a reference to the `Arena`. 65 | //! // The reference can be shared, since `Arena` uses interior mutability. 66 | //! map.insert(&arena, "child", child); 67 | //! 68 | //! // We can put our `map` on the arena as well. Once again we use the `&*` 69 | //! // operation to change the reference to be immutable, just to demonstrate 70 | //! // that our `Map` implementation is perfectly happy with internal mutability. 71 | //! let map: &Map<&str, &Foo> = &*arena.alloc(map); 72 | //! 73 | //! // Each insert allocates a small chunk of data on the arena. Since arena is 74 | //! // preallocated on the heap, these inserts are very, very fast. 75 | //! // 76 | //! // We only have a non-mutable reference to `map` now, however `Map` is also 77 | //! // using interior mutability on references to allow exactly this kind of 78 | //! // behavior in a safe manner. 79 | //! map.insert(&arena, "parent", parent); 80 | //! 81 | //! assert_eq!(map.get("child"), Some(&Foo::Integer(42))); 82 | //! assert_eq!(map.get("parent"), Some(&Foo::Nested(&Foo::Integer(42)))); 83 | //! assert_eq!(map.get("heh"), None); 84 | //! } 85 | //! 86 | //! ``` 87 | 88 | #![warn(missing_docs)] 89 | 90 | // Pull in serde if `impl_serialize` is enabled 91 | #[cfg(feature = "impl_serialize")] 92 | use serde; 93 | 94 | // Pull in serde_json for testing if `impl_serialize` is enabled 95 | #[cfg(all(test, feature = "impl_serialize"))] 96 | use serde_json; 97 | 98 | mod cell; 99 | pub mod map; 100 | pub mod set; 101 | pub mod list; 102 | mod arena; 103 | mod bloom; 104 | mod impl_partial_eq; 105 | mod impl_debug; 106 | 107 | #[cfg(feature = "impl_serialize")] 108 | mod impl_serialize; 109 | 110 | pub use self::arena::{Arena, Uninitialized, NulTermStr}; 111 | pub use self::cell::CopyCell; 112 | -------------------------------------------------------------------------------- /src/list.rs: -------------------------------------------------------------------------------- 1 | //! A linked list and auxiliary types that can be used with the `Arena`. 2 | 3 | use std::num::NonZeroUsize; 4 | 5 | use crate::arena::Arena; 6 | use crate::cell::CopyCell; 7 | 8 | #[derive(Debug, PartialEq, Clone, Copy)] 9 | struct ListNode<'arena, T> { 10 | value: T, 11 | next: CopyCell>>, 12 | } 13 | 14 | /// A single-ended linked list. 15 | #[derive(Clone, Copy)] 16 | pub struct List<'arena, T> { 17 | root: CopyCell>>, 18 | } 19 | 20 | impl<'arena, T> List<'arena, T> { 21 | /// Create a new empty `List`. 22 | pub const fn empty() -> Self { 23 | List { 24 | root: CopyCell::new(None) 25 | } 26 | } 27 | 28 | /// Turns the list into an empty list. 29 | /// 30 | /// Internally, all this method does is removing the reference to the 31 | /// first item on the list. 32 | #[inline] 33 | pub fn clear(&self) { 34 | self.root.set(None); 35 | } 36 | 37 | /// Returns an iterator over the items in the list. 38 | #[inline] 39 | pub fn iter(&self) -> ListIter<'arena, T> { 40 | ListIter { 41 | next: self.root.get() 42 | } 43 | } 44 | 45 | /// Checks if the list is empty. 46 | #[inline] 47 | pub fn is_empty(&self) -> bool { 48 | self.root.get().is_none() 49 | } 50 | 51 | /// Returns the first element if, and only if, the list contains 52 | /// just that single element. 53 | #[inline] 54 | pub fn only_element(&self) -> Option<&'arena T> { 55 | match self.root.get() { 56 | Some(&ListNode { 57 | ref value, 58 | ref next, 59 | .. 60 | }) if next.get().is_none() => Some(value), 61 | _ => None 62 | } 63 | } 64 | 65 | /// Returns the reference to the first element. 66 | #[inline] 67 | pub fn first_element(&self) -> Option<&'arena T> { 68 | self.root.get().map(|li| &li.value) 69 | } 70 | 71 | /// Returns an `UnsafeList` for the current `List`. While this function is 72 | /// safe itself, using `UnsafeList` might lead to undefined behavior. 73 | #[inline] 74 | pub fn into_unsafe(self) -> UnsafeList { 75 | UnsafeList { 76 | root: self.root.get().map(|ptr| unsafe { 77 | NonZeroUsize::new_unchecked(ptr as *const ListNode as usize) 78 | }), 79 | } 80 | } 81 | } 82 | 83 | impl<'arena, T: Copy> List<'arena, T> { 84 | /// Create a single-element list from the given value. 85 | #[inline] 86 | pub fn from(arena: &'arena Arena, value: T) -> List<'arena, T> { 87 | List { 88 | root: CopyCell::new(Some(arena.alloc(ListNode { 89 | value, 90 | next: CopyCell::new(None) 91 | }))) 92 | } 93 | } 94 | 95 | /// Create a list from an iterator of items. 96 | pub fn from_iter(arena: &'arena Arena, source: I) -> List<'arena, T> where 97 | I: IntoIterator 98 | { 99 | let mut iter = source.into_iter(); 100 | 101 | let builder = match iter.next() { 102 | Some(item) => ListBuilder::new(arena, item), 103 | None => return List::empty(), 104 | }; 105 | 106 | for item in iter { 107 | builder.push(arena, item); 108 | } 109 | 110 | builder.as_list() 111 | } 112 | 113 | /// Adds a new element to the beginning of the list. 114 | #[inline] 115 | pub fn prepend(&self, arena: &'arena Arena, value: T) -> &'arena T { 116 | let root = arena.alloc( 117 | ListNode { 118 | value, 119 | next: self.root 120 | } 121 | ); 122 | 123 | self.root.set(Some(root)); 124 | 125 | &root.value 126 | } 127 | 128 | /// Removes the first element from the list and returns it. 129 | #[inline] 130 | pub fn shift(&self) -> Option<&'arena T> { 131 | let list_item = self.root.get()?; 132 | 133 | self.root.set(list_item.next.get()); 134 | 135 | Some(&list_item.value) 136 | } 137 | 138 | /// Get the first element of the `List`, if any, then create a 139 | /// new `List` starting from the second element at the reference to 140 | /// the old list. 141 | /// 142 | /// Note: This does not modify the internal state of the `List`. 143 | /// If you wish to modify the list use `shift` instead. 144 | #[inline] 145 | pub fn shift_ref(&mut self) -> Option<&'arena T> { 146 | let list_item = self.root.get()?; 147 | 148 | *self = List { 149 | root: list_item.next 150 | }; 151 | 152 | Some(&list_item.value) 153 | } 154 | } 155 | 156 | impl<'arena, T> IntoIterator for List<'arena, T> { 157 | type Item = &'arena T; 158 | type IntoIter = ListIter<'arena, T>; 159 | 160 | #[inline] 161 | fn into_iter(self) -> Self::IntoIter { 162 | self.iter() 163 | } 164 | } 165 | 166 | impl<'a, 'arena, T> IntoIterator for &'a List<'arena, T> { 167 | type Item = &'arena T; 168 | type IntoIter = ListIter<'arena, T>; 169 | 170 | #[inline] 171 | fn into_iter(self) -> Self::IntoIter { 172 | self.iter() 173 | } 174 | } 175 | 176 | /// A variant of the `List` that keeps track of the last element and thus 177 | /// allows user to push to the end of the list. 178 | #[derive(Clone, Copy)] 179 | pub struct GrowableList<'arena, T> { 180 | last: CopyCell>>, 181 | first: CopyCell>>, 182 | } 183 | 184 | impl<'arena, T> GrowableList<'arena, T> 185 | where 186 | T: Copy, 187 | { 188 | /// Push a new item at the end of the `List`. 189 | #[inline] 190 | pub fn push(&self, arena: &'arena Arena, item: T) { 191 | let next = Some(&*arena.alloc(ListNode { 192 | value: item, 193 | next: CopyCell::new(None) 194 | })); 195 | 196 | match self.last.get() { 197 | Some(ref last) => last.next.set(next), 198 | None => self.first.set(next), 199 | } 200 | 201 | self.last.set(next); 202 | } 203 | } 204 | 205 | impl<'arena, T> GrowableList<'arena, T> { 206 | /// Create a new builder. 207 | pub const fn new() -> Self { 208 | GrowableList { 209 | first: CopyCell::new(None), 210 | last: CopyCell::new(None), 211 | } 212 | } 213 | 214 | /// Get a `List` from the builder. 215 | #[inline] 216 | pub fn as_list(&self) -> List<'arena, T> { 217 | List { 218 | root: self.first 219 | } 220 | } 221 | } 222 | 223 | /// A builder that allows one to push elements onto the end of the list. 224 | /// 225 | /// This is in principle identical to `GrowableList`, however it skips 226 | /// some checks on pushing given that it always has to have at least one 227 | /// element, and thus might be ever so slightly faster. 228 | #[derive(Clone, Copy)] 229 | pub struct ListBuilder<'arena, T> { 230 | first: &'arena ListNode<'arena, T>, 231 | last: CopyCell<&'arena ListNode<'arena, T>>, 232 | } 233 | 234 | impl<'arena, T: Copy> ListBuilder<'arena, T> { 235 | /// Create a new builder with the first element. 236 | #[inline] 237 | pub fn new(arena: &'arena Arena, first: T) -> Self { 238 | let first = arena.alloc(ListNode { 239 | value: first, 240 | next: CopyCell::new(None) 241 | }); 242 | 243 | ListBuilder { 244 | first, 245 | last: CopyCell::new(first), 246 | } 247 | } 248 | 249 | /// Push a new item at the end of the `List`. 250 | #[inline] 251 | pub fn push(&self, arena: &'arena Arena, item: T) { 252 | let next = arena.alloc(ListNode { 253 | value: item, 254 | next: CopyCell::new(None) 255 | }); 256 | 257 | self.last.get().next.set(Some(next)); 258 | self.last.set(next); 259 | } 260 | } 261 | 262 | impl<'arena, T> ListBuilder<'arena, T> { 263 | /// Get a `List` from the builder. 264 | #[inline] 265 | pub fn as_list(&self) -> List<'arena, T> { 266 | List { 267 | root: CopyCell::new(Some(self.first)) 268 | } 269 | } 270 | } 271 | 272 | /// Unsafe variant of the `List` that erases any lifetime information. 273 | #[derive(Debug, Clone, Copy)] 274 | pub struct UnsafeList { 275 | root: Option, 276 | } 277 | 278 | impl UnsafeList { 279 | /// Converts the `UnsafeList` into a regular `List`. Using this with 280 | /// incorrect lifetimes of after the original arena has been dropped 281 | /// will lead to undefined behavior. Use with extreme care. 282 | pub unsafe fn into_list<'arena, T>(self) -> List<'arena, T> { 283 | List { 284 | root: CopyCell::new(self.root.map(|ptr| &*(ptr.get() as *const ListNode<'arena, T>))), 285 | } 286 | } 287 | } 288 | 289 | /// An iterator over the items in the list. 290 | pub struct ListIter<'arena, T> { 291 | next: Option<&'arena ListNode<'arena, T>> 292 | } 293 | 294 | impl<'arena, T> Iterator for ListIter<'arena, T> { 295 | type Item = &'arena T; 296 | 297 | #[inline] 298 | fn next(&mut self) -> Option { 299 | let next = self.next; 300 | 301 | next.map(|list_item| { 302 | let value = &list_item.value; 303 | self.next = list_item.next.get(); 304 | value 305 | }) 306 | } 307 | } 308 | 309 | #[cfg(test)] 310 | mod test { 311 | use super::*; 312 | 313 | #[test] 314 | fn builder() { 315 | let arena = Arena::new(); 316 | let builder = ListBuilder::new(&arena, 10); 317 | 318 | builder.push(&arena, 20); 319 | builder.push(&arena, 30); 320 | 321 | let list = builder.as_list(); 322 | 323 | assert!(list.iter().eq([10, 20, 30].iter())); 324 | } 325 | 326 | #[test] 327 | fn empty_builder() { 328 | let arena = Arena::new(); 329 | let builder = GrowableList::new(); 330 | 331 | builder.push(&arena, 10); 332 | builder.push(&arena, 20); 333 | builder.push(&arena, 30); 334 | 335 | let list = builder.as_list(); 336 | 337 | assert!(list.iter().eq([10, 20, 30].iter())); 338 | } 339 | 340 | #[test] 341 | fn from_iter() { 342 | let arena = Arena::new(); 343 | let list = List::from_iter(&arena, [10, 20, 30].iter().cloned()); 344 | 345 | assert!(list.iter().eq([10, 20, 30].iter())); 346 | } 347 | 348 | #[test] 349 | fn prepend() { 350 | let arena = Arena::new(); 351 | let list = List::from(&arena, 30); 352 | 353 | list.prepend(&arena, 20); 354 | list.prepend(&arena, 10); 355 | 356 | assert!(list.iter().eq([10, 20, 30].iter())); 357 | } 358 | 359 | #[test] 360 | fn only_element() { 361 | let arena = Arena::new(); 362 | let list = List::from(&arena, 42); 363 | 364 | assert_eq!(list.only_element(), Some(&42)); 365 | 366 | list.prepend(&arena, 10); 367 | 368 | assert_eq!(list.only_element(), None); 369 | } 370 | 371 | #[test] 372 | fn shift() { 373 | let arena = Arena::new(); 374 | let builder = GrowableList::new(); 375 | 376 | builder.push(&arena, 10); 377 | builder.push(&arena, 20); 378 | builder.push(&arena, 30); 379 | 380 | let list = builder.as_list(); 381 | 382 | assert_eq!(list.shift(), Some(&10)); 383 | 384 | assert!(list.iter().eq([20, 30].iter())); 385 | } 386 | 387 | #[test] 388 | fn shift_ref() { 389 | let arena = Arena::new(); 390 | let builder = GrowableList::new(); 391 | 392 | builder.push(&arena, 10); 393 | builder.push(&arena, 20); 394 | builder.push(&arena, 30); 395 | 396 | let list_a = builder.as_list(); 397 | let mut list_b = list_a; 398 | 399 | assert_eq!(list_b.shift_ref(), Some(&10)); 400 | 401 | assert!(list_a.iter().eq([10, 20, 30].iter())); 402 | assert!(list_b.iter().eq([20, 30].iter())); 403 | } 404 | 405 | #[test] 406 | fn empty_unsafe_list() { 407 | let list: List = List::empty(); 408 | let raw = list.into_unsafe(); 409 | 410 | assert!(raw.root.is_none()); 411 | 412 | let list: List = unsafe { raw.into_list() }; 413 | 414 | assert_eq!(list.is_empty(), true); 415 | } 416 | 417 | #[test] 418 | fn unsafe_list() { 419 | let arena = Arena::new(); 420 | 421 | { 422 | let list = List::from(&arena, 42usize); 423 | 424 | drop(list); 425 | 426 | let raw = list.into_unsafe(); 427 | 428 | assert!(raw.root.is_some()); 429 | 430 | let list: List = unsafe { raw.into_list() }; 431 | 432 | assert_eq!(list.only_element(), Some(&42)); 433 | 434 | // Let's be absolutely sure... 435 | drop(list); 436 | } 437 | 438 | // ...that things are dropped in the right order 439 | drop(arena); 440 | } 441 | } 442 | -------------------------------------------------------------------------------- /src/map.rs: -------------------------------------------------------------------------------- 1 | //! Maps of keys to values that can be used with the `Arena`. 2 | 3 | use std::hash::{Hash, Hasher}; 4 | use rustc_hash::FxHasher; 5 | 6 | use crate::cell::CopyCell; 7 | use crate::Arena; 8 | use crate::bloom::bloom; 9 | 10 | #[derive(Clone, Copy)] 11 | struct MapNode<'arena, K, V> { 12 | pub key: K, 13 | pub hash: u64, 14 | pub value: CopyCell, 15 | pub left: CopyCell>>, 16 | pub right: CopyCell>>, 17 | pub next: CopyCell>>, 18 | } 19 | 20 | impl<'arena, K, V> MapNode<'arena, K, V> { 21 | pub const fn new(key: K, hash: u64, value: V) -> Self { 22 | MapNode { 23 | key, 24 | hash, 25 | value: CopyCell::new(value), 26 | left: CopyCell::new(None), 27 | right: CopyCell::new(None), 28 | next: CopyCell::new(None), 29 | } 30 | } 31 | } 32 | 33 | /// A map of keys `K` to values `V`. The map is built as a pseudo-random 34 | /// binary tree with hashes of keys used for balancing the tree nodes. 35 | /// 36 | /// All the nodes of the map are also linked to allow iteration in 37 | /// insertion order. 38 | #[derive(Clone, Copy)] 39 | pub struct Map<'arena, K, V> { 40 | root: CopyCell>>, 41 | last: CopyCell>>, 42 | } 43 | 44 | impl<'arena, K, V> Default for Map<'arena, K, V> { 45 | fn default() -> Self { 46 | Self::new() 47 | } 48 | } 49 | 50 | impl<'arena, K, V> Map<'arena, K, V> { 51 | /// Create a new, empty `Map`. 52 | pub const fn new() -> Self { 53 | Map { 54 | root: CopyCell::new(None), 55 | last: CopyCell::new(None), 56 | } 57 | } 58 | } 59 | 60 | impl<'arena, K, V> Map<'arena, K, V> { 61 | /// Get an iterator over key value pairs. 62 | #[inline] 63 | pub fn iter(&self) -> MapIter<'arena, K, V> { 64 | MapIter { 65 | next: self.root.get() 66 | } 67 | } 68 | 69 | /// Returns true if the map contains no elements. 70 | #[inline] 71 | pub fn is_empty(&self) -> bool { 72 | self.root.get().is_none() 73 | } 74 | 75 | /// Clears the map. 76 | #[inline] 77 | pub fn clear(&self) { 78 | self.root.set(None); 79 | } 80 | } 81 | 82 | impl<'arena, K, V> Map<'arena, K, V> 83 | where 84 | K: Eq + Hash + Copy, 85 | V: Copy, 86 | { 87 | #[inline] 88 | fn hash_key(key: &K) -> u64 { 89 | let mut hasher = FxHasher::default(); 90 | 91 | key.hash(&mut hasher); 92 | 93 | hasher.finish() 94 | } 95 | 96 | #[inline] 97 | fn find_slot(&self, key: K, hash: u64) -> &CopyCell>> { 98 | let mut node = &self.root; 99 | 100 | loop { 101 | match node.get() { 102 | None => return node, 103 | Some(parent) => { 104 | if hash == parent.hash && key == parent.key { 105 | return node; 106 | } else if hash < parent.hash { 107 | node = &parent.left; 108 | } else { 109 | node = &parent.right; 110 | } 111 | } 112 | } 113 | } 114 | } 115 | 116 | /// Inserts a key-value pair into the map. If the key was previously set, 117 | /// old value is returned. 118 | #[inline] 119 | pub fn insert(&self, arena: &'arena Arena, key: K, value: V) -> Option { 120 | let hash = Self::hash_key(&key); 121 | let node = self.find_slot(key, hash); 122 | 123 | match node.get() { 124 | Some(node) => { 125 | let old = node.value.get(); 126 | node.value.set(value); 127 | Some(old) 128 | }, 129 | None => { 130 | let new = Some(&*arena.alloc(MapNode::new(key, hash, value))); 131 | 132 | if let Some(last) = self.last.get() { 133 | last.next.set(new); 134 | } 135 | 136 | self.last.set(new); 137 | node.set(new); 138 | None 139 | } 140 | } 141 | } 142 | 143 | /// Returns the value corresponding to the key. 144 | #[inline] 145 | pub fn get_key(&self, key: K) -> Option<&K> { 146 | let hash = Self::hash_key(&key); 147 | 148 | self.find_slot(key, hash).get().map(|node| &node.key) 149 | } 150 | 151 | /// Returns the value corresponding to the key. 152 | #[inline] 153 | pub fn get(&self, key: K) -> Option { 154 | let hash = Self::hash_key(&key); 155 | 156 | self.find_slot(key, hash).get().map(|node| node.value.get()) 157 | } 158 | 159 | /// Returns true if the map contains a value for the specified key. 160 | #[inline] 161 | pub fn contains_key(&self, key: K) -> bool { 162 | let hash = Self::hash_key(&key); 163 | 164 | self.find_slot(key, hash).get().is_some() 165 | } 166 | } 167 | 168 | /// A variant of the `Map` that includes a bloom filter using the 169 | /// `bloom` function for keys that can be represented as byte slices. 170 | /// 171 | /// This is ideal for small maps for which querying for absent keys is 172 | /// a common behavior. In this case it will very likely outperform a 173 | /// `HashMap`, even one with a fast hashing algorithm. 174 | #[derive(Clone, Copy)] 175 | pub struct BloomMap<'arena, K, V> { 176 | filter: CopyCell, 177 | inner: Map<'arena, K, V>, 178 | } 179 | 180 | impl<'arena, K, V> BloomMap<'arena, K, V> { 181 | /// Create a new, empty `BloomMap`. 182 | pub const fn new() -> Self { 183 | BloomMap { 184 | filter: CopyCell::new(0), 185 | inner: Map::new(), 186 | } 187 | } 188 | } 189 | 190 | impl<'arena, K, V: Copy> BloomMap<'arena, K, V> { 191 | /// Get an iterator over key value pairs. 192 | #[inline] 193 | pub fn iter(&self) -> MapIter<'arena, K, V> { 194 | self.inner.iter() 195 | } 196 | 197 | /// Returns true if the map contains no elements. 198 | #[inline] 199 | pub fn is_empty(&self) -> bool { 200 | self.inner.is_empty() 201 | } 202 | 203 | /// Clears the map. 204 | #[inline] 205 | pub fn clear(&self) { 206 | self.filter.set(0); 207 | self.inner.clear(); 208 | } 209 | } 210 | 211 | impl<'arena, K, V> BloomMap<'arena, K, V> 212 | where 213 | K: Eq + Hash + Copy + AsRef<[u8]>, 214 | V: Copy, 215 | { 216 | /// Inserts a key-value pair into the map. If the key was previously set, 217 | /// old value is returned. 218 | #[inline] 219 | pub fn insert(&self, arena: &'arena Arena, key: K, value: V) -> Option { 220 | self.filter.set(self.filter.get() | bloom(key)); 221 | self.inner.insert(arena, key, value) 222 | } 223 | 224 | /// Returns the value corresponding to the key. 225 | #[inline] 226 | pub fn get(&self, key: K) -> Option { 227 | let b = bloom(key.as_ref()); 228 | 229 | if self.filter.get() & b == b { 230 | self.inner.get(key) 231 | } else { 232 | None 233 | } 234 | } 235 | 236 | /// Returns true if the map contains a value for the specified key. 237 | #[inline] 238 | pub fn contains_key(&self, key: K) -> bool { 239 | let b = bloom(key); 240 | 241 | self.filter.get() & b == b && self.inner.contains_key(key) 242 | } 243 | } 244 | 245 | /// An iterator over the entries in the map. 246 | /// All entries are returned in insertion order. 247 | pub struct MapIter<'arena, K, V> { 248 | next: Option<&'arena MapNode<'arena, K, V>> 249 | } 250 | 251 | impl<'arena, K, V: Copy> Iterator for MapIter<'arena, K, V> { 252 | type Item = (&'arena K, V); 253 | 254 | #[inline] 255 | fn next(&mut self) -> Option { 256 | let next = self.next; 257 | 258 | next.map(|map_node| { 259 | let item = (&map_node.key, map_node.value.get()); 260 | self.next = map_node.next.get(); 261 | item 262 | }) 263 | } 264 | } 265 | 266 | impl<'arena, K, V: Copy> IntoIterator for Map<'arena, K, V> { 267 | type Item = (&'arena K, V); 268 | type IntoIter = MapIter<'arena, K, V>; 269 | 270 | #[inline] 271 | fn into_iter(self) -> Self::IntoIter { 272 | self.iter() 273 | } 274 | } 275 | 276 | impl<'arena, K, V: Copy> IntoIterator for BloomMap<'arena, K, V> { 277 | type Item = (&'arena K, V); 278 | type IntoIter = MapIter<'arena, K, V>; 279 | 280 | #[inline] 281 | fn into_iter(self) -> Self::IntoIter { 282 | self.iter() 283 | } 284 | } 285 | 286 | impl<'arena, K, V> From> for BloomMap<'arena, K, V> 287 | where 288 | K: Eq + Hash + Copy + AsRef<[u8]>, 289 | V: Copy, 290 | { 291 | fn from(map: Map<'arena, K, V>) -> BloomMap<'arena, K, V> { 292 | let mut filter = 0; 293 | 294 | for (key, _) in map.iter() { 295 | filter |= bloom(key.as_ref()); 296 | } 297 | 298 | BloomMap { 299 | filter: CopyCell::new(filter), 300 | inner: map, 301 | } 302 | } 303 | } 304 | 305 | impl<'arena, K, V> From> for Map<'arena, K, V> { 306 | #[inline] 307 | fn from(bloom_map: BloomMap<'arena, K, V>) -> Map<'arena, K, V> { 308 | bloom_map.inner 309 | } 310 | } 311 | 312 | #[cfg(test)] 313 | mod test { 314 | use super::*; 315 | 316 | #[test] 317 | fn map() { 318 | let arena = Arena::new(); 319 | let map = Map::new(); 320 | 321 | map.insert(&arena, "foo", 10u64); 322 | map.insert(&arena, "bar", 20); 323 | map.insert(&arena, "doge", 30); 324 | 325 | assert_eq!(map.contains_key("foo"), true); 326 | assert_eq!(map.contains_key("bar"), true); 327 | assert_eq!(map.contains_key("doge"), true); 328 | assert_eq!(map.contains_key("moon"), false); 329 | 330 | assert_eq!(map.get("foo"), Some(10)); 331 | assert_eq!(map.get("bar"), Some(20)); 332 | assert_eq!(map.get("doge"), Some(30)); 333 | assert_eq!(map.get("moon"), None); 334 | } 335 | 336 | #[test] 337 | fn bloom_map() { 338 | let arena = Arena::new(); 339 | let map = BloomMap::new(); 340 | 341 | map.insert(&arena, "foo", 10u64); 342 | map.insert(&arena, "bar", 20); 343 | map.insert(&arena, "doge", 30); 344 | 345 | assert_eq!(map.contains_key("foo"), true); 346 | assert_eq!(map.contains_key("bar"), true); 347 | assert_eq!(map.contains_key("doge"), true); 348 | assert_eq!(map.contains_key("moon"), false); 349 | 350 | assert_eq!(map.get("foo"), Some(10)); 351 | assert_eq!(map.get("bar"), Some(20)); 352 | assert_eq!(map.get("doge"), Some(30)); 353 | assert_eq!(map.get("moon"), None); 354 | } 355 | 356 | #[test] 357 | fn iter() { 358 | let arena = Arena::new(); 359 | let map = Map::new(); 360 | 361 | map.insert(&arena, "foo", 10u64); 362 | map.insert(&arena, "bar", 20); 363 | map.insert(&arena, "doge", 30); 364 | 365 | let mut iter = map.iter(); 366 | 367 | assert_eq!(iter.next(), Some((&"foo", 10))); 368 | assert_eq!(iter.next(), Some((&"bar", 20))); 369 | assert_eq!(iter.next(), Some((&"doge", 30))); 370 | assert_eq!(iter.next(), None); 371 | } 372 | 373 | #[test] 374 | fn insert_replace() { 375 | let arena = Arena::new(); 376 | let map = Map::new(); 377 | 378 | map.insert(&arena, "foo", 10u64); 379 | map.insert(&arena, "bar", 20); 380 | map.insert(&arena, "doge", 30); 381 | 382 | let mut iter = map.iter(); 383 | 384 | assert_eq!(iter.next(), Some((&"foo", 10))); 385 | assert_eq!(iter.next(), Some((&"bar", 20))); 386 | assert_eq!(iter.next(), Some((&"doge", 30))); 387 | assert_eq!(iter.next(), None); 388 | 389 | map.insert(&arena, "bar", 42); 390 | 391 | let mut iter = map.iter(); 392 | 393 | assert_eq!(iter.next(), Some((&"foo", 10))); 394 | assert_eq!(iter.next(), Some((&"bar", 42))); 395 | assert_eq!(iter.next(), Some((&"doge", 30))); 396 | assert_eq!(iter.next(), None); 397 | } 398 | 399 | #[test] 400 | fn from_eq() { 401 | let arena = Arena::new(); 402 | let map = Map::new(); 403 | 404 | map.insert(&arena, "foo", 10); 405 | map.insert(&arena, "bar", 20); 406 | map.insert(&arena, "doge", 30); 407 | 408 | let bloom_map = BloomMap::new(); 409 | 410 | bloom_map.insert(&arena, "foo", 10); 411 | bloom_map.insert(&arena, "bar", 20); 412 | bloom_map.insert(&arena, "doge", 30); 413 | 414 | assert_eq!(map, Map::from(bloom_map)); 415 | assert_eq!(BloomMap::from(map), bloom_map); 416 | } 417 | } 418 | -------------------------------------------------------------------------------- /src/set.rs: -------------------------------------------------------------------------------- 1 | //! Sets of values that can be used with the `Arena`. 2 | 3 | use std::hash::Hash; 4 | 5 | use crate::map::{Map, BloomMap, MapIter}; 6 | use crate::Arena; 7 | 8 | /// A set of values. This structure is using a `Map` with value 9 | /// type set to `()` internally. 10 | #[derive(Clone, Copy)] 11 | pub struct Set<'arena, I> { 12 | map: Map<'arena, I, ()>, 13 | } 14 | 15 | impl Default for Set<'_, I> { 16 | fn default() -> Self { 17 | Self::new() 18 | } 19 | } 20 | 21 | impl<'arena, I> Set<'arena, I> { 22 | /// Creates a new, empty `Set`. 23 | pub const fn new() -> Self { 24 | Set { 25 | map: Map::new(), 26 | } 27 | } 28 | 29 | /// Get an iterator over the elements in the set 30 | #[inline] 31 | pub fn iter(&self) -> SetIter<'arena, I> { 32 | SetIter { 33 | inner: self.map.iter() 34 | } 35 | } 36 | 37 | /// Returns `true` if the set contains no elements. 38 | #[inline] 39 | pub fn is_empty(&self) -> bool { 40 | self.map.is_empty() 41 | } 42 | 43 | /// Clears the map. 44 | #[inline] 45 | pub fn clear(&self) { 46 | self.map.clear() 47 | } 48 | } 49 | 50 | impl<'arena, I> Set<'arena, I> 51 | where 52 | I: Eq + Hash + Copy, 53 | { 54 | /// Inserts a value into the set. 55 | #[inline] 56 | pub fn insert(&self, arena: &'arena Arena, item: I) { 57 | self.map.insert(arena, item, ()); 58 | } 59 | 60 | /// Gets a reference to the existing value in the set, if it exists 61 | #[inline] 62 | pub fn get(&self, key: I) -> Option<&I> { 63 | self.map.get_key(key) 64 | } 65 | 66 | /// Returns `true` if the set contains a value. 67 | #[inline] 68 | pub fn contains(&self, item: I) -> bool { 69 | self.map.contains_key(item) 70 | } 71 | } 72 | 73 | /// A set of values with a bloom filter. This structure is 74 | /// using a `BloomMap` with value type set to `()` internally. 75 | #[derive(Clone, Copy)] 76 | pub struct BloomSet<'arena, I> { 77 | map: BloomMap<'arena, I, ()>, 78 | } 79 | 80 | impl<'arena, I> BloomSet<'arena, I> { 81 | /// Creates a new, empty `BloomSet`. 82 | pub const fn new() -> Self { 83 | BloomSet { 84 | map: BloomMap::new(), 85 | } 86 | } 87 | 88 | /// Get an iterator over the elements in the set 89 | #[inline] 90 | pub fn iter(&self) -> SetIter<'arena, I> { 91 | SetIter { 92 | inner: self.map.iter() 93 | } 94 | } 95 | 96 | /// Returns `true` if the set contains no elements. 97 | #[inline] 98 | pub fn is_empty(&self) -> bool { 99 | self.map.is_empty() 100 | } 101 | 102 | /// Clears the map. 103 | #[inline] 104 | pub fn clear(&self) { 105 | self.map.clear() 106 | } 107 | } 108 | 109 | impl<'arena, I> BloomSet<'arena, I> 110 | where 111 | I: Eq + Hash + Copy + AsRef<[u8]>, 112 | { 113 | /// Inserts a value into the set. 114 | #[inline] 115 | pub fn insert(&self, arena: &'arena Arena, item: I) { 116 | self.map.insert(arena, item, ()); 117 | } 118 | 119 | /// Returns `true` if the set contains a value. 120 | #[inline] 121 | pub fn contains(&self, item: I) -> bool { 122 | self.map.contains_key(item) 123 | } 124 | } 125 | 126 | /// An iterator over the elements in the set. 127 | pub struct SetIter<'arena, I> { 128 | inner: MapIter<'arena, I, ()> 129 | } 130 | 131 | impl<'arena, I> Iterator for SetIter<'arena, I> { 132 | type Item = &'arena I; 133 | 134 | #[inline] 135 | fn next(&mut self) -> Option { 136 | self.inner.next().map(|(key, _)| key) 137 | } 138 | } 139 | 140 | impl<'arena, I> IntoIterator for Set<'arena, I> { 141 | type Item = &'arena I; 142 | type IntoIter = SetIter<'arena, I>; 143 | 144 | #[inline] 145 | fn into_iter(self) -> Self::IntoIter { 146 | self.iter() 147 | } 148 | } 149 | 150 | impl<'arena, I> IntoIterator for BloomSet<'arena, I> { 151 | type Item = &'arena I; 152 | type IntoIter = SetIter<'arena, I>; 153 | 154 | #[inline] 155 | fn into_iter(self) -> Self::IntoIter { 156 | self.iter() 157 | } 158 | } 159 | 160 | impl<'arena, I> From> for BloomSet<'arena, I> 161 | where 162 | I: Eq + Hash + Copy + AsRef<[u8]>, 163 | { 164 | #[inline] 165 | fn from(set: Set<'arena, I>) -> BloomSet<'arena, I> { 166 | BloomSet { 167 | map: set.map.into() 168 | } 169 | } 170 | } 171 | 172 | impl<'arena, I> From> for Set<'arena, I> { 173 | #[inline] 174 | fn from(bloom_set: BloomSet<'arena, I>) -> Set<'arena, I> { 175 | Set { 176 | map: bloom_set.map.into() 177 | } 178 | } 179 | } 180 | 181 | #[cfg(test)] 182 | mod test { 183 | use super::*; 184 | 185 | #[test] 186 | fn set() { 187 | let arena = Arena::new(); 188 | let set = Set::new(); 189 | 190 | set.insert(&arena, "foo"); 191 | set.insert(&arena, "bar"); 192 | set.insert(&arena, "doge"); 193 | 194 | assert_eq!(set.contains("foo"), true); 195 | assert_eq!(set.contains("bar"), true); 196 | assert_eq!(set.contains("doge"), true); 197 | assert_eq!(set.contains("moon"), false); 198 | } 199 | 200 | #[test] 201 | fn bloom_set() { 202 | let arena = Arena::new(); 203 | let set = BloomSet::new(); 204 | 205 | set.insert(&arena, "foo"); 206 | set.insert(&arena, "bar"); 207 | set.insert(&arena, "doge"); 208 | 209 | assert_eq!(set.contains("foo"), true); 210 | assert_eq!(set.contains("bar"), true); 211 | assert_eq!(set.contains("doge"), true); 212 | assert_eq!(set.contains("moon"), false); 213 | } 214 | 215 | #[test] 216 | fn set_iter() { 217 | let arena = Arena::new(); 218 | let set = Set::new(); 219 | 220 | set.insert(&arena, "foo"); 221 | set.insert(&arena, "bar"); 222 | set.insert(&arena, "doge"); 223 | 224 | let mut iter = set.iter(); 225 | 226 | assert_eq!(iter.next(), Some(&"foo")); 227 | assert_eq!(iter.next(), Some(&"bar")); 228 | assert_eq!(iter.next(), Some(&"doge")); 229 | } 230 | 231 | #[test] 232 | fn bloom_set_iter() { 233 | let arena = Arena::new(); 234 | let set = BloomSet::new(); 235 | 236 | set.insert(&arena, "foo"); 237 | set.insert(&arena, "bar"); 238 | set.insert(&arena, "doge"); 239 | 240 | let mut iter = set.iter(); 241 | 242 | assert_eq!(iter.next(), Some(&"foo")); 243 | assert_eq!(iter.next(), Some(&"bar")); 244 | assert_eq!(iter.next(), Some(&"doge")); 245 | } 246 | 247 | #[test] 248 | fn from_eq() { 249 | let arena = Arena::new(); 250 | let set = Set::new(); 251 | 252 | set.insert(&arena, "foo"); 253 | set.insert(&arena, "bar"); 254 | set.insert(&arena, "doge"); 255 | 256 | let bloom_set = BloomSet::new(); 257 | 258 | bloom_set.insert(&arena, "foo"); 259 | bloom_set.insert(&arena, "bar"); 260 | bloom_set.insert(&arena, "doge"); 261 | 262 | assert_eq!(set, Set::from(bloom_set)); 263 | assert_eq!(BloomSet::from(set), bloom_set); 264 | } 265 | } 266 | --------------------------------------------------------------------------------