├── .gitignore ├── Cargo.toml ├── README.md ├── benches ├── concurrent.rs └── single_threaded.rs ├── examples ├── readme.rs └── wordcount.rs ├── src ├── lib.rs ├── map.rs └── table.rs └── tests └── tests.rs /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | *.swp 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "concurrent-hashmap" 3 | version = "0.2.2" 4 | authors = ["Viktor Dahl "] 5 | license = "MIT/Apache-2.0" 6 | repository = "https://github.com/veddan/rust-concurrent-hashmap.git" 7 | documentation = "https://veddan.github.io/rustdoc/concurrent-hashmap/concurrent_hashmap/index.html" 8 | description = "A concurrent hashmap library." 9 | 10 | [dependencies.spin] 11 | version = '0.4.5' 12 | default-features = false 13 | 14 | [features] 15 | unstable = ["spin/asm"] 16 | default = ["unstable"] 17 | 18 | [dev-dependencies] 19 | rand = '0.3.11' 20 | 21 | [profile.test] 22 | opt-level = 1 23 | 24 | [profile.bench] 25 | debug = true 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rust-concurrent-hashmap 2 | 3 | [![Crates.io](https://img.shields.io/crates/v/concurrent-hashmap.svg)](https://crates.io/crates/concurrent-hashmap) 4 | 5 | [Documentation](https://veddan.github.io/rustdoc/concurrent-hashmap/concurrent_hashmap/index.html) 6 | 7 | This is a Rust implementing a concurrent hashmap. 8 | 9 | The crate works on stable Rust if default features are disabled: 10 | ```toml 11 | [depdencies.concurrent-hashmap] 12 | version = "0.2.1" 13 | default-features = false 14 | ``` 15 | However, performance is better with nightly rustc due to use of unstable `#![feature]`s. 16 | 17 | ## Usage 18 | ```rust 19 | extern crate concurrent_hashmap; 20 | 21 | use concurrent_hashmap::*; 22 | 23 | fn main() { 24 | // Create a table mapping u32 to u32, using defaults 25 | let map = ConcHashMap::::new(); 26 | map.insert(1, 2); 27 | map.insert(30, 12); 28 | if let Some(mut val) = map.find_mut(&30) { 29 | // Update a value in-place if it exists 30 | // This mapping can not be modified while we have a reference to it 31 | *val.get() += 3; 32 | } 33 | // Update the value with key 129, or insert a default (3) 34 | map.upsert(129, 3, &|x| *x *= 3); // 129 => 3 35 | map.upsert(129, 3, &|x| *x *= 3); // 129 => 9 36 | map.remove(&1); 37 | for (&k, &v) in map.iter() { 38 | println!("{} => {}", k, v); 39 | } 40 | } 41 | ``` 42 | 43 | For sharing a map between thread, you typically want to put it in an `Arc`. 44 | A less artificial (and actually multi-threaded) examples can be found in `examples/wordcount.rs`. 45 | 46 | ## Implementation 47 | This hashtable works by partitioning the keys between several independent hashtable based on 48 | the initial bits of their hash values. 49 | Each of these partitions is protected by its own lock, so accessing a key in one partition 50 | does not block access to kes in other partitions. 51 | Under the assumption that the hash function uniformly distributes keys across paritions, 52 | contention is reduced by a factor equal to the number of partitions. 53 | A key will never move between partitions, so they can be resized independently and without 54 | locking other partitions. 55 | 56 | Each partition is an open-addressed hashtable, using quadratic probing. 57 | Deletion is handled by tombstones and bucket occupancy is tracked by a bitmap. 58 | 59 | Single-threaded insertion performance is similar to or better than `std::collections::HashMap`, 60 | while read performance is worse. 61 | 62 | ## Concurrency notes 63 | This is not a lock-free hashtable. 64 | To achieve good performance, minimal work should be done while holding locks. 65 | Cases where locks are held include using the result of `.find()`/`.find_mut()`, 66 | running the updating closure in `.upsert()`, and iterating over the map. 67 | To reduce contention, the `ConcHashMap::with_options()` constructor can be used 68 | to set the `concurrency` parameter to the expected number of threads concurrently 69 | accessing the table. 70 | 71 | Iterating does not provide a consistent snapshot of the table's contents. 72 | Updates performed while iterating over the table may or may not be reflected in the iteration. 73 | Iterating works by locking a one partition at a time. 74 | 75 | -------------------------------------------------------------------------------- /benches/concurrent.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate test; 4 | extern crate rand; 5 | extern crate concurrent_hashmap; 6 | use std::thread; 7 | use std::sync::{Barrier, Arc}; 8 | use test::Bencher; 9 | use rand::{Rng, SeedableRng, XorShiftRng}; 10 | use concurrent_hashmap::*; 11 | 12 | const OPS: u32 = 10000; 13 | 14 | #[bench] 15 | fn concurrent_ops_50_reads_2_threads(b: &mut Bencher) { 16 | bench(b, 0.50, 2); 17 | } 18 | 19 | #[bench] 20 | fn concurrent_ops_50_reads_4_threads(b: &mut Bencher) { 21 | bench(b, 0.50, 4); 22 | } 23 | 24 | #[bench] 25 | fn concurrent_ops_50_reads_8_threads(b: &mut Bencher) { 26 | bench(b, 0.50, 8); 27 | } 28 | 29 | #[bench] 30 | fn concurrent_ops_50_reads_16_threads(b: &mut Bencher) { 31 | bench(b, 0.50, 16); 32 | } 33 | 34 | #[bench] 35 | fn concurrent_ops_50_reads_32_threads(b: &mut Bencher) { 36 | bench(b, 0.50, 32); 37 | } 38 | 39 | #[ignore] 40 | #[bench] 41 | fn concurrent_ops_50_reads_64_threads(b: &mut Bencher) { 42 | bench(b, 0.950, 64); 43 | } 44 | 45 | #[bench] 46 | fn concurrent_ops_95_reads_2_threads(b: &mut Bencher) { 47 | bench(b, 0.95, 2); 48 | } 49 | 50 | #[bench] 51 | fn concurrent_ops_95_reads_4_threads(b: &mut Bencher) { 52 | bench(b, 0.95, 4); 53 | } 54 | 55 | #[bench] 56 | fn concurrent_ops_95_reads_8_threads(b: &mut Bencher) { 57 | bench(b, 0.95, 8); 58 | } 59 | 60 | #[bench] 61 | fn concurrent_ops_95_reads_16_threads(b: &mut Bencher) { 62 | bench(b, 0.95, 16); 63 | } 64 | 65 | #[bench] 66 | fn concurrent_ops_95_reads_32_threads(b: &mut Bencher) { 67 | bench(b, 0.95, 32); 68 | } 69 | 70 | #[ignore] 71 | #[bench] 72 | fn concurrent_ops_95_reads_64_threads(b: &mut Bencher) { 73 | bench(b, 0.95, 64); 74 | } 75 | 76 | #[bench] 77 | fn concurrent_ops_100_reads_2_threads(b: &mut Bencher) { 78 | bench(b, 1.00, 2); 79 | } 80 | 81 | #[bench] 82 | fn concurrent_ops_100_reads_4_threads(b: &mut Bencher) { 83 | bench(b, 1.00, 4); 84 | } 85 | 86 | #[bench] 87 | fn concurrent_ops_100_reads_8_threads(b: &mut Bencher) { 88 | bench(b, 1.00, 8); 89 | } 90 | 91 | #[bench] 92 | fn concurrent_ops_100_reads_16_threads(b: &mut Bencher) { 93 | bench(b, 1.00, 16); 94 | } 95 | 96 | #[bench] 97 | fn concurrent_ops_100_reads_32_threads(b: &mut Bencher) { 98 | bench(b, 1.00, 32); 99 | } 100 | 101 | #[bench] 102 | fn concurrent_ops_100_reads_64_threads(b: &mut Bencher) { 103 | bench(b, 1.00, 64); 104 | } 105 | 106 | fn bench(b: &mut Bencher, reads: f64, nthreads: u32) { 107 | b.iter(|| do_bench(reads, nthreads)); 108 | b.bytes = nthreads as u64 * OPS as u64; 109 | } 110 | 111 | fn do_bench(reads: f64, nthreads: u32) { 112 | assert!(reads >= 0.0 && reads <= 1.0); 113 | let map: Arc> = Arc::new(Default::default()); 114 | let nthreads = nthreads as usize; 115 | { 116 | let mut threads = Vec::new(); 117 | let start_barrier = Arc::new(Barrier::new(nthreads)); 118 | for _ in 0..nthreads { 119 | let map = map.clone(); 120 | let start_barrier = start_barrier.clone(); 121 | threads.push(thread::spawn(move || { 122 | let mut rng: XorShiftRng = SeedableRng::from_seed([1, 2, 3, 4]); 123 | let mut read = 0; 124 | start_barrier.wait(); 125 | for i in 0..OPS { 126 | if rng.gen::() < reads { 127 | map.find(&i).map(|x| read += *x.get()); 128 | } else { 129 | map.insert(i, i * i); 130 | } 131 | } 132 | })); 133 | } 134 | for thread in threads { 135 | thread.join().unwrap(); 136 | } 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /benches/single_threaded.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate rand; 4 | extern crate test; 5 | extern crate concurrent_hashmap; 6 | 7 | use std::default::Default; 8 | use std::cmp::max; 9 | use test::Bencher; 10 | use rand::{Rng, weak_rng, XorShiftRng}; 11 | use concurrent_hashmap::*; 12 | 13 | const INTEGERS: u32 = 100_000; 14 | 15 | macro_rules! new_map ( 16 | ($typ: ty) => ({ 17 | let mut options: Options<::std::collections::hash_map::RandomState> = Default::default(); 18 | options.concurrency = 4; 19 | ConcHashMap::<$typ, usize, _>::with_options(options) 20 | }) 21 | ); 22 | 23 | #[bench] 24 | #[inline(never)] 25 | fn insert_sequential_integers(b: &mut Bencher) { 26 | b.iter(|| { 27 | let map = new_map!(u32); 28 | for i in 0..INTEGERS { 29 | map.insert(i, 0); 30 | } 31 | map 32 | }); 33 | b.bytes = INTEGERS as u64; 34 | } 35 | 36 | #[bench] 37 | #[inline(never)] 38 | fn insert_random_integers(b: &mut Bencher) { 39 | let mut integers: Vec<_> = (0..INTEGERS).collect(); 40 | weak_rng().shuffle(&mut integers); 41 | b.iter(|| { 42 | let map = new_map!(u32); 43 | for &i in integers.iter() { 44 | map.insert(i, 0); 45 | } 46 | map 47 | }); 48 | b.bytes = INTEGERS as u64; 49 | } 50 | 51 | #[bench] 52 | #[inline(never)] 53 | fn insert_sequential_strings(b: &mut Bencher) { 54 | let strings: Vec<_> = (0..INTEGERS as u64).map(|i| (i * i).to_string()).collect(); 55 | b.iter(|| { 56 | let map = new_map!(&str); 57 | for i in strings.iter() { 58 | map.insert(i, 0); 59 | } 60 | map 61 | }); 62 | b.bytes = INTEGERS as u64; 63 | } 64 | 65 | #[bench] 66 | #[inline(never)] 67 | fn insert_random_strings(b: &mut Bencher) { 68 | let mut strings: Vec<_> = (0..INTEGERS as u64).map(|i| (i * i).to_string()).collect(); 69 | weak_rng().shuffle(&mut strings); 70 | b.iter(|| { 71 | let map = new_map!(&str); 72 | for i in strings.iter() { 73 | map.insert(i, 0); 74 | } 75 | map 76 | }); 77 | b.bytes = INTEGERS as u64; 78 | } 79 | 80 | #[bench] 81 | #[inline(never)] 82 | fn insert_sequential_integers_std(b: &mut Bencher) { 83 | b.iter(|| { 84 | let mut map = ::std::collections::HashMap::::new(); 85 | for i in 0..INTEGERS { 86 | map.insert(i, 0); 87 | } 88 | map 89 | }); 90 | b.bytes = INTEGERS as u64; 91 | } 92 | 93 | #[bench] 94 | #[inline(never)] 95 | fn insert_random_integers_std(b: &mut Bencher) { 96 | let mut integers: Vec<_> = (0..INTEGERS).collect(); 97 | weak_rng().shuffle(&mut integers); 98 | b.iter(|| { 99 | let mut map = ::std::collections::HashMap::::new(); 100 | for &i in integers.iter() { 101 | map.insert(i, 0); 102 | } 103 | map 104 | }); 105 | b.bytes = INTEGERS as u64; 106 | } 107 | 108 | #[bench] 109 | #[inline(never)] 110 | fn insert_sequential_strings_std(b: &mut Bencher) { 111 | let strings: Vec<_> = (0..INTEGERS as u64).map(|i| (i * i).to_string()).collect(); 112 | b.iter(|| { 113 | let mut map = ::std::collections::HashMap::::new(); 114 | for i in strings.iter() { 115 | map.insert(i.clone(), 0); 116 | } 117 | map 118 | }); 119 | b.bytes = INTEGERS as u64; 120 | } 121 | 122 | #[bench] 123 | #[inline(never)] 124 | fn insert_random_strings_std(b: &mut Bencher) { 125 | let mut strings: Vec<_> = (0..INTEGERS as u64).map(|i| (i * i).to_string()).collect(); 126 | weak_rng().shuffle(&mut strings); 127 | b.iter(|| { 128 | let mut map = ::std::collections::HashMap::::new(); 129 | for i in strings.iter() { 130 | map.insert(i.clone(), 0); 131 | } 132 | map 133 | }); 134 | b.bytes = INTEGERS as u64; 135 | } 136 | 137 | #[ignore] 138 | #[bench] 139 | #[inline(never)] 140 | fn random_integer_lookup_50_large(b: &mut Bencher) { 141 | let map = new_map!(u64); 142 | let len = 1000_000; 143 | for i in 0..len { 144 | map.insert(i, 0); 145 | } 146 | let mut nums: Vec<_> = (0..2 * len).collect(); 147 | XorShiftRng::new_unseeded().shuffle(&mut nums); 148 | b.iter(|| { 149 | for _ in 0..1 { 150 | for i in nums.iter() { 151 | test::black_box(map.find(i)); 152 | } 153 | } 154 | }); 155 | b.bytes = nums.len() as u64; 156 | } 157 | 158 | // TODO Replace these with a macro when #12249 is solved 159 | #[bench] 160 | #[inline(never)] 161 | fn random_integer_lookup_100(b: &mut Bencher) { 162 | random_integer_lookup(100.0, b, INTEGERS); 163 | } 164 | 165 | #[bench] 166 | #[inline(never)] 167 | fn random_integer_lookup_95(b: &mut Bencher) { 168 | random_integer_lookup(95.0, b, INTEGERS); 169 | } 170 | 171 | #[bench] 172 | #[inline(never)] 173 | fn random_integer_lookup_50(b: &mut Bencher) { 174 | random_integer_lookup(50.0, b, INTEGERS); 175 | } 176 | 177 | #[bench] 178 | #[inline(never)] 179 | fn random_integer_lookup_5(b: &mut Bencher) { 180 | random_integer_lookup(5.0, b, INTEGERS); 181 | } 182 | 183 | #[bench] 184 | #[inline(never)] 185 | fn random_integer_lookup_0(b: &mut Bencher) { 186 | random_integer_lookup(0.0, b, INTEGERS); 187 | } 188 | 189 | #[bench] 190 | #[inline(never)] 191 | fn random_integer_lookup_95_huge(b: &mut Bencher) { 192 | random_integer_lookup(95.0, b, INTEGERS * 100); 193 | } 194 | 195 | #[bench] 196 | #[inline(never)] 197 | fn random_string_lookup_95_huge(b: &mut Bencher) { 198 | random_string_lookup(95.0, b, INTEGERS * 100); 199 | } 200 | 201 | fn random_integer_lookup(hit_rate: f64, b: &mut Bencher, count: u32) { 202 | let mut rng = weak_rng(); 203 | let map = new_map!(u32); 204 | for i in 0..count { 205 | map.insert(i, 0); 206 | } 207 | let base_n = 1000; 208 | let n = max(1, base_n - (0.99 * base_n as f64 * (1.0 - hit_rate / 100.0)) as u32); 209 | let (min, max) = if hit_rate > 0.0 { 210 | (0, (count as f64 / (hit_rate / 100.0)) as u32) 211 | } else { 212 | (count, 2 * count) 213 | }; 214 | let keys: Vec<_> = (0..n).map(|_| rng.gen_range(min, max)).collect(); 215 | b.iter(|| 216 | for key in keys.iter() { 217 | test::black_box(map.find(key)); 218 | } 219 | ); 220 | b.bytes = n as u64 as u64; 221 | } 222 | 223 | fn random_string_lookup(hit_rate: f64, b: &mut Bencher, count: u32) { 224 | let mut rng = weak_rng(); 225 | let map = new_map!(String); 226 | for i in 0..count { 227 | map.insert(format!("____{}____", i), 0); 228 | } 229 | let keys: Vec<_> = map.iter() 230 | .map(|(k, _)| if rng.gen::() < hit_rate { k.to_string() } else { "miss".to_string() }) 231 | .collect(); 232 | b.iter(|| 233 | for key in keys.iter() { 234 | test::black_box(map.find(key)); 235 | } 236 | ); 237 | b.bytes = count as u64 as u64; 238 | } 239 | 240 | #[bench] 241 | #[inline(never)] 242 | fn random_integer_lookup_100_std(b: &mut Bencher) { 243 | random_integer_lookup_std(100.0, b); 244 | } 245 | 246 | #[bench] 247 | #[inline(never)] 248 | fn random_integer_lookup_95_std(b: &mut Bencher) { 249 | random_integer_lookup_std(95.0, b); 250 | } 251 | 252 | #[bench] 253 | #[inline(never)] 254 | fn random_integer_lookup_50_std(b: &mut Bencher) { 255 | random_integer_lookup_std(50.0, b); 256 | } 257 | 258 | #[bench] 259 | #[inline(never)] 260 | fn random_integer_lookup_5_std(b: &mut Bencher) { 261 | random_integer_lookup_std(5.0, b); 262 | } 263 | 264 | #[ignore] 265 | #[bench] 266 | #[inline(never)] 267 | fn random_integer_lookup_0_std(b: &mut Bencher) { 268 | random_integer_lookup_std(0.0, b); 269 | } 270 | 271 | fn random_integer_lookup_std(hit_rate: f64, b: &mut Bencher) { 272 | let mut rng = weak_rng(); 273 | let mut map = ::std::collections::HashMap::new(); 274 | for i in 0..INTEGERS { 275 | map.insert(i, 0); 276 | } 277 | let base_n = 1000; 278 | let n = max(1, base_n - (0.99 * base_n as f64 * (1.0 - hit_rate / 100.0)) as u32); 279 | let (min, max) = if hit_rate > 0.0 { 280 | (0, (INTEGERS as f64 / (hit_rate / 100.0)) as u32) 281 | } else { 282 | (INTEGERS, 2 * INTEGERS) 283 | }; 284 | let keys: Vec<_> = (0..n).map(|_| rng.gen_range(min, max)).collect(); 285 | b.iter(|| 286 | for key in keys.iter() { 287 | test::black_box(map.get(key)); 288 | } 289 | ); 290 | b.bytes = n as u64 as u64; 291 | } 292 | -------------------------------------------------------------------------------- /examples/readme.rs: -------------------------------------------------------------------------------- 1 | extern crate concurrent_hashmap; 2 | 3 | use concurrent_hashmap::*; 4 | 5 | fn main() { 6 | // Create a table mapping u32 to u32, using defaults 7 | let map = ConcHashMap::::new(); 8 | map.insert(1, 2); 9 | map.insert(30, 12); 10 | if let Some(mut val) = map.find_mut(&30) { 11 | // Update a value in-place if it exists 12 | // This mapping can not be modified while we have a reference to it 13 | *val.get() += 3; 14 | } 15 | // Update the value with key 129, or insert a default (3) 16 | map.upsert(129, 3, &|x| *x *= 3); // 129 => 3 17 | map.upsert(129, 3, &|x| *x *= 3); // 129 => 9 18 | map.remove(&1); 19 | for (&k, &v) in map.iter() { 20 | println!("{} => {}", k, v); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /examples/wordcount.rs: -------------------------------------------------------------------------------- 1 | #![feature(step_by)] 2 | extern crate concurrent_hashmap; 3 | 4 | use std::io::Read; 5 | use std::io; 6 | use std::cmp; 7 | use std::thread; 8 | use std::default::Default; 9 | use std::sync::Arc; 10 | use concurrent_hashmap::*; 11 | 12 | fn main() { 13 | let words = Arc::new(read_words()); 14 | let word_counts: Arc> = Default::default(); 15 | count_words(words.clone(), word_counts.clone(), 4); 16 | let mut counts: Vec<(String, u32)> = word_counts.iter().map(|(s, &n)| (s.clone(), n)).collect(); 17 | counts.sort_by(|&(_, a), &(_, b)| a.cmp(&b)); 18 | for &(ref word, count) in counts.iter() { 19 | println!("{}\t{}", word, count); 20 | } 21 | } 22 | 23 | fn read_words() -> Vec { 24 | let mut input = String::new(); 25 | io::stdin().read_to_string(&mut input).unwrap(); 26 | input.split_whitespace() 27 | .map(|w| w.trim_matches(|c| ['.', '"', ':', ';', ',', '!', '?', ')', '(', '_'] 28 | .contains(&c))) 29 | .map(|w| w.to_lowercase()) 30 | .filter(|w| !w.is_empty()) 31 | .collect() 32 | } 33 | 34 | fn count_words(words: Arc>, word_counts: Arc>, nthreads: usize) { 35 | let mut threads = Vec::with_capacity(nthreads); 36 | let chunk_size = words.len() / nthreads; 37 | for chunk_index in (0..words.len()).step_by(chunk_size) { 38 | let words = words.clone(); 39 | let word_counts = word_counts.clone(); 40 | threads.push(thread::spawn(move || { 41 | for word in &words[chunk_index..cmp::min(words.len(), chunk_index + chunk_size)] { 42 | // It would be nice to be able to pass a &K to .upsert() 43 | // and have it clone as needed instead of passing a K. 44 | word_counts.upsert(word.to_owned(), 1, &|count| *count += 1); 45 | } 46 | })); 47 | } 48 | for thread in threads { 49 | thread.join().unwrap(); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | extern crate spin; 2 | 3 | mod table; 4 | mod map; 5 | 6 | pub use map::*; 7 | pub use table::Accessor; 8 | -------------------------------------------------------------------------------- /src/map.rs: -------------------------------------------------------------------------------- 1 | use std::hash::{Hasher, Hash}; 2 | use std::hash::BuildHasher; 3 | use std::collections::hash_map::RandomState; 4 | use spin::{Mutex, MutexGuard}; 5 | use std::default::Default; 6 | use std::mem::swap; 7 | use std::cmp::min; 8 | use std::u16; 9 | use std::borrow::Borrow; 10 | use std::iter::{FromIterator, IntoIterator}; 11 | use table::*; 12 | 13 | // This is the user-facing part of the implementation. 14 | // ConcHashMap wraps a couple of actual hash tables (Table) with locks around them. 15 | // It uses the top bits of the hash to decide which Table to access for a given key. 16 | // The size of an invidual Table is limited (to a still unreasonably large value) so 17 | // that it will never use the forementioned to bits of the hash. 18 | // That means that resizing a Table will never cause a key to cross between Tables. 19 | // Therefore each table can be resized independently. 20 | 21 | /// A concurrent hashmap using sharding 22 | pub struct ConcHashMap where K: Send + Sync, V: Send + Sync { 23 | tables: Vec>>, 24 | hasher_factory: H, 25 | table_shift: u64, 26 | table_mask: u64, 27 | } 28 | 29 | impl ConcHashMap 30 | where K: Hash + Eq + Send + Sync, V: Send + Sync, H: BuildHasher { 31 | 32 | /// Creates a new hashmap using default options. 33 | pub fn new() -> ConcHashMap { 34 | Default::default() 35 | } 36 | 37 | /// Creates a new hashmap with custom options. 38 | pub fn with_options(opts: Options) -> ConcHashMap { 39 | let conc = opts.concurrency as usize; 40 | let partitions = conc.checked_next_power_of_two().unwrap_or((conc / 2).next_power_of_two()); 41 | let capacity = f64_to_usize(opts.capacity as f64 / 0.92).expect("capacity overflow"); 42 | let reserve = div_ceil(capacity, partitions); 43 | let mut tables = Vec::with_capacity(partitions); 44 | for _ in 0..partitions { 45 | tables.push(Mutex::new(Table::new(reserve))); 46 | } 47 | ConcHashMap { 48 | tables: tables, 49 | hasher_factory: opts.hasher_factory, 50 | table_shift: if partitions == 1 { 0 } else { 64 - partitions.trailing_zeros() as u64 }, 51 | table_mask: partitions as u64 - 1 52 | } 53 | } 54 | 55 | /// Searches for a key, returning an accessor to the mapped values (or `None` if no mapping 56 | /// exists). 57 | /// 58 | /// Note that as long as the `Accessor` lives, a lock is held. 59 | /// 60 | /// # Examples 61 | /// 62 | /// Printing a value if it exists: 63 | /// 64 | /// ``` 65 | /// # use concurrent_hashmap::*; 66 | /// # let map = ConcHashMap::::new(); 67 | /// map.insert(100, 1); 68 | /// if let Some(val) = map.find(&100) { 69 | /// println!("100 => {}", val.get()); 70 | /// } 71 | /// # println!("workaround"); 72 | /// ``` 73 | #[inline(never)] 74 | pub fn find<'a, Q: ?Sized>(&'a self, key: &Q) -> Option> 75 | where K: Borrow + Hash + Eq + Send + Sync, Q: Hash + Eq + Sync { 76 | let hash = self.hash(key); 77 | let table_idx = self.table_for(hash); 78 | let table = self.tables[table_idx].lock(); 79 | match table.lookup(hash, |k| k.borrow() == key) { 80 | Some(idx) => Some(Accessor::new(table, idx)), 81 | None => None 82 | } 83 | } 84 | 85 | /// Searches for a key, returning a mutable accessor to the mapped value 86 | /// (or `None` if no mapping exists). 87 | /// 88 | /// Note that as long as the `MutAccessor` lives, a lock is held. 89 | /// 90 | /// # Examples 91 | /// 92 | /// Adding 2 to a value if it exists: 93 | /// 94 | /// ``` 95 | /// # use concurrent_hashmap::*; 96 | /// # let map = ConcHashMap::::new(); 97 | /// map.insert(100, 1); 98 | /// if let Some(mut val) = map.find_mut(&100) { 99 | /// *val.get() += 2; 100 | /// } 101 | /// # println!("workaround"); 102 | /// ``` 103 | #[inline(never)] 104 | pub fn find_mut<'a, Q: ?Sized>(&'a self, key: &Q) -> Option> 105 | where K: Borrow + Hash + Eq + Send + Sync, Q: Hash + Eq + Sync { 106 | let hash = self.hash(key); 107 | let table_idx = self.table_for(hash); 108 | let table = self.tables[table_idx].lock(); 109 | match table.lookup(hash, |k| k.borrow() == key) { 110 | Some(idx) => Some(MutAccessor::new(table, idx)), 111 | None => None 112 | } 113 | } 114 | 115 | /// Inserts a new mapping from `key` to `value`. 116 | /// If a previous mapping existed for `key`, it is returned. 117 | #[inline(never)] 118 | pub fn insert(&self, key: K, value: V) -> Option { 119 | let hash = self.hash(&key); 120 | let table_idx = self.table_for(hash); 121 | let mut table = self.tables[table_idx].lock(); 122 | table.put(key, value, hash, |old, mut new| { swap(old, &mut new); new }) 123 | } 124 | 125 | /// Performs on "upsert" operation: 126 | /// Updates the value currently mapped to `key` using `updater`, 127 | /// or maps `key` to `value` if no previous mapping existed. 128 | /// 129 | /// # Examples 130 | /// ``` 131 | /// # use concurrent_hashmap::*; 132 | /// # use std::string::String; 133 | /// let word_counts = ConcHashMap::::new(); 134 | /// let words = ["a", "car", "is", "a", "thing"]; 135 | /// for word in words.iter().map(|s| s.to_string()) { 136 | /// word_counts.upsert(word, 1, &|count| *count += 1); 137 | /// } 138 | /// // Map is now "a"=>2, "car"=>1, "thing"=>1 139 | /// ``` 140 | pub fn upsert(&self, key: K, value: V, updater: &U) { 141 | let hash = self.hash(&key); 142 | let table_idx = self.table_for(hash); 143 | let mut table = self.tables[table_idx].lock(); 144 | table.put(key, value, hash, |old, _| { updater(old); }); 145 | } 146 | 147 | /// Removes any mapping associated with `key`. 148 | /// 149 | /// If a mapping was removed, the mapped values is returned. 150 | pub fn remove<'a, Q: ?Sized>(&'a self, key: &Q) -> Option 151 | where K: Borrow + Hash + Eq + Send + Sync, Q: Hash + Eq + Sync { 152 | let hash = self.hash(key); 153 | let table_idx = self.table_for(hash); 154 | let mut table = self.tables[table_idx].lock(); 155 | table.remove(hash, |k| k.borrow() == key) 156 | } 157 | 158 | fn table_for(&self, hash: u64) -> usize { 159 | ((hash >> self.table_shift) & self.table_mask) as usize 160 | } 161 | 162 | fn hash(&self, key: &Q) -> u64 163 | where K: Borrow + Hash + Eq + Send + Sync, Q: Hash + Eq + Sync { 164 | let mut hasher = self.hasher_factory.build_hasher(); 165 | key.hash(&mut hasher); 166 | hasher.finish() 167 | } 168 | } 169 | 170 | impl Clone for ConcHashMap 171 | where K: Hash + Eq + Send + Sync + Clone, V: Send + Sync + Clone, H: BuildHasher + Clone { 172 | /// Clones the hashmap, returning a new map with the same mappings and hasher. 173 | /// 174 | /// If a consistent snapshot is desired, external synchronization is required. 175 | /// In the absence of external synchronization, this method has the same consistency guarantees 176 | /// as .iter(). 177 | fn clone(&self) -> ConcHashMap { 178 | let clone = ConcHashMap::::with_options(Options { 179 | capacity: 16, // TODO 180 | hasher_factory: self.hasher_factory.clone(), 181 | concurrency: min(u16::MAX as usize, self.tables.len()) as u16 182 | }); 183 | for (k, v) in self.iter() { 184 | clone.insert(k.clone(), v.clone()); 185 | } 186 | return clone; 187 | } 188 | } 189 | 190 | impl FromIterator<(K, V)> for ConcHashMap 191 | where K: Eq + Hash + Send + Sync, V: Send + Sync, H: BuildHasher + Default { 192 | fn from_iter(iterator: T) -> Self where T: IntoIterator { 193 | let iterator = iterator.into_iter(); 194 | let mut options: Options = Default::default(); 195 | if let (_, Some(bound)) = iterator.size_hint() { 196 | options.capacity = bound; 197 | } 198 | let map = ConcHashMap::with_options(options); 199 | for (k, v) in iterator { 200 | map.insert(k, v); 201 | } 202 | return map; 203 | } 204 | } 205 | 206 | impl ConcHashMap where K: Send + Sync, V: Send + Sync { 207 | /// Iterates over all mappings. 208 | /// 209 | /// This method does not provide a consistent snapshot of the map. 210 | /// All mappings returned must have been in the map at some point, but updates performed during 211 | /// the iteration may or may not be reflected. 212 | /// 213 | /// Iterating may block writers. 214 | pub fn iter<'a>(&'a self) -> Entries<'a, K, V, H> { 215 | Entries { 216 | map: self, 217 | table: self.tables[0].lock(), 218 | table_idx: 0, 219 | bucket: 0 220 | } 221 | } 222 | 223 | /// Removes all mappings. 224 | /// 225 | /// In the absence of external synchronization, the map can not be guaranteed to have been empty 226 | /// at any point during or after the `.clear()` call. 227 | pub fn clear(&self) { 228 | for table in self.tables.iter() { 229 | table.lock().clear(); 230 | } 231 | } 232 | } 233 | 234 | impl Default for ConcHashMap 235 | where K: Hash + Eq + Send + Sync, V: Send + Sync, H: BuildHasher + Default { 236 | /// Equivalent to `ConcHashMap::new()`. 237 | fn default() -> ConcHashMap { 238 | ConcHashMap::with_options(Default::default()) 239 | } 240 | } 241 | 242 | /// Iterator over the hashmap's mappings. 243 | pub struct Entries<'a, K, V, H> where K: 'a + Send + Sync, V: 'a + Send + Sync, H: 'a { 244 | map: &'a ConcHashMap, 245 | table: MutexGuard<'a, Table>, 246 | table_idx: usize, 247 | bucket: usize, 248 | } 249 | 250 | impl <'a, K, V, H> Entries<'a, K, V, H> where K: Send + Sync, V: Send + Sync { 251 | fn next_table(&mut self) { 252 | self.table_idx += 1; 253 | self.table = self.map.tables[self.table_idx].lock(); 254 | self.bucket = 0; 255 | } 256 | } 257 | 258 | impl <'a, K, V, H> Iterator for Entries<'a, K, V, H> where K: Send + Sync, V: Send + Sync { 259 | type Item = (&'a K, &'a V); 260 | 261 | fn next(&mut self) -> Option<(&'a K, &'a V)> { 262 | loop { 263 | if self.bucket == self.table.capacity() { 264 | if self.table_idx + 1 == self.map.tables.len() { 265 | return None; 266 | } 267 | self.next_table(); 268 | } 269 | let res: Option<(&'a K, &'a V)> = unsafe { ::std::mem::transmute(self.table.iter_advance(&mut self.bucket)) }; 270 | match res { 271 | Some(e) => return Some(e), 272 | None => { 273 | if self.table_idx + 1 == self.map.tables.len() { 274 | return None; 275 | } 276 | self.next_table() 277 | } 278 | } 279 | } 280 | } 281 | } 282 | 283 | /// Options used when creating a hashmap. 284 | pub struct Options { 285 | /// Number of mappings to preallocate space for. 286 | /// 287 | /// The map will always grow as needed, but preallocating space can improve performance. 288 | /// This value applies to the entire map. 289 | /// By default, no space is preallocated. 290 | pub capacity: usize, 291 | /// Factory for the hasher used for hashing keys. 292 | pub hasher_factory: H, 293 | /// Expected level of concurrency. 294 | /// 295 | /// This value controls the number of partitions used internally in the map. 296 | /// A higher value leads to less contention, but also greater memory overhead. 297 | /// The default value is 16. 298 | pub concurrency: u16, 299 | } 300 | 301 | impl Default for Options where H: BuildHasher+Default { 302 | fn default() -> Options { 303 | Options { 304 | capacity: 0, 305 | hasher_factory: Default::default(), 306 | concurrency: 16 307 | } 308 | } 309 | } 310 | 311 | fn div_ceil(n: usize, d: usize) -> usize { 312 | if n == 0 { 313 | 0 314 | } else { 315 | n/d + if n % d == 0 { 1 } else { 0 } 316 | } 317 | } 318 | 319 | fn f64_to_usize(f: f64) -> Option { 320 | if f.is_nan() || f.is_sign_negative() || f > ::std::usize::MAX as f64 { 321 | None 322 | } else { 323 | Some(f as usize) 324 | } 325 | } 326 | 327 | #[cfg(test)] 328 | mod test { 329 | use std::hash::Hash; 330 | use std::hash::{BuildHasher, Hasher, BuildHasherDefault}; 331 | use std::default::Default; 332 | use std::fmt::Debug; 333 | use std::thread; 334 | use std::sync::Arc; 335 | use super::*; 336 | 337 | struct BadHasher; 338 | 339 | impl Hasher for BadHasher { 340 | fn write(&mut self, _: &[u8]) { } 341 | 342 | fn finish(&self) -> u64 { 0 } 343 | } 344 | 345 | impl Default for BadHasher { 346 | fn default() -> BadHasher { BadHasher } 347 | } 348 | 349 | struct OneAtATimeHasher { 350 | state: u64 351 | } 352 | 353 | impl Hasher for OneAtATimeHasher { 354 | fn write(&mut self, bytes: &[u8]) { 355 | for &b in bytes.iter() { 356 | self.state = self.state.wrapping_add(b as u64); 357 | self.state = self.state.wrapping_add(self.state << 10); 358 | self.state ^= self.state >> 6; 359 | } 360 | } 361 | 362 | fn finish(&self) -> u64 { 363 | let mut hash = self.state; 364 | hash = hash.wrapping_add(hash << 3); 365 | hash ^= hash >> 11; 366 | hash = hash.wrapping_add(hash << 15); 367 | hash 368 | } 369 | } 370 | 371 | impl Default for OneAtATimeHasher { 372 | fn default() -> OneAtATimeHasher { 373 | OneAtATimeHasher { state: 0x124C494467744825 } 374 | } 375 | } 376 | 377 | #[test] 378 | fn insert_is_found() { 379 | let map: ConcHashMap = Default::default(); 380 | assert!(map.find(&1).is_none()); 381 | map.insert(1, 2); 382 | assert_eq!(map.find(&1).unwrap().get(), &2); 383 | assert!(map.find(&2).is_none()); 384 | map.insert(2, 4); 385 | assert_eq!(map.find(&2).unwrap().get(), &4); 386 | } 387 | 388 | #[test] 389 | fn insert_replace() { 390 | let map: ConcHashMap = Default::default(); 391 | assert!(map.find(&1).is_none()); 392 | map.insert(1, &"old"); 393 | assert_eq!(map.find(&1).unwrap().get(), &"old"); 394 | let old = map.insert(1, &"new"); 395 | assert_eq!(Some("old"), old); 396 | assert_eq!(map.find(&1).unwrap().get(), &"new"); 397 | } 398 | 399 | #[test] 400 | fn insert_lots() { 401 | let map: ConcHashMap> = Default::default(); 402 | for i in 0..1000 { 403 | if i % 2 == 0 { 404 | map.insert(i, i * 2); 405 | } 406 | } 407 | for i in 0..1000 { 408 | if i % 2 == 0 { 409 | find_assert(&map, &i, &(i * 2)); 410 | } else { 411 | assert!(map.find(&i).is_none()); 412 | } 413 | } 414 | } 415 | 416 | #[test] 417 | fn insert_bad_hash_lots() { 418 | let map: ConcHashMap> = Default::default(); 419 | for i in 0..100 { 420 | if i % 2 == 0 { 421 | map.insert(i, i * 2); 422 | } 423 | } 424 | for i in 0..100 { 425 | if i % 2 == 0 { 426 | find_assert(&map, &i, &(i * 2)); 427 | } else { 428 | assert!(map.find(&i).is_none()); 429 | } 430 | } 431 | } 432 | 433 | #[test] 434 | fn find_none_on_empty() { 435 | let map: ConcHashMap = Default::default(); 436 | assert!(map.find(&1).is_none()); 437 | } 438 | 439 | #[test] 440 | fn test_clone() { 441 | let orig: ConcHashMap = Default::default(); 442 | for i in 0..100 { 443 | orig.insert(i, i * i); 444 | } 445 | let clone = orig.clone(); 446 | for i in 0..100 { 447 | assert_eq!(orig.find(&i).unwrap().get(), clone.find(&i).unwrap().get()); 448 | } 449 | } 450 | 451 | #[test] 452 | fn test_clear() { 453 | let map: ConcHashMap = Default::default(); 454 | for i in 0..100 { 455 | map.insert(i, i * i); 456 | } 457 | map.clear(); 458 | for i in 0..100 { 459 | assert!(map.find(&i).is_none()); 460 | } 461 | } 462 | 463 | #[test] 464 | fn test_remove() { 465 | let map: ConcHashMap = Default::default(); 466 | map.insert(1, "one".to_string()); 467 | map.insert(2, "two".to_string()); 468 | map.insert(3, "three".to_string()); 469 | assert_eq!(Some("two".to_string()), map.remove(&2)); 470 | assert_eq!("one", map.find(&1).unwrap().get()); 471 | assert!(map.find(&2).is_none()); 472 | assert_eq!("three", map.find(&3).unwrap().get()); 473 | } 474 | 475 | #[test] 476 | fn test_remove_many() { 477 | let map: ConcHashMap = Default::default(); 478 | for i in 0..100 { 479 | map.insert(i, (i * i).to_string()); 480 | } 481 | for i in 0..100 { 482 | if i % 2 == 0 { 483 | assert_eq!(Some((i * i).to_string()), map.remove(&i)); 484 | } 485 | } 486 | for i in 0..100 { 487 | let x = map.find(&i); 488 | if i % 2 == 0 { 489 | assert!(x.is_none()); 490 | } else { 491 | assert_eq!(&(i * i).to_string(), x.unwrap().get()); 492 | } 493 | } 494 | } 495 | 496 | #[test] 497 | fn test_remove_insert() { 498 | let map: ConcHashMap = Default::default(); 499 | for i in 0..100 { 500 | map.insert(i, (i * i).to_string()); 501 | } 502 | for i in 0..100 { 503 | if i % 2 == 0 { 504 | assert_eq!(Some((i * i).to_string()), map.remove(&i)); 505 | } 506 | } 507 | for i in 0..100 { 508 | if i % 4 == 0 { 509 | map.insert(i, i.to_string()); 510 | } 511 | } 512 | for i in 0..100 { 513 | let x = map.find(&i); 514 | if i % 4 == 0 { 515 | assert_eq!(&i.to_string(), x.unwrap().get()); 516 | } else if i % 2 == 0 { 517 | assert!(x.is_none()); 518 | } else { 519 | assert_eq!(&(i * i).to_string(), x.unwrap().get()); 520 | } 521 | } 522 | } 523 | 524 | #[test] 525 | fn test_from_iterator() { 526 | let vec: Vec<(u32, u32)> = (0..100).map(|i| (i, i * i)).collect(); 527 | let map: ConcHashMap = vec.iter().map(|x| *x).collect(); 528 | for &(k, v) in vec.iter() { 529 | find_assert(&map, &k, &v); 530 | } 531 | } 532 | 533 | #[test] 534 | fn mut_modify() { 535 | let map: ConcHashMap = Default::default(); 536 | map.insert(1, 0); 537 | let mut e = map.find_mut(&1).unwrap().get(); 538 | *e += 1; 539 | assert_eq!(&1, map.find(&1).unwrap().get()); 540 | } 541 | 542 | #[test] 543 | fn conc_mut_modify() { 544 | let mmap: Arc> = Arc::new(Default::default()); 545 | let map = mmap.clone(); 546 | let range = 10000; 547 | for i in 0..range { 548 | map.insert(i, i*i); 549 | } 550 | 551 | let tl_map = mmap.clone(); 552 | let reader = thread::spawn(move || { 553 | for i in 0..range { 554 | tl_map.find(&i).unwrap().get(); 555 | } 556 | }); 557 | 558 | let tl_map = mmap.clone(); 559 | let writer = thread::spawn(move || { 560 | for i in 0..range { 561 | let mut e = tl_map.find_mut(&i).unwrap().get(); 562 | *e += 1; 563 | } 564 | }); 565 | 566 | reader.join().unwrap(); 567 | writer.join().unwrap(); 568 | for i in 0..range { 569 | assert_eq!(map.find(&i).unwrap().get(), &(i*i+1)); 570 | } 571 | } 572 | 573 | fn find_assert (map: &ConcHashMap, key: &K, expected_val: &V) 574 | where K: Eq + Hash + Debug + Send + Sync, V: Eq + Debug + Send + Sync, H: BuildHasher { 575 | match map.find(key) { 576 | None => panic!("missing key {:?} should map to {:?}", key, expected_val), 577 | Some(v) => assert_eq!(*v.get(), *expected_val) 578 | } 579 | } 580 | } 581 | -------------------------------------------------------------------------------- /src/table.rs: -------------------------------------------------------------------------------- 1 | use std::hash::Hash; 2 | use spin::MutexGuard; 3 | use std::ptr::{self, drop_in_place}; 4 | use std::mem; 5 | use std::cmp::max; 6 | use std::mem::size_of; 7 | use std::marker::{Send, Sync}; 8 | 9 | // This is the actual hash table implementation. 10 | // The Table struct does not have any synchronization; that is handled by the ConHashMap wrapper. 11 | // It uses open addressing with quadratic probing, with a bitmap for tracking bucket occupancy, 12 | // and uses tombstones to track deleted entries. 13 | 14 | // Minimum size of table when resizing. 15 | // Initially, zero-sized tables are allowed to avoid allocation. 16 | // When they need to reallocate, this is the smallest size used. 17 | const MIN_CAPACITY: usize = 1 << 5; 18 | 19 | // Largest number of elements in a table. 20 | // We want to be able to use the top 16 bits of the hash for choosing the partition. 21 | // If we limit the size of the partition to 47 bits, elements will never change partition. 22 | // Thus we can resize each partition individually. 23 | const MAX_CAPACITY: u64 = (1 << 48) - 1; 24 | 25 | // This masks out the metadata bits of the hash field. 26 | const HASH_MASK: u64 = 0x0000FFFFFFFFFFFF; 27 | 28 | // If this bit is in a stored hash, the entry entry has been removed. 29 | const TOMBSTONE: u64 = 0x0001000000000000; 30 | 31 | // If this bit is in a stored hash, the entry entry is present. 32 | const PRESENT: u64 = 0x1000000000000000; 33 | 34 | // The proper heap API is only available in nightlies 35 | unsafe fn alloc(count: usize, zero: bool) -> *mut T { 36 | let mut dummy: Vec = Vec::with_capacity(count); 37 | let ptr = dummy.as_mut_ptr(); 38 | if zero { 39 | ptr::write_bytes(ptr, 0, count); 40 | } 41 | mem::forget(dummy); 42 | return ptr; 43 | } 44 | 45 | unsafe fn dealloc(p: *mut T, count: usize) { 46 | let _dummy: Vec = Vec::from_raw_parts(p, 0, count); 47 | // Dummy is dropped and the memory is freed 48 | } 49 | 50 | pub struct Table { 51 | hashes: *mut u64, 52 | keys: *mut K, 53 | values: *mut V, 54 | capacity: usize, 55 | len: usize, 56 | } 57 | 58 | /// A handle to a particular mapping. 59 | /// 60 | /// Note that this acts as a lock guard to a part of the map. 61 | pub struct Accessor<'a, K: 'a, V: 'a> { 62 | table: MutexGuard<'a, Table>, 63 | idx: usize 64 | } 65 | 66 | /// A mutable handle to a particular mapping. 67 | /// 68 | /// Note that this acts as a lock guard to a part of the map. 69 | pub struct MutAccessor<'a, K: 'a, V: 'a> { 70 | table: MutexGuard<'a, Table>, 71 | idx: usize 72 | } 73 | 74 | impl <'a, K, V> Accessor<'a, K, V> { 75 | pub fn new(table: MutexGuard<'a, Table>, idx: usize) -> Accessor<'a, K, V> { 76 | Accessor { 77 | table: table, 78 | idx: idx 79 | } 80 | } 81 | 82 | pub fn get(&self) -> &'a V { 83 | debug_assert!(self.table.is_present(self.idx)); 84 | unsafe { 85 | &*self.table.values.offset(self.idx as isize) 86 | } 87 | } 88 | } 89 | 90 | impl <'a, K, V> MutAccessor<'a, K, V> { 91 | pub fn new(table: MutexGuard<'a, Table>, idx: usize) -> MutAccessor<'a, K, V> { 92 | MutAccessor { 93 | table: table, 94 | idx: idx 95 | } 96 | } 97 | 98 | pub fn get(&mut self) -> &'a mut V { 99 | debug_assert!(self.table.is_present(self.idx)); 100 | unsafe { 101 | &mut *self.table.values.offset(self.idx as isize) 102 | } 103 | } 104 | } 105 | 106 | impl Table where K: Hash + Eq { 107 | pub fn new(capacity: usize) -> Table { 108 | assert!(size_of::() > 0 && size_of::() > 0, "zero-size types not yet supported"); 109 | let capacity = if capacity == 0 { 0 } else { capacity.next_power_of_two() }; 110 | Table { 111 | capacity: capacity, 112 | len: 0, 113 | hashes: unsafe { alloc(capacity, true) }, 114 | keys: unsafe { alloc(capacity, false) }, 115 | values: unsafe { alloc(capacity, false) } 116 | } 117 | } 118 | 119 | pub fn lookup(&self, hash: u64, eq: C) -> Option where C: Fn(&K) -> bool { 120 | let len = self.capacity; 121 | if len == 0 { 122 | return None; 123 | } 124 | let mask = len - 1; 125 | let hash = hash & HASH_MASK; 126 | let mut i = hash as usize & mask; 127 | let mut j = 0; 128 | loop { 129 | if self.is_present(i) && self.compare_key_at(&eq, i) { 130 | return Some(i); 131 | } 132 | if !self.is_present(i) && !self.is_deleted(i) { 133 | // The key we're searching for would have been placed here if it existed 134 | return None; 135 | } 136 | if i == len - 1 { return None; } 137 | j += 1; 138 | i = (i + j) & mask; 139 | } 140 | } 141 | 142 | pub fn put T>(&mut self, key: K, value: V, hash: u64, update: U) -> Option { 143 | if self.capacity == 0 { 144 | self.resize(); 145 | } 146 | loop { 147 | let len = self.capacity; 148 | let hash = hash & HASH_MASK; 149 | let mask = len - 1; 150 | let mut i = (hash as usize) & mask; 151 | let mut j = 0; 152 | loop { 153 | if !self.is_present(i) { 154 | unsafe { self.put_at_empty(i, key, value, hash); } 155 | self.len += 1; 156 | return None; 157 | } else if self.compare_key_at(&|k| k == &key, i) { 158 | let old_value = unsafe { &mut *self.values.offset(i as isize) }; 159 | return Some(update(old_value, value)); 160 | } 161 | if i == len - 1 { break; } 162 | j += 1; 163 | i = (i + j) & mask; 164 | } 165 | self.resize(); 166 | } 167 | } 168 | 169 | pub fn remove(&mut self, hash: u64, eq: C) -> Option where C: Fn(&K) -> bool { 170 | let i = match self.lookup(hash, eq) { 171 | Some(i) => i, 172 | None => return None 173 | }; 174 | unsafe { 175 | drop_in_place::(self.keys.offset(i as isize)); 176 | *self.hashes.offset(i as isize) = TOMBSTONE; 177 | self.len -= 1; 178 | let value = ptr::read(self.values.offset(i as isize)); 179 | return Some(value); 180 | } 181 | } 182 | 183 | #[inline] 184 | fn compare_key_at(&self, eq: &C, idx: usize) -> bool where C: Fn(&K) -> bool { 185 | assert!(self.is_present(idx)); 186 | unsafe { eq(&*self.keys.offset(idx as isize)) } 187 | } 188 | 189 | unsafe fn put_at_empty(&mut self, idx: usize, key: K, value: V, hash: u64) { 190 | let i = idx as isize; 191 | *self.hashes.offset(i) = hash | PRESENT; 192 | ptr::write(self.keys.offset(i), key); 193 | ptr::write(self.values.offset(i), value); 194 | } 195 | 196 | fn resize(&mut self) { 197 | let new_capacity = max(self.capacity.checked_add(self.capacity).expect("size overflow"), MIN_CAPACITY); 198 | if new_capacity as u64 > MAX_CAPACITY { 199 | panic!("requested size: {}, max size: {}", new_capacity, MAX_CAPACITY); 200 | } 201 | let mut new_table = Table::new(new_capacity); 202 | unsafe { 203 | self.foreach_present_idx(|i| { 204 | let hash: u64 = *self.hashes.offset(i as isize); 205 | new_table.put(ptr::read(self.keys.offset(i as isize)), 206 | ptr::read(self.values.offset(i as isize)), 207 | hash, |_, _| { }); 208 | }); 209 | dealloc(self.hashes, self.capacity); 210 | dealloc(self.keys, self.capacity); 211 | dealloc(self.values, self.capacity); 212 | // This is checked in drop() to see that this instance is already "dropped" 213 | self.hashes = ptr::null_mut(); 214 | } 215 | mem::swap(self, &mut new_table); 216 | } 217 | 218 | // fn _dump_table(&self) { 219 | // unsafe { 220 | // let table = ::std::slice::from_raw_parts(self.buckets, self.capacity); 221 | // for (i, e) in table.iter().enumerate() { 222 | // if self.present[i] { 223 | // println!("{}:\t{:?}\t=>\t{:?}", 224 | // i, e.key, e.value,); 225 | // } else { 226 | // println!("{}:\tempty", i); 227 | // } 228 | // } 229 | // } 230 | // } 231 | } 232 | 233 | impl Table { 234 | pub fn capacity(&self) -> usize { self.capacity } 235 | 236 | /// Used to implement iteration. 237 | /// Search for a present bucket >= idx. 238 | /// If one is found, Some(..) is returned and idx is set to a value 239 | /// that can be passed back to iter_advance to look for the next bucket. 240 | /// When all bucket have been scanned, idx is set to self.capacity. 241 | pub fn iter_advance<'a>(&'a self, idx: &mut usize) -> Option<(&'a K, &'a V)> { 242 | if *idx >= self.capacity { 243 | return None; 244 | } 245 | for i in *idx..self.capacity { 246 | if self.is_present(i) { 247 | *idx = i + 1; 248 | let entry = unsafe { 249 | let key = self.keys.offset(i as isize); 250 | let value = self.values.offset(i as isize); 251 | (&*key, &*value) 252 | }; 253 | return Some(entry); 254 | } 255 | } 256 | *idx = self.capacity; 257 | return None; 258 | } 259 | 260 | pub fn clear(&mut self) { 261 | self.foreach_present_idx(|i| { 262 | unsafe { 263 | drop_in_place::(self.keys.offset(i as isize)); 264 | drop_in_place::(self.values.offset(i as isize)); 265 | } 266 | }); 267 | unsafe { 268 | ptr::write_bytes(self.hashes, 0, self.capacity); 269 | } 270 | self.len = 0; 271 | } 272 | 273 | fn is_present(&self, idx: usize) -> bool { 274 | assert!(idx < self.capacity); 275 | self.hash_at(idx) & PRESENT != 0 276 | } 277 | 278 | fn is_deleted(&self, idx: usize) -> bool { 279 | assert!(idx < self.capacity); 280 | !self.is_present(idx) && self.hash_at(idx) & TOMBSTONE != 0 281 | } 282 | 283 | fn hash_at(&self, idx: usize) -> u64 { 284 | assert!(idx < self.capacity); 285 | unsafe { *self.hashes.offset(idx as isize) } 286 | } 287 | 288 | fn foreach_present_idx(&self, mut f: F) where F: FnMut(usize) { 289 | let mut seen = 0; 290 | for i in 0..self.capacity { 291 | if seen == self.len { 292 | return; 293 | } 294 | if self.is_present(i) { 295 | seen += 1; 296 | f(i); 297 | } 298 | } 299 | } 300 | } 301 | 302 | impl Drop for Table { 303 | fn drop(&mut self) { 304 | if self.hashes.is_null() { 305 | // "Dying" instance that has been resized 306 | return; 307 | } 308 | self.foreach_present_idx(|i| { 309 | unsafe { 310 | drop_in_place::(self.keys.offset(i as isize)); 311 | drop_in_place::(self.values.offset(i as isize)); 312 | } 313 | }); 314 | unsafe { 315 | dealloc(self.hashes, self.capacity); 316 | dealloc(self.keys, self.capacity); 317 | dealloc(self.values, self.capacity); 318 | } 319 | } 320 | } 321 | 322 | unsafe impl Sync for Table where K: Send + Sync, V: Send + Sync { } 323 | 324 | unsafe impl Send for Table where K: Send, V: Send { } 325 | -------------------------------------------------------------------------------- /tests/tests.rs: -------------------------------------------------------------------------------- 1 | extern crate rand; 2 | extern crate concurrent_hashmap; 3 | 4 | use std::collections::HashMap; 5 | use std::thread; 6 | use std::default::Default; 7 | use std::sync::Arc; 8 | use rand::{Rng, weak_rng}; 9 | use concurrent_hashmap::*; 10 | 11 | /// Spawn a lot of threads that update the map conccurently at different ranges. 12 | /// Checks that random numbers in the total range are either empty or have correct values. 13 | #[test] 14 | fn many_threads() { 15 | let mut threads = Vec::new(); 16 | let map: Arc> = Arc::new(Default::default()); 17 | let n = 1500; 18 | let nthreads = 30; 19 | let max = nthreads * n; 20 | for t in 0..nthreads { 21 | let map = map.clone(); 22 | threads.push(thread::spawn(move || { 23 | let mut rng = weak_rng(); 24 | let s = t * n; 25 | for i in s..s + n { 26 | map.insert(i, t); 27 | let x = rng.gen_range(0, max); 28 | match map.find(&x) { 29 | Some(ref y) if x / n != *y.get() => return Err(format!("{} => {}", x, *y.get())), 30 | _ => { } 31 | } 32 | } 33 | Ok(()) 34 | })); 35 | } 36 | for thread in threads { 37 | assert_eq!(thread.join().unwrap(), Ok(())); 38 | } 39 | } 40 | 41 | /// Count elements in a list both sequentially and parallel, then verify that the results are the same. 42 | #[test] 43 | fn count_compare_with_sequential() { 44 | let n = 10000; 45 | let max = 100; 46 | let mut rng = weak_rng(); 47 | let nums: Vec<_> = (0..n).map(|_| rng.gen_range(0, max)).collect(); 48 | 49 | let seq = count_seq(&nums); 50 | let par = count_par(&nums); 51 | 52 | for k in 0..max { 53 | let seq_v = seq.get(&k); 54 | let par_v = par.find(&k); 55 | if seq_v.is_none() && par_v.is_none() { 56 | continue; 57 | } 58 | assert_eq!(seq_v.unwrap(), par_v.unwrap().get()); 59 | } 60 | 61 | fn count_seq(nums: &[u32]) -> HashMap { 62 | let mut map = HashMap::new(); 63 | for &num in nums { 64 | *map.entry(num).or_insert(0) += 1; 65 | } 66 | return map; 67 | } 68 | 69 | fn count_par(nums: &[u32]) -> Arc> { 70 | let map: Arc> = Default::default(); 71 | let mut threads = Vec::new(); 72 | for ns in nums.chunks(nums.len() / 4) { 73 | let map = map.clone(); 74 | let ns = ns.iter().cloned().collect::>(); 75 | threads.push(thread::spawn(move || { 76 | for &num in ns.iter() { 77 | map.upsert(num, 1, &|count| *count += 1); 78 | } 79 | })); 80 | } 81 | for thread in threads { 82 | thread.join().unwrap(); 83 | } 84 | map 85 | } 86 | } --------------------------------------------------------------------------------