├── .gitignore
├── Cargo.toml
├── README.md
├── benches
    ├── concurrent.rs
    └── single_threaded.rs
├── examples
    ├── readme.rs
    └── wordcount.rs
├── src
    ├── lib.rs
    ├── map.rs
    └── table.rs
└── tests
    └── tests.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | Cargo.lock
3 | *.swp
4 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "concurrent-hashmap"
 3 | version = "0.2.2"
 4 | authors = ["Viktor Dahl <pazaconyoman@gmail.com>"]
 5 | license = "MIT/Apache-2.0"
 6 | repository = "https://github.com/veddan/rust-concurrent-hashmap.git"
 7 | documentation = "https://veddan.github.io/rustdoc/concurrent-hashmap/concurrent_hashmap/index.html"
 8 | description = "A concurrent hashmap library."
 9 | 
10 | [dependencies.spin]
11 | version = '0.4.5'
12 | default-features = false
13 | 
14 | [features]
15 | unstable = ["spin/asm"]
16 | default = ["unstable"]
17 | 
18 | [dev-dependencies]
19 | rand = '0.3.11'
20 | 
21 | [profile.test]
22 | opt-level = 1
23 | 
24 | [profile.bench]
25 | debug = true
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # rust-concurrent-hashmap
 2 | 
 3 | [![Crates.io](https://img.shields.io/crates/v/concurrent-hashmap.svg)](https://crates.io/crates/concurrent-hashmap)
 4 | 
 5 | [Documentation](https://veddan.github.io/rustdoc/concurrent-hashmap/concurrent_hashmap/index.html)
 6 | 
 7 | This is a Rust implementing a concurrent hashmap.
 8 | 
 9 | The crate works on stable Rust if default features are disabled:
10 | ```toml
11 | [depdencies.concurrent-hashmap]
12 | version = "0.2.1"
13 | default-features = false
14 | ```
15 | However, performance is better with nightly rustc due to use of unstable `#![feature]`s.
16 | 
17 | ## Usage
18 | ```rust
19 | extern crate concurrent_hashmap;
20 | 
21 | use concurrent_hashmap::*;
22 | 
23 | fn main() {
24 |     // Create a table mapping u32 to u32, using defaults
25 |     let map = ConcHashMap::<u32, u32>::new();
26 |     map.insert(1, 2);
27 |     map.insert(30, 12);
28 |     if let Some(mut val) = map.find_mut(&30) {
29 |         // Update a value in-place if it exists
30 |         // This mapping can not be modified while we have a reference to it
31 |         *val.get() += 3;
32 |     }
33 |     // Update the value with key 129, or insert a default (3)
34 |     map.upsert(129, 3, &|x| *x *= 3);  // 129 => 3
35 |     map.upsert(129, 3, &|x| *x *= 3);  // 129 => 9
36 |     map.remove(&1);
37 |     for (&k, &v) in map.iter() {
38 |         println!("{} => {}", k, v);
39 |     }
40 | }
41 | ```
42 | 
43 | For sharing a map between thread, you typically want to put it in an `Arc`.
44 | A less artificial (and actually multi-threaded) examples can be found in `examples/wordcount.rs`.
45 | 
46 | ## Implementation
47 | This hashtable works by partitioning the keys between several independent hashtable based on
48 |  the initial bits of their hash values.
49 | Each of these partitions is protected by its own lock, so accessing a key in one partition
50 |  does not block access to kes in other partitions.
51 | Under the assumption that the hash function uniformly distributes keys across paritions,
52 |  contention is reduced by a factor equal to the number of partitions.
53 | A key will never move between partitions, so they can be resized independently and without
54 |  locking other partitions.
55 | 
56 | Each partition is an open-addressed hashtable, using quadratic probing.
57 | Deletion is handled by tombstones and bucket occupancy is tracked by a bitmap.
58 | 
59 | Single-threaded insertion performance is similar to or better than `std::collections::HashMap`,
60 |  while read performance is worse.
61 | 
62 | ## Concurrency notes
63 | This is not a lock-free hashtable.
64 | To achieve good performance, minimal work should be done while holding locks.
65 | Cases where locks are held include using the result of `.find()`/`.find_mut()`,
66 |  running the updating closure in `.upsert()`, and iterating over the map.
67 | To reduce contention, the `ConcHashMap::with_options()` constructor can be used
68 |  to set the `concurrency` parameter to the expected number of threads concurrently
69 |  accessing the table.
70 | 
71 | Iterating does not provide a consistent snapshot of the table's contents.
72 | Updates performed while iterating over the table may or may not be reflected in the iteration.
73 | Iterating works by locking a one partition at a time.
74 | 
75 | 


--------------------------------------------------------------------------------
/benches/concurrent.rs:
--------------------------------------------------------------------------------
  1 | #![feature(test)]
  2 | 
  3 | extern crate test;
  4 | extern crate rand;
  5 | extern crate concurrent_hashmap;
  6 | use std::thread;
  7 | use std::sync::{Barrier, Arc};
  8 | use test::Bencher;
  9 | use rand::{Rng, SeedableRng, XorShiftRng};
 10 | use concurrent_hashmap::*;
 11 | 
 12 | const OPS: u32 = 10000;
 13 | 
 14 | #[bench]
 15 | fn concurrent_ops_50_reads_2_threads(b: &mut Bencher) {
 16 |     bench(b, 0.50, 2);
 17 | }
 18 | 
 19 | #[bench]
 20 | fn concurrent_ops_50_reads_4_threads(b: &mut Bencher) {
 21 |     bench(b, 0.50, 4);
 22 | }
 23 | 
 24 | #[bench]
 25 | fn concurrent_ops_50_reads_8_threads(b: &mut Bencher) {
 26 |     bench(b, 0.50, 8);
 27 | }
 28 | 
 29 | #[bench]
 30 | fn concurrent_ops_50_reads_16_threads(b: &mut Bencher) {
 31 |     bench(b, 0.50, 16);
 32 | }
 33 | 
 34 | #[bench]
 35 | fn concurrent_ops_50_reads_32_threads(b: &mut Bencher) {
 36 |     bench(b, 0.50, 32);
 37 | }
 38 | 
 39 | #[ignore]
 40 | #[bench]
 41 | fn concurrent_ops_50_reads_64_threads(b: &mut Bencher) {
 42 |     bench(b, 0.950, 64);
 43 | }
 44 | 
 45 | #[bench]
 46 | fn concurrent_ops_95_reads_2_threads(b: &mut Bencher) {
 47 |     bench(b, 0.95, 2);
 48 | }
 49 | 
 50 | #[bench]
 51 | fn concurrent_ops_95_reads_4_threads(b: &mut Bencher) {
 52 |     bench(b, 0.95, 4);
 53 | }
 54 | 
 55 | #[bench]
 56 | fn concurrent_ops_95_reads_8_threads(b: &mut Bencher) {
 57 |     bench(b, 0.95, 8);
 58 | }
 59 | 
 60 | #[bench]
 61 | fn concurrent_ops_95_reads_16_threads(b: &mut Bencher) {
 62 |     bench(b, 0.95, 16);
 63 | }
 64 | 
 65 | #[bench]
 66 | fn concurrent_ops_95_reads_32_threads(b: &mut Bencher) {
 67 |     bench(b, 0.95, 32);
 68 | }
 69 | 
 70 | #[ignore]
 71 | #[bench]
 72 | fn concurrent_ops_95_reads_64_threads(b: &mut Bencher) {
 73 |     bench(b, 0.95, 64);
 74 | }
 75 | 
 76 | #[bench]
 77 | fn concurrent_ops_100_reads_2_threads(b: &mut Bencher) {
 78 |     bench(b, 1.00, 2);
 79 | }
 80 | 
 81 | #[bench]
 82 | fn concurrent_ops_100_reads_4_threads(b: &mut Bencher) {
 83 |     bench(b, 1.00, 4);
 84 | }
 85 | 
 86 | #[bench]
 87 | fn concurrent_ops_100_reads_8_threads(b: &mut Bencher) {
 88 |     bench(b, 1.00, 8);
 89 | }
 90 | 
 91 | #[bench]
 92 | fn concurrent_ops_100_reads_16_threads(b: &mut Bencher) {
 93 |     bench(b, 1.00, 16);
 94 | }
 95 | 
 96 | #[bench]
 97 | fn concurrent_ops_100_reads_32_threads(b: &mut Bencher) {
 98 |     bench(b, 1.00, 32);
 99 | }
100 | 
101 | #[bench]
102 | fn concurrent_ops_100_reads_64_threads(b: &mut Bencher) {
103 |     bench(b, 1.00, 64);
104 | }
105 | 
106 | fn bench(b: &mut Bencher, reads: f64, nthreads: u32) {
107 |     b.iter(|| do_bench(reads, nthreads));
108 |     b.bytes = nthreads as u64 * OPS as u64;
109 | }
110 | 
111 | fn do_bench(reads: f64, nthreads: u32) {
112 |     assert!(reads >= 0.0 && reads <= 1.0);
113 |     let map: Arc<ConcHashMap<u32, u32>> = Arc::new(Default::default());
114 |     let nthreads = nthreads as usize;
115 |     {
116 |         let mut threads = Vec::new();
117 |         let start_barrier = Arc::new(Barrier::new(nthreads));
118 |         for _ in 0..nthreads {
119 |             let map = map.clone();
120 |             let start_barrier = start_barrier.clone();
121 |             threads.push(thread::spawn(move || {
122 |                 let mut rng: XorShiftRng = SeedableRng::from_seed([1, 2, 3, 4]);
123 |                 let mut read = 0;
124 |                 start_barrier.wait();
125 |                 for i in 0..OPS {
126 |                     if rng.gen::<f64>() < reads {
127 |                         map.find(&i).map(|x| read += *x.get());
128 |                     } else {
129 |                         map.insert(i, i * i);
130 |                     }
131 |                 }
132 |             }));
133 |         }
134 |         for thread in threads {
135 |             thread.join().unwrap();
136 |         }
137 |     }
138 | }
139 | 


--------------------------------------------------------------------------------
/benches/single_threaded.rs:
--------------------------------------------------------------------------------
  1 | #![feature(test)]
  2 | 
  3 | extern crate rand;
  4 | extern crate test;
  5 | extern crate concurrent_hashmap;
  6 | 
  7 | use std::default::Default;
  8 | use std::cmp::max;
  9 | use test::Bencher;
 10 | use rand::{Rng, weak_rng, XorShiftRng};
 11 | use concurrent_hashmap::*;
 12 | 
 13 | const INTEGERS: u32 = 100_000;
 14 | 
 15 | macro_rules! new_map (
 16 |     ($typ: ty) => ({
 17 |         let mut options: Options<::std::collections::hash_map::RandomState> = Default::default();
 18 |         options.concurrency = 4;
 19 |         ConcHashMap::<$typ, usize, _>::with_options(options)
 20 |     })
 21 | );
 22 | 
 23 | #[bench]
 24 | #[inline(never)]
 25 | fn insert_sequential_integers(b: &mut Bencher) {
 26 |     b.iter(|| {
 27 |         let map = new_map!(u32);
 28 |         for i in 0..INTEGERS {
 29 |             map.insert(i, 0);
 30 |         }
 31 |         map
 32 |     });
 33 |     b.bytes = INTEGERS as u64;
 34 | }
 35 | 
 36 | #[bench]
 37 | #[inline(never)]
 38 | fn insert_random_integers(b: &mut Bencher) {
 39 |     let mut integers: Vec<_> = (0..INTEGERS).collect();
 40 |     weak_rng().shuffle(&mut integers);
 41 |     b.iter(|| {
 42 |         let map = new_map!(u32);
 43 |         for &i in integers.iter() {
 44 |             map.insert(i, 0);
 45 |         }
 46 |         map
 47 |     });
 48 |     b.bytes = INTEGERS as u64;
 49 | }
 50 | 
 51 | #[bench]
 52 | #[inline(never)]
 53 | fn insert_sequential_strings(b: &mut Bencher) {
 54 |     let strings: Vec<_> = (0..INTEGERS as u64).map(|i| (i * i).to_string()).collect();
 55 |     b.iter(|| {
 56 |         let map = new_map!(&str);
 57 |         for i in strings.iter() {
 58 |             map.insert(i, 0);
 59 |         }
 60 |         map
 61 |     });
 62 |     b.bytes = INTEGERS as u64;
 63 | }
 64 | 
 65 | #[bench]
 66 | #[inline(never)]
 67 | fn insert_random_strings(b: &mut Bencher) {
 68 |     let mut strings: Vec<_> = (0..INTEGERS as u64).map(|i| (i * i).to_string()).collect();
 69 |     weak_rng().shuffle(&mut strings);
 70 |     b.iter(|| {
 71 |         let map = new_map!(&str);
 72 |         for i in strings.iter() {
 73 |             map.insert(i, 0);
 74 |         }
 75 |         map
 76 |     });
 77 |     b.bytes = INTEGERS as u64;
 78 | }
 79 | 
 80 | #[bench]
 81 | #[inline(never)]
 82 | fn insert_sequential_integers_std(b: &mut Bencher) {
 83 |     b.iter(|| {
 84 |         let mut map = ::std::collections::HashMap::<u32, i8>::new();
 85 |         for i in 0..INTEGERS {
 86 |             map.insert(i, 0);
 87 |         }
 88 |         map
 89 |     });
 90 |     b.bytes = INTEGERS as u64;
 91 | }
 92 | 
 93 | #[bench]
 94 | #[inline(never)]
 95 | fn insert_random_integers_std(b: &mut Bencher) {
 96 |     let mut integers: Vec<_> = (0..INTEGERS).collect();
 97 |     weak_rng().shuffle(&mut integers);
 98 |     b.iter(|| {
 99 |         let mut map = ::std::collections::HashMap::<u32, i8>::new();
100 |         for &i in integers.iter() {
101 |             map.insert(i, 0);
102 |         }
103 |         map
104 |     });
105 |     b.bytes = INTEGERS as u64;
106 | }
107 | 
108 | #[bench]
109 | #[inline(never)]
110 | fn insert_sequential_strings_std(b: &mut Bencher) {
111 |     let strings: Vec<_> = (0..INTEGERS as u64).map(|i| (i * i).to_string()).collect();
112 |     b.iter(|| {
113 |         let mut map = ::std::collections::HashMap::<String, i8>::new();
114 |         for i in strings.iter() {
115 |             map.insert(i.clone(), 0);
116 |         }
117 |         map
118 |     });
119 |     b.bytes = INTEGERS as u64;
120 | }
121 | 
122 | #[bench]
123 | #[inline(never)]
124 | fn insert_random_strings_std(b: &mut Bencher) {
125 |     let mut strings: Vec<_> = (0..INTEGERS as u64).map(|i| (i * i).to_string()).collect();
126 |     weak_rng().shuffle(&mut strings);
127 |     b.iter(|| {
128 |         let mut map = ::std::collections::HashMap::<String, i8>::new();
129 |         for i in strings.iter() {
130 |             map.insert(i.clone(), 0);
131 |         }
132 |         map
133 |     });
134 |     b.bytes = INTEGERS as u64;
135 | }
136 | 
137 | #[ignore]
138 | #[bench]
139 | #[inline(never)]
140 | fn random_integer_lookup_50_large(b: &mut Bencher) {
141 |     let map = new_map!(u64);
142 |     let len = 1000_000;
143 |     for i in 0..len {
144 |         map.insert(i, 0);
145 |     }
146 |     let mut nums: Vec<_> = (0..2 * len).collect();
147 |     XorShiftRng::new_unseeded().shuffle(&mut nums);
148 |     b.iter(|| {
149 |         for _ in 0..1 {
150 |             for i in nums.iter() {
151 |                 test::black_box(map.find(i));
152 |             }
153 |         }
154 |     });
155 |     b.bytes = nums.len() as u64;
156 | }
157 | 
158 | // TODO Replace these with a macro when #12249 is solved
159 | #[bench]
160 | #[inline(never)]
161 | fn random_integer_lookup_100(b: &mut Bencher) {
162 |     random_integer_lookup(100.0, b, INTEGERS);
163 | }
164 | 
165 | #[bench]
166 | #[inline(never)]
167 | fn random_integer_lookup_95(b: &mut Bencher) {
168 |     random_integer_lookup(95.0, b, INTEGERS);
169 | }
170 | 
171 | #[bench]
172 | #[inline(never)]
173 | fn random_integer_lookup_50(b: &mut Bencher) {
174 |     random_integer_lookup(50.0, b, INTEGERS);
175 | }
176 | 
177 | #[bench]
178 | #[inline(never)]
179 | fn random_integer_lookup_5(b: &mut Bencher) {
180 |     random_integer_lookup(5.0, b, INTEGERS);
181 | }
182 | 
183 | #[bench]
184 | #[inline(never)]
185 | fn random_integer_lookup_0(b: &mut Bencher) {
186 |     random_integer_lookup(0.0, b, INTEGERS);
187 | }
188 | 
189 | #[bench]
190 | #[inline(never)]
191 | fn random_integer_lookup_95_huge(b: &mut Bencher) {
192 |     random_integer_lookup(95.0, b, INTEGERS * 100);
193 | }
194 | 
195 | #[bench]
196 | #[inline(never)]
197 | fn random_string_lookup_95_huge(b: &mut Bencher) {
198 |     random_string_lookup(95.0, b, INTEGERS * 100);
199 | }
200 | 
201 | fn random_integer_lookup(hit_rate: f64, b: &mut Bencher, count: u32) {
202 |     let mut rng = weak_rng();
203 |     let map = new_map!(u32);
204 |     for i in 0..count {
205 |         map.insert(i, 0);
206 |     }
207 |     let base_n = 1000;
208 |     let n = max(1, base_n - (0.99 * base_n as f64 * (1.0 - hit_rate / 100.0)) as u32);
209 |     let (min, max) = if hit_rate > 0.0 {
210 |         (0, (count as f64 / (hit_rate / 100.0)) as u32)
211 |     } else {
212 |         (count, 2 * count)
213 |     };
214 |     let keys: Vec<_> = (0..n).map(|_| rng.gen_range(min, max)).collect();
215 |     b.iter(||
216 |         for key in keys.iter() {
217 |             test::black_box(map.find(key));
218 |         }
219 |     );
220 |     b.bytes = n as u64 as u64;
221 | }
222 | 
223 | fn random_string_lookup(hit_rate: f64, b: &mut Bencher, count: u32) {
224 |     let mut rng = weak_rng();
225 |     let map = new_map!(String);
226 |     for i in 0..count {
227 |         map.insert(format!("____{}____", i), 0);
228 |     }
229 |     let keys: Vec<_> = map.iter()
230 |         .map(|(k, _)| if rng.gen::<f64>() < hit_rate { k.to_string() } else { "miss".to_string() })
231 |         .collect();
232 |     b.iter(||
233 |         for key in keys.iter() {
234 |             test::black_box(map.find(key));
235 |         }
236 |     );
237 |     b.bytes = count as u64 as u64;
238 | }
239 | 
240 | #[bench]
241 | #[inline(never)]
242 | fn random_integer_lookup_100_std(b: &mut Bencher) {
243 |     random_integer_lookup_std(100.0, b);
244 | }
245 | 
246 | #[bench]
247 | #[inline(never)]
248 | fn random_integer_lookup_95_std(b: &mut Bencher) {
249 |     random_integer_lookup_std(95.0, b);
250 | }
251 | 
252 | #[bench]
253 | #[inline(never)]
254 | fn random_integer_lookup_50_std(b: &mut Bencher) {
255 |     random_integer_lookup_std(50.0, b);
256 | }
257 | 
258 | #[bench]
259 | #[inline(never)]
260 | fn random_integer_lookup_5_std(b: &mut Bencher) {
261 |     random_integer_lookup_std(5.0, b);
262 | }
263 | 
264 | #[ignore]
265 | #[bench]
266 | #[inline(never)]
267 | fn random_integer_lookup_0_std(b: &mut Bencher) {
268 |     random_integer_lookup_std(0.0, b);
269 | }
270 | 
271 | fn random_integer_lookup_std(hit_rate: f64, b: &mut Bencher) {
272 |     let mut rng = weak_rng();
273 |     let mut map = ::std::collections::HashMap::new();
274 |     for i in 0..INTEGERS {
275 |         map.insert(i, 0);
276 |     }
277 |     let base_n = 1000;
278 |     let n = max(1, base_n - (0.99 * base_n as f64 * (1.0 - hit_rate / 100.0)) as u32);
279 |     let (min, max) = if hit_rate > 0.0 {
280 |         (0, (INTEGERS as f64 / (hit_rate / 100.0)) as u32)
281 |     } else {
282 |         (INTEGERS, 2 * INTEGERS)
283 |     };
284 |     let keys: Vec<_> = (0..n).map(|_| rng.gen_range(min, max)).collect();
285 |     b.iter(||
286 |         for key in keys.iter() {
287 |             test::black_box(map.get(key));
288 |         }
289 |     );
290 |     b.bytes = n as u64 as u64;
291 | }
292 | 


--------------------------------------------------------------------------------
/examples/readme.rs:
--------------------------------------------------------------------------------
 1 | extern crate concurrent_hashmap;
 2 | 
 3 | use concurrent_hashmap::*;
 4 | 
 5 | fn main() {
 6 |     // Create a table mapping u32 to u32, using defaults
 7 |     let map = ConcHashMap::<u32, u32>::new();
 8 |     map.insert(1, 2);
 9 |     map.insert(30, 12);
10 |     if let Some(mut val) = map.find_mut(&30) {
11 |         // Update a value in-place if it exists
12 |         // This mapping can not be modified while we have a reference to it
13 |         *val.get() += 3;
14 |     }
15 |     // Update the value with key 129, or insert a default (3)
16 |     map.upsert(129, 3, &|x| *x *= 3);  // 129 => 3
17 |     map.upsert(129, 3, &|x| *x *= 3);  // 129 => 9
18 |     map.remove(&1);
19 |     for (&k, &v) in map.iter() {
20 |         println!("{} => {}", k, v);
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/examples/wordcount.rs:
--------------------------------------------------------------------------------
 1 | #![feature(step_by)]
 2 | extern crate concurrent_hashmap;
 3 | 
 4 | use std::io::Read;
 5 | use std::io;
 6 | use std::cmp;
 7 | use std::thread;
 8 | use std::default::Default;
 9 | use std::sync::Arc;
10 | use concurrent_hashmap::*;
11 | 
12 | fn main() {
13 |     let words = Arc::new(read_words());
14 |     let word_counts: Arc<ConcHashMap<String, u32>> = Default::default();
15 |     count_words(words.clone(), word_counts.clone(), 4);
16 |     let mut counts: Vec<(String, u32)> = word_counts.iter().map(|(s, &n)| (s.clone(), n)).collect();
17 |     counts.sort_by(|&(_, a), &(_, b)| a.cmp(&b));
18 |     for &(ref word, count) in counts.iter() {
19 |         println!("{}\t{}", word, count);
20 |     }
21 | }
22 | 
23 | fn read_words() -> Vec<String> {
24 |     let mut input = String::new();
25 |     io::stdin().read_to_string(&mut input).unwrap();
26 |     input.split_whitespace()
27 |         .map(|w| w.trim_matches(|c| ['.', '"', ':', ';', ',', '!', '?', ')', '(', '_']
28 |                   .contains(&c)))
29 |         .map(|w| w.to_lowercase())
30 |         .filter(|w| !w.is_empty())
31 |         .collect()
32 | }
33 | 
34 | fn count_words(words: Arc<Vec<String>>, word_counts: Arc<ConcHashMap<String, u32>>, nthreads: usize) {
35 |     let mut threads = Vec::with_capacity(nthreads);
36 |     let chunk_size = words.len() / nthreads;
37 |     for chunk_index in (0..words.len()).step_by(chunk_size) {
38 |         let words = words.clone();
39 |         let word_counts = word_counts.clone();
40 |         threads.push(thread::spawn(move || {
41 |             for word in &words[chunk_index..cmp::min(words.len(), chunk_index + chunk_size)] {
42 |                 // It would be nice to be able to pass a &K to .upsert()
43 |                 // and have it clone as needed instead of passing a K.
44 |                 word_counts.upsert(word.to_owned(), 1, &|count| *count += 1);
45 |             }
46 |         }));
47 |     }
48 |     for thread in threads {
49 |         thread.join().unwrap();
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
1 | extern crate spin;
2 | 
3 | mod table;
4 | mod map;
5 | 
6 | pub use map::*;
7 | pub use table::Accessor;
8 | 


--------------------------------------------------------------------------------
/src/map.rs:
--------------------------------------------------------------------------------
  1 | use std::hash::{Hasher, Hash};
  2 | use std::hash::BuildHasher;
  3 | use std::collections::hash_map::RandomState;
  4 | use spin::{Mutex, MutexGuard};
  5 | use std::default::Default;
  6 | use std::mem::swap;
  7 | use std::cmp::min;
  8 | use std::u16;
  9 | use std::borrow::Borrow;
 10 | use std::iter::{FromIterator, IntoIterator};
 11 | use table::*;
 12 | 
 13 | // This is the user-facing part of the implementation.
 14 | // ConcHashMap wraps a couple of actual hash tables (Table) with locks around them.
 15 | // It uses the top bits of the hash to decide which Table to access for a given key.
 16 | // The size of an invidual Table is limited (to a still unreasonably large value) so
 17 | // that it will never use the forementioned to bits of the hash.
 18 | // That means that resizing a Table will never cause a key to cross between Tables.
 19 | // Therefore each table can be resized independently.
 20 | 
 21 | /// A concurrent hashmap using sharding
 22 | pub struct ConcHashMap<K, V, H=RandomState> where K: Send + Sync, V: Send + Sync {
 23 |     tables: Vec<Mutex<Table<K, V>>>,
 24 |     hasher_factory: H,
 25 |     table_shift: u64,
 26 |     table_mask: u64,
 27 | }
 28 | 
 29 | impl <K, V, H> ConcHashMap<K, V, H>
 30 |         where K: Hash + Eq + Send + Sync, V: Send + Sync, H: BuildHasher {
 31 | 
 32 |     /// Creates a new hashmap using default options.
 33 |     pub fn new() -> ConcHashMap<K, V> {
 34 |         Default::default()
 35 |     }
 36 | 
 37 |     /// Creates a new hashmap with custom options.
 38 |     pub fn with_options(opts: Options<H>) -> ConcHashMap<K, V, H> {
 39 |         let conc = opts.concurrency as usize;
 40 |         let partitions = conc.checked_next_power_of_two().unwrap_or((conc / 2).next_power_of_two());
 41 |         let capacity = f64_to_usize(opts.capacity as f64 / 0.92).expect("capacity overflow");
 42 |         let reserve = div_ceil(capacity, partitions);
 43 |         let mut tables = Vec::with_capacity(partitions);
 44 |         for _ in 0..partitions {
 45 |             tables.push(Mutex::new(Table::new(reserve)));
 46 |         }
 47 |         ConcHashMap {
 48 |             tables: tables,
 49 |             hasher_factory: opts.hasher_factory,
 50 |             table_shift: if partitions == 1 { 0 } else { 64 - partitions.trailing_zeros() as u64 },
 51 |             table_mask: partitions as u64 - 1
 52 |         }
 53 |     }
 54 | 
 55 |     /// Searches for a key, returning an accessor to the mapped values (or `None` if no mapping
 56 |     /// exists).
 57 |     ///
 58 |     /// Note that as long as the `Accessor` lives, a lock is held.
 59 |     ///
 60 |     /// # Examples
 61 |     ///
 62 |     /// Printing a value if it exists:
 63 |     ///
 64 |     /// ```
 65 |     /// # use concurrent_hashmap::*;
 66 |     /// # let map = ConcHashMap::<u32, u32>::new();
 67 |     /// map.insert(100, 1);
 68 |     /// if let Some(val) = map.find(&100) {
 69 |     ///     println!("100 => {}", val.get());
 70 |     /// }
 71 |     /// # println!("workaround");
 72 |     /// ```
 73 |     #[inline(never)]
 74 |     pub fn find<'a, Q: ?Sized>(&'a self, key: &Q) -> Option<Accessor<'a, K, V>>
 75 |             where K: Borrow<Q> + Hash + Eq + Send + Sync, Q: Hash + Eq + Sync {
 76 |         let hash = self.hash(key);
 77 |         let table_idx = self.table_for(hash);
 78 |         let table = self.tables[table_idx].lock();
 79 |         match table.lookup(hash, |k| k.borrow() == key) {
 80 |             Some(idx) => Some(Accessor::new(table, idx)),
 81 |             None      => None
 82 |         }
 83 |     }
 84 | 
 85 |     /// Searches for a key, returning a mutable accessor to the mapped value
 86 |     /// (or `None` if no mapping exists).
 87 |     ///
 88 |     /// Note that as long as the `MutAccessor` lives, a lock is held.
 89 |     ///
 90 |     /// # Examples
 91 |     ///
 92 |     /// Adding 2 to a value if it exists:
 93 |     ///
 94 |     /// ```
 95 |     /// # use concurrent_hashmap::*;
 96 |     /// # let map = ConcHashMap::<u32, u32>::new();
 97 |     /// map.insert(100, 1);
 98 |     /// if let Some(mut val) = map.find_mut(&100) {
 99 |     ///     *val.get() += 2;
100 |     /// }
101 |     /// # println!("workaround");
102 |     /// ```
103 |     #[inline(never)]
104 |     pub fn find_mut<'a, Q: ?Sized>(&'a self, key: &Q) -> Option<MutAccessor<'a, K, V>>
105 |             where K: Borrow<Q> + Hash + Eq + Send + Sync, Q: Hash + Eq + Sync {
106 |         let hash = self.hash(key);
107 |         let table_idx = self.table_for(hash);
108 |         let table = self.tables[table_idx].lock();
109 |         match table.lookup(hash, |k| k.borrow() == key) {
110 |             Some(idx) => Some(MutAccessor::new(table, idx)),
111 |             None      => None
112 |         }
113 |     }
114 | 
115 |     /// Inserts a new mapping from `key` to `value`.
116 |     /// If a previous mapping existed for `key`, it is returned.
117 |     #[inline(never)]
118 |     pub fn insert(&self, key: K, value: V) -> Option<V> {
119 |         let hash = self.hash(&key);
120 |         let table_idx = self.table_for(hash);
121 |         let mut table = self.tables[table_idx].lock();
122 |         table.put(key, value, hash, |old, mut new| { swap(old, &mut new); new })
123 |     }
124 | 
125 |     /// Performs on "upsert" operation:
126 |     /// Updates the value currently mapped to `key` using `updater`,
127 |     /// or maps `key` to `value` if no previous mapping existed.
128 |     ///
129 |     /// # Examples
130 |     /// ```
131 |     /// # use concurrent_hashmap::*;
132 |     /// # use std::string::String;
133 |     /// let word_counts = ConcHashMap::<String, u32>::new();
134 |     /// let words = ["a", "car", "is", "a", "thing"];
135 |     /// for word in words.iter().map(|s| s.to_string()) {
136 |     ///     word_counts.upsert(word, 1, &|count| *count += 1);
137 |     /// }
138 |     /// // Map is now "a"=>2, "car"=>1, "thing"=>1
139 |     /// ```
140 |     pub fn upsert<U: Fn(&mut V)>(&self, key: K, value: V, updater: &U) {
141 |         let hash = self.hash(&key);
142 |         let table_idx = self.table_for(hash);
143 |         let mut table = self.tables[table_idx].lock();
144 |         table.put(key, value, hash, |old, _| { updater(old); });
145 |     }
146 | 
147 |     /// Removes any mapping associated with `key`.
148 |     ///
149 |     /// If a mapping was removed, the mapped values is returned.
150 |     pub fn remove<'a, Q: ?Sized>(&'a self, key: &Q) -> Option<V>
151 |             where K: Borrow<Q> + Hash + Eq + Send + Sync, Q: Hash + Eq + Sync {
152 |         let hash = self.hash(key);
153 |         let table_idx = self.table_for(hash);
154 |         let mut table = self.tables[table_idx].lock();
155 |         table.remove(hash, |k| k.borrow() == key)
156 |     }
157 | 
158 |     fn table_for(&self, hash: u64) -> usize {
159 |         ((hash >> self.table_shift) & self.table_mask) as usize
160 |     }
161 | 
162 |     fn hash<Q: ?Sized>(&self, key: &Q) -> u64
163 |             where K: Borrow<Q> + Hash + Eq + Send + Sync, Q: Hash + Eq + Sync {
164 |         let mut hasher = self.hasher_factory.build_hasher();
165 |         key.hash(&mut hasher);
166 |         hasher.finish()
167 |     }
168 | }
169 | 
170 | impl <K, V, H> Clone for ConcHashMap<K, V, H>
171 |         where K: Hash + Eq + Send + Sync + Clone, V: Send + Sync + Clone, H: BuildHasher + Clone {
172 |     /// Clones the hashmap, returning a new map with the same mappings and hasher.
173 |     ///
174 |     /// If a consistent snapshot is desired, external synchronization is required.
175 |     /// In the absence of external synchronization, this method has the same consistency guarantees
176 |     /// as .iter().
177 |     fn clone(&self) -> ConcHashMap<K, V, H> {
178 |         let clone = ConcHashMap::<K, V, H>::with_options(Options {
179 |             capacity: 16,  // TODO
180 |             hasher_factory: self.hasher_factory.clone(),
181 |             concurrency: min(u16::MAX as usize, self.tables.len()) as u16
182 |         });
183 |         for (k, v) in self.iter() {
184 |             clone.insert(k.clone(), v.clone());
185 |         }
186 |         return clone;
187 |     }
188 | }
189 | 
190 | impl <K, V, H> FromIterator<(K, V)> for ConcHashMap<K, V, H>
191 |         where K: Eq + Hash + Send + Sync, V: Send + Sync, H: BuildHasher + Default {
192 |     fn from_iter<T>(iterator: T) -> Self where T: IntoIterator<Item=(K, V)> {
193 |         let iterator = iterator.into_iter();
194 |         let mut options: Options<H> = Default::default();
195 |         if let (_, Some(bound)) = iterator.size_hint() {
196 |             options.capacity = bound;
197 |         }
198 |         let map = ConcHashMap::with_options(options);
199 |         for (k, v) in iterator {
200 |             map.insert(k, v);
201 |         }
202 |         return map;
203 |     }
204 | }
205 | 
206 | impl <K, V, H> ConcHashMap<K, V, H> where K: Send + Sync, V: Send + Sync {
207 |     /// Iterates over all mappings.
208 |     ///
209 |     /// This method does not provide a consistent snapshot of the map.
210 |     /// All mappings returned must have been in the map at some point, but updates performed during
211 |     /// the iteration may or may not be reflected.
212 |     ///
213 |     /// Iterating may block writers.
214 |     pub fn iter<'a>(&'a self) -> Entries<'a, K, V, H> {
215 |        Entries {
216 |            map: self,
217 |            table: self.tables[0].lock(),
218 |            table_idx: 0,
219 |            bucket: 0
220 |        }
221 |     }
222 | 
223 |     /// Removes all mappings.
224 |     ///
225 |     /// In the absence of external synchronization, the map can not be guaranteed to have been empty
226 |     /// at any point during or after the `.clear()` call.
227 |     pub fn clear(&self) {
228 |         for table in self.tables.iter() {
229 |             table.lock().clear();
230 |         }
231 |     }
232 | }
233 | 
234 | impl <K, V, H> Default for ConcHashMap<K, V, H>
235 |         where K: Hash + Eq + Send + Sync, V: Send + Sync, H: BuildHasher + Default {
236 |     /// Equivalent to `ConcHashMap::new()`.
237 |     fn default() -> ConcHashMap<K, V, H> {
238 |         ConcHashMap::with_options(Default::default())
239 |     }
240 | }
241 | 
242 | /// Iterator over the hashmap's mappings.
243 | pub struct Entries<'a, K, V, H> where K: 'a + Send + Sync, V: 'a + Send + Sync, H: 'a {
244 |     map: &'a ConcHashMap<K, V, H>,
245 |     table: MutexGuard<'a, Table<K, V>>,
246 |     table_idx: usize,
247 |     bucket: usize,
248 | }
249 | 
250 | impl <'a, K, V, H> Entries<'a, K, V, H> where K: Send + Sync, V: Send + Sync  {
251 |     fn next_table(&mut self) {
252 |         self.table_idx += 1;
253 |         self.table = self.map.tables[self.table_idx].lock();
254 |         self.bucket = 0;
255 |     }
256 | }
257 | 
258 | impl <'a, K, V, H> Iterator for Entries<'a, K, V, H> where K: Send + Sync, V: Send + Sync {
259 |     type Item = (&'a K, &'a V);
260 | 
261 |     fn next(&mut self) -> Option<(&'a K, &'a V)> {
262 |         loop {
263 |             if self.bucket == self.table.capacity() {
264 |                 if self.table_idx + 1 == self.map.tables.len() {
265 |                     return None;
266 |                 }
267 |                 self.next_table();
268 |             }
269 |             let res: Option<(&'a K, &'a V)> = unsafe { ::std::mem::transmute(self.table.iter_advance(&mut self.bucket)) };
270 |             match res {
271 |                 Some(e) => return Some(e),
272 |                 None    => {
273 |                     if self.table_idx + 1 == self.map.tables.len() {
274 |                         return None;
275 |                     }
276 |                     self.next_table()
277 |                 }
278 |             }
279 |         }
280 |     }
281 | }
282 | 
283 | /// Options used when creating a hashmap.
284 | pub struct Options<H> {
285 |     /// Number of mappings to preallocate space for.
286 |     ///
287 |     /// The map will always grow as needed, but preallocating space can improve performance.
288 |     /// This value applies to the entire map.
289 |     /// By default, no space is preallocated.
290 |     pub capacity: usize,
291 |     /// Factory for the hasher used for hashing keys.
292 |     pub hasher_factory: H,
293 |     /// Expected level of concurrency.
294 |     ///
295 |     /// This value controls the number of partitions used internally in the map.
296 |     /// A higher value leads to less contention, but also greater memory overhead.
297 |     /// The default value is 16.
298 |     pub concurrency: u16,
299 | }
300 | 
301 | impl <H> Default for Options<H> where H: BuildHasher+Default {
302 |     fn default() -> Options<H> {
303 |         Options {
304 |             capacity: 0,
305 |             hasher_factory: Default::default(),
306 |             concurrency: 16
307 |         }
308 |     }
309 | }
310 | 
311 | fn div_ceil(n: usize, d: usize) -> usize {
312 |     if n == 0 {
313 |         0
314 |     } else {
315 |         n/d + if n % d == 0 { 1 } else { 0 }
316 |     }
317 | }
318 | 
319 | fn f64_to_usize(f: f64) -> Option<usize> {
320 |     if f.is_nan() || f.is_sign_negative() || f > ::std::usize::MAX as f64 {
321 |         None
322 |     } else {
323 |         Some(f as usize)
324 |     }
325 | }
326 | 
327 | #[cfg(test)]
328 | mod test {
329 |     use std::hash::Hash;
330 |     use std::hash::{BuildHasher, Hasher, BuildHasherDefault};
331 |     use std::default::Default;
332 |     use std::fmt::Debug;
333 |     use std::thread;
334 |     use std::sync::Arc;
335 |     use super::*;
336 | 
337 |     struct BadHasher;
338 | 
339 |     impl Hasher for BadHasher {
340 |         fn write(&mut self, _: &[u8]) { }
341 | 
342 |         fn finish(&self) -> u64 { 0 }
343 |     }
344 | 
345 |     impl Default for BadHasher {
346 |         fn default() -> BadHasher { BadHasher }
347 |     }
348 | 
349 |     struct OneAtATimeHasher {
350 |         state: u64
351 |     }
352 | 
353 |     impl Hasher for OneAtATimeHasher {
354 |         fn write(&mut self, bytes: &[u8]) {
355 |             for &b in bytes.iter() {
356 |                 self.state = self.state.wrapping_add(b as u64);
357 |                 self.state = self.state.wrapping_add(self.state << 10);
358 |                 self.state ^= self.state >> 6;
359 |             }
360 |         }
361 | 
362 |         fn finish(&self) -> u64 {
363 |             let mut hash = self.state;
364 |             hash = hash.wrapping_add(hash << 3);
365 |             hash ^= hash >> 11;
366 |             hash = hash.wrapping_add(hash << 15);
367 |             hash
368 |         }
369 |     }
370 | 
371 |     impl Default for OneAtATimeHasher {
372 |         fn default() -> OneAtATimeHasher {
373 |             OneAtATimeHasher { state: 0x124C494467744825 }
374 |         }
375 |     }
376 | 
377 |     #[test]
378 |     fn insert_is_found() {
379 |         let map: ConcHashMap<i32, i32> = Default::default();
380 |         assert!(map.find(&1).is_none());
381 |         map.insert(1, 2);
382 |         assert_eq!(map.find(&1).unwrap().get(), &2);
383 |         assert!(map.find(&2).is_none());
384 |         map.insert(2, 4);
385 |         assert_eq!(map.find(&2).unwrap().get(), &4);
386 |     }
387 | 
388 |     #[test]
389 |     fn insert_replace() {
390 |         let map: ConcHashMap<i32, &'static str> = Default::default();
391 |         assert!(map.find(&1).is_none());
392 |         map.insert(1, &"old");
393 |         assert_eq!(map.find(&1).unwrap().get(), &"old");
394 |         let old = map.insert(1, &"new");
395 |         assert_eq!(Some("old"), old);
396 |         assert_eq!(map.find(&1).unwrap().get(), &"new");
397 |     }
398 | 
399 |     #[test]
400 |     fn insert_lots() {
401 |         let map: ConcHashMap<i32, i32, BuildHasherDefault<OneAtATimeHasher>> = Default::default();
402 |         for i in 0..1000 {
403 |             if i % 2 == 0 {
404 |                 map.insert(i, i * 2);
405 |             }
406 |         }
407 |         for i in 0..1000 {
408 |             if i % 2 == 0 {
409 |                 find_assert(&map, &i, &(i * 2));
410 |             } else {
411 |                 assert!(map.find(&i).is_none());
412 |             }
413 |         }
414 |     }
415 | 
416 |     #[test]
417 |     fn insert_bad_hash_lots() {
418 |         let map: ConcHashMap<i32, i32, BuildHasherDefault<BadHasher>> = Default::default();
419 |         for i in 0..100 {
420 |             if i % 2 == 0 {
421 |                 map.insert(i, i * 2);
422 |             }
423 |         }
424 |         for i in 0..100 {
425 |             if i % 2 == 0 {
426 |                 find_assert(&map, &i, &(i * 2));
427 |             } else {
428 |                 assert!(map.find(&i).is_none());
429 |             }
430 |         }
431 |     }
432 | 
433 |     #[test]
434 |     fn find_none_on_empty() {
435 |         let map: ConcHashMap<i32, i32> = Default::default();
436 |         assert!(map.find(&1).is_none());
437 |     }
438 | 
439 |     #[test]
440 |     fn test_clone() {
441 |         let orig: ConcHashMap<i32, i32> = Default::default();
442 |         for i in 0..100 {
443 |             orig.insert(i, i * i);
444 |         }
445 |         let clone = orig.clone();
446 |         for i in 0..100 {
447 |             assert_eq!(orig.find(&i).unwrap().get(), clone.find(&i).unwrap().get());
448 |         }
449 |     }
450 | 
451 |     #[test]
452 |     fn test_clear() {
453 |         let map: ConcHashMap<i32, i32> = Default::default();
454 |         for i in 0..100 {
455 |             map.insert(i, i * i);
456 |         }
457 |         map.clear();
458 |         for i in 0..100 {
459 |             assert!(map.find(&i).is_none());
460 |         }
461 |     }
462 | 
463 |     #[test]
464 |     fn test_remove() {
465 |         let map: ConcHashMap<i32, String> = Default::default();
466 |         map.insert(1, "one".to_string());
467 |         map.insert(2, "two".to_string());
468 |         map.insert(3, "three".to_string());
469 |         assert_eq!(Some("two".to_string()), map.remove(&2));
470 |         assert_eq!("one", map.find(&1).unwrap().get());
471 |         assert!(map.find(&2).is_none());
472 |         assert_eq!("three", map.find(&3).unwrap().get());
473 |     }
474 | 
475 |     #[test]
476 |     fn test_remove_many() {
477 |         let map: ConcHashMap<i32, String> = Default::default();
478 |         for i in 0..100 {
479 |             map.insert(i, (i * i).to_string());
480 |         }
481 |         for i in 0..100 {
482 |             if i % 2 == 0 {
483 |                 assert_eq!(Some((i * i).to_string()), map.remove(&i));
484 |             }
485 |         }
486 |         for i in 0..100 {
487 |             let x = map.find(&i);
488 |             if i % 2 == 0 {
489 |                 assert!(x.is_none());
490 |             } else {
491 |                 assert_eq!(&(i * i).to_string(), x.unwrap().get());
492 |             }
493 |         }
494 |     }
495 | 
496 |     #[test]
497 |     fn test_remove_insert() {
498 |         let map: ConcHashMap<i32, String> = Default::default();
499 |         for i in 0..100 {
500 |             map.insert(i, (i * i).to_string());
501 |         }
502 |         for i in 0..100 {
503 |             if i % 2 == 0 {
504 |                 assert_eq!(Some((i * i).to_string()), map.remove(&i));
505 |             }
506 |         }
507 |         for i in 0..100 {
508 |             if i % 4 == 0 {
509 |                 map.insert(i, i.to_string());
510 |             }
511 |         }
512 |         for i in 0..100 {
513 |             let x = map.find(&i);
514 |             if i % 4 == 0 {
515 |                 assert_eq!(&i.to_string(), x.unwrap().get());
516 |             } else if i % 2 == 0 {
517 |                 assert!(x.is_none());
518 |             } else {
519 |                 assert_eq!(&(i * i).to_string(), x.unwrap().get());
520 |             }
521 |         }
522 |     }
523 | 
524 |     #[test]
525 |     fn test_from_iterator() {
526 |         let vec: Vec<(u32, u32)> = (0..100).map(|i| (i, i * i)).collect();
527 |         let map: ConcHashMap<u32, u32> = vec.iter().map(|x| *x).collect();
528 |         for &(k, v) in vec.iter() {
529 |             find_assert(&map, &k, &v);
530 |         }
531 |     }
532 | 
533 |     #[test]
534 |     fn mut_modify() {
535 |         let map: ConcHashMap<u32, u32> = Default::default();
536 |         map.insert(1, 0);
537 |         let mut e = map.find_mut(&1).unwrap().get();
538 |         *e += 1;
539 |         assert_eq!(&1, map.find(&1).unwrap().get());
540 |     }
541 | 
542 |     #[test]
543 |     fn conc_mut_modify() {
544 |         let mmap: Arc<ConcHashMap<u32, u32>> = Arc::new(Default::default());
545 |         let map = mmap.clone();
546 |         let range = 10000;
547 |         for i in 0..range {
548 |             map.insert(i, i*i);
549 |         }
550 | 
551 |         let tl_map = mmap.clone();
552 |         let reader = thread::spawn(move || {
553 |             for i in 0..range {
554 |                 tl_map.find(&i).unwrap().get();
555 |             }
556 |         });
557 | 
558 |         let tl_map = mmap.clone();
559 |         let writer = thread::spawn(move || {
560 |             for i in 0..range {
561 |                 let mut e = tl_map.find_mut(&i).unwrap().get();
562 |                 *e += 1;
563 |             }
564 |         });
565 | 
566 |         reader.join().unwrap();
567 |         writer.join().unwrap();
568 |         for i in 0..range {
569 |             assert_eq!(map.find(&i).unwrap().get(), &(i*i+1));
570 |         }
571 |     }
572 | 
573 |     fn find_assert<K, V, H> (map: &ConcHashMap<K, V, H>, key: &K,  expected_val: &V)
574 |             where K: Eq + Hash + Debug + Send + Sync, V: Eq + Debug + Send + Sync, H: BuildHasher {
575 |         match map.find(key) {
576 |             None    => panic!("missing key {:?} should map to {:?}", key, expected_val),
577 |             Some(v) => assert_eq!(*v.get(), *expected_val)
578 |         }
579 |     }
580 | }
581 | 


--------------------------------------------------------------------------------
/src/table.rs:
--------------------------------------------------------------------------------
  1 | use std::hash::Hash;
  2 | use spin::MutexGuard;
  3 | use std::ptr::{self, drop_in_place};
  4 | use std::mem;
  5 | use std::cmp::max;
  6 | use std::mem::size_of;
  7 | use std::marker::{Send, Sync};
  8 | 
  9 | // This is the actual hash table implementation.
 10 | // The Table struct does not have any synchronization; that is handled by the ConHashMap wrapper.
 11 | // It uses open addressing with quadratic probing, with a bitmap for tracking bucket occupancy,
 12 | // and uses tombstones to track deleted entries.
 13 | 
 14 | // Minimum size of table when resizing.
 15 | // Initially, zero-sized tables are allowed to avoid allocation.
 16 | // When they need to reallocate, this is the smallest size used.
 17 | const MIN_CAPACITY: usize = 1 << 5;
 18 | 
 19 | // Largest number of elements in a table.
 20 | // We want to be able to use the top 16 bits of the hash for choosing the partition.
 21 | // If we limit the size of the partition to 47 bits, elements will never change partition.
 22 | // Thus we can resize each partition individually.
 23 | const MAX_CAPACITY: u64 = (1 << 48) - 1;
 24 | 
 25 | // This masks out the metadata bits of the hash field.
 26 | const HASH_MASK: u64 = 0x0000FFFFFFFFFFFF;
 27 | 
 28 | // If this bit is in a stored hash, the entry entry has been removed.
 29 | const TOMBSTONE: u64 = 0x0001000000000000;
 30 | 
 31 | // If this bit is in a stored hash, the entry entry is present.
 32 | const PRESENT: u64 = 0x1000000000000000;
 33 | 
 34 | // The proper heap API is only available in nightlies
 35 | unsafe fn alloc<T>(count: usize, zero: bool) -> *mut T {
 36 |     let mut dummy: Vec<T> = Vec::with_capacity(count);
 37 |     let ptr = dummy.as_mut_ptr();
 38 |     if zero {
 39 |         ptr::write_bytes(ptr, 0, count);
 40 |     }
 41 |     mem::forget(dummy);
 42 |     return ptr;
 43 | }
 44 | 
 45 | unsafe fn dealloc<T>(p: *mut T, count: usize) {
 46 |     let _dummy: Vec<T> = Vec::from_raw_parts(p, 0, count);
 47 |     // Dummy is dropped and the memory is freed
 48 | }
 49 | 
 50 | pub struct Table<K, V> {
 51 |     hashes: *mut u64,
 52 |     keys: *mut K,
 53 |     values: *mut V,
 54 |     capacity: usize,
 55 |     len: usize,
 56 | }
 57 | 
 58 | /// A handle to a particular mapping.
 59 | ///
 60 | /// Note that this acts as a lock guard to a part of the map.
 61 | pub struct Accessor<'a, K: 'a, V: 'a> {
 62 |     table: MutexGuard<'a, Table<K, V>>,
 63 |     idx: usize
 64 | }
 65 | 
 66 | /// A mutable handle to a particular mapping.
 67 | ///
 68 | /// Note that this acts as a lock guard to a part of the map.
 69 | pub struct MutAccessor<'a, K: 'a, V: 'a> {
 70 |     table: MutexGuard<'a, Table<K, V>>,
 71 |     idx: usize
 72 | }
 73 | 
 74 | impl <'a, K, V> Accessor<'a, K, V> {
 75 |     pub fn new(table: MutexGuard<'a, Table<K, V>>, idx: usize) -> Accessor<'a, K, V> {
 76 |         Accessor {
 77 |             table: table,
 78 |             idx: idx
 79 |         }
 80 |     }
 81 | 
 82 |     pub fn get(&self) -> &'a V {
 83 |         debug_assert!(self.table.is_present(self.idx));
 84 |         unsafe {
 85 |             &*self.table.values.offset(self.idx as isize)
 86 |         }
 87 |     }
 88 | }
 89 | 
 90 | impl <'a, K, V> MutAccessor<'a, K, V> {
 91 |     pub fn new(table: MutexGuard<'a, Table<K, V>>, idx: usize) -> MutAccessor<'a, K, V> {
 92 |         MutAccessor {
 93 |             table: table,
 94 |             idx: idx
 95 |         }
 96 |     }
 97 | 
 98 |     pub fn get(&mut self) -> &'a mut V {
 99 |         debug_assert!(self.table.is_present(self.idx));
100 |         unsafe {
101 |             &mut *self.table.values.offset(self.idx as isize)
102 |         }
103 |     }
104 | }
105 | 
106 | impl <K, V> Table<K, V> where K: Hash + Eq {
107 |     pub fn new(capacity: usize) -> Table<K, V> {
108 |         assert!(size_of::<K>() > 0 && size_of::<V>() > 0, "zero-size types not yet supported");
109 |         let capacity = if capacity == 0 { 0 } else { capacity.next_power_of_two() };
110 |         Table {
111 |             capacity: capacity,
112 |             len: 0,
113 |             hashes: unsafe { alloc(capacity, true) },
114 |             keys: unsafe { alloc(capacity, false) },
115 |             values: unsafe { alloc(capacity, false) }
116 |         }
117 |     }
118 | 
119 |     pub fn lookup<C>(&self, hash: u64, eq: C) -> Option<usize> where C: Fn(&K) -> bool {
120 |         let len = self.capacity;
121 |         if len == 0 {
122 |             return None;
123 |         }
124 |         let mask = len - 1;
125 |         let hash = hash & HASH_MASK;
126 |         let mut i = hash as usize & mask;
127 |         let mut j = 0;
128 |         loop {
129 |             if self.is_present(i) && self.compare_key_at(&eq, i) {
130 |                 return Some(i);
131 |             }
132 |             if !self.is_present(i) && !self.is_deleted(i) {
133 |                 // The key we're searching for would have been placed here if it existed
134 |                 return None;
135 |             }
136 |             if i == len - 1 { return None; }
137 |             j += 1;
138 |             i = (i + j) & mask;
139 |         }
140 |     }
141 | 
142 |     pub fn put<T, U: Fn(&mut V, V)-> T>(&mut self, key: K, value: V, hash: u64, update: U) -> Option<T> {
143 |         if self.capacity == 0 {
144 |             self.resize();
145 |         }
146 |         loop {
147 |             let len = self.capacity;
148 |             let hash = hash & HASH_MASK;
149 |             let mask = len - 1;
150 |             let mut i = (hash as usize) & mask;
151 |             let mut j = 0;
152 |             loop {
153 |                 if !self.is_present(i) {
154 |                     unsafe { self.put_at_empty(i, key, value, hash); }
155 |                     self.len += 1;
156 |                     return None;
157 |                 } else if self.compare_key_at(&|k| k == &key, i) {
158 |                     let old_value = unsafe { &mut *self.values.offset(i as isize) };
159 |                     return Some(update(old_value, value));
160 |                 }
161 |                 if i == len - 1 { break; }
162 |                 j += 1;
163 |                 i = (i + j) & mask;
164 |             }
165 |             self.resize();
166 |         }
167 |     }
168 | 
169 |     pub fn remove<C>(&mut self, hash: u64, eq: C) -> Option<V> where C: Fn(&K) -> bool {
170 |         let i = match self.lookup(hash, eq) {
171 |             Some(i) => i,
172 |             None    => return None
173 |         };
174 |         unsafe {
175 |             drop_in_place::<K>(self.keys.offset(i as isize));
176 |             *self.hashes.offset(i as isize) = TOMBSTONE;
177 |             self.len -= 1;
178 |             let value = ptr::read(self.values.offset(i as isize));
179 |             return Some(value);
180 |         }
181 |     }
182 | 
183 |     #[inline]
184 |     fn compare_key_at<C>(&self, eq: &C, idx: usize) -> bool where C: Fn(&K) -> bool {
185 |         assert!(self.is_present(idx));
186 |         unsafe { eq(&*self.keys.offset(idx as isize)) }
187 |     }
188 | 
189 |     unsafe fn put_at_empty(&mut self, idx: usize, key: K, value: V, hash: u64) {
190 |         let i = idx as isize;
191 |         *self.hashes.offset(i) = hash | PRESENT;
192 |         ptr::write(self.keys.offset(i), key);
193 |         ptr::write(self.values.offset(i), value);
194 |     }
195 | 
196 |     fn resize(&mut self) {
197 |         let new_capacity = max(self.capacity.checked_add(self.capacity).expect("size overflow"), MIN_CAPACITY);
198 |         if new_capacity as u64 > MAX_CAPACITY {
199 |             panic!("requested size: {}, max size: {}", new_capacity, MAX_CAPACITY);
200 |         }
201 |         let mut new_table = Table::new(new_capacity);
202 |         unsafe {
203 |             self.foreach_present_idx(|i| {
204 |                 let hash: u64 = *self.hashes.offset(i as isize);
205 |                 new_table.put(ptr::read(self.keys.offset(i as isize)),
206 |                               ptr::read(self.values.offset(i as isize)),
207 |                               hash, |_, _| { });
208 |             });
209 |             dealloc(self.hashes, self.capacity);
210 |             dealloc(self.keys, self.capacity);
211 |             dealloc(self.values, self.capacity);
212 |             // This is checked in drop() to see that this instance is already "dropped"
213 |             self.hashes = ptr::null_mut();
214 |         }
215 |         mem::swap(self, &mut new_table);
216 |     }
217 | 
218 | //     fn _dump_table(&self) {
219 | //         unsafe {
220 | //             let table = ::std::slice::from_raw_parts(self.buckets, self.capacity);
221 | //             for (i, e) in table.iter().enumerate() {
222 | //                 if self.present[i] {
223 | //                     println!("{}:\t{:?}\t=>\t{:?}",
224 | //                             i, e.key, e.value,);
225 | //                 } else {
226 | //                     println!("{}:\tempty", i);
227 | //                 }
228 | //             }
229 | //         }
230 | //     }
231 | }
232 | 
233 | impl <K, V> Table<K, V> {
234 |     pub fn capacity(&self) -> usize { self.capacity }
235 | 
236 |     /// Used to implement iteration.
237 |     /// Search for a present bucket >= idx.
238 |     /// If one is found, Some(..) is returned and idx is set to a value
239 |     /// that can be passed back to iter_advance to look for the next bucket.
240 |     /// When all bucket have been scanned, idx is set to self.capacity.
241 |     pub fn iter_advance<'a>(&'a self, idx: &mut usize) -> Option<(&'a K, &'a V)> {
242 |         if *idx >= self.capacity {
243 |             return None;
244 |         }
245 |         for i in *idx..self.capacity {
246 |             if self.is_present(i) {
247 |                 *idx = i + 1;
248 |                 let entry = unsafe {
249 |                     let key = self.keys.offset(i as isize);
250 |                     let value = self.values.offset(i as isize);
251 |                     (&*key, &*value)
252 |                 };
253 |                 return Some(entry);
254 |             }
255 |         }
256 |         *idx = self.capacity;
257 |         return None;
258 |     }
259 | 
260 |     pub fn clear(&mut self) {
261 |         self.foreach_present_idx(|i| {
262 |             unsafe {
263 |                 drop_in_place::<K>(self.keys.offset(i as isize));
264 |                 drop_in_place::<V>(self.values.offset(i as isize));
265 |             }
266 |         });
267 |         unsafe {
268 |             ptr::write_bytes(self.hashes, 0, self.capacity);
269 |         }
270 |         self.len = 0;
271 |     }
272 | 
273 |     fn is_present(&self, idx: usize) -> bool {
274 |         assert!(idx < self.capacity);
275 |         self.hash_at(idx) & PRESENT != 0
276 |     }
277 | 
278 |     fn is_deleted(&self, idx: usize) -> bool {
279 |         assert!(idx < self.capacity);
280 |         !self.is_present(idx) && self.hash_at(idx) & TOMBSTONE != 0
281 |     }
282 | 
283 |     fn hash_at(&self, idx: usize) -> u64 {
284 |         assert!(idx < self.capacity);
285 |         unsafe { *self.hashes.offset(idx as isize) }
286 |     }
287 | 
288 |     fn foreach_present_idx<F>(&self, mut f: F) where F: FnMut(usize) {
289 |         let mut seen = 0;
290 |         for i in 0..self.capacity {
291 |             if seen == self.len {
292 |                 return;
293 |             }
294 |             if self.is_present(i) {
295 |                 seen += 1;
296 |                 f(i);
297 |             }
298 |         }
299 |     }
300 | }
301 | 
302 | impl <K, V> Drop for Table<K, V> {
303 |     fn drop(&mut self) {
304 |         if self.hashes.is_null() {
305 |             // "Dying" instance that has been resized
306 |             return;
307 |         }
308 |         self.foreach_present_idx(|i| {
309 |             unsafe {
310 |                 drop_in_place::<K>(self.keys.offset(i as isize));
311 |                 drop_in_place::<V>(self.values.offset(i as isize));
312 |             }
313 |         });
314 |         unsafe {
315 |             dealloc(self.hashes, self.capacity);
316 |             dealloc(self.keys, self.capacity);
317 |             dealloc(self.values, self.capacity);
318 |         }
319 |     }
320 | }
321 | 
322 | unsafe impl <K, V> Sync for Table<K, V> where K: Send + Sync, V: Send + Sync { }
323 | 
324 | unsafe impl <K, V> Send for Table<K, V> where K: Send, V: Send { }
325 | 


--------------------------------------------------------------------------------
/tests/tests.rs:
--------------------------------------------------------------------------------
 1 | extern crate rand;
 2 | extern crate concurrent_hashmap;
 3 | 
 4 | use std::collections::HashMap;
 5 | use std::thread;
 6 | use std::default::Default;
 7 | use std::sync::Arc;
 8 | use rand::{Rng, weak_rng};
 9 | use concurrent_hashmap::*;
10 | 
11 | /// Spawn a lot of threads that update the map conccurently at different ranges.
12 | /// Checks that random numbers in the total range are either empty or have correct values.
13 | #[test]
14 | fn many_threads() {
15 |     let mut threads = Vec::new();
16 |     let map: Arc<ConcHashMap<i32, i32>> = Arc::new(Default::default());
17 |     let n = 1500;
18 |     let nthreads = 30;
19 |     let max = nthreads * n;
20 |     for t in 0..nthreads {
21 |         let map = map.clone();
22 |         threads.push(thread::spawn(move || {
23 |             let mut rng = weak_rng();
24 |             let s = t * n;
25 |             for i in s..s + n {
26 |                 map.insert(i, t);
27 |                 let x = rng.gen_range(0, max);
28 |                 match map.find(&x) {
29 |                     Some(ref y) if x / n != *y.get() => return Err(format!("{} => {}", x, *y.get())),
30 |                     _ => { }
31 |                 }
32 |             }
33 |             Ok(())
34 |         }));
35 |     }
36 |     for thread in threads {
37 |         assert_eq!(thread.join().unwrap(), Ok(()));
38 |     }
39 | }
40 | 
41 | /// Count elements in a list both sequentially and parallel, then verify that the results are the same.
42 | #[test]
43 | fn count_compare_with_sequential() {
44 |     let n = 10000;
45 |     let max = 100;
46 |     let mut rng = weak_rng();
47 |     let nums: Vec<_> = (0..n).map(|_| rng.gen_range(0, max)).collect();
48 | 
49 |     let seq = count_seq(&nums);
50 |     let par = count_par(&nums);
51 | 
52 |     for k in 0..max {
53 |         let seq_v = seq.get(&k);
54 |         let par_v = par.find(&k);
55 |         if seq_v.is_none() && par_v.is_none() {
56 |             continue;
57 |         }
58 |         assert_eq!(seq_v.unwrap(), par_v.unwrap().get());
59 |     }
60 | 
61 |     fn count_seq(nums: &[u32]) -> HashMap<u32, u32> {
62 |         let mut map = HashMap::new();
63 |         for &num in nums {
64 |             *map.entry(num).or_insert(0) += 1;
65 |         }
66 |         return map;
67 |     }
68 | 
69 |     fn count_par(nums: &[u32]) -> Arc<ConcHashMap<u32, u32>> {
70 |         let map: Arc<ConcHashMap<u32, u32>> = Default::default();
71 |         let mut threads = Vec::new();
72 |         for ns in nums.chunks(nums.len() / 4) {
73 |             let map = map.clone();
74 |             let ns = ns.iter().cloned().collect::<Vec<_>>();
75 |             threads.push(thread::spawn(move || {
76 |                 for &num in ns.iter() {
77 |                     map.upsert(num, 1, &|count| *count += 1);
78 |                 }
79 |             }));
80 |         }
81 |         for thread in threads {
82 |             thread.join().unwrap();
83 |         }
84 |         map
85 |     }
86 | }


--------------------------------------------------------------------------------