├── .gitignore ├── Cargo.toml ├── LICENSE └── src └── lib.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | Cargo.lock 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "mph-rs" 3 | version = "0.1.0" 4 | authors = ["Damian Gryski "] 5 | edition = "2018" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2020 Damian Gryski 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::vec; 2 | 3 | use std::collections::hash_map::DefaultHasher; 4 | use std::collections::HashMap; 5 | use std::hash::{Hash, Hasher}; 6 | 7 | pub struct Table { 8 | values: Vec, 9 | seeds: Vec, 10 | } 11 | 12 | struct Entry { 13 | idx: i32, 14 | hash: u64, 15 | } 16 | 17 | impl Table { 18 | pub fn new(keys: &[&str]) -> Table { 19 | let size = (keys.len()).next_power_of_two(); 20 | let mut h: Vec> = Vec::with_capacity(size); 21 | for _ in 0..size { 22 | h.push(Vec::new()) 23 | } 24 | 25 | for (idx, k) in keys.iter().enumerate() { 26 | let hash = calculate_hash(k); 27 | let i = hash % (size as u64); 28 | // idx+1 so we can identify empty entries in the table with 0 29 | h[i as usize].push(Entry { 30 | idx: (idx + 1) as i32, 31 | hash, 32 | }); 33 | } 34 | 35 | h.sort_by(|a, b| b.len().cmp(&a.len())); 36 | 37 | let mut values = vec![0i32; size]; 38 | let mut seeds = vec![0i32; size]; 39 | 40 | let mut hidx = 0; 41 | 42 | for idx in 0..h.len() { 43 | hidx = idx; 44 | if h[hidx].len() <= 1 { 45 | break; 46 | } 47 | 48 | let subkeys = &h[hidx]; 49 | 50 | let mut seed = 0u64; 51 | let mut entries: HashMap = HashMap::new(); 52 | 53 | 'newseed: loop { 54 | seed += 1; 55 | for k in subkeys.iter() { 56 | let i = (xorshift_mult64(k.hash + seed) as usize) % size; 57 | if entries.get(&i).is_none() && values[i] == 0 { 58 | // looks free, claim it 59 | entries.insert(i, k.idx); 60 | continue; 61 | } 62 | 63 | // found a collision, reset and try a new seed 64 | entries.clear(); 65 | continue 'newseed; 66 | } 67 | 68 | // made it through; everything got placed 69 | break; 70 | } 71 | 72 | // mark subkey spaces as claimed 73 | for (&k, &v) in entries.iter() { 74 | values[k] = v 75 | } 76 | 77 | // and assign this seed value for every subkey 78 | let i = (subkeys[0].hash as usize) % size; 79 | seeds[i] = seed as i32; 80 | } 81 | 82 | // find the unassigned entries in the table 83 | let mut free: Vec = Vec::new(); 84 | for (i, v) in values.iter_mut().enumerate() { 85 | if *v == 0 { 86 | free.push(i); 87 | } else { 88 | // decrement idx as this is now the final value for the table 89 | *v -= 1; 90 | } 91 | } 92 | 93 | while hidx < h.len() && !h[hidx].is_empty() { 94 | let k = &h[hidx][0]; 95 | let i = (k.hash as usize) % size; 96 | hidx += 1; 97 | 98 | // take a free slot 99 | let dst = free.pop().unwrap(); 100 | 101 | // claim it; -1 because of the +1 at the start 102 | values[dst] = k.idx - 1; 103 | 104 | // store offset in seed as a negative; -1 so even slot 0 is negative 105 | seeds[i] = -(dst as i32 + 1); 106 | } 107 | 108 | Table { values, seeds } 109 | } 110 | 111 | // Query looks up an entry in the table and return the index. 112 | pub fn query(&self, k: &str) -> usize { 113 | let size = self.values.len(); 114 | let hash = calculate_hash(&k.to_string()) as u64; 115 | let i = hash & (size as u64 - 1); 116 | let seed = self.seeds[i as usize]; 117 | if seed < 0 { 118 | return self.values[(-seed - 1) as usize] as usize; 119 | } 120 | 121 | let i = xorshift_mult64(seed as u64 + hash) & (size as u64 - 1); 122 | self.values[i as usize] as usize 123 | } 124 | } 125 | 126 | fn xorshift_mult64(x: u64) -> u64 { 127 | let mut x = x; 128 | x = x ^ (x >> 12); // a 129 | x ^= x << 25; // b 130 | x ^= x >> 27; // c 131 | x.wrapping_mul(2_685_821_657_736_338_717 as u64) 132 | } 133 | 134 | fn calculate_hash(t: &T) -> u64 { 135 | let mut s = DefaultHasher::new(); 136 | t.hash(&mut s); 137 | s.finish() 138 | } 139 | 140 | #[cfg(test)] 141 | mod tests { 142 | use super::*; 143 | 144 | #[test] 145 | fn it_works() { 146 | let keys = vec!["foo", "bar", "baz", "qux", "zot", "frob", "zork", "zeek"]; 147 | 148 | let t = Table::new(&keys); 149 | 150 | for (i, k) in keys.iter().enumerate() { 151 | assert_eq!(t.query(k), i); 152 | } 153 | } 154 | } 155 | --------------------------------------------------------------------------------