├── .gitignore ├── readme.md ├── Cargo.toml └── src └── lib.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | Cargo.lock -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | ## hashmap 2 | простая и примитивная хэш-таблица 3 | 4 | ## запуск тестов 5 | `cargo +nightly test --release` 6 | 7 | `cargo +nightly bench` 8 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "hashmap" 3 | version = "0.1.0" 4 | authors = ["ZOTTCE "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | 9 | [dev-dependencies] 10 | rand = "0.6.5" 11 | 12 | [profile.bench] 13 | opt-level = 3 14 | debug = false 15 | rpath = false 16 | lto = false 17 | debug-assertions = false 18 | codegen-units = 16 19 | panic = 'unwind' 20 | incremental = false 21 | overflow-checks = false 22 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![feature(alloc_layout_extra)] 2 | #![feature(test)] 3 | 4 | use std::alloc::{alloc_zeroed, dealloc, handle_alloc_error, Layout}; 5 | use std::marker::PhantomData; 6 | use std::ptr::NonNull; 7 | 8 | const EMPTY: u8 = 0; 9 | const TAKEN: u8 = 1; 10 | const DELETED: u8 = 2; 11 | 12 | struct Slot { 13 | flag: u8, // не самое оптимальное решение по памяти из-за выравнивания структуры 14 | key: usize, 15 | value: T, 16 | } 17 | 18 | /// простейшая хэш-таблица, ключем которой является `usize` значение, хэш-функция от ключа KEY % MAP_CAPACITY 19 | pub struct HashMap { 20 | slots: NonNull>, 21 | items: usize, 22 | capacity: usize, 23 | marker: PhantomData, 24 | } 25 | 26 | impl HashMap { 27 | pub fn new() -> HashMap { 28 | HashMap { 29 | slots: NonNull::dangling(), 30 | items: 0, 31 | capacity: 0, 32 | marker: PhantomData, 33 | } 34 | } 35 | 36 | unsafe fn new_inner(capacity: usize) -> HashMap { 37 | let capacity = capacity.next_power_of_two(); 38 | let layout = Layout::array::>(capacity).unwrap(); 39 | let slots = alloc_zeroed(layout) as *mut Slot; 40 | 41 | if slots.is_null() { 42 | handle_alloc_error(layout); 43 | } 44 | 45 | HashMap { 46 | slots: NonNull::new_unchecked(slots), 47 | capacity, 48 | items: 0, 49 | marker: PhantomData, 50 | } 51 | } 52 | 53 | pub fn with_capacity(capacity: usize) -> HashMap { 54 | unsafe { Self::new_inner(capacity) } 55 | } 56 | 57 | // максимально простое линейное пробирование с шагом в единицу 58 | fn prob_seq(&self, hash: usize) -> impl Iterator { 59 | let capacity = self.capacity; 60 | (0..capacity).map(move |idx| (hash + idx) % capacity) 61 | } 62 | 63 | fn find(&self, key: usize) -> Option<(usize, &mut Slot)> { 64 | if self.capacity == 0 { 65 | return None; 66 | } 67 | 68 | let hash = key % self.capacity; 69 | let slots = self.slots.as_ptr(); 70 | 71 | for idx in self.prob_seq(hash) { 72 | let slot = unsafe { &mut *slots.add(idx) }; 73 | 74 | if slot.flag == EMPTY { 75 | return None; 76 | } 77 | 78 | if slot.flag == TAKEN && slot.key == key { 79 | return Some((idx, slot)); 80 | } 81 | } 82 | 83 | None 84 | } 85 | 86 | fn find_insert_slot(&self, hash: usize) -> usize { 87 | let slots = self.slots.as_ptr(); 88 | 89 | for idx in self.prob_seq(hash) { 90 | let slot = unsafe { &*slots.add(idx) }; 91 | 92 | if slot.flag != TAKEN { 93 | return idx; 94 | } 95 | } 96 | 97 | unreachable!(); 98 | } 99 | 100 | pub fn get<'a>(&'a self, key: usize) -> Option<&'a V> { 101 | self.find(key).map(|(_, slot)| &slot.value) 102 | } 103 | 104 | pub fn get_mut<'a>(&'a mut self, key: usize) -> Option<&'a mut V> { 105 | self.find(key).map(|(_, slot)| &mut slot.value) 106 | } 107 | 108 | pub fn insert(&mut self, key: usize, value: V) -> Option { 109 | if let Some((_, slot)) = self.find(key) { 110 | Some(std::mem::replace(&mut slot.value, value)) 111 | } else { 112 | self.reserve(1); 113 | self.insert_inner(key, value); 114 | None 115 | } 116 | } 117 | 118 | fn insert_inner(&mut self, key: usize, value: V) { 119 | let hash = key % self.capacity; 120 | let index = self.find_insert_slot(hash); 121 | 122 | let slot = Slot { 123 | flag: TAKEN, 124 | key, 125 | value, 126 | }; 127 | 128 | unsafe { 129 | self.slots.as_ptr().add(index).write(slot); 130 | } 131 | 132 | self.items += 1; 133 | } 134 | 135 | unsafe fn find_hash_window(&self, hash: usize) -> usize { 136 | let mut length = 0; 137 | let slots = self.slots.as_ptr(); 138 | 139 | while (&*slots.add(hash + length)).key % self.capacity() == hash { 140 | length += 1; 141 | } 142 | 143 | length 144 | } 145 | 146 | unsafe fn is_window_ready_to_empty(&self, hash: usize, length: usize) -> bool { 147 | let mut idx = 0; 148 | let mut ready = true; 149 | let slots = self.slots.as_ptr(); 150 | 151 | while idx < length { 152 | let slot = &*slots.add(hash + idx); 153 | 154 | if slot.flag != DELETED { 155 | ready = false; 156 | } 157 | 158 | idx += 1; 159 | } 160 | 161 | ready 162 | } 163 | 164 | pub fn remove(&mut self, key: usize) -> Option { 165 | let value = self.find(key).map(|(idx, slot)| unsafe { 166 | let hash = slot.key % self.capacity; 167 | let window_length = self.find_hash_window(hash); 168 | 169 | if window_length == (idx - hash + 1) 170 | && self.is_window_ready_to_empty(hash, window_length - 1) 171 | { 172 | slot.flag = EMPTY; 173 | } else { 174 | slot.flag = DELETED; 175 | } 176 | 177 | std::mem::replace(&mut slot.value, std::mem::zeroed()) 178 | })?; 179 | 180 | self.items -= 1; 181 | 182 | Some(value) 183 | } 184 | 185 | pub fn reserve(&mut self, additional: usize) { 186 | if additional + self.items > self.capacity { 187 | self.resize(additional + self.items); 188 | } 189 | } 190 | 191 | pub fn resize(&mut self, new_size: usize) { 192 | assert!( 193 | new_size >= self.items, 194 | "the new size is less than count of items" 195 | ); 196 | 197 | unsafe { 198 | let mut map = Self::new_inner(new_size); 199 | let slots = self.slots.as_ptr(); 200 | 201 | for idx in 0..self.capacity { 202 | let mut slot = &mut *slots.add(idx); 203 | 204 | if slot.flag == TAKEN { 205 | let hash = slot.key % map.capacity(); 206 | let index = map.find_insert_slot(hash); 207 | std::mem::swap(&mut *map.slots.as_ptr().add(index), &mut slot); 208 | map.items += 1; 209 | } 210 | } 211 | 212 | std::mem::swap(self, &mut map); 213 | } 214 | } 215 | 216 | pub fn capacity(&self) -> usize { 217 | self.capacity 218 | } 219 | 220 | pub fn len(&self) -> usize { 221 | self.items 222 | } 223 | } 224 | 225 | impl Drop for HashMap { 226 | fn drop(&mut self) { 227 | if self.capacity != 0 { 228 | unsafe { 229 | let layout = Layout::array::>(self.capacity).unwrap(); 230 | let slots = self.slots.as_ptr(); 231 | 232 | if std::mem::needs_drop::() { 233 | for idx in 0..self.capacity { 234 | let slot = &*slots.add(idx); 235 | if slot.flag == TAKEN { 236 | slots.add(idx).drop_in_place(); 237 | } 238 | } 239 | } 240 | 241 | dealloc(slots as *mut u8, layout); 242 | } 243 | } 244 | } 245 | } 246 | 247 | unsafe impl Send for HashMap {} 248 | 249 | #[cfg(test)] 250 | mod tests { 251 | extern crate test; 252 | 253 | use super::HashMap; 254 | use rand::random; 255 | use std::collections::HashMap as StdMap; 256 | use test::Bencher; 257 | 258 | #[test] 259 | fn empty_hashmap() { 260 | let hashmap: HashMap = HashMap::new(); 261 | assert_eq!(hashmap.capacity(), 0); 262 | assert_eq!(hashmap.get(0), None); 263 | } 264 | 265 | #[test] 266 | fn resize() { 267 | let mut hashmap: HashMap = HashMap::with_capacity(1); 268 | hashmap.insert(0, 0.1); 269 | hashmap.insert(1, 0.2); 270 | assert_eq!(hashmap.get(0).copied(), Some(0.1)); 271 | assert_eq!(hashmap.get(1).copied(), Some(0.2)); 272 | } 273 | 274 | #[test] 275 | fn capacity() { 276 | let mut hashmap: HashMap = HashMap::with_capacity(12); 277 | assert_eq!(hashmap.capacity(), 16); 278 | hashmap.insert(15, 0.21); 279 | assert_eq!(hashmap.capacity(), 16); 280 | } 281 | 282 | #[test] 283 | fn collision() { 284 | let mut hashmap: HashMap = HashMap::with_capacity(2); 285 | hashmap.insert(2, 0.1); // 2 % 2 == 0 286 | hashmap.insert(4, 0.2); // 4 % 2 == 0 287 | 288 | assert_eq!(hashmap.get(2).copied(), Some(0.1)); 289 | assert_eq!(hashmap.get(4).copied(), Some(0.2)); 290 | } 291 | 292 | #[test] 293 | fn collision_remove() { 294 | let mut hashmap: HashMap = HashMap::with_capacity(4); 295 | hashmap.insert(4, 0.1); // 4 % 4 == 0 296 | hashmap.insert(8, 0.2); // 8 % 4 == 0 297 | hashmap.insert(12, 0.3); // 12 % 4 == 0 298 | 299 | hashmap.remove(8); 300 | 301 | assert_eq!(hashmap.get(4).copied(), Some(0.1)); 302 | assert_eq!(hashmap.get(12).copied(), Some(0.3)); 303 | } 304 | 305 | #[test] 306 | fn double_insert() { 307 | let mut hashmap: HashMap = HashMap::new(); 308 | hashmap.insert(10, 0.1); 309 | assert_eq!(hashmap.insert(10, 0.2), Some(0.1)); 310 | assert_eq!(hashmap.get(10).copied(), Some(0.2)); 311 | assert_eq!(hashmap.len(), 1); 312 | } 313 | 314 | #[test] 315 | fn dont_die_please() { 316 | let mut hashmap: HashMap = HashMap::new(); 317 | let mut array = vec![]; 318 | 319 | for key in 0..1_000_000 { 320 | let value = random(); 321 | hashmap.insert(key, value); 322 | array.push(value); 323 | } 324 | 325 | for (key, value) in array.iter().enumerate() { 326 | assert_eq!(hashmap.get(key), Some(value)); 327 | assert_eq!(hashmap.remove(key), Some(*value)); 328 | } 329 | 330 | for key in 0..1_000_000 { 331 | assert_eq!(hashmap.get(key), None); 332 | } 333 | } 334 | 335 | #[bench] 336 | fn my_hashmap(b: &mut Bencher) { 337 | let mut hashmap: HashMap = HashMap::new(); 338 | 339 | b.iter(|| { 340 | for key in 0..500_000 { 341 | let value = random(); 342 | hashmap.insert(key, value); 343 | } 344 | }); 345 | } 346 | 347 | #[bench] 348 | fn std_hashmap(b: &mut Bencher) { 349 | let mut stdmap: StdMap = StdMap::new(); 350 | 351 | b.iter(|| { 352 | for key in 0..500_000 { 353 | let value = random(); 354 | stdmap.insert(key, value); 355 | } 356 | }); 357 | } 358 | } 359 | --------------------------------------------------------------------------------