├── .gitignore ├── README.md ├── benchmarks ├── hashmap.zig └── martinus_map.zig ├── build.zig ├── deps ├── bench.zig └── wyhash.zig ├── hashmap.zig ├── monolithic_array.zig └── sliceable_hashmap.zig /.gitignore: -------------------------------------------------------------------------------- 1 | zig-cache/ 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # A set of containers for Zig 2 | 3 | May turn useful, or not. 4 | 5 | This is mostly a testbed for a few containers, especially `hashmap.zig` which ended up in the standard library. 6 | 7 | # Benchmarks 8 | 9 | A couple benchmarks are available (and can be tinkered with): 10 | ```shell 11 | zig build bench -Drelease-fast 12 | ``` 13 | and more interestingly, a simplified implementation in Zig of [this one](https://github.com/martinus/map_benchmark/) which ended up in [gotta go fast](https://github.com/ziglang/gotta-go-fast/tree/master/benchmarks/std-hash-map). 14 | ```shell 15 | zig build martinus -Drelease-fast 16 | ``` 17 | 18 | The `martinus` benchmark directly uses the standard library's hashmap, and supports `-Doverride-lib-dir=path/to/lib` for in-place testing. 19 | -------------------------------------------------------------------------------- /benchmarks/hashmap.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const builtin = @import("builtin"); 3 | const meta = std.meta; 4 | 5 | const TypeId = builtin.TypeId; 6 | 7 | const ArrayList = std.ArrayList; 8 | 9 | const bench = @import("bench"); 10 | const benchmark = bench.benchmark; 11 | const benchmarkArgs = bench.benchmarkArgs; 12 | const clobberMemory = bench.clobberMemory; 13 | const doNotOptimize = bench.doNotOptimize; 14 | const Context = bench.Context; 15 | 16 | const HashMap = @import("hashmap").HashMap; 17 | const SliceableHashMap = @import("sliceable_hashmap").HashMap; 18 | 19 | var heap = std.heap.HeapAllocator.init(); 20 | var allocator = &heap.allocator; 21 | 22 | pub fn eqlu32(x: u32, y: u32) bool { 23 | return x == y; 24 | } 25 | 26 | fn putHelper(map: anytype, key: anytype, value: anytype) void { 27 | const put_type = @TypeOf(map.put); 28 | const return_type = @typeInfo(put_type).BoundFn.return_type.?; 29 | const payload_type = @typeInfo(return_type).ErrorUnion.payload; 30 | 31 | if (payload_type == void) { 32 | map.put(key, value) catch unreachable; 33 | } else { 34 | _ = map.put(key, value) catch unreachable; 35 | } 36 | } 37 | 38 | fn removeHelper(map: anytype, key: anytype) void { 39 | doNotOptimize(map.remove(key)); 40 | } 41 | 42 | fn reserveHelper(map: anytype, size: u32) void { 43 | const Map = @TypeOf(map); 44 | 45 | if (comptime meta.trait.hasFn("reserve")(Map)) { 46 | map.reserve(size) catch unreachable; 47 | } else { 48 | var i: u32 = 0; 49 | while (i < size) : (i += 1) { 50 | putHelper(map, i, 0); 51 | } 52 | map.clear(); 53 | } 54 | } 55 | 56 | const MapBenchFn = fn (c: *Context, n: u32) void; 57 | 58 | /// Insert sequential integers from 0 to n-1. 59 | fn insertSequential(comptime Map: type) MapBenchFn { 60 | const Closure = struct { 61 | pub fn bench(ctx: *Context, n: u32) void { 62 | while (ctx.runExplicitTiming()) { 63 | var map = Map.init(allocator); 64 | defer map.deinit(); 65 | 66 | ctx.startTimer(); 67 | defer ctx.stopTimer(); 68 | 69 | var i: u32 = 0; 70 | while (i < n) : (i += 1) { 71 | putHelper(&map, i, i); 72 | } 73 | clobberMemory(); 74 | } 75 | } 76 | }; 77 | 78 | return Closure.bench; 79 | } 80 | 81 | /// Insert sequential integers from 0 to n-1, and sequentially check if the map contains them. 82 | fn successfulContains(comptime Map: type) MapBenchFn { 83 | const Closure = struct { 84 | pub fn bench(ctx: *Context, n: u32) void { 85 | var map = Map.init(allocator); 86 | defer map.deinit(); 87 | 88 | { 89 | var i: u32 = 0; 90 | while (i < n) : (i += 1) { 91 | putHelper(&map, i, i); 92 | } 93 | } 94 | 95 | while (ctx.run()) { 96 | var i: u32 = n; 97 | while (i > 0) : (i -= 1) { 98 | doNotOptimize(map.contains(i)); 99 | } 100 | } 101 | } 102 | }; 103 | 104 | return Closure.bench; 105 | } 106 | 107 | /// Insert sequential integers from 0 to n-1, and check if the map contains sequential integers from n to 2n. 108 | fn unsuccessfulContains(comptime Map: type) MapBenchFn { 109 | const Closure = struct { 110 | pub fn bench(ctx: *Context, n: u32) void { 111 | var map = Map.init(allocator); 112 | defer map.deinit(); 113 | 114 | { 115 | var i: u32 = 0; 116 | while (i < n) : (i += 1) { 117 | putHelper(&map, i, i); 118 | } 119 | } 120 | 121 | while (ctx.run()) { 122 | var i: u32 = n; 123 | while (i < 2 * n) : (i += 1) { 124 | doNotOptimize(map.contains(i)); 125 | } 126 | } 127 | } 128 | }; 129 | 130 | return Closure.bench; 131 | } 132 | 133 | /// Insert sequential integers from 0 to n-1 and remove them in random order. 134 | fn eraseRandomOrder(comptime Map: type) MapBenchFn { 135 | const Closure = struct { 136 | pub fn bench(ctx: *Context, n: u32) void { 137 | var map = Map.init(allocator); 138 | defer map.deinit(); 139 | 140 | var keys = ArrayList(u32).init(allocator); 141 | { 142 | var i: u32 = 0; 143 | while (i < n) : (i += 1) { 144 | keys.append(i) catch unreachable; 145 | } 146 | } 147 | 148 | var rng = std.rand.DefaultPrng.init(0); 149 | std.rand.Random.shuffle(&rng.random, u32, keys.items); 150 | 151 | while (ctx.runExplicitTiming()) { 152 | var i: u32 = 0; 153 | while (i < n) : (i += 1) { 154 | putHelper(&map, i, i); 155 | } 156 | 157 | ctx.startTimer(); 158 | defer ctx.stopTimer(); 159 | for (keys.items) |key| { 160 | removeHelper(&map, key); 161 | } 162 | } 163 | } 164 | }; 165 | 166 | return Closure.bench; 167 | } 168 | 169 | /// Insert n integers and iterate through to sum them up. 170 | fn iterate(comptime Map: type) MapBenchFn { 171 | const Closure = struct { 172 | pub fn bench(ctx: *Context, n: u32) void { 173 | var map = Map.init(allocator); 174 | defer map.deinit(); 175 | 176 | var rng = std.rand.DefaultPrng.init(0); 177 | var keys = ArrayList(u32).init(allocator); 178 | var i: u32 = 0; 179 | while (i < n) : (i += 1) { 180 | keys.append(i) catch unreachable; 181 | } 182 | 183 | for (keys.items) |key| { 184 | putHelper(&map, key, key); 185 | } 186 | 187 | while (ctx.run()) { 188 | var sum: u64 = 0; 189 | if (comptime meta.trait.hasFn("toSlice")(Map)) { 190 | for (map.items) |kv| { 191 | sum += kv.value; 192 | } 193 | } else { 194 | var it = map.iterator(); 195 | while (it.next()) |kv| { 196 | sum += kv.value; 197 | } 198 | } 199 | 200 | doNotOptimize(sum); 201 | } 202 | } 203 | }; 204 | 205 | return Closure.bench; 206 | } 207 | 208 | const sizes = [_]u32{ 5, 25, 100, 500, 1000, 15000, 50000 }; 209 | 210 | const Flat = HashMap(u32, u32, wyhash, eqlu32, 80); 211 | //const Sliceable = SliceableHashMap(u32, u32, wyhash, eqlu32); 212 | const Std = std.HashMap(u32, u32, wyhash, eqlu32, 80); 213 | 214 | const wyhash = std.hash_map.getAutoHashFn(u32); 215 | const wyhash32 = struct { 216 | fn hash(key: u32) u64 { 217 | var hasher = std.hash.Wyhash.init(0); 218 | std.hash.autoHash(&hasher, key); 219 | return hasher.final(); 220 | } 221 | }.hash; 222 | 223 | const BenchFn = @TypeOf(insertSequential); 224 | fn compareFlatAndStd(comptime name: []const u8, comptime benchFn: BenchFn) void { 225 | //benchmarkArgs(name ++ " Flat", comptime benchFn(Flat), &sizes); 226 | //benchmarkArgs(name ++ " Slic", comptime benchFn(Sliceable), &sizes); 227 | benchmarkArgs(name ++ " Std ", comptime benchFn(Std), &sizes); 228 | } 229 | 230 | // TODO 231 | // - use better allocators 232 | 233 | pub fn main() void { 234 | compareFlatAndStd("insert", comptime insertSequential); 235 | compareFlatAndStd("contains", comptime successfulContains); 236 | compareFlatAndStd("!contains", comptime unsuccessfulContains); 237 | compareFlatAndStd("eraseRandomOrder", comptime eraseRandomOrder); 238 | compareFlatAndStd("iterate", comptime iterate); 239 | } 240 | -------------------------------------------------------------------------------- /benchmarks/martinus_map.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const assert = std.debug.assert; 3 | const time = std.time; 4 | const warn = std.debug.warn; 5 | const Timer = time.Timer; 6 | 7 | // Copy of std.rand.Sfc64 with a public next() function. The random API is 8 | // slower than just calling next() and these benchmarks only require getting 9 | // consecutive u64's. 10 | pub const Sfc64 = struct { 11 | random: std.rand.Random, 12 | 13 | a: u64 = undefined, 14 | b: u64 = undefined, 15 | c: u64 = undefined, 16 | counter: u64 = undefined, 17 | 18 | const Rotation = 24; 19 | const RightShift = 11; 20 | const LeftShift = 3; 21 | 22 | pub fn init(init_s: u64) Sfc64 { 23 | var x = Sfc64{ 24 | .random = std.rand.Random{ .fillFn = fill }, 25 | }; 26 | 27 | x.seed(init_s); 28 | return x; 29 | } 30 | 31 | pub fn next(self: *Sfc64) u64 { 32 | const tmp = self.a +% self.b +% self.counter; 33 | self.counter += 1; 34 | self.a = self.b ^ (self.b >> RightShift); 35 | self.b = self.c +% (self.c << LeftShift); 36 | self.c = std.math.rotl(u64, self.c, Rotation) +% tmp; 37 | return tmp; 38 | } 39 | 40 | pub fn seed(self: *Sfc64, init_s: u64) void { 41 | self.a = init_s; 42 | self.b = init_s; 43 | self.c = init_s; 44 | self.counter = 1; 45 | var i: u32 = 0; 46 | while (i < 12) : (i += 1) { 47 | _ = self.next(); 48 | } 49 | } 50 | 51 | fn fill(r: *std.rand.Random, buf: []u8) void { 52 | const self = @fieldParentPtr(Sfc64, "random", r); 53 | 54 | var i: usize = 0; 55 | const aligned_len = buf.len - (buf.len & 7); 56 | 57 | // Complete 8 byte segments. 58 | while (i < aligned_len) : (i += 8) { 59 | var n = self.next(); 60 | comptime var j: usize = 0; 61 | inline while (j < 8) : (j += 1) { 62 | buf[i + j] = @truncate(u8, n); 63 | n >>= 8; 64 | } 65 | } 66 | 67 | // Remaining. (cuts the stream) 68 | if (i != buf.len) { 69 | var n = self.next(); 70 | while (i < buf.len) : (i += 1) { 71 | buf[i] = @truncate(u8, n); 72 | n >>= 8; 73 | } 74 | } 75 | } 76 | }; 77 | 78 | const AutoHashMap = std.AutoHashMap; 79 | 80 | fn iterate(allocator: anytype) void { 81 | const num_iters = 50000; 82 | 83 | var result: u64 = 0; 84 | var map = AutoHashMap(u64, u64).init(allocator); 85 | defer map.deinit(); 86 | 87 | const seed = 123; 88 | var rng = Sfc64.init(seed); 89 | 90 | warn("iterate while adding", .{}); 91 | var timer = Timer.start() catch unreachable; 92 | var i: u64 = 0; 93 | while (i < num_iters) : (i += 1) { 94 | const key = rng.next(); 95 | map.put(key, i) catch unreachable; 96 | var it = map.iterator(); 97 | while (it.next()) |kv| { 98 | result += kv.value; 99 | } 100 | } 101 | var elapsed = timer.read(); 102 | if (result != 20833333325000) std.os.abort(); 103 | warn(" {d:.3}s\n", .{@intToFloat(f64, elapsed) / time.ns_per_s}); 104 | 105 | rng.seed(seed); 106 | warn("iterate while removing", .{}); 107 | timer.reset(); 108 | i = 0; 109 | while (i < num_iters) : (i += 1) { 110 | _ = map.remove(rng.next()); 111 | var it = map.iterator(); 112 | while (it.next()) |kv| { 113 | result += kv.value; 114 | } 115 | } 116 | elapsed = timer.read(); 117 | assert(map.count() == 0); 118 | if (result != 62498750000000) std.os.abort(); 119 | warn(" {d:.3}s\n", .{@intToFloat(f64, elapsed) / time.ns_per_s}); 120 | } 121 | 122 | fn insert(allocator: anytype) void { 123 | const num_iters = 100 * 1000 * 1000; 124 | 125 | var rng = Sfc64.init(213); 126 | 127 | warn("insert 100M int", .{}); 128 | var timer = Timer.start() catch unreachable; 129 | var map = AutoHashMap(i32, i32).init(allocator); 130 | 131 | var i: i32 = 0; 132 | while (i < num_iters) : (i += 1) { 133 | const key = @bitCast(i32, @truncate(u32, rng.next())); 134 | map.put(key, 0) catch unreachable; 135 | } 136 | var elapsed = timer.read(); 137 | std.debug.assert(map.count() == 98841586); 138 | warn(" {d:.3}s\n", .{@intToFloat(f64, elapsed) / time.ns_per_s}); 139 | 140 | warn("clear 100M int", .{}); 141 | timer.reset(); 142 | map.clearRetainingCapacity(); 143 | elapsed = timer.read(); 144 | warn(" {d:.3}s\n", .{@intToFloat(f64, elapsed) / time.ns_per_s}); 145 | 146 | const state = rng; 147 | warn("reinsert 100M int", .{}); 148 | timer.reset(); 149 | i = 0; 150 | while (i < num_iters) : (i += 1) { 151 | const key = @bitCast(i32, @truncate(u32, rng.next())); 152 | map.put(key, 0) catch unreachable; 153 | } 154 | elapsed = timer.read(); 155 | std.debug.assert(map.count() == 98843646); 156 | warn(" {d:.3}s\n", .{@intToFloat(f64, elapsed) / time.ns_per_s}); 157 | 158 | warn("remove 100M int", .{}); 159 | rng = state; 160 | timer.reset(); 161 | i = 0; 162 | while (i < num_iters) : (i += 1) { 163 | const key = @bitCast(i32, @truncate(u32, rng.next())); 164 | _ = map.remove(key); 165 | } 166 | elapsed = timer.read(); 167 | std.debug.assert(map.count() == 0); 168 | warn(" {d:.3}s\n", .{@intToFloat(f64, elapsed) / time.ns_per_s}); 169 | 170 | warn("reinsert 100M int", .{}); 171 | rng = state; 172 | map.clearRetainingCapacity(); 173 | timer.reset(); 174 | i = 0; 175 | while (i < num_iters) : (i += 1) { 176 | const key = @bitCast(i32, @truncate(u32, rng.next())); 177 | map.put(key, 0) catch unreachable; 178 | } 179 | elapsed = timer.read(); 180 | std.debug.assert(map.count() == 98843646); 181 | warn(" {d:.3}s\n", .{@intToFloat(f64, elapsed) / time.ns_per_s}); 182 | 183 | warn("deinit map", .{}); 184 | timer.reset(); 185 | map.deinit(); 186 | elapsed = timer.read(); 187 | warn(" {d:.3}s\n", .{@intToFloat(f64, elapsed) / time.ns_per_s}); 188 | } 189 | 190 | fn randomDistinct(allocator: anytype) void { 191 | const num_iters = 50 * 1000 * 1000; 192 | const _5distinct = num_iters / 20; 193 | const _25distinct = num_iters / 4; 194 | const _50distinct = num_iters / 2; 195 | 196 | const settings = .{ 197 | .{ .max = _5distinct, .txt = "5% distinct", .sum = 549980587 }, 198 | .{ .max = _25distinct, .txt = "25% distinct", .sum = 149995671 }, 199 | .{ .max = _50distinct, .txt = "50% distinct", .sum = 99996161 }, 200 | .{ .max = 0xFFFFFFFF, .txt = "100% distinct", .sum = 50291772 }, 201 | }; 202 | 203 | var rng = Sfc64.init(123); 204 | 205 | inline for (settings) |setting| { 206 | warn(setting.txt, .{}); 207 | 208 | var checksum: i32 = 0; 209 | var timer = Timer.start() catch unreachable; 210 | var map = AutoHashMap(i32, i32).init(allocator); 211 | defer map.deinit(); 212 | 213 | var i: u32 = 0; 214 | while (i < num_iters) : (i += 1) { 215 | const key = @bitCast(i32, rng.random.uintLessThan(u32, setting.max)); 216 | var res = map.getOrPutValue(key, 0) catch unreachable; 217 | res.value += 1; 218 | checksum += res.value; 219 | } 220 | 221 | const elapsed = timer.read(); 222 | 223 | std.debug.assert(checksum == setting.sum); 224 | warn(" {d:.3}s\n", .{@intToFloat(f64, elapsed) / time.ns_per_s}); 225 | } 226 | } 227 | 228 | fn randomInsertRemove(allocator: anytype) void { 229 | var rng = Sfc64.init(999); 230 | 231 | const masks = [_]u64{ 232 | 0b1001000000000000000000000000000000000000000100000000000000001000, 233 | 0b1001000000000010001100000000000000000000000101000000000000001000, 234 | 0b1001000000000110001100000000000000010000000101100000000000001001, 235 | 0b1001000000000110001100000001000000010000000101110000000100101001, 236 | 0b1101100000000110001100001001000000010000000101110001000100101001, 237 | 0b1101100000001110001100001001001000010000100101110001000100101011, 238 | }; 239 | const bit_count = [_]u32{ 4, 8, 12, 16, 20, 24 }; 240 | const expected_final_sizes = [_]u32{ 7, 141, 2303, 37938, 606489, 9783443 }; 241 | const max_n = 50 * 1000 * 1000; 242 | 243 | var rnd_bit_idx: u32 = 0; 244 | 245 | var map = AutoHashMap(u64, u64).init(allocator); 246 | defer map.deinit(); 247 | 248 | var i: u32 = 0; 249 | while (i < 6) : (i += 1) { 250 | const bit_mask = masks[i]; 251 | var verifier: u64 = 0; 252 | warn("{} bits, {}M cycles", .{ bit_count[i], max_n / 1000000 }); 253 | 254 | var timer = Timer.start() catch unreachable; 255 | var j: u32 = 0; 256 | while (j < max_n) : (j += 1) { 257 | _ = map.getOrPut(rng.next() & bit_mask, j) catch unreachable; 258 | _ = map.remove(rng.next() & bit_mask); 259 | } 260 | const elapsed = timer.read(); 261 | std.debug.assert(map.count() == expected_final_sizes[i]); 262 | warn(" {d:.3}s\n", .{@intToFloat(f64, elapsed) / time.ns_per_s}); 263 | } 264 | } 265 | 266 | /// /!\ Leaks big amounts of memory !! 267 | fn randomInsertRemoveStrings(allocator: anytype, max_n: u64, length: u64, mask: u32, expected: u64) void { 268 | var rng = Sfc64.init(123); 269 | var verifier: u64 = 0; 270 | 271 | warn("{} bytes ", .{length}); 272 | 273 | var str = allocator.alloc(u8, length) catch unreachable; 274 | for (str) |*c| c.* = 'x'; 275 | const idx32 = (length / 4) - 1; 276 | const strData32 = @ptrToInt(@ptrCast(*u32, @alignCast(4, &str[0]))) + idx32 * @sizeOf(u32); 277 | 278 | var timer = Timer.start() catch unreachable; 279 | var map = AutoHashMap([]const u8, []const u8).init(allocator); 280 | defer map.deinit(); 281 | 282 | var i: u32 = 0; 283 | while (i < max_n) : (i += 1) { 284 | @intToPtr(*u32, strData32).* = @truncate(u32, rng.next()) & mask; 285 | 286 | // This leaks because we never release the strings we insert in the map. 287 | const new_str = allocator.alloc(u8, length) catch unreachable; 288 | for (str) |c, j| { 289 | new_str[j] = c; 290 | } 291 | 292 | _ = map.getOrPut(new_str, []const u8{}) catch unreachable; 293 | @intToPtr(*u32, strData32).* = @truncate(u32, rng.next()) & mask; 294 | if (map.remove(str)) verifier += 1; 295 | } 296 | const elapsed = timer.read(); 297 | std.debug.assert(expected == verifier); 298 | warn(" {d:.3}s\n", .{@intToFloat(f64, elapsed) / time.ns_per_s}); 299 | } 300 | 301 | fn randomFind(allocator: anytype, num_rand: u32, mask: u64, num_insert: u64, find_per_insert: u64, expected: u64) void { 302 | const total = 4; 303 | const sequential = total - num_rand; 304 | 305 | const find_per_iter = find_per_insert * total; 306 | 307 | warn("{}% success, {x} ", .{ (sequential * 100) / total, mask }); 308 | var rng = Sfc64.init(123); 309 | 310 | var num_found: u64 = 0; 311 | var insert_random = [_]bool{false} ** 4; 312 | for (insert_random[0..num_rand]) |*b| b.* = true; 313 | 314 | var other_rng = Sfc64.init(987654321); 315 | const state = other_rng; 316 | var find_rng = state; 317 | 318 | { 319 | var map = AutoHashMap(u64, u64).init(allocator); 320 | var i: u64 = 0; 321 | var find_count: u64 = 0; 322 | 323 | var timer = Timer.start() catch unreachable; 324 | while (i < num_insert) { 325 | // insert NumTotal entries: some random, some sequential. 326 | std.rand.Random.shuffle(&rng.random, bool, insert_random[0..]); 327 | for (insert_random) |isRandomToInsert| { 328 | const val = other_rng.next(); 329 | if (isRandomToInsert) { 330 | map.put(rng.next() & mask, 1) catch unreachable; 331 | } else { 332 | map.put(val & mask, 1) catch unreachable; 333 | } 334 | i += 1; 335 | } 336 | 337 | var j: u64 = 0; 338 | while (j < find_per_iter) : (j += 1) { 339 | find_count += 1; 340 | if (find_count > i) { 341 | find_count = 0; 342 | find_rng = state; 343 | } 344 | const key = find_rng.next() & mask; 345 | if (map.get(key)) |val| num_found += val; 346 | } 347 | } 348 | 349 | const elapsed = timer.read(); 350 | std.debug.assert(expected == num_found); 351 | warn(" {d:.3}ns\n", .{@intToFloat(f64, elapsed) / @intToFloat(f64, num_insert * find_per_insert)}); 352 | } 353 | } 354 | 355 | pub fn main() void { 356 | const allocator = std.heap.c_allocator; 357 | //const allocator = std.heap.page_allocator; 358 | 359 | iterate(allocator); 360 | insert(allocator); 361 | randomDistinct(allocator); 362 | 363 | const lower32bit = 0x00000000FFFFFFFF; 364 | const upper32bit = 0xFFFFFFFF00000000; 365 | 366 | { 367 | const num_inserts = 2000; 368 | const find_per_insert = 500000; 369 | randomFind(allocator, 4, lower32bit, num_inserts, find_per_insert, 0); 370 | randomFind(allocator, 4, upper32bit, num_inserts, find_per_insert, 0); 371 | randomFind(allocator, 3, lower32bit, num_inserts, find_per_insert, 249194555); 372 | randomFind(allocator, 3, upper32bit, num_inserts, find_per_insert, 249194555); 373 | randomFind(allocator, 2, lower32bit, num_inserts, find_per_insert, 498389111); 374 | randomFind(allocator, 2, upper32bit, num_inserts, find_per_insert, 498389111); 375 | randomFind(allocator, 1, lower32bit, num_inserts, find_per_insert, 747583667); 376 | randomFind(allocator, 1, upper32bit, num_inserts, find_per_insert, 747583667); 377 | randomFind(allocator, 0, lower32bit, num_inserts, find_per_insert, 996778223); 378 | randomFind(allocator, 0, upper32bit, num_inserts, find_per_insert, 996778223); 379 | } 380 | 381 | // This is not very interesting to compare against the C++ version of the 382 | // benchmarks since std::string is very different from []const u8. 383 | // It would at least need to use a SSO-enabled Zig version. 384 | // randomInsertRemoveStrings(allocator, 20000000, 7, 0xfffff, 10188986); 385 | // randomInsertRemoveStrings(allocator, 20000000, 8, 0xfffff, 10191449); 386 | // randomInsertRemoveStrings(allocator, 20000000, 13, 0xfffff, 10190593); 387 | // randomInsertRemoveStrings(allocator, 12000000, 100, 0x7ffff, 6144655); 388 | // randomInsertRemoveStrings(allocator, 6000000, 1000, 0x1ffff, 3109782); 389 | } 390 | -------------------------------------------------------------------------------- /build.zig: -------------------------------------------------------------------------------- 1 | const builtin = @import("builtin"); 2 | const Builder = @import("std").build.Builder; 3 | const tests = @import("tests.zig"); 4 | 5 | pub fn build(b: *Builder) void { 6 | const test_step = b.step("test", "Run tests"); 7 | test_step.dependOn(&b.addTest("hashmap.zig").step); 8 | 9 | const bench_exe = b.addExecutable("bench", "benchmarks/hashmap.zig"); 10 | bench_exe.addPackagePath("bench", "deps/bench.zig"); 11 | bench_exe.addPackagePath("sliceable_hashmap", "sliceable_hashmap.zig"); 12 | bench_exe.addPackagePath("hashmap", "hashmap.zig"); 13 | bench_exe.setBuildMode(.ReleaseFast); 14 | 15 | const bench_cmd = bench_exe.run(); 16 | const bench_step = b.step("bench", "Run benchmarks"); 17 | bench_step.dependOn(&bench_cmd.step); 18 | 19 | const martinus_exe = b.addExecutable("martinus", "benchmarks/martinus_map.zig"); 20 | martinus_exe.override_lib_dir = b.option([]const u8, "override-lib-dir", "override lib dir"); 21 | martinus_exe.addPackagePath("bench", "deps/bench.zig"); 22 | martinus_exe.addPackagePath("sliceable_hashmap", "sliceable_hashmap.zig"); 23 | martinus_exe.addPackagePath("hashmap", "hashmap.zig"); 24 | martinus_exe.linkSystemLibrary("c"); 25 | martinus_exe.setBuildMode(b.standardReleaseOptions()); 26 | 27 | const martinus_cmd = martinus_exe.run(); 28 | const martinus_step = b.step("martinus", "Run martinus map benchmarks"); 29 | martinus_step.dependOn(&martinus_cmd.step); 30 | } 31 | -------------------------------------------------------------------------------- /deps/bench.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const TypeId = @import("builtin").TypeId; 3 | const assert = std.debug.assert; 4 | const time = std.time; 5 | const warn = std.debug.warn; 6 | 7 | const Timer = time.Timer; 8 | 9 | const BenchFn = fn (*Context) void; 10 | 11 | pub const Context = struct { 12 | timer: Timer, 13 | iter: u32, 14 | count: u32, 15 | state: State, 16 | nanoseconds: u64, 17 | 18 | const HeatingTime = time.ns_per_s / 2; 19 | const RunTime = time.ns_per_s / 2; 20 | 21 | const State = enum { 22 | None, 23 | Heating, 24 | Running, 25 | Finished, 26 | }; 27 | 28 | pub fn init() Context { 29 | return Context{ .timer = Timer.start() catch unreachable, .iter = 0, .count = 0, .state = .None, .nanoseconds = 0 }; 30 | } 31 | 32 | pub fn run(self: *Context) bool { 33 | switch (self.state) { 34 | .None => { 35 | self.state = .Heating; 36 | self.timer.reset(); 37 | return true; 38 | }, 39 | .Heating => { 40 | self.count += 1; 41 | const elapsed = self.timer.read(); 42 | if (elapsed >= HeatingTime) { 43 | // Caches should be hot 44 | self.count = @intCast(u32, RunTime / (HeatingTime / self.count)); 45 | self.state = .Running; 46 | self.timer.reset(); 47 | } 48 | 49 | return true; 50 | }, 51 | .Running => { 52 | if (self.iter < self.count) { 53 | self.iter += 1; 54 | return true; 55 | } else { 56 | self.nanoseconds = self.timer.read(); 57 | self.state = .Finished; 58 | return false; 59 | } 60 | }, 61 | .Finished => unreachable, 62 | } 63 | } 64 | 65 | pub fn startTimer(self: *Context) void { 66 | self.timer.reset(); 67 | } 68 | 69 | pub fn stopTimer(self: *Context) void { 70 | const elapsed = self.timer.read(); 71 | self.nanoseconds += elapsed; 72 | } 73 | 74 | pub fn runExplicitTiming(self: *Context) bool { 75 | switch (self.state) { 76 | .None => { 77 | self.state = .Heating; 78 | return true; 79 | }, 80 | .Heating => { 81 | self.count += 1; 82 | if (self.nanoseconds >= HeatingTime) { 83 | // Caches should be hot 84 | self.count = @intCast(u32, RunTime / (HeatingTime / self.count)); 85 | self.nanoseconds = 0; 86 | self.state = .Running; 87 | } 88 | 89 | return true; 90 | }, 91 | .Running => { 92 | if (self.iter < self.count) { 93 | self.iter += 1; 94 | return true; 95 | } else { 96 | self.state = .Finished; 97 | return false; 98 | } 99 | }, 100 | .Finished => unreachable, 101 | } 102 | } 103 | 104 | pub fn averageTime(self: *Context, unit: u64) f32 { 105 | assert(self.state == .Finished); 106 | return @intToFloat(f32, self.nanoseconds / unit) / @intToFloat(f32, self.iter); 107 | } 108 | }; 109 | 110 | pub fn benchmark(name: comptime []const u8, f: BenchFn) void { 111 | var ctx = Context.init(); 112 | @call(.{.never_inline}, f, &ctx); 113 | 114 | var unit: u64 = undefined; 115 | var unit_name: []const u8 = undefined; 116 | const avg_time = ctx.averageTime(1); 117 | assert(avg_time >= 0); 118 | 119 | if (avg_time <= time.microsecond) { 120 | unit = 1; 121 | unit_name = "ns"; 122 | } else if (avg_time <= time.ns_per_ms) { 123 | unit = time.microsecond; 124 | unit_name = "us"; 125 | } else { 126 | unit = time.ns_per_ms; 127 | unit_name = "ms"; 128 | } 129 | 130 | warn("{s}: avg {d:.3}{s} ({} iterations)\n", .{ name, ctx.averageTime(unit), unit_name, ctx.iter }); 131 | } 132 | 133 | fn benchArgFn(comptime argType: type) type { 134 | return fn (*Context, argType) void; 135 | } 136 | 137 | fn argTypeFromFn(comptime f: anytype) type { 138 | comptime const F = @TypeOf(f); 139 | if (@typeInfo(F) != TypeId.Fn) { 140 | @compileError("Argument must be a function."); 141 | } 142 | 143 | const fnInfo = @typeInfo(F).Fn; 144 | if (fnInfo.args.len != 2) { 145 | @compileError("Only functions taking 1 argument are accepted."); 146 | } 147 | 148 | return fnInfo.args[1].arg_type.?; 149 | } 150 | 151 | pub fn benchmarkArgs(comptime name: []const u8, comptime f: anytype, comptime args: []const argTypeFromFn(f)) void { 152 | inline for (args) |a| { 153 | var ctx = Context.init(); 154 | const m = std.builtin.CallOptions{ .modifier = .never_inline }; 155 | @call(m, f, .{ &ctx, a }); 156 | 157 | var unit: u64 = undefined; 158 | var unit_name: []const u8 = undefined; 159 | const avg_time = ctx.averageTime(1); 160 | assert(avg_time >= 0); 161 | 162 | if (avg_time <= time.ns_per_us) { 163 | unit = 1; 164 | unit_name = "ns"; 165 | } else if (avg_time <= time.ns_per_ms) { 166 | unit = time.ns_per_us; 167 | unit_name = "us"; 168 | } else { 169 | unit = time.ns_per_ms; 170 | unit_name = "ms"; 171 | } 172 | warn("{s} <{}>: avg {d:.3}{s} ({} iterations)\n", .{ 173 | name, 174 | if (@TypeOf(a) == type) @typeName(a) else a, 175 | ctx.averageTime(unit), 176 | unit_name, 177 | ctx.iter, 178 | }); 179 | } 180 | } 181 | 182 | pub fn doNotOptimize(value: anytype) void { 183 | // LLVM triggers an assert if we pass non-trivial types as inputs for the 184 | // asm volatile expression. 185 | // Workaround until asm support is better on Zig's end. 186 | const T = @TypeOf(value); 187 | const typeId = @typeInfo(T); 188 | switch (typeId) { 189 | .Bool, .Int, .Float => { 190 | asm volatile ("" 191 | : 192 | : [_] "r,m" (value) 193 | : "memory" 194 | ); 195 | }, 196 | .Optional => { 197 | if (value) |v| doNotOptimize(v); 198 | }, 199 | .Struct => { 200 | inline for (comptime std.meta.fields(T)) |field| { 201 | doNotOptimize(@field(value, field.name)); 202 | } 203 | }, 204 | .Type, .Void, .NoReturn, .ComptimeFloat, .ComptimeInt, .Undefined, .Null, .Fn, .BoundFn => @compileError("doNotOptimize makes no sense for " ++ @tagName(typeId)), 205 | else => @compileError("doNotOptimize is not implemented for " ++ @tagName(typeId)), 206 | } 207 | } 208 | 209 | pub fn clobberMemory() void { 210 | asm volatile ("" ::: "memory"); 211 | } 212 | 213 | test "benchmark" { 214 | const benchSleep57 = struct { 215 | fn benchSleep57(ctx: *Context) void { 216 | while (ctx.run()) { 217 | time.sleep(57 * time.ns_per_ms); 218 | } 219 | } 220 | }.benchSleep57; 221 | 222 | std.debug.warn("\n"); 223 | benchmark("Sleep57", benchSleep57); 224 | } 225 | 226 | test "benchmarkArgs" { 227 | const benchSleep = struct { 228 | fn benchSleep(ctx: *Context, ms: u32) void { 229 | while (ctx.run()) { 230 | time.sleep(ms * time.ns_per_ms); 231 | } 232 | } 233 | }.benchSleep; 234 | 235 | std.debug.warn("\n"); 236 | benchmarkArgs("Sleep", benchSleep, [_]u32{ 20, 30, 57 }); 237 | } 238 | 239 | test "benchmarkArgs types" { 240 | const benchMin = struct { 241 | fn benchMin(ctx: *Context, comptime intType: type) void { 242 | while (ctx.run()) { 243 | time.sleep(std.math.min(37, 48) * time.ns_per_ms); 244 | } 245 | } 246 | }.benchMin; 247 | 248 | std.debug.warn("\n"); 249 | benchmarkArgs("Min", benchMin, [_]type{ u32, u64 }); 250 | } 251 | 252 | test "benchmark custom timing" { 253 | const sleep = struct { 254 | fn sleep(ctx: *Context) void { 255 | while (ctx.runExplicitTiming()) { 256 | time.sleep(30 * time.ns_per_ms); 257 | ctx.startTimer(); 258 | defer ctx.stopTimer(); 259 | time.sleep(10 * time.ns_per_ms); 260 | } 261 | } 262 | }.sleep; 263 | 264 | std.debug.warn("\n"); 265 | benchmark("sleep", sleep); 266 | } 267 | -------------------------------------------------------------------------------- /deps/wyhash.zig: -------------------------------------------------------------------------------- 1 | // Author: Jan Halsema 2 | // Zig implementation of wyhash 3 | 4 | const std = @import("std"); 5 | const mem = std.mem; 6 | 7 | const primes = [_]u64{ 8 | 0xa0761d6478bd642f, 0xe7037ed1a0b428db, 9 | 0x8ebc6af09c88c6e3, 0x589965cc75374cc3, 10 | 0x1d8e4e27c47d124f, 0xeb44accab455d165, 11 | }; 12 | 13 | fn read_bytes(comptime bytes: u8, data: []const u8) u64 { 14 | return mem.readVarInt(u64, data[0..bytes], .Little); 15 | } 16 | 17 | fn read_8bytes_swapped(data: []const u8) u64 { 18 | return (read_bytes(4, data) << 32 | read_bytes(4, data[4..])); 19 | } 20 | 21 | fn mum(a: u64, b: u64) u64 { 22 | var r: u128 = @intCast(u128, a) * @intCast(u128, b); 23 | r = (r >> 64) ^ r; 24 | return @truncate(u64, r); 25 | } 26 | 27 | pub fn hash(key: []const u8, initial_seed: u64) u64 { 28 | const len = key.len; 29 | 30 | var seed = initial_seed; 31 | 32 | var i: usize = 0; 33 | while (i + 32 <= key.len) : (i += 32) { 34 | seed = mum(seed ^ primes[0], 35 | mum(read_bytes(8, key[i ..]) ^ primes[1], 36 | read_bytes(8, key[i + 8 ..]) ^ primes[2]) ^ 37 | mum(read_bytes(8, key[i + 16 ..]) ^ primes[3], 38 | read_bytes(8, key[i + 24 ..]) ^ primes[4])); 39 | } 40 | seed ^= primes[0]; 41 | 42 | const rem_len = @truncate(u5, len); 43 | if (rem_len != 0) { 44 | const rem_bits = @truncate(u3, rem_len % 8); 45 | const rem_bytes = @truncate(u2, (len - 1) / 8); 46 | const rem_key = key[i + @intCast(usize, rem_bytes) * 8 ..]; 47 | 48 | const rest = switch (rem_bits) { 49 | 0 => read_8bytes_swapped(rem_key), 50 | 1 => read_bytes(1, rem_key), 51 | 2 => read_bytes(2, rem_key), 52 | 3 => read_bytes(2, rem_key) << 8 | read_bytes(1, rem_key[2..]), 53 | 4 => read_bytes(4, rem_key), 54 | 5 => read_bytes(4, rem_key) << 8 | read_bytes(1, rem_key[4..]), 55 | 6 => read_bytes(4, rem_key) << 16 | read_bytes(2, rem_key[4..]), 56 | 7 => read_bytes(4, rem_key) << 24 | read_bytes(2, rem_key[4..]) << 8 | read_bytes(1, rem_key[6..]), 57 | } ^ primes[@intCast(usize, rem_bytes) + 1]; 58 | 59 | seed = switch (rem_bytes) { 60 | 0 => mum(seed, rest), 61 | 1 => mum(read_8bytes_swapped(key[i ..]) ^ seed, rest), 62 | 2 => mum(read_8bytes_swapped(key[i ..]) ^ seed, 63 | read_8bytes_swapped(key[i + 8 ..]) ^ primes[2]) ^ 64 | mum(seed, rest), 65 | 3 => mum(read_8bytes_swapped(key[i ..]) ^ seed, 66 | read_8bytes_swapped(key[i + 8 ..]) ^ primes[2]) ^ 67 | mum(read_8bytes_swapped(key[i + 16 ..]) ^ seed, rest), 68 | }; 69 | } 70 | 71 | return mum(seed, len ^ primes[5]); 72 | } 73 | 74 | pub fn rng(initial_seed: u64) u64 { 75 | var seed = initial_seed +% primes[0]; 76 | return mum(seed ^ primes[1], seed); 77 | } 78 | 79 | -------------------------------------------------------------------------------- /hashmap.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const builtin = @import("builtin"); 3 | const assert = debug.assert; 4 | const autoHash = std.hash.autoHash; 5 | const debug = std.debug; 6 | const warn = debug.warn; 7 | const math = std.math; 8 | const mem = std.mem; 9 | const meta = std.meta; 10 | const trait = meta.trait; 11 | const Allocator = mem.Allocator; 12 | const Wyhash = std.hash.Wyhash; 13 | 14 | pub fn getAutoHashFn(comptime K: type) (fn (K) u64) { 15 | return struct { 16 | fn hash(key: K) u64 { 17 | if (comptime trait.hasUniqueRepresentation(K)) { 18 | return Wyhash.hash(0, std.mem.asBytes(&key)); 19 | } else { 20 | var hasher = Wyhash.init(0); 21 | autoHash(&hasher, key); 22 | return hasher.final(); 23 | } 24 | } 25 | }.hash; 26 | } 27 | 28 | pub fn getAutoEqlFn(comptime K: type) (fn (K, K) bool) { 29 | return struct { 30 | fn eql(a: K, b: K) bool { 31 | return meta.eql(a, b); 32 | } 33 | }.eql; 34 | } 35 | 36 | pub fn AutoHashMap(comptime K: type, comptime V: type) type { 37 | return HashMap(K, V, getAutoHashFn(K), getAutoEqlFn(K), DefaultMaxLoadPercentage); 38 | } 39 | 40 | pub fn AutoHashMapUnmanaged(comptime K: type, comptime V: type) type { 41 | return HashMapUnmanaged(K, V, getAutoHashFn(K), getAutoEqlFn(K), DefaultMaxLoadPercentage); 42 | } 43 | 44 | /// Builtin hashmap for strings as keys. 45 | pub fn StringHashMap(comptime V: type) type { 46 | return HashMap([]const u8, V, hashString, eqlString, DefaultMaxLoadPercentage); 47 | } 48 | 49 | pub fn StringHashMapUnmanaged(comptime V: type) type { 50 | return HashMapUnmanaged([]const u8, V, hashString, eqlString, DefaultMaxLoadPercentage); 51 | } 52 | 53 | pub fn eqlString(a: []const u8, b: []const u8) bool { 54 | return mem.eql(u8, a, b); 55 | } 56 | 57 | pub fn hashString(s: []const u8) u64 { 58 | return std.hash.Wyhash.hash(0, s); 59 | } 60 | 61 | pub const DefaultMaxLoadPercentage = 80; 62 | 63 | pub fn HashMap( 64 | comptime K: type, 65 | comptime V: type, 66 | comptime hashFn: fn (key: K) u64, 67 | comptime eqlFn: fn (a: K, b: K) bool, 68 | comptime MaxLoadPercentage: u64, 69 | ) type { 70 | return struct { 71 | unmanaged: Unmanaged, 72 | allocator: *Allocator, 73 | 74 | pub const Unmanaged = HashMapUnmanaged(K, V, hashFn, eqlFn, MaxLoadPercentage); 75 | pub const Entry = Unmanaged.Entry; 76 | pub const Hash = Unmanaged.Hash; 77 | pub const Iterator = Unmanaged.Iterator; 78 | pub const Size = Unmanaged.Size; 79 | pub const GetOrPutResult = Unmanaged.GetOrPutResult; 80 | 81 | const Self = @This(); 82 | 83 | pub fn init(allocator: *Allocator) Self { 84 | return .{ 85 | .unmanaged = .{}, 86 | .allocator = allocator, 87 | }; 88 | } 89 | 90 | pub fn deinit(self: *Self) void { 91 | self.unmanaged.deinit(self.allocator); 92 | self.* = undefined; 93 | } 94 | 95 | pub fn clearRetainingCapacity(self: *Self) void { 96 | return self.unmanaged.clearRetainingCapacity(); 97 | } 98 | 99 | pub fn clearAndFree(self: *Self) void { 100 | return self.unmanaged.clearAndFree(self.allocator); 101 | } 102 | 103 | pub fn count(self: Self) usize { 104 | return self.unmanaged.count(); 105 | } 106 | 107 | pub fn iterator(self: *const Self) Iterator { 108 | return self.unmanaged.iterator(); 109 | } 110 | 111 | /// If key exists this function cannot fail. 112 | /// If there is an existing item with `key`, then the result 113 | /// `Entry` pointer points to it, and found_existing is true. 114 | /// Otherwise, puts a new item with undefined value, and 115 | /// the `Entry` pointer points to it. Caller should then initialize 116 | /// the value (but not the key). 117 | pub fn getOrPut(self: *Self, key: K) !GetOrPutResult { 118 | return self.unmanaged.getOrPut(self.allocator, key); 119 | } 120 | 121 | /// If there is an existing item with `key`, then the result 122 | /// `Entry` pointer points to it, and found_existing is true. 123 | /// Otherwise, puts a new item with undefined value, and 124 | /// the `Entry` pointer points to it. Caller should then initialize 125 | /// the value (but not the key). 126 | /// If a new entry needs to be stored, this function asserts there 127 | /// is enough capacity to store it. 128 | pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult { 129 | return self.unmanaged.getOrPutAssumeCapacity(key); 130 | } 131 | 132 | pub fn getOrPutValue(self: *Self, key: K, value: V) !*Entry { 133 | return self.unmanaged.getOrPutValue(self.allocator, key, value); 134 | } 135 | 136 | /// Increases capacity, guaranteeing that insertions up until the 137 | /// `expected_count` will not cause an allocation, and therefore cannot fail. 138 | pub fn ensureCapacity(self: *Self, expected_count: Size) !void { 139 | return self.unmanaged.ensureCapacity(self.allocator, expected_count); 140 | } 141 | 142 | /// Returns the number of total elements which may be present before it is 143 | /// no longer guaranteed that no allocations will be performed. 144 | pub fn capacity(self: *Self) Size { 145 | return self.unmanaged.capacity(); 146 | } 147 | 148 | /// Clobbers any existing data. To detect if a put would clobber 149 | /// existing data, see `getOrPut`. 150 | pub fn put(self: *Self, key: K, value: V) !void { 151 | return self.unmanaged.put(self.allocator, key, value); 152 | } 153 | 154 | /// Inserts a key-value pair into the hash map, asserting that no previous 155 | /// entry with the same key is already present 156 | pub fn putNoClobber(self: *Self, key: K, value: V) !void { 157 | return self.unmanaged.putNoClobber(self.allocator, key, value); 158 | } 159 | 160 | /// Asserts there is enough capacity to store the new key-value pair. 161 | /// Clobbers any existing data. To detect if a put would clobber 162 | /// existing data, see `getOrPutAssumeCapacity`. 163 | pub fn putAssumeCapacity(self: *Self, key: K, value: V) void { 164 | return self.unmanaged.putAssumeCapacity(key, value); 165 | } 166 | 167 | /// Asserts there is enough capacity to store the new key-value pair. 168 | /// Asserts that it does not clobber any existing data. 169 | /// To detect if a put would clobber existing data, see `getOrPutAssumeCapacity`. 170 | pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void { 171 | return self.unmanaged.putAssumeCapacityNoClobber(key, value); 172 | } 173 | 174 | /// Inserts a new `Entry` into the hash map, returning the previous one, if any. 175 | pub fn fetchPut(self: *Self, key: K, value: V) !?Entry { 176 | return self.unmanaged.fetchPut(self.allocator, key, value); 177 | } 178 | 179 | /// Inserts a new `Entry` into the hash map, returning the previous one, if any. 180 | /// If insertion happuns, asserts there is enough capacity without allocating. 181 | pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?Entry { 182 | return self.unmanaged.fetchPutAssumeCapacity(key, value); 183 | } 184 | 185 | pub fn get(self: Self, key: K) ?V { 186 | return self.unmanaged.get(key); 187 | } 188 | 189 | pub fn getEntry(self: Self, key: K) ?*Entry { 190 | return self.unmanaged.getEntry(key); 191 | } 192 | 193 | pub fn contains(self: Self, key: K) bool { 194 | return self.unmanaged.contains(key); 195 | } 196 | 197 | /// If there is an `Entry` with a matching key, it is deleted from 198 | /// the hash map, and then returned from this function. 199 | pub fn remove(self: *Self, key: K) ?Entry { 200 | return self.unmanaged.remove(key); 201 | } 202 | 203 | /// Asserts there is an `Entry` with matching key, deletes it from the hash map, 204 | /// and discards it. 205 | pub fn removeAssertDiscard(self: *Self, key: K) void { 206 | return self.unmanaged.removeAssertDiscard(key); 207 | } 208 | 209 | pub fn clone(self: Self) !Self { 210 | var other = try self.unmanaged.clone(self.allocator); 211 | return other.promote(self.allocator); 212 | } 213 | }; 214 | } 215 | 216 | /// A HashMap based on open addressing and linear probing. 217 | /// A lookup or modification typically occurs only 2 cache misses. 218 | /// No order is guaranteed and any modification invalidates live iterators. 219 | /// It achieves good performance with quite high load factors (by default, 220 | /// grow is triggered at 80% full) and only one byte of overhead per element. 221 | pub fn HashMapUnmanaged( 222 | comptime K: type, 223 | comptime V: type, 224 | hashFn: fn (key: K) u64, 225 | eqlFn: fn (a: K, b: K) bool, 226 | comptime MaxLoadPercentage: u64, 227 | ) type { 228 | comptime assert(MaxLoadPercentage > 0 and MaxLoadPercentage < 100); 229 | 230 | return struct { 231 | const Self = @This(); 232 | 233 | // This is actually a midway pointer to the single buffer containing 234 | // a `Header` field, the `Metadata`s and `Entry`s. 235 | // At `-@sizeOf(Header)` is the Header field. 236 | // At `sizeOf(Metadata) * capacity + offset`, which is pointed to by 237 | // self.header().entries, is the array of entries. 238 | // This means that the hashmap only holds one live allocation, to 239 | // reduce memory fragmentation and struct size. 240 | /// Pointer to the metadata. 241 | metadata: ?[*]Metadata = null, 242 | 243 | /// Current number of elements in the hashmap. 244 | size: Size = 0, 245 | 246 | // Having a countdown to grow reduces the number of instructions to 247 | // execute when determining if the hashmap has enough capacity already. 248 | /// Number of available slots before a grow is needed to satisfy the 249 | /// `MaxLoadPercentage`. 250 | available: Size = 0, 251 | 252 | /// Capacity of the first grow when bootstrapping the hashmap. 253 | const MinimalCapacity = 8; 254 | 255 | // This hashmap is specially designed for sizes that fit in a u32. 256 | const Size = u32; 257 | 258 | // u64 hashes guarantee us that the fingerprint bits will never be used 259 | // to compute the index of a slot, maximizing the use of entropy. 260 | const Hash = u64; 261 | 262 | const Entry = struct { 263 | key: K, 264 | value: V, 265 | }; 266 | 267 | const Header = packed struct { 268 | entries: [*]Entry, 269 | capacity: Size, 270 | }; 271 | 272 | /// Metadata for a slot. It can be in three states: empty, used or 273 | /// tombstone. Tombstones indicate that an entry was previously used, 274 | /// they are a simple way to handle removal. 275 | /// To this state, we add 6 bits from the slot's key hash. These are 276 | /// used as a fast way to disambiguate between entries without 277 | /// having to use the equality function. If two fingerprints are 278 | /// different, we know that we don't have to compare the keys at all. 279 | /// The 6 bits are the highest ones from a 64 bit hash. This way, not 280 | /// only we use the `log2(capacity)` lowest bits from the hash to determine 281 | /// a slot index, but we use 6 more bits to quickly resolve collisions 282 | /// when multiple elements with different hashes end up wanting to be in / the same slot. 283 | /// Not using the equality function means we don't have to read into 284 | /// the entries array, avoiding a likely cache miss. 285 | const Metadata = packed struct { 286 | const FingerPrint = u6; 287 | 288 | used: u1 = 0, 289 | tombstone: u1 = 0, 290 | fingerprint: FingerPrint = 0, 291 | 292 | pub fn isUsed(self: Metadata) bool { 293 | return self.used == 1; 294 | } 295 | 296 | pub fn isTombstone(self: Metadata) bool { 297 | return self.tombstone == 1; 298 | } 299 | 300 | pub fn takeFingerprint(hash: Hash) FingerPrint { 301 | const hash_bits = @typeInfo(Hash).Int.bits; 302 | const fp_bits = @typeInfo(FingerPrint).Int.bits; 303 | return @truncate(FingerPrint, hash >> (hash_bits - fp_bits)); 304 | } 305 | 306 | pub fn fill(self: *Metadata, fp: FingerPrint) void { 307 | self.used = 1; 308 | self.tombstone = 0; 309 | self.fingerprint = fp; 310 | } 311 | 312 | pub fn remove(self: *Metadata) void { 313 | self.used = 0; 314 | self.tombstone = 1; 315 | self.fingerprint = 0; 316 | } 317 | }; 318 | 319 | comptime { 320 | assert(@sizeOf(Metadata) == 1); 321 | assert(@alignOf(Metadata) == 1); 322 | } 323 | 324 | const Iterator = struct { 325 | hm: *const Self, 326 | index: Size = 0, 327 | 328 | pub fn next(it: *Iterator) ?*Entry { 329 | assert(it.index <= it.hm.capacity()); 330 | if (it.hm.size == 0) return null; 331 | 332 | const cap = it.hm.capacity(); 333 | const end = it.hm.metadata.? + cap; 334 | var metadata = it.hm.metadata.? + it.index; 335 | 336 | while (metadata != end) : ({ 337 | metadata += 1; 338 | it.index += 1; 339 | }) { 340 | if (metadata[0].isUsed()) { 341 | const entry = &it.hm.entries()[it.index]; 342 | it.index += 1; 343 | return entry; 344 | } 345 | } 346 | 347 | return null; 348 | } 349 | }; 350 | 351 | pub const GetOrPutResult = struct { 352 | entry: *Entry, 353 | found_existing: bool, 354 | }; 355 | 356 | pub const Managed = HashMap(K, V, hashFn, eqlFn, MaxLoadPercentage); 357 | 358 | pub fn promote(self: Self, allocator: *Allocator) Managed { 359 | return .{ 360 | .unmanaged = self, 361 | .allocator = allocator, 362 | }; 363 | } 364 | 365 | fn isUnderMaxLoadPercentage(size: Size, cap: Size) bool { 366 | return size * 100 < MaxLoadPercentage * cap; 367 | } 368 | 369 | pub fn init(allocator: *Allocator) Self { 370 | return .{}; 371 | } 372 | 373 | pub fn deinit(self: *Self, allocator: *Allocator) void { 374 | self.deallocate(allocator); 375 | self.* = undefined; 376 | } 377 | 378 | fn deallocate(self: *Self, allocator: *Allocator) void { 379 | if (self.metadata == null) return; 380 | 381 | const cap = self.capacity(); 382 | const meta_size = @sizeOf(Header) + cap * @sizeOf(Metadata); 383 | 384 | const alignment = @alignOf(Entry) - 1; 385 | const entries_size = @as(usize, cap) * @sizeOf(Entry) + alignment; 386 | 387 | const total_size = meta_size + entries_size; 388 | 389 | var slice: []u8 = undefined; 390 | slice.ptr = @intToPtr([*]u8, @ptrToInt(self.header())); 391 | slice.len = total_size; 392 | allocator.free(slice); 393 | 394 | self.metadata = null; 395 | self.available = 0; 396 | } 397 | 398 | fn capacityForSize(size: Size) Size { 399 | var new_cap = @truncate(u32, (@as(u64, size) * 100) / MaxLoadPercentage + 1); 400 | new_cap = math.ceilPowerOfTwo(u32, new_cap) catch unreachable; 401 | return new_cap; 402 | } 403 | 404 | pub fn ensureCapacity(self: *Self, allocator: *Allocator, new_size: Size) !void { 405 | if (!isUnderMaxLoadPercentage(new_size, self.capacity())) 406 | try self.grow(allocator, capacityForSize(new_size)); 407 | } 408 | 409 | pub fn clearRetainingCapacity(self: *Self) void { 410 | if (self.metadata) |_| { 411 | self.initMetadatas(); 412 | self.size = 0; 413 | self.available = 0; 414 | } 415 | } 416 | 417 | pub fn clearAndFree(self: *Self, allocator: *Allocator) void { 418 | self.deallocate(allocator); 419 | self.size = 0; 420 | self.available = 0; 421 | } 422 | 423 | pub fn count(self: *const Self) Size { 424 | return self.size; 425 | } 426 | 427 | fn header(self: *const Self) *Header { 428 | return @ptrCast(*Header, @ptrCast([*]Header, self.metadata.?) - 1); 429 | } 430 | 431 | fn entries(self: *const Self) [*]Entry { 432 | return self.header().entries; 433 | } 434 | 435 | pub fn capacity(self: *const Self) Size { 436 | if (self.metadata == null) return 0; 437 | 438 | return self.header().capacity; 439 | } 440 | 441 | pub fn iterator(self: *const Self) Iterator { 442 | return .{ .hm = self }; 443 | } 444 | 445 | /// Insert an entry in the map. Assumes it is not already present. 446 | pub fn putNoClobber(self: *Self, allocator: *Allocator, key: K, value: V) !void { 447 | assert(!self.contains(key)); 448 | try self.growIfNeeded(allocator, 1); 449 | 450 | self.putAssumeCapacityNoClobber(key, value); 451 | } 452 | 453 | /// Insert an entry in the map. Assumes it is not already present, 454 | /// and that no allocation is needed. 455 | pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void { 456 | assert(!self.contains(key)); 457 | 458 | const hash = hashFn(key); 459 | const mask = self.capacity() - 1; 460 | var idx = @truncate(usize, hash & mask); 461 | 462 | var metadata = self.metadata.? + idx; 463 | while (metadata[0].isUsed()) { 464 | idx = (idx + 1) & mask; 465 | metadata = self.metadata.? + idx; 466 | } 467 | 468 | if (!metadata[0].isTombstone()) { 469 | assert(self.available > 0); 470 | self.available -= 1; 471 | } 472 | 473 | const fingerprint = Metadata.takeFingerprint(hash); 474 | metadata[0].fill(fingerprint); 475 | self.entries()[idx] = Entry{ .key = key, .value = value }; 476 | 477 | self.size += 1; 478 | } 479 | 480 | /// Inserts a new `Entry` into the hash map, returning the previous one, if any. 481 | pub fn fetchPut(self: *Self, allocator: *Allocator, key: K, value: V) !?Entry { 482 | const gop = try self.getOrPut(allocator, key); 483 | var result: ?Entry = null; 484 | if (gop.found_existing) { 485 | result = gop.entry.*; 486 | } 487 | gop.entry.value = value; 488 | return result; 489 | } 490 | 491 | /// Inserts a new `Entry` into the hash map, returning the previous one, if any. 492 | /// If insertion happens, asserts there is enough capacity without allocating. 493 | pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?Entry { 494 | const gop = self.getOrPutAssumeCapacity(key); 495 | var result: ?Entry = null; 496 | if (gop.found_existing) { 497 | result = gop.entry.*; 498 | } 499 | gop.entry.value = value; 500 | return result; 501 | } 502 | 503 | pub fn getEntry(self: Self, key: K) ?*Entry { 504 | if (self.size == 0) { 505 | return null; 506 | } 507 | 508 | const hash = hashFn(key); 509 | const mask = self.capacity() - 1; 510 | const fingerprint = Metadata.takeFingerprint(hash); 511 | var idx = @truncate(usize, hash & mask); 512 | 513 | var metadata = self.metadata.? + idx; 514 | while (metadata[0].isUsed() or metadata[0].isTombstone()) { 515 | if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) { 516 | const entry = &self.entries()[idx]; 517 | if (eqlFn(entry.key, key)) { 518 | return entry; 519 | } 520 | } 521 | idx = (idx + 1) & mask; 522 | metadata = self.metadata.? + idx; 523 | } 524 | 525 | return null; 526 | } 527 | 528 | /// Insert an entry if the associated key is not already present, otherwise update preexisting value. 529 | /// Returns true if the key was already present. 530 | pub fn put(self: *Self, allocator: *Allocator, key: K, value: V) !void { 531 | const result = try self.getOrPut(allocator, key); 532 | result.entry.value = value; 533 | } 534 | 535 | /// Get an optional pointer to the value associated with key, if present. 536 | pub fn get(self: Self, key: K) ?V { 537 | if (self.size == 0) { 538 | return null; 539 | } 540 | 541 | const hash = hashFn(key); 542 | const mask = self.capacity() - 1; 543 | const fingerprint = Metadata.takeFingerprint(hash); 544 | var idx = @truncate(usize, hash & mask); 545 | 546 | var metadata = self.metadata.? + idx; 547 | while (metadata[0].isUsed() or metadata[0].isTombstone()) { 548 | if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) { 549 | const entry = &self.entries()[idx]; 550 | if (eqlFn(entry.key, key)) { 551 | return entry.value; 552 | } 553 | } 554 | idx = (idx + 1) & mask; 555 | metadata = self.metadata.? + idx; 556 | } 557 | 558 | return null; 559 | } 560 | 561 | pub fn getOrPut(self: *Self, allocator: *Allocator, key: K) !GetOrPutResult { 562 | try self.growIfNeeded(allocator, 1); 563 | 564 | return self.getOrPutAssumeCapacity(key); 565 | } 566 | 567 | pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult { 568 | const hash = hashFn(key); 569 | const mask = self.capacity() - 1; 570 | const fingerprint = Metadata.takeFingerprint(hash); 571 | var idx = @truncate(usize, hash & mask); 572 | 573 | var metadata = self.metadata.? + idx; 574 | while (metadata[0].isUsed() or metadata[0].isTombstone()) { 575 | if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) { 576 | const entry = &self.entries()[idx]; 577 | if (eqlFn(entry.key, key)) { 578 | return GetOrPutResult{ .entry = entry, .found_existing = true }; 579 | } 580 | } 581 | idx = (idx + 1) & mask; 582 | metadata = self.metadata.? + idx; 583 | } 584 | 585 | metadata[0].fill(fingerprint); 586 | const entry = &self.entries()[idx]; 587 | entry.* = .{ .key = key, .value = undefined }; 588 | self.size += 1; 589 | self.available -= 1; 590 | 591 | return GetOrPutResult{ .entry = entry, .found_existing = false }; 592 | } 593 | 594 | pub fn getOrPutValue(self: *Self, allocator: *Allocator, key: K, value: V) !*Entry { 595 | const res = try self.getOrPut(allocator, key); 596 | if (!res.found_existing) res.entry.value = value; 597 | return res.entry; 598 | } 599 | 600 | /// Return true if there is a value associated with key in the map. 601 | pub fn contains(self: *const Self, key: K) bool { 602 | return self.get(key) != null; 603 | } 604 | 605 | /// If there is an `Entry` with a matching key, it is deleted from 606 | /// the hash map, and then returned from this function. 607 | pub fn remove(self: *Self, key: K) ?Entry { 608 | const hash = hashFn(key); 609 | const mask = self.capacity() - 1; 610 | const fingerprint = Metadata.takeFingerprint(hash); 611 | var idx = @truncate(usize, hash & mask); 612 | 613 | var metadata = self.metadata.? + idx; 614 | while (metadata[0].isUsed() or metadata[0].isTombstone()) { 615 | if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) { 616 | const entry = &self.entries()[idx]; 617 | if (eqlFn(entry.key, key)) { 618 | const removed_entry = entry.*; 619 | metadata[0].remove(); 620 | entry.* = undefined; 621 | self.size -= 1; 622 | return removed_entry; 623 | } 624 | } 625 | idx = (idx + 1) & mask; 626 | metadata = self.metadata.? + idx; 627 | } 628 | 629 | return null; 630 | } 631 | 632 | /// Asserts there is an `Entry` with matching key, deletes it from the hash map, 633 | /// and discards it. 634 | pub fn removeAssertDiscard(self: *Self, key: K) void { 635 | assert(self.contains(key)); 636 | 637 | const hash = hashFn(key); 638 | const mask = self.capacity() - 1; 639 | const fingerprint = Metadata.takeFingerprint(hash); 640 | var idx = @truncate(usize, hash & mask); 641 | 642 | var metadata = self.metadata.? + idx; 643 | while (metadata[0].isUsed() or metadata[0].isTombstone()) { 644 | if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) { 645 | const entry = &self.entries()[idx]; 646 | if (eqlFn(entry.key, key)) { 647 | metadata[0].remove(); 648 | entry.* = undefined; 649 | self.size -= 1; 650 | return; 651 | } 652 | } 653 | idx = (idx + 1) & mask; 654 | metadata = self.metadata.? + idx; 655 | } 656 | 657 | unreachable; 658 | } 659 | 660 | fn initMetadatas(self: *Self) void { 661 | @memset(@ptrCast([*]u8, self.metadata.?), 0, @sizeOf(Metadata) * self.capacity()); 662 | } 663 | 664 | // This counts the number of occupied slots, used + tombstones, which is 665 | // what has to stay under the MaxLoadPercentage of capacity. 666 | fn load(self: *const Self) Size { 667 | const max_load = (self.capacity() * MaxLoadPercentage) / 100; 668 | assert(max_load >= self.available); 669 | return @truncate(Size, max_load - self.available); 670 | } 671 | 672 | fn growIfNeeded(self: *Self, allocator: *Allocator, new_count: Size) !void { 673 | if (new_count > self.available) { 674 | const new_cap = if (self.capacity() == 0) MinimalCapacity else capacityForSize(self.load() + new_count); 675 | try self.grow(allocator, new_cap); 676 | } 677 | } 678 | 679 | pub fn clone(self: Self, allocator: *Allocator) !Self { 680 | var other = Self{}; 681 | if (self.size == 0) 682 | return other; 683 | 684 | const new_cap = capacityForSize(self.size); 685 | try other.allocate(allocator, new_cap); 686 | other.initMetadatas(); 687 | other.available = @truncate(u32, (new_cap * MaxLoadPercentage) / 100); 688 | 689 | var i: Size = 0; 690 | var metadata = self.metadata.?; 691 | var entr = self.entries(); 692 | while (i < self.capacity()) : (i += 1) { 693 | if (metadata[i].isUsed()) { 694 | const entry = &entr[i]; 695 | other.putAssumeCapacityNoClobber(entry.key, entry.value); 696 | if (other.size == self.size) 697 | break; 698 | } 699 | } 700 | 701 | return other; 702 | } 703 | 704 | fn grow(self: *Self, allocator: *Allocator, new_capacity: Size) !void { 705 | assert(new_capacity > self.capacity()); 706 | assert(std.math.isPowerOfTwo(new_capacity)); 707 | 708 | var map = Self{}; 709 | defer map.deinit(allocator); 710 | try map.allocate(allocator, new_capacity); 711 | map.initMetadatas(); 712 | map.available = @truncate(u32, (new_capacity * MaxLoadPercentage) / 100); 713 | 714 | if (self.size != 0) { 715 | const old_capacity = self.capacity(); 716 | var i: Size = 0; 717 | var metadata = self.metadata.?; 718 | var entr = self.entries(); 719 | while (i < old_capacity) : (i += 1) { 720 | if (metadata[i].isUsed()) { 721 | const entry = &entr[i]; 722 | map.putAssumeCapacityNoClobber(entry.key, entry.value); 723 | if (map.size == self.size) 724 | break; 725 | } 726 | } 727 | } 728 | 729 | self.size = 0; 730 | std.mem.swap(Self, self, &map); 731 | } 732 | 733 | fn allocate(self: *Self, allocator: *Allocator, new_capacity: Size) !void { 734 | const meta_size = @sizeOf(Header) + new_capacity * @sizeOf(Metadata); 735 | 736 | const alignment = @alignOf(Entry) - 1; 737 | const entries_size = @as(usize, new_capacity) * @sizeOf(Entry) + alignment; 738 | 739 | const total_size = meta_size + entries_size; 740 | 741 | const slice = try allocator.alignedAlloc(u8, @alignOf(Header), total_size); 742 | const ptr = @ptrToInt(slice.ptr); 743 | 744 | const metadata = ptr + @sizeOf(Header); 745 | var entry_ptr = ptr + meta_size; 746 | entry_ptr = (entry_ptr + alignment) & ~@as(usize, alignment); 747 | assert(entry_ptr + @as(usize, new_capacity) * @sizeOf(Entry) <= ptr + total_size); 748 | 749 | const hdr = @intToPtr(*Header, ptr); 750 | hdr.entries = @intToPtr([*]Entry, entry_ptr); 751 | hdr.capacity = new_capacity; 752 | self.metadata = @intToPtr([*]Metadata, metadata); 753 | } 754 | }; 755 | } 756 | 757 | const expect = std.testing.expect; 758 | const expectEqual = std.testing.expectEqual; 759 | 760 | test "basic usage" { 761 | var map = AutoHashMap(u32, u32).init(std.testing.allocator); 762 | comptime assert(@sizeOf(@TypeOf(map)) == 24); 763 | defer map.deinit(); 764 | 765 | const count = 5; 766 | var i: u32 = 0; 767 | var total: u32 = 0; 768 | while (i < count) : (i += 1) { 769 | try map.put(i, i); 770 | total += i; 771 | } 772 | 773 | var sum: u32 = 0; 774 | var it = map.iterator(); 775 | while (it.next()) |kv| { 776 | sum += kv.key; 777 | } 778 | expect(sum == total); 779 | 780 | i = 0; 781 | sum = 0; 782 | while (i < count) : (i += 1) { 783 | expectEqual(map.get(i).?, i); 784 | sum += map.get(i).?; 785 | } 786 | expectEqual(total, sum); 787 | } 788 | 789 | test "ensureCapacity" { 790 | var map = AutoHashMap(u32, u32).init(std.testing.allocator); 791 | defer map.deinit(); 792 | 793 | try map.ensureCapacity(9); 794 | expectEqual(map.capacity(), 16); 795 | try map.ensureCapacity(129); 796 | expectEqual(map.capacity(), 256); 797 | expectEqual(map.count(), 0); 798 | } 799 | 800 | test "clearRetainingCapacity" { 801 | var map = AutoHashMap(u32, u32).init(std.testing.allocator); 802 | defer map.deinit(); 803 | 804 | map.clearRetainingCapacity(); 805 | 806 | try map.put(1, 1); 807 | expectEqual(map.get(1).?, 1); 808 | expectEqual(map.count(), 1); 809 | 810 | const cap = map.capacity(); 811 | expect(cap > 0); 812 | 813 | map.clearRetainingCapacity(); 814 | map.clearRetainingCapacity(); 815 | expectEqual(map.count(), 0); 816 | expectEqual(map.capacity(), cap); 817 | expect(!map.contains(1)); 818 | } 819 | 820 | test "grow" { 821 | var map = AutoHashMap(u32, u32).init(std.testing.allocator); 822 | defer map.deinit(); 823 | 824 | const growTo = 12456; 825 | 826 | var i: u32 = 0; 827 | while (i < growTo) : (i += 1) { 828 | try map.put(i, i); 829 | } 830 | expectEqual(map.count(), growTo); 831 | 832 | i = 0; 833 | var it = map.iterator(); 834 | while (it.next()) |kv| { 835 | expectEqual(kv.key, kv.value); 836 | i += 1; 837 | } 838 | expectEqual(i, growTo); 839 | 840 | i = 0; 841 | while (i < growTo) : (i += 1) { 842 | expectEqual(map.get(i).?, i); 843 | } 844 | } 845 | 846 | test "clone" { 847 | var map = AutoHashMap(u32, u32).init(std.testing.allocator); 848 | defer map.deinit(); 849 | 850 | var a = try map.clone(); 851 | defer a.deinit(); 852 | 853 | expectEqual(a.count(), 0); 854 | 855 | try a.put(1, 1); 856 | try a.put(2, 2); 857 | try a.put(3, 3); 858 | 859 | var b = try a.clone(); 860 | defer b.deinit(); 861 | 862 | expectEqual(b.count(), 3); 863 | expectEqual(b.get(1), 1); 864 | expectEqual(b.get(2), 2); 865 | expectEqual(b.get(3), 3); 866 | } 867 | 868 | test "ensureCapacity with existing elements" { 869 | var map = AutoHashMap(u32, u32).init(std.testing.allocator); 870 | defer map.deinit(); 871 | 872 | try map.put(0, 0); 873 | expectEqual(map.count(), 1); 874 | expectEqual(map.capacity(), @TypeOf(map).Unmanaged.MinimalCapacity); 875 | 876 | try map.ensureCapacity(65); 877 | expectEqual(map.count(), 1); 878 | expectEqual(map.capacity(), 128); 879 | } 880 | 881 | test "ensureCapacity satisfies max load factor" { 882 | var map = AutoHashMap(u32, u32).init(std.testing.allocator); 883 | defer map.deinit(); 884 | 885 | try map.ensureCapacity(127); 886 | expectEqual(map.capacity(), 256); 887 | } 888 | 889 | test "remove" { 890 | var map = AutoHashMap(u32, u32).init(std.testing.allocator); 891 | defer map.deinit(); 892 | 893 | var i: u32 = 0; 894 | while (i < 16) : (i += 1) { 895 | try map.put(i, i); 896 | } 897 | 898 | i = 0; 899 | while (i < 16) : (i += 1) { 900 | if (i % 3 == 0) { 901 | _ = map.remove(i); 902 | } 903 | } 904 | expectEqual(map.count(), 10); 905 | var it = map.iterator(); 906 | while (it.next()) |kv| { 907 | expectEqual(kv.key, kv.value); 908 | expect(kv.key % 3 != 0); 909 | } 910 | 911 | i = 0; 912 | while (i < 16) : (i += 1) { 913 | if (i % 3 == 0) { 914 | expect(!map.contains(i)); 915 | } else { 916 | expectEqual(map.get(i).?, i); 917 | } 918 | } 919 | } 920 | 921 | test "reverse removes" { 922 | var map = AutoHashMap(u32, u32).init(std.testing.allocator); 923 | defer map.deinit(); 924 | 925 | var i: u32 = 0; 926 | while (i < 16) : (i += 1) { 927 | try map.putNoClobber(i, i); 928 | } 929 | 930 | i = 16; 931 | while (i > 0) : (i -= 1) { 932 | _ = map.remove(i - 1); 933 | expect(!map.contains(i - 1)); 934 | var j: u32 = 0; 935 | while (j < i - 1) : (j += 1) { 936 | expectEqual(map.get(j).?, j); 937 | } 938 | } 939 | 940 | expectEqual(map.count(), 0); 941 | } 942 | 943 | test "multiple removes on same metadata" { 944 | var map = AutoHashMap(u32, u32).init(std.testing.allocator); 945 | defer map.deinit(); 946 | 947 | var i: u32 = 0; 948 | while (i < 16) : (i += 1) { 949 | try map.put(i, i); 950 | } 951 | 952 | _ = map.remove(7); 953 | _ = map.remove(15); 954 | _ = map.remove(14); 955 | _ = map.remove(13); 956 | expect(!map.contains(7)); 957 | expect(!map.contains(15)); 958 | expect(!map.contains(14)); 959 | expect(!map.contains(13)); 960 | 961 | i = 0; 962 | while (i < 13) : (i += 1) { 963 | if (i == 7) { 964 | expect(!map.contains(i)); 965 | } else { 966 | expectEqual(map.get(i).?, i); 967 | } 968 | } 969 | 970 | try map.put(15, 15); 971 | try map.put(13, 13); 972 | try map.put(14, 14); 973 | try map.put(7, 7); 974 | i = 0; 975 | while (i < 16) : (i += 1) { 976 | expectEqual(map.get(i).?, i); 977 | } 978 | } 979 | 980 | test "put and remove loop in random order" { 981 | var map = AutoHashMap(u32, u32).init(std.testing.allocator); 982 | defer map.deinit(); 983 | 984 | var keys = std.ArrayList(u32).init(std.testing.allocator); 985 | defer keys.deinit(); 986 | 987 | const size = 32; 988 | const iterations = 100; 989 | 990 | var i: u32 = 0; 991 | while (i < size) : (i += 1) { 992 | try keys.append(i); 993 | } 994 | var rng = std.rand.DefaultPrng.init(0); 995 | 996 | while (i < iterations) : (i += 1) { 997 | std.rand.Random.shuffle(&rng.random, u32, keys.items); 998 | 999 | for (keys.items) |key| { 1000 | try map.put(key, key); 1001 | } 1002 | expectEqual(map.count(), size); 1003 | 1004 | for (keys.items) |key| { 1005 | _ = map.remove(key); 1006 | } 1007 | expectEqual(map.count(), 0); 1008 | } 1009 | } 1010 | 1011 | test "remove one million elements in random order" { 1012 | const Map = AutoHashMap(u32, u32); 1013 | const n = 1000 * 1000; 1014 | var map = Map.init(std.heap.page_allocator); 1015 | defer map.deinit(); 1016 | 1017 | var keys = std.ArrayList(u32).init(std.heap.page_allocator); 1018 | defer keys.deinit(); 1019 | 1020 | var i: u32 = 0; 1021 | while (i < n) : (i += 1) { 1022 | keys.append(i) catch unreachable; 1023 | } 1024 | 1025 | var rng = std.rand.DefaultPrng.init(0); 1026 | std.rand.Random.shuffle(&rng.random, u32, keys.items); 1027 | 1028 | for (keys.items) |key| { 1029 | map.put(key, key) catch unreachable; 1030 | } 1031 | 1032 | std.rand.Random.shuffle(&rng.random, u32, keys.items); 1033 | i = 0; 1034 | while (i < n) : (i += 1) { 1035 | const key = keys.items[i]; 1036 | _ = map.remove(key); 1037 | } 1038 | } 1039 | 1040 | test "put" { 1041 | var map = AutoHashMap(u32, u32).init(std.testing.allocator); 1042 | defer map.deinit(); 1043 | 1044 | var i: u32 = 0; 1045 | while (i < 16) : (i += 1) { 1046 | _ = try map.put(i, i); 1047 | } 1048 | 1049 | i = 0; 1050 | while (i < 16) : (i += 1) { 1051 | expectEqual(map.get(i).?, i); 1052 | } 1053 | 1054 | i = 0; 1055 | while (i < 16) : (i += 1) { 1056 | try map.put(i, i * 16 + 1); 1057 | } 1058 | 1059 | i = 0; 1060 | while (i < 16) : (i += 1) { 1061 | expectEqual(map.get(i).?, i * 16 + 1); 1062 | } 1063 | } 1064 | 1065 | test "getOrPut" { 1066 | var map = AutoHashMap(u32, u32).init(std.testing.allocator); 1067 | defer map.deinit(); 1068 | 1069 | var i: u32 = 0; 1070 | while (i < 10) : (i += 1) { 1071 | try map.put(i * 2, 2); 1072 | } 1073 | 1074 | i = 0; 1075 | while (i < 20) : (i += 1) { 1076 | var n = try map.getOrPutValue(i, 1); 1077 | } 1078 | 1079 | i = 0; 1080 | var sum = i; 1081 | while (i < 20) : (i += 1) { 1082 | sum += map.get(i).?; 1083 | } 1084 | 1085 | expectEqual(sum, 30); 1086 | } 1087 | -------------------------------------------------------------------------------- /monolithic_array.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const builtin = @import("builtin"); 3 | const assert = std.debug.assert; 4 | const mem = std.mem; 5 | const os = std.os; 6 | const w = os.windows; 7 | const warn = std.debug.warn; 8 | 9 | inline fn pageCountForSize(size: usize) usize { 10 | return (size + mem.page_size - 1) / mem.page_size; 11 | } 12 | 13 | /// Special array based on the plentiness of address space available to 64bit 14 | /// processes. 15 | /// Reserves enough memory pages from the OS to hold at most `max_count` 16 | /// items. Pages are committed as needed when the array grows in size. 17 | /// Advantages: 18 | /// - item addresses are stable throughout the lifetime of the array 19 | /// - no need to realloc, thus: 20 | /// - no temporary 2n memory usage 21 | /// - no need to copy items to their new storage when growing 22 | /// - at most `page_size` memory overhead from unused items 23 | /// Drawbacks: 24 | /// - has to declare a maximum size upfront 25 | /// - not suited to small arrays because of the page granularity 26 | /// - no automatic geometric growth 27 | pub fn MonolithicArray(comptime T: type) type { 28 | // TODO support big sizes and alignments. 29 | comptime assert(@alignOf(T) <= mem.page_size); 30 | comptime assert(@sizeOf(T) <= mem.page_size); 31 | 32 | return struct { 33 | /// Always hold the actual pointer and length to existing items. 34 | items: []T, 35 | 36 | /// Total number of pages reserved. 37 | reserved_pages: usize, 38 | 39 | /// Number of pages currently used. 40 | committed_pages: usize, 41 | 42 | const Self = @This(); 43 | const ElementSize = @sizeOf(T); 44 | const ElementsPerPage = mem.page_size / ElementSize; 45 | 46 | inline fn pageCountForItems(count: usize) usize { 47 | return pageCountForSize(count * ElementSize); 48 | } 49 | 50 | pub fn init(max_count: usize) !Self { 51 | const page_count = pageCountForItems(max_count); 52 | const alloc_size = page_count * mem.page_size; 53 | const ptr = switch (builtin.os) { 54 | .windows => try w.VirtualAlloc( 55 | null, 56 | alloc_size, 57 | w.MEM_RESERVE, 58 | w.PAGE_READWRITE, 59 | ), 60 | else => @compileError("TODO"), 61 | }; 62 | 63 | return Self{ 64 | .items = @ptrCast([*]T, @alignCast(@alignOf(T), ptr))[0..0], 65 | .reserved_pages = page_count, 66 | .committed_pages = 0, 67 | }; 68 | } 69 | 70 | pub fn deinit(self: *Self) void { 71 | switch (builtin.os) { 72 | .windows => w.VirtualFree(self.items.ptr, 0, w.MEM_RELEASE), 73 | else => @compileError("TODO"), 74 | } 75 | } 76 | 77 | pub fn size(self: *Self) usize { 78 | return self.items.len; 79 | } 80 | 81 | pub fn capacity(self: *Self) usize { 82 | return self.committed_pages * ElementsPerPage; 83 | } 84 | 85 | pub fn toSlice(self: *Self) []T { 86 | return self.items; 87 | } 88 | 89 | pub fn toSliceConst(self: *Self) []const T { 90 | return self.items; 91 | } 92 | 93 | fn grow(self: *Self, page_count: usize) !void { 94 | // Start address of the first non-committed-yet page. 95 | const ptr = @ptrToInt(self.items.ptr) + self.committed_pages * mem.page_size; 96 | switch (builtin.os) { 97 | // Calling directly kernel32 to bypass unexpectedError that 98 | // prints a stack trace during tests. 99 | .windows => _ = w.kernel32.VirtualAlloc( 100 | @intToPtr(*c_void, ptr), 101 | page_count * mem.page_size, 102 | w.MEM_COMMIT, 103 | w.PAGE_READWRITE, 104 | ) orelse return error.OutOfMemory, 105 | else => @compileError("TODO"), 106 | } 107 | self.committed_pages += page_count; 108 | } 109 | 110 | pub fn reserve(self: *Self, wanted_capacity: usize) !void { 111 | const wanted_page_count = pageCountForItems(wanted_capacity); 112 | if (wanted_page_count > self.committed_pages) { 113 | try self.grow(wanted_page_count - self.committed_pages); 114 | } 115 | } 116 | 117 | pub fn append(self: *Self, item: T) !void { 118 | const s = self.size(); 119 | if (self.capacity() == s) { 120 | try self.grow(1); 121 | } 122 | self.items.len += 1; 123 | self.items[s] = item; 124 | } 125 | 126 | pub fn appendAssumeCapacity(self: *Self, item: T) void { 127 | assert(self.size() < self.capacity()); 128 | 129 | const s = self.items.len; 130 | self.items.len += 1; 131 | self.items[s] = item; 132 | } 133 | 134 | pub fn appendSlice(self: *Self, items: []const T) !void { 135 | const s = self.size(); 136 | const wanted_size = s + items.len; 137 | const cap = self.capacity(); 138 | if (wanted_size > cap) { 139 | try self.grow(pageCountForItems(wanted_size - cap)); 140 | } 141 | self.items.len = wanted_size; 142 | mem.copy(T, self.items[s..], items); 143 | } 144 | 145 | pub fn remove(self: *Self, i: usize) T { 146 | assert(i < self.size()); 147 | 148 | const s = self.items.len - 1; 149 | const removed_elem = self.at(i); 150 | if (i != s) { 151 | self.items[i] = self.items[s]; 152 | } 153 | self.items[s] = undefined; 154 | self.items.len = s; 155 | 156 | return removed_elem; 157 | } 158 | 159 | pub fn orderedRemove(self: *Self, i: usize) T { 160 | assert(i < self.size()); 161 | 162 | const s = self.items.len - 1; 163 | const removed_elem = self.at(i); 164 | if (i != s) { 165 | var j: usize = i; 166 | while (j < s) : (j += 1) { 167 | self.items[j] = self.items[j + 1]; 168 | } 169 | } 170 | self.items.len = s; 171 | 172 | return removed_elem; 173 | } 174 | 175 | pub fn set(self: *Self, i: usize, item: T) void { 176 | assert(i < self.size()); 177 | 178 | self.items[i] = item; 179 | } 180 | 181 | pub fn setOrError(self: *Self, i: usize, item: T) !void { 182 | if (i >= self.size()) return error.OutOfBounds; 183 | self.items[i] = item; 184 | } 185 | 186 | pub fn insert(self: *Self, i: usize, item: T) !void { 187 | assert(i < self.size()); 188 | 189 | const s = self.size(); 190 | const new_size = s + 1; 191 | if (self.capacity() == s) { 192 | try self.grow(1); 193 | } 194 | 195 | self.items.len = new_size; 196 | mem.copyBackwards( 197 | T, 198 | self.items[i + 1 .. new_size], 199 | self.items[i..s], 200 | ); 201 | self.items[i] = item; 202 | } 203 | 204 | pub fn insertSlice(self: *Self, items: []const T) !void {} 205 | 206 | pub fn at(self: *const Self, i: usize) T { 207 | return self.items[i]; 208 | } 209 | }; 210 | } 211 | 212 | const expect = std.testing.expect; 213 | const expectEqual = std.testing.expectEqual; 214 | const expectError = std.testing.expectError; 215 | 216 | test "init" { 217 | var arr = try MonolithicArray(u32).init(1 << 32); 218 | defer arr.deinit(); 219 | 220 | expectEqual(arr.size(), 0); 221 | expectEqual(arr.capacity(), 0); 222 | expectEqual(arr.committed_pages, 0); 223 | } 224 | 225 | test "append" { 226 | var arr = try MonolithicArray(u32).init(1 << 32); 227 | defer arr.deinit(); 228 | 229 | try arr.append(123); 230 | try arr.append(456); 231 | try arr.append(789); 232 | expectEqual(arr.size(), 3); 233 | expectEqual(arr.committed_pages, 1); 234 | } 235 | 236 | test "appendAssumeCapacity" { 237 | var arr = try MonolithicArray(u32).init(1 << 32); 238 | defer arr.deinit(); 239 | 240 | try arr.reserve(1); // force reserve for at least one item 241 | var i: u32 = 0; 242 | while (i < arr.capacity()) : (i += 1) { 243 | arr.appendAssumeCapacity(i); 244 | } 245 | expectEqual(arr.size(), arr.capacity()); 246 | expectEqual(arr.committed_pages, 1); 247 | } 248 | 249 | test "appendSlice" { 250 | var arr = try MonolithicArray(u32).init(1 << 32); 251 | defer arr.deinit(); 252 | 253 | try arr.append(1); 254 | try arr.append(2); 255 | try arr.append(3); 256 | try arr.appendSlice([_]u32{ 4, 5, 6 }); 257 | try arr.append(7); 258 | 259 | expectEqual(arr.size(), 7); 260 | for (arr.toSliceConst()) |i, j| { 261 | expectEqual(i, @intCast(u32, j + 1)); 262 | } 263 | } 264 | 265 | test "at" { 266 | var arr = try MonolithicArray(u32).init(1 << 32); 267 | defer arr.deinit(); 268 | 269 | try arr.append(123); 270 | try arr.append(456); 271 | try arr.append(789); 272 | expectEqual(arr.at(0), 123); 273 | expectEqual(arr.at(1), 456); 274 | expectEqual(arr.at(2), 789); 275 | } 276 | 277 | test "out of memory" { 278 | const cap = mem.page_size / @sizeOf(u32); 279 | var arr = try MonolithicArray(u32).init(cap); 280 | defer arr.deinit(); 281 | 282 | var i: u32 = 0; 283 | while (i < cap) : (i += 1) { 284 | try arr.append(i); 285 | } 286 | expectError(error.OutOfMemory, arr.append(i)); 287 | } 288 | 289 | test "reserve" { 290 | var arr = try MonolithicArray(u32).init(1 << 32); 291 | defer arr.deinit(); 292 | 293 | try arr.reserve(145); 294 | expect(arr.capacity() >= 145); 295 | expectEqual(arr.committed_pages, 1); 296 | expectEqual(arr.committed_pages, 1); 297 | expectEqual(arr.size(), 0); 298 | } 299 | 300 | test "reserve multiple pages" { 301 | var arr = try MonolithicArray(u32).init(1 << 32); 302 | defer arr.deinit(); 303 | 304 | const page_count = 3; 305 | 306 | const capacity = (page_count * mem.page_size) / @sizeOf(u32); 307 | try arr.reserve(capacity); 308 | expectEqual(arr.capacity(), capacity); 309 | expectEqual(arr.committed_pages, page_count); 310 | expectEqual(arr.size(), 0); 311 | } 312 | 313 | test "reserve more pages than physical memory" { 314 | // We can ask for 16TB and the OS will just comply. 315 | const memory_size = 16 * 1000 * 1000 * 1000 * 1000; 316 | const capacity = memory_size / @sizeOf(u32); 317 | const page_count = pageCountForSize(memory_size); 318 | 319 | var arr = try MonolithicArray(u32).init(capacity); 320 | defer arr.deinit(); 321 | 322 | expectEqual(arr.capacity(), 0); 323 | expectEqual(arr.size(), 0); 324 | expectEqual(arr.reserved_pages, page_count); 325 | } 326 | 327 | test "grow" { 328 | var arr = try MonolithicArray(u32).init(1 << 32); 329 | defer arr.deinit(); 330 | 331 | const size = 1000 * 1000; 332 | 333 | var i: u32 = 0; 334 | while (i < size) : (i += 1) { 335 | try arr.append(i); 336 | } 337 | 338 | expectEqual(arr.size(), size); 339 | } 340 | 341 | test "remove" { 342 | var arr = try MonolithicArray(u32).init(1 << 32); 343 | defer arr.deinit(); 344 | 345 | try arr.append(123); 346 | try arr.append(456); 347 | try arr.append(789); 348 | try arr.append(147); 349 | try arr.append(258); 350 | try arr.append(369); 351 | 352 | expectEqual(arr.remove(5), 369); 353 | expectEqual(arr.remove(0), 123); 354 | expectEqual(arr.remove(2), 789); 355 | 356 | expectEqual(arr.size(), 3); 357 | expectEqual(arr.at(0), 258); 358 | expectEqual(arr.at(1), 456); 359 | expectEqual(arr.at(2), 147); 360 | } 361 | 362 | test "orderedRemove" { 363 | var arr = try MonolithicArray(u32).init(1 << 32); 364 | defer arr.deinit(); 365 | 366 | try arr.append(123); 367 | try arr.append(456); 368 | try arr.append(789); 369 | try arr.append(147); 370 | try arr.append(258); 371 | try arr.append(369); 372 | 373 | expectEqual(arr.orderedRemove(5), 369); 374 | expectEqual(arr.orderedRemove(0), 123); 375 | expectEqual(arr.orderedRemove(0), 456); 376 | 377 | expectEqual(arr.size(), 3); 378 | expectEqual(arr.at(0), 789); 379 | expectEqual(arr.at(1), 147); 380 | expectEqual(arr.at(2), 258); 381 | } 382 | 383 | test "set" { 384 | var arr = try MonolithicArray(u32).init(1 << 32); 385 | defer arr.deinit(); 386 | 387 | try arr.append(0); 388 | try arr.append(0); 389 | try arr.append(0); 390 | 391 | arr.set(0, 123); 392 | arr.set(1, 456); 393 | arr.set(2, 789); 394 | 395 | expectEqual(arr.at(0), 123); 396 | expectEqual(arr.at(1), 456); 397 | expectEqual(arr.at(2), 789); 398 | } 399 | 400 | test "setOrError" { 401 | var arr = try MonolithicArray(u32).init(1 << 32); 402 | defer arr.deinit(); 403 | 404 | try arr.append(0); 405 | 406 | expectError(error.OutOfBounds, arr.setOrError(1, 0)); 407 | expectError(error.OutOfBounds, arr.setOrError(2, 0)); 408 | expectError(error.OutOfBounds, arr.setOrError(123456, 0)); 409 | } 410 | 411 | test "insert" { 412 | var arr = try MonolithicArray(u32).init(1 << 32); 413 | defer arr.deinit(); 414 | 415 | try arr.append(123); 416 | try arr.append(456); 417 | try arr.append(789); 418 | 419 | try arr.insert(1, 159); 420 | 421 | expectEqual(arr.size(), 4); 422 | expectEqual(arr.at(0), 123); 423 | expectEqual(arr.at(1), 159); 424 | expectEqual(arr.at(2), 456); 425 | expectEqual(arr.at(3), 789); 426 | } 427 | -------------------------------------------------------------------------------- /sliceable_hashmap.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const debug = std.debug; 3 | const assert = debug.assert; 4 | const warn = debug.warn; 5 | const math = std.math; 6 | const mem = std.mem; 7 | const Allocator = mem.Allocator; 8 | const builtin = @import("builtin"); 9 | const ceilPowerOfTwo = std.math.ceilPowerOfTwo; 10 | 11 | pub fn hashInt(comptime HashInt: type, i: anytype) HashInt { 12 | var x: HashInt = i; 13 | 14 | if (HashInt.bit_count <= 32) { 15 | // Improved MurmurHash3 finalizer taken from https://nullprogram.com/blog/2018/07/31/ 16 | x ^= x >> 16; 17 | x *%= 0x7feb352d; 18 | x ^= x >> 15; 19 | x *%= 0x846ca68b; 20 | x ^= x >> 16; 21 | } else if (HashInt.bit_count <= 64) { 22 | // Improved MurmurHash3 finalizer (Mix13) taken from http://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html 23 | x ^= x >> 30; 24 | x *%= 0xbf58476d1ce4e5b9; 25 | x ^= x >> 27; 26 | x *%= 0x94d049bb133111eb; 27 | x ^= x >> 31; 28 | } else @compileError("TODO"); 29 | 30 | return x; 31 | } 32 | 33 | pub fn hashu32(x: u32) u32 { 34 | return @inlineCall(hashInt, u32, x); 35 | } 36 | 37 | pub fn eqlu32(x: u32, y: u32) bool { 38 | return x == y; 39 | } 40 | 41 | pub fn isPowerOfTwo(i: anytype) bool { 42 | return i & (i - 1) == 0; 43 | } 44 | 45 | // Design decisions: 46 | // 47 | // Open addressing is good to modern CPU architectures, making efficient use of 48 | // caches. Once you've resolved the key's hash to the initial bucket, it is most 49 | // likely that the element you're looking for is within a cache line. If the 50 | // elements are too big to fit many in a cache line, you still benefit from 51 | // regular patterns in memory accesses, which are easy to predict for the CPU. 52 | // Linear probing is a way of resolving collisions when multiple elements belong 53 | // in the same bucket. It's nice on the memory cache and easily predictable. 54 | // 55 | // The HashMap holds two data arrays, one containing metadata, and one containing 56 | // elements. 57 | // * Metadata 58 | // An array of buckets, each holding the hash of the element within it and an index. 59 | // At the moment, this is a pair of u32, restraining the size of the HashMap to 60 | // about 2^32. 61 | // * Elements 62 | // An array of elements, stored contiguously. 63 | // 64 | // The capacity is based on power of two numbers, which allow to use a bitmask 65 | // operation instead of modulo when probing. 66 | // 67 | // This strategy has several advantages, especially regarding memory usage and speed. 68 | // 69 | // 1. Probing makes a very efficient use of cache: when doing a lookup, it is 70 | // very likely that even if the bucket is already used by another element, we 71 | // can look into the following ones in the same cache line. Interleaving the 72 | // metadata with actual elements would incur more frequent cache misses. 73 | // 74 | // 2. By storing the hash of the element present into the bucket, we can have 75 | // high confidence that probing usually does not need to look at many elements. 76 | // If that's probable that two elements resolve to the same bucket, especially 77 | // in HashMaps of small capacity, it is not that their _hashes_ collision. Thus 78 | // we can simply probe in the bucket array by comparing hashes without resorting 79 | // to comparing keys. When a bucket with the same hash is found, we do a key 80 | // comparison to be certain of the key's identity, and that's usually the only one. 81 | // 82 | // 3. Elements are inserted on the back of their array, and their bucket 83 | // updated. Removal is also inspired from dynamic arrays: the removed element X 84 | // is replaced by Y, the one at the end of the array (if applicable). X's bucket 85 | // is marked with a tombstone, and Y's bucket is updated to its new index in 86 | // the element array. This is amortized rehash for removal. 87 | // 88 | // 4. Elements are stored contiguously, which mean they can be used as a slice. 89 | // This results in cache-efficient iteration over the elements. 90 | // 91 | // 5. Separating bucket metadata from stored elements allows to allocate less 92 | // element slots than capacity, because we know that we will never have more 93 | // elements than the maximum load factor multiplied by capacity. When elements 94 | // contain big keys and/or values, this can be a substantial saving in memory. 95 | // The amount of "wasted" memory is then only two u32 for each empty bucket, 96 | // and can be calculated so: (1 - max_load_factor) * capacity * 8 bytes. 97 | // 98 | // 6. Using no SIMD operations or special instruction set means that it is 99 | // widely portable across platforms. The implementation is also quite simple. 100 | // 101 | // But it also has drawbacks or areas it could be improved upon. 102 | // 103 | // 1. Storing 8 bytes of metadata per element is a lot and adds significant 104 | // memory overhead compared to implementations focusing on small memory footprint. 105 | // 106 | // 2. A smarter approach such as Robin Hood Hashing would probably help attain 107 | // higher load factors with good performance. 108 | 109 | /// A HashMap based on open addressing and linear probing. 110 | pub fn HashMap(comptime K: type, comptime V: type, hashFn: fn (key: K) u32, eqlFn: fn (a: K, b: K) bool) type { 111 | return struct { 112 | const Self = @This(); 113 | 114 | // TODO at least one slice is unnecessary 115 | entries: []KV, 116 | buckets: []Bucket, 117 | size: Size, 118 | allocator: *Allocator, 119 | 120 | const Size = u32; 121 | 122 | const KV = struct { 123 | key: K, 124 | value: V, 125 | }; 126 | 127 | const Bucket = struct { 128 | hash: Size, 129 | index: Size, 130 | 131 | const Empty = 0xFFFFFFFF; 132 | const TombStone = Empty - 1; 133 | }; 134 | 135 | pub fn init(allocator: *Allocator) Self { 136 | return Self{ 137 | .allocator = allocator, 138 | .entries = [0]KV{}, 139 | .buckets = [0]Bucket{}, 140 | .size = 0, 141 | }; 142 | } 143 | 144 | pub fn deinit(self: *Self) void { 145 | self.allocator.free(self.buckets); 146 | self.allocator.free(self.entries); 147 | self.* = undefined; 148 | } 149 | 150 | pub fn reserve(self: *Self, cap: Size) !void { 151 | if (cap <= self.capacity()) { 152 | assert(isUnderMaxLoadFactor(self.size, self.capacity())); 153 | return; 154 | } 155 | 156 | // Get a new capacity that satisfies the constraint of the maximum load factor. 157 | // TODO because of Empty & Tombstone, capacity can be 2^31 at most, handle this correctly 158 | const new_capacity = blk: { 159 | var new_cap = ceilPowerOfTwo(Size, cap) catch unreachable; 160 | if (!isUnderMaxLoadFactor(cap, new_cap)) { 161 | new_cap *= 2; 162 | } 163 | break :blk new_cap; 164 | }; 165 | 166 | if (self.capacity() == 0) { 167 | try self.setCapacity(new_capacity); 168 | } else { 169 | try self.grow(new_capacity); 170 | } 171 | } 172 | 173 | pub fn clear(self: *Self) void { 174 | self.size = 0; 175 | self.initBuckets(); 176 | } 177 | 178 | pub fn toSlice(self: *Self) []KV { 179 | return self.entries[0..self.size]; 180 | } 181 | 182 | pub fn toSliceConst(self: *const Self) []const KV { 183 | return self.entries[0..self.size]; 184 | } 185 | 186 | pub fn count(self: *const Self) Size { 187 | return self.size; 188 | } 189 | 190 | pub fn capacity(self: *const Self) Size { 191 | return @intCast(Size, self.buckets.len); 192 | } 193 | 194 | fn internalPut(self: *Self, key: K, value: V, hash: Size) void { 195 | const mask = self.buckets.len - 1; 196 | var bucket_index = hash & mask; 197 | var bucket = &self.buckets[bucket_index]; 198 | 199 | while (bucket.index != Bucket.Empty and bucket.index != Bucket.TombStone) { 200 | bucket_index = (bucket_index + 1) & mask; 201 | bucket = &self.buckets[bucket_index]; 202 | } 203 | 204 | const index = self.size; 205 | self.size += 1; 206 | 207 | bucket.hash = hash; 208 | bucket.index = index; 209 | self.entries[index] = KV{ .key = key, .value = value }; 210 | } 211 | 212 | /// Insert an entry in the map with precomputed hash. Assumes it is not already present. 213 | pub fn putHashed(self: *Self, key: K, value: V, hash: Size) !void { 214 | assert(hash == hashFn(key)); 215 | assert(!self.contains(key)); 216 | try self.ensureCapacity(); 217 | 218 | assert(self.buckets.len >= 0); 219 | assert(isPowerOfTwo(self.buckets.len)); 220 | 221 | self.internalPut(key, value, hash); 222 | } 223 | 224 | /// Insert an entry in the map. Assumes it is not already present. 225 | pub fn put(self: *Self, key: K, value: V) !void { 226 | assert(!self.contains(key)); 227 | try self.ensureCapacity(); 228 | 229 | assert(self.buckets.len >= 0); 230 | assert(isPowerOfTwo(self.buckets.len)); 231 | 232 | const hash = hashFn(key); 233 | self.internalPut(key, value, hash); 234 | } 235 | 236 | /// Insert an entry if the associated key is not already present, otherwise update preexisting value. 237 | /// Returns true if the key was already present. 238 | pub fn putOrUpdate(self: *Self, key: K, value: V) !bool { 239 | try self.ensureCapacity(); // Should this go after the 'get' part, at the cost of complicating the code ? Would it even be an actual optimization ? 240 | 241 | // Same code as internalGet except we update the value if found. 242 | const mask: Size = @intCast(Size, self.buckets.len) - 1; 243 | const hash = hashFn(key); 244 | var bucket_index = hash & mask; 245 | var bucket = &self.buckets[bucket_index]; 246 | while (bucket.index != Bucket.Empty and bucket.index != Bucket.TombStone) : ({ 247 | bucket_index = (bucket_index + 1) & mask; 248 | bucket = &self.buckets[bucket_index]; 249 | }) { 250 | if (bucket.hash == hash) { 251 | const entry_index = bucket.index; 252 | const entry = &self.entries[entry_index]; 253 | if (eqlFn(entry.key, key)) { 254 | entry.value = value; 255 | return true; 256 | } 257 | } 258 | } 259 | 260 | // No existing key found, put it there. 261 | const index = self.size; 262 | self.size += 1; 263 | 264 | bucket.hash = hash; 265 | bucket.index = index; 266 | self.entries[index] = KV{ .key = key, .value = value }; 267 | 268 | return false; 269 | } 270 | 271 | fn internalGet(self: *const Self, key: K, hash: Size) ?*V { 272 | const mask = @intCast(Size, self.buckets.len) - 1; 273 | 274 | var bucket_index = hash & mask; 275 | var bucket = &self.buckets[bucket_index]; 276 | while (bucket.index != Bucket.Empty) : ({ 277 | bucket_index = (bucket_index + 1) & mask; 278 | bucket = &self.buckets[bucket_index]; 279 | }) { 280 | if (bucket.index != Bucket.TombStone and bucket.hash == hash) { 281 | const entry_index = bucket.index; 282 | const entry = &self.entries[entry_index]; 283 | if (eqlFn(entry.key, key)) { 284 | return &entry.value; 285 | } 286 | } 287 | } 288 | 289 | return null; 290 | } 291 | 292 | /// Get an optional pointer to the value associated with key and precomputed hash, if present. 293 | pub fn getHashed(self: *const Self, key: K, hash: Size) ?*V { 294 | assert(hash == hashFn(key)); 295 | if (self.size == 0) { 296 | return null; // TODO better without branch ? 297 | } 298 | 299 | return self.internalGet(key, hash); 300 | } 301 | 302 | /// Get an optional pointer to the value associated with key, if present. 303 | pub fn get(self: *const Self, key: K) ?*V { 304 | if (self.size == 0) { 305 | return null; // TODO better without branch ? 306 | } 307 | 308 | const hash = hashFn(key); 309 | return self.internalGet(key, hash); 310 | } 311 | 312 | pub fn getOrPut(self: *Self, key: K, value: V) !*V { 313 | try self.ensureCapacity(); // Should this go after the 'get' part, at the cost of complicating the code ? Would it even be an actual optimization ? 314 | 315 | // Same code as internalGet except we update the value if found. 316 | const mask: Size = @intCast(Size, self.buckets.len) - 1; 317 | const hash = hashFn(key); 318 | var bucket_index = hash & mask; 319 | var bucket = &self.buckets[bucket_index]; 320 | while (bucket.index != Bucket.Empty and bucket.index != Bucket.TombStone) : ({ 321 | bucket_index = (bucket_index + 1) & mask; 322 | bucket = &self.buckets[bucket_index]; 323 | }) { 324 | if (bucket.hash == hash) { 325 | const entry_index = bucket.index; 326 | const entry = &self.entries[entry_index]; 327 | if (eqlFn(entry.key, key)) { 328 | return &entry.value; 329 | } 330 | } 331 | } 332 | 333 | // No existing key found, put it there. 334 | 335 | const index = self.size; 336 | self.size += 1; 337 | 338 | bucket.hash = hash; 339 | bucket.index = index; 340 | self.entries[index] = KV{ .key = key, .value = value }; 341 | 342 | return &self.entries[index].value; 343 | } 344 | 345 | /// Return true if there is a value associated with key in the map. 346 | pub fn contains(self: *const Self, key: K) bool { 347 | return self.get(key) != null; 348 | } 349 | 350 | /// Remove the value associated with key, if present. Returns wether 351 | /// an element was removed. 352 | pub fn remove(self: *Self, key: K) bool { 353 | assert(self.size > 0); 354 | // assert(self.contains(key)); TODO make two versions of remove 355 | 356 | const mask = @intCast(Size, self.buckets.len - 1); 357 | const hash = hashFn(key); 358 | var bucket_index = hash & mask; 359 | var bucket = &self.buckets[bucket_index]; 360 | 361 | var entry: *KV = undefined; 362 | const entry_index = while (bucket.index != Bucket.Empty) : ({ 363 | bucket_index = (bucket_index + 1) & mask; 364 | bucket = &self.buckets[bucket_index]; 365 | }) { 366 | if (bucket.index != Bucket.TombStone and bucket.hash == hash) { 367 | entry = &self.entries[bucket.index]; 368 | if (eqlFn(entry.key, key)) { 369 | break bucket.index; 370 | } 371 | } 372 | } else return false; // TODO make two versions of remove 373 | 374 | bucket.index = Bucket.TombStone; 375 | 376 | self.size -= 1; 377 | if (entry_index != self.size) { 378 | // Simply move the last element 379 | entry.* = self.entries[self.size]; 380 | self.entries[self.size] = undefined; 381 | 382 | // And update its bucket accordingly. 383 | const moved_index = self.size; 384 | const moved_hash = hashFn(entry.key); 385 | bucket_index = moved_hash & mask; 386 | bucket = &self.buckets[bucket_index]; 387 | while (bucket.index != moved_index) { 388 | bucket_index = (bucket_index + 1) & mask; 389 | bucket = &self.buckets[bucket_index]; 390 | } 391 | assert(bucket.hash == moved_hash); 392 | bucket.index = entry_index; 393 | } 394 | 395 | return true; 396 | } 397 | 398 | fn isUnderMaxLoadFactor(size: Size, cap: Size) bool { 399 | return size * 5 < cap * 3; 400 | } 401 | 402 | /// Return the maximum number of entries for a given capacity. 403 | fn entryCountForCapacity(cap: Size) Size { 404 | const res = (cap * 3) / 5; 405 | assert(isUnderMaxLoadFactor(res, cap)); 406 | return res; 407 | } 408 | 409 | fn ensureCapacity(self: *Self) !void { 410 | if (self.capacity() == 0) { 411 | try self.setCapacity(16); 412 | } 413 | 414 | if (self.size == self.entries.len) { // We know the entries are exactly the maximum size according to the load factor. 415 | assert(self.buckets.len < std.math.maxInt(Size) / 2); 416 | const new_capacity = @intCast(Size, self.buckets.len * 2); 417 | try self.grow(new_capacity); 418 | } 419 | } 420 | 421 | fn setCapacity(self: *Self, cap: Size) !void { 422 | assert(self.capacity() == 0); 423 | assert(self.size == 0); 424 | const entry_count = entryCountForCapacity(cap); 425 | self.entries = try self.allocator.alloc(KV, entry_count); 426 | self.buckets = try self.allocator.alloc(Bucket, cap); 427 | self.initBuckets(); 428 | self.size = 0; 429 | } 430 | 431 | fn initBuckets(self: *Self) void { 432 | // TODO use other default values so that the memset can be faster ? 433 | std.mem.set(Bucket, self.buckets, Bucket{ .index = Bucket.Empty, .hash = Bucket.Empty }); 434 | } 435 | 436 | fn grow(self: *Self, new_capacity: Size) !void { 437 | assert(new_capacity > self.capacity()); 438 | assert(isPowerOfTwo(new_capacity)); 439 | 440 | const entry_count = entryCountForCapacity(new_capacity); 441 | assert(entry_count > self.entries.len); 442 | self.entries = if (self.entries.len != 0) try self.allocator.realloc(self.entries, entry_count) else try self.allocator.alloc(KV, entry_count); 443 | 444 | const new_buckets = try self.allocator.alloc(Bucket, new_capacity); 445 | 446 | self.rehash(new_buckets); 447 | self.allocator.free(self.buckets); 448 | self.buckets = new_buckets; 449 | } 450 | 451 | fn rehash(self: *Self, new_buckets: []Bucket) void { 452 | std.mem.set(Bucket, new_buckets, Bucket{ .index = Bucket.Empty, .hash = Bucket.Empty }); 453 | 454 | // We'll move the existing buckets into their new home. 455 | // This is faster than a real rehashing that would go through the 456 | // entries and hash them to create the new buckets. 457 | const mask = new_buckets.len - 1; 458 | for (self.buckets) |bucket| { 459 | if (bucket.index != Bucket.Empty) { 460 | var bucket_index = bucket.hash & mask; 461 | var new_bucket = &new_buckets[bucket_index]; 462 | while (new_bucket.index != Bucket.Empty) { 463 | bucket_index = (bucket_index + 1) & mask; 464 | new_bucket = &new_buckets[bucket_index]; 465 | } 466 | new_bucket.* = bucket; 467 | } 468 | } 469 | } 470 | }; 471 | } 472 | 473 | const expect = std.testing.expect; 474 | const expectEqual = std.testing.expectEqual; 475 | const direct_allocator = std.heap.direct_allocator; 476 | 477 | test "basic usage" { 478 | var map = HashMap(u32, u32, hashu32, eqlu32).init(direct_allocator); 479 | defer map.deinit(); 480 | 481 | const count = 5; 482 | var i: u32 = 0; 483 | var total: u32 = 0; 484 | while (i < count) : (i += 1) { 485 | try map.put(i, i); 486 | total += i; 487 | } 488 | 489 | var sum: u32 = 0; 490 | for (map.toSliceConst()) |kv| { 491 | sum += kv.key; 492 | } 493 | expect(sum == total); 494 | 495 | i = 0; 496 | sum = 0; 497 | while (i < count) : (i += 1) { 498 | expectEqual(map.get(i).?.*, i); 499 | sum += map.get(i).?.*; 500 | } 501 | expectEqual(total, sum); 502 | } 503 | 504 | test "reserve" { 505 | var map = HashMap(u32, u32, hashu32, eqlu32).init(direct_allocator); 506 | defer map.deinit(); 507 | 508 | try map.reserve(9); 509 | expectEqual(map.capacity(), 16); 510 | try map.reserve(129); 511 | expectEqual(map.capacity(), 256); 512 | expectEqual(map.size, 0); 513 | } 514 | 515 | test "clear" { 516 | var map = HashMap(u32, u32, hashu32, eqlu32).init(direct_allocator); 517 | defer map.deinit(); 518 | 519 | try map.put(1, 1); 520 | expectEqual(map.get(1).?.*, 1); 521 | expectEqual(map.size, 1); 522 | 523 | const cap = map.capacity(); 524 | expect(cap > 0); 525 | 526 | map.clear(); 527 | expectEqual(map.size, 0); 528 | expectEqual(map.capacity(), cap); 529 | expect(!map.contains(1)); 530 | } 531 | 532 | test "put and get with precomputed hash" { 533 | var map = HashMap(u32, u32, hashu32, eqlu32).init(direct_allocator); 534 | defer map.deinit(); 535 | 536 | var i: u32 = 0; 537 | while (i < 8) : (i += 1) { 538 | try map.putHashed(i, i * 3 + 1, hashu32(i)); 539 | } 540 | 541 | i = 0; 542 | while (i < 8) : (i += 1) { 543 | expectEqual(map.get(i).?.*, i * 3 + 1); 544 | } 545 | 546 | i = 0; 547 | while (i < 8) : (i += 1) { 548 | expectEqual(map.getHashed(i, hashu32(i)).?.*, i * 3 + 1); 549 | } 550 | } 551 | 552 | // This test can only be run by removing the asserts checking hash consistency 553 | // in putHashed and getHashed. 554 | // test "put and get with long collision chain" { 555 | // var map = HashMap(u32, u32, hashu32, eqlu32).init(direct_allocator); 556 | // defer map.deinit(); 557 | // try map.reserve(32); 558 | 559 | // // Using a fixed arbitrary hash for every value, we force collisions. 560 | // var i: u32 = 0; 561 | // while (i < 16) : (i += 1) { 562 | // try map.putHashed(i, i, 0x12345678); 563 | // } 564 | 565 | // i = 0; 566 | // while (i < 16) : (i += 1) { 567 | // expectEqual(map.getHashed(i, 0x12345678).?.*, i); 568 | // } 569 | // } 570 | 571 | test "grow" { 572 | var map = HashMap(u32, u32, hashu32, eqlu32).init(direct_allocator); 573 | defer map.deinit(); 574 | 575 | const growTo = 12456; 576 | 577 | var i: u32 = 0; 578 | while (i < growTo) : (i += 1) { 579 | try map.put(i, i); 580 | } 581 | // this depends on the maximum load factor 582 | // warn("\ncap {} next {}\n", map.capacity(), ceilPowerOfTwo(u32, growTo)); 583 | // expect(map.capacity() == ceilPowerOfTwo(u32, growTo)); 584 | expectEqual(map.size, growTo); 585 | 586 | i = 0; 587 | for (map.toSliceConst()) |kv| { 588 | expectEqual(kv.key, kv.value); 589 | i += 1; 590 | } 591 | expectEqual(i, growTo); 592 | 593 | i = 0; 594 | while (i < growTo) : (i += 1) { 595 | expectEqual(map.get(i).?.*, i); 596 | } 597 | } 598 | 599 | test "reserve with existing elements" { 600 | var map = HashMap(u32, u32, hashu32, eqlu32).init(direct_allocator); 601 | defer map.deinit(); 602 | 603 | try map.put(0, 0); 604 | expectEqual(map.size, 1); 605 | expectEqual(map.capacity(), 16); 606 | 607 | try map.reserve(65); 608 | expectEqual(map.size, 1); 609 | expectEqual(map.capacity(), 128); 610 | } 611 | 612 | test "reserve satisfies max load factor" { 613 | var map = HashMap(u32, u32, hashu32, eqlu32).init(direct_allocator); 614 | defer map.deinit(); 615 | 616 | try map.reserve(127); 617 | expectEqual(map.capacity(), 256); 618 | } 619 | 620 | test "remove" { 621 | var map = HashMap(u32, u32, hashu32, eqlu32).init(direct_allocator); 622 | defer map.deinit(); 623 | 624 | var i: u32 = 0; 625 | while (i < 16) : (i += 1) { 626 | try map.put(i, i); 627 | } 628 | 629 | i = 0; 630 | while (i < 16) : (i += 1) { 631 | if (i % 3 == 0) { 632 | _ = map.remove(i); 633 | } 634 | } 635 | expectEqual(map.size, 10); 636 | for (map.toSliceConst()) |kv, j| { 637 | expectEqual(kv.key, kv.value); 638 | expect(kv.key % 3 != 0); 639 | } 640 | 641 | i = 0; 642 | while (i < 16) : (i += 1) { 643 | if (i % 3 == 0) { 644 | expect(!map.contains(i)); 645 | } else { 646 | expectEqual(map.get(i).?.*, i); 647 | } 648 | } 649 | } 650 | 651 | test "reverse removes" { 652 | var map = HashMap(u32, u32, hashu32, eqlu32).init(direct_allocator); 653 | defer map.deinit(); 654 | 655 | var i: u32 = 0; 656 | while (i < 16) : (i += 1) { 657 | try map.put(i, i); 658 | } 659 | 660 | i = 16; 661 | while (i > 0) : (i -= 1) { 662 | _ = map.remove(i - 1); 663 | expect(!map.contains(i - 1)); 664 | var j: u32 = 0; 665 | while (j < i - 1) : (j += 1) { 666 | expectEqual(map.get(j).?.*, j); 667 | } 668 | } 669 | 670 | expectEqual(map.size, 0); 671 | } 672 | 673 | test "multiple removes on same buckets" { 674 | var map = HashMap(u32, u32, hashu32, eqlu32).init(direct_allocator); 675 | defer map.deinit(); 676 | 677 | var i: u32 = 0; 678 | while (i < 16) : (i += 1) { 679 | try map.put(i, i); 680 | } 681 | 682 | _ = map.remove(7); 683 | _ = map.remove(15); 684 | _ = map.remove(14); 685 | _ = map.remove(13); 686 | expect(!map.contains(7)); 687 | expect(!map.contains(15)); 688 | expect(!map.contains(14)); 689 | expect(!map.contains(13)); 690 | 691 | i = 0; 692 | while (i < 13) : (i += 1) { 693 | if (i == 7) { 694 | expect(!map.contains(i)); 695 | } else { 696 | expectEqual(map.get(i).?.*, i); 697 | } 698 | } 699 | 700 | try map.put(15, 15); 701 | try map.put(13, 13); 702 | try map.put(14, 14); 703 | try map.put(7, 7); 704 | i = 0; 705 | while (i < 16) : (i += 1) { 706 | expectEqual(map.get(i).?.*, i); 707 | } 708 | } 709 | 710 | test "put and remove loop in random order" { 711 | var map = HashMap(u32, u32, hashu32, eqlu32).init(direct_allocator); 712 | defer map.deinit(); 713 | 714 | var keys = std.ArrayList(u32).init(direct_allocator); 715 | const size = 32; 716 | const iterations = 100; 717 | 718 | var i: u32 = 0; 719 | while (i < size) : (i += 1) { 720 | try keys.append(i); 721 | } 722 | var rng = std.rand.DefaultPrng.init(0); 723 | 724 | while (i < iterations) : (i += 1) { 725 | std.rand.Random.shuffle(&rng.random, u32, keys.toSlice()); 726 | 727 | for (keys.toSlice()) |key| { 728 | try map.put(key, key); 729 | } 730 | expectEqual(map.size, size); 731 | 732 | for (keys.toSlice()) |key| { 733 | _ = map.remove(key); 734 | } 735 | expectEqual(map.size, 0); 736 | } 737 | } 738 | 739 | test "remove one million elements in random order" { 740 | const Map = HashMap(u32, u32, hashu32, eqlu32); 741 | const n = 1000 * 1000; 742 | var map = Map.init(direct_allocator); 743 | defer map.deinit(); 744 | 745 | var keys = std.ArrayList(u32).init(direct_allocator); 746 | var i: u32 = 0; 747 | while (i < n) : (i += 1) { 748 | keys.append(i) catch unreachable; 749 | } 750 | 751 | var rng = std.rand.DefaultPrng.init(0); 752 | std.rand.Random.shuffle(&rng.random, u32, keys.toSlice()); 753 | 754 | for (keys.toSlice()) |key| { 755 | map.put(key, key) catch unreachable; 756 | } 757 | 758 | i = 0; 759 | while (i < n) : (i += 1) { 760 | const key = keys.toSlice()[i]; 761 | _ = map.remove(key); 762 | } 763 | } 764 | 765 | test "putOrUpdate" { 766 | var map = HashMap(u32, u32, hashu32, eqlu32).init(direct_allocator); 767 | defer map.deinit(); 768 | 769 | var i: u32 = 0; 770 | while (i < 16) : (i += 1) { 771 | _ = try map.putOrUpdate(i, i); 772 | } 773 | 774 | i = 0; 775 | while (i < 16) : (i += 1) { 776 | expectEqual(map.get(i).?.*, i); 777 | } 778 | 779 | i = 0; 780 | while (i < 16) : (i += 1) { 781 | expect(try map.putOrUpdate(i, i * 16 + 1)); 782 | } 783 | 784 | i = 0; 785 | while (i < 16) : (i += 1) { 786 | expectEqual(map.get(i).?.*, i * 16 + 1); 787 | } 788 | } 789 | 790 | test "getOrPut" { 791 | var map = HashMap(u32, u32, hashu32, eqlu32).init(direct_allocator); 792 | defer map.deinit(); 793 | 794 | var i: u32 = 0; 795 | while (i < 10) : (i += 1) { 796 | try map.put(i * 2, 2); 797 | } 798 | 799 | i = 0; 800 | while (i < 20) : (i += 1) { 801 | var n = try map.getOrPut(i, 1); 802 | } 803 | 804 | i = 0; 805 | var sum = i; 806 | while (i < 20) : (i += 1) { 807 | sum += map.get(i).?.*; 808 | } 809 | 810 | expectEqual(sum, 30); 811 | } 812 | --------------------------------------------------------------------------------