├── LICENSE ├── MerkleSet.py ├── README.md ├── ReferenceMerkleSet.py └── TestMerkleSet.py /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2015-2017 BitTorrent Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. -------------------------------------------------------------------------------- /MerkleSet.py: -------------------------------------------------------------------------------- 1 | from hashlib import blake2s, sha256 2 | 3 | from ReferenceMerkleSet import * 4 | LAZY = TRUNCATED 5 | 6 | __all__ = ['confirm_included', 'confirm_included_already_hashed', 'confirm_not_included', 7 | 'confirm_not_included_already_hashed', 'MerkleSet'] 8 | 9 | """ 10 | The behavior of this implementation is semantically identical to the one in ReferenceMerkleSet 11 | 12 | Advantages of this merkle tree implementation: 13 | Lazy root calculation 14 | Few l1 and l2 cache misses 15 | Good memory efficiency 16 | Reasonable defense against malicious insertion attacks 17 | 18 | TODO: Port to C 19 | TODO: Add combining of proofs and looking up a whole multiproof at once 20 | 21 | Branch memory allocation data format: 22 | 23 | # The active child is the leaf where overflow is currently sent to 24 | # When the active child is filled, a new empty one is made 25 | # When a leaf overflows, the data is sent to the active child of the parent branch 26 | # all unused should be zeroed out 27 | branch: active_child 8 patricia[size] 28 | patricia[n]: type 1 hash 32 type 1 hash 32 patricia[n-1] patricia[n-1] 29 | type: EMPTY or TERMINAL or MIDDLE or LAZY 30 | EMPTY: \x00 31 | TERMINAL: \x01 32 | MIDDLE: \x02 33 | LAZY: \x03 34 | # unused are zeroed out. If child is a branch pos is set to 0xFFFF 35 | patricia[0]: child 8 pos 2 36 | 37 | Leaf memory allocation data format: 38 | 39 | # first_unused is the start of linked list, 0xFFFF for terminal 40 | # num_inputs is the number of references from the parent branch into this leaf 41 | leaf: first_unused 2 num_inputs 2 [node or emptynode] 42 | # pos0 and pos1 are one based indexes to make it easy to detect if they are accidently cleared to zero 43 | node: type 1 hash 32 type 1 hash 32 pos0 2 pos1 2 44 | # next is a zero based index 45 | emptynode: next 2 unused 68 46 | """ 47 | 48 | # Returned in branch updates when the terminal was unused 49 | NOTSTARTED = 4 50 | # Returned in removal when there's only one left 51 | ONELEFT = 5 52 | # Fragile is returned when there might be only two things below 53 | # Bubbles upwards as long as there's an empty sibling 54 | # When a non-empty sibling is hit, it calls catch on the layer below 55 | # On catch, collapse is called on everything below 56 | # Collapse returns None if it has more than two things, or both if both terminal 57 | # If there is an empty child, collapse passes through the return of its non-empty child 58 | # Collapse clears out if it's returning something other than None 59 | FRAGILE = 6 60 | INVALIDATING = 7 61 | DONE = 8 62 | FULL = 9 63 | 64 | def from_bytes(f): 65 | return int.from_bytes(f, 'big') 66 | 67 | def to_bytes(f, v): 68 | return int.to_bytes(f, v, 'big') 69 | 70 | # Sanity checking on top of the hash function 71 | def hashaudit(mystr): 72 | assert len(mystr) == 66 73 | t0, t1 = mystr[0:1], mystr[33:34] 74 | assert t0 != LAZY and t1 != LAZY 75 | if (t0 == EMPTY or t0 == TERMINAL) and (t1 == EMPTY or t1 == TERMINAL): 76 | assert t0 == TERMINAL and t1 == TERMINAL 77 | assert mystr[1:33] < mystr[34:] 78 | assert t0 != EMPTY or mystr[1:33] == BLANK 79 | assert t1 != EMPTY or mystr[34:] == BLANK 80 | return hashdown(mystr) 81 | 82 | # Bounds checking for the win 83 | class safearray(bytearray): 84 | def __setitem__(self, index, thing): 85 | if type(index) is slice: 86 | start = index.start 87 | if start is None: 88 | start = 0 89 | stop = index.stop 90 | if stop is None: 91 | stop = len(self) 92 | assert index.step is None 93 | assert start >= 0 94 | assert stop >= 0 95 | assert start < len(self) 96 | assert stop <= len(self) 97 | assert stop - start == len(thing) 98 | else: 99 | assert index >= 0 100 | assert index < len(self) 101 | bytearray.__setitem__(self, index, thing) 102 | 103 | class MerkleSet: 104 | # depth sets the size of branches, it's power of two scale with a smallest value of 0 105 | # leaf_units is the size of leaves, its smallest possible value is 1 106 | # Optimal values for both of those are heavily dependent on the memory architecture of 107 | # the particular machine 108 | def __init__(self, depth, leaf_units): 109 | self.subblock_lengths = [10] 110 | while len(self.subblock_lengths) <= depth: 111 | self.subblock_lengths.append(66 + 2 * self.subblock_lengths[-1]) 112 | self.leaf_units = leaf_units 113 | self.root = safearray(33) 114 | # should be dumped completely on a port to C in favor of real dereferencing. 115 | self.pointers_to_arrays = {} 116 | self.rootblock = None 117 | 118 | # Only used by test code, makes sure internal state is consistent 119 | def _audit(self, hashes): 120 | newhashes = [] 121 | t = self.root[:1] 122 | if t == EMPTY: 123 | assert self.root[1:] == BLANK 124 | assert self.rootblock == None 125 | assert len(self.pointers_to_arrays) == 0 126 | elif t == TERMINAL: 127 | assert self.rootblock == None 128 | assert len(self.pointers_to_arrays) == 0 129 | newhashes.append(self.root[1:]) 130 | else: 131 | allblocks = set() 132 | self._audit_branch(self._deref(self.rootblock), 0, allblocks, self.root, newhashes, True) 133 | assert allblocks == set(self.pointers_to_arrays.keys()) 134 | assert newhashes == sorted(hashes) 135 | 136 | def _audit_branch(self, branch, depth, allblocks, expected, hashes, can_terminate): 137 | assert branch not in allblocks 138 | allblocks.add(branch) 139 | outputs = {} 140 | branch = self._ref(branch) 141 | assert len(branch) == 8 + self.subblock_lengths[-1] 142 | self._audit_branch_inner(branch, 8, depth, len(self.subblock_lengths) - 1, outputs, allblocks, expected, hashes, can_terminate) 143 | active = branch[:8] 144 | if active != bytes(8): 145 | assert bytes(active) in outputs 146 | for leaf, positions in outputs.items(): 147 | assert leaf not in allblocks 148 | allblocks.add(leaf) 149 | self._audit_whole_leaf(leaf, positions) 150 | 151 | def _audit_branch_inner(self, branch, pos, depth, moddepth, outputs, allblocks, expected, hashes, can_terminate): 152 | if moddepth == 0: 153 | newpos = from_bytes(branch[pos + 8:pos + 10]) 154 | output = bytes(branch[pos:pos + 8]) 155 | assert output in self.pointers_to_arrays 156 | if newpos == 0xFFFF: 157 | self._audit_branch(output, depth, allblocks, expected, hashes, can_terminate) 158 | else: 159 | outputs.setdefault(output, []).append((newpos, expected)) 160 | self._add_hashes_leaf(self._ref(output), newpos, hashes, can_terminate) 161 | return 162 | assert expected[:1] == LAZY or hashaudit(branch[pos:pos + 66]) == expected[1:] 163 | t0 = branch[pos:pos + 1] 164 | t1 = branch[pos + 33:pos + 34] 165 | if t0 == EMPTY: 166 | assert t1 != EMPTY and t1 != TERMINAL 167 | assert branch[pos + 1:pos + 33] == BLANK 168 | elif t0 == TERMINAL: 169 | assert can_terminate or t1 != TERMINAL 170 | assert t1 != EMPTY 171 | if t1 == EMPTY: 172 | assert branch[pos + 34:pos + 66] == BLANK 173 | if t0 == EMPTY or t0 == TERMINAL: 174 | self._audit_branch_inner_empty(branch, pos + 66, moddepth - 1) 175 | if t0 == TERMINAL: 176 | hashes.append(branch[pos + 1:pos + 33]) 177 | else: 178 | self._audit_branch_inner(branch, pos + 66, depth + 1, moddepth - 1, outputs, allblocks, 179 | branch[pos:pos + 33], hashes, t1 != EMPTY) 180 | if t1 == EMPTY or t1 == TERMINAL: 181 | self._audit_branch_inner_empty(branch, pos + 66 + self.subblock_lengths[moddepth - 1], moddepth - 1) 182 | if t1 == TERMINAL: 183 | hashes.append(branch[pos + 34:pos + 66]) 184 | else: 185 | self._audit_branch_inner(branch, pos + 66 + self.subblock_lengths[moddepth - 1], depth + 1, moddepth - 1, outputs, allblocks, 186 | branch[pos + 33:pos + 66], hashes, t0 != EMPTY) 187 | 188 | def _add_hashes_leaf(self, leaf, pos, hashes, can_terminate): 189 | assert pos >= 0 190 | rpos = 4 + pos * 70 191 | t0 = leaf[rpos:rpos + 1] 192 | t1 = leaf[rpos + 33:rpos + 34] 193 | if t0 == TERMINAL: 194 | hashes.append(leaf[rpos + 1:rpos + 33]) 195 | assert can_terminate or t1 != TERMINAL 196 | elif t0 != EMPTY: 197 | self._add_hashes_leaf(leaf, from_bytes(leaf[rpos + 66:rpos + 68]) - 1, hashes, t1 != EMPTY) 198 | if t1 == TERMINAL: 199 | hashes.append(leaf[rpos + 34:rpos + 66]) 200 | elif t1 != EMPTY: 201 | self._add_hashes_leaf(leaf, from_bytes(leaf[rpos + 68:rpos + 70]) - 1, hashes, t0 != EMPTY) 202 | 203 | def _audit_branch_inner_empty(self, branch, pos, moddepth): 204 | if moddepth == 0: 205 | assert branch[pos:pos + 10] == bytes(10) 206 | return 207 | assert branch[pos:pos + 66] == bytes(66) 208 | self._audit_branch_inner_empty(branch, pos + 66, moddepth - 1) 209 | self._audit_branch_inner_empty(branch, pos + 66 + self.subblock_lengths[moddepth - 1], moddepth - 1) 210 | 211 | def _audit_whole_leaf(self, leaf, inputs): 212 | leaf = self._ref(leaf) 213 | assert len(leaf) == 4 + self.leaf_units * 70 214 | assert len(inputs) == from_bytes(leaf[2:4]) 215 | mycopy = safearray([ord('X')] * (4 + self.leaf_units * 70)) 216 | for pos, expected in inputs: 217 | self._audit_whole_leaf_inner(leaf, mycopy, pos, expected) 218 | i = from_bytes(leaf[:2]) 219 | while i != 0xFFFF: 220 | nexti = from_bytes(leaf[4 + i * 70:4 + i * 70 + 2]) 221 | assert mycopy[4 + i * 70:4 + i * 70 + 70] == b'X' * 70 222 | mycopy[4 + i * 70:4 + i * 70 + 70] = bytes(70) 223 | mycopy[4 + i * 70:4 + i * 70 + 2] = to_bytes(nexti, 2) 224 | i = nexti 225 | assert mycopy[4:] == leaf[4:] 226 | 227 | def _audit_whole_leaf_inner(self, leaf, mycopy, pos, expected): 228 | assert pos >= 0 229 | rpos = 4 + pos * 70 230 | assert mycopy[rpos:rpos + 70] == b'X' * 70 231 | mycopy[rpos:rpos + 70] = leaf[rpos:rpos + 70] 232 | t0 = leaf[rpos:rpos + 1] 233 | t1 = leaf[rpos + 33:rpos + 34] 234 | assert expected[:1] == LAZY or hashaudit(leaf[rpos:rpos + 66]) == expected[1:] 235 | if t0 == EMPTY: 236 | assert t1 != EMPTY 237 | assert t1 != TERMINAL 238 | assert leaf[rpos + 1:rpos + 33] == BLANK 239 | assert leaf[rpos + 66:rpos + 68] == bytes(2) 240 | elif t0 == TERMINAL: 241 | assert t1 != EMPTY 242 | assert leaf[rpos + 66:rpos + 68] == bytes(2) 243 | else: 244 | assert t0 == MIDDLE or t0 == LAZY 245 | self._audit_whole_leaf_inner(leaf, mycopy, from_bytes(leaf[rpos + 66:rpos + 68]) - 1, 246 | leaf[rpos:rpos + 33]) 247 | if t1 == EMPTY: 248 | assert leaf[rpos + 34:rpos + 66] == BLANK 249 | assert leaf[rpos + 68:rpos + 70] == bytes(2) 250 | elif t1 == TERMINAL: 251 | assert leaf[rpos + 68:rpos + 70] == bytes(2) 252 | else: 253 | assert t1 == MIDDLE or t1 == LAZY 254 | self._audit_whole_leaf_inner(leaf, mycopy, from_bytes(leaf[rpos + 68:rpos + 70]) - 1, 255 | leaf[rpos + 33:rpos + 66]) 256 | 257 | # In C this should be malloc/new 258 | def _allocate_branch(self): 259 | b = safearray(8 + self.subblock_lengths[-1]) 260 | self.pointers_to_arrays[self._deref(b)] = b 261 | return b 262 | 263 | # In C this should be malloc/new 264 | def _allocate_leaf(self): 265 | leaf = safearray(4 + self.leaf_units * 70) 266 | for i in range(self.leaf_units): 267 | p = 4 + i * 70 268 | leaf[p:p + 2] = to_bytes((i + 1) if i != self.leaf_units - 1 else 0xFFFF, 2) 269 | self.pointers_to_arrays[self._deref(leaf)] = leaf 270 | return leaf 271 | 272 | # In C this should be calloc/free 273 | def _deallocate(self, thing): 274 | del self.pointers_to_arrays[self._deref(thing)] 275 | 276 | # In C this should be * 277 | def _ref(self, ref): 278 | assert len(ref) == 8 279 | if ref == bytes(8): 280 | return None 281 | return self.pointers_to_arrays[bytes(ref)] 282 | 283 | # In C this should be & 284 | def _deref(self, thing): 285 | assert thing is not None 286 | return to_bytes(id(thing), 8) 287 | 288 | def get_root(self): 289 | if self.root[:1] == LAZY: 290 | self.root[:] = self._force_calculation_branch(self.rootblock, 8, len(self.subblock_lengths) - 1) 291 | return compress_root(self.root) 292 | 293 | def _force_calculation_branch(self, block, pos, moddepth): 294 | if moddepth == 0: 295 | block2 = self._ref(block[pos:pos + 8]) 296 | pos = from_bytes(block[pos + 8:pos + 10]) 297 | if pos == 0xFFFF: 298 | return self._force_calculation_branch(block2, 8, len(self.subblock_lengths) - 1) 299 | else: 300 | return self._force_calculation_leaf(block2, pos) 301 | if block[pos:pos + 1] == LAZY: 302 | block[pos:pos + 33] = self._force_calculation_branch(block, pos + 66, moddepth - 1) 303 | if block[pos + 33:pos + 34] == LAZY: 304 | block[pos + 33:pos + 66] = self._force_calculation_branch(block, pos + 66 + self.subblock_lengths[moddepth - 1], moddepth - 1) 305 | return MIDDLE + hashaudit(block[pos:pos + 66]) 306 | 307 | def _force_calculation_leaf(self, block, pos): 308 | pos = 4 + pos * 70 309 | if block[pos:pos + 1] == LAZY: 310 | block[pos:pos + 33] = self._force_calculation_leaf(block, from_bytes(block[pos + 66:pos + 68]) - 1) 311 | if block[pos + 33:pos + 34] == LAZY: 312 | block[pos + 33:pos + 66] = self._force_calculation_leaf(block, from_bytes(block[pos + 68:pos + 70]) - 1) 313 | return MIDDLE + hashaudit(block[pos:pos + 66]) 314 | 315 | # Convenience function 316 | def add(self, toadd): 317 | return self.add_already_hashed(sha256(toadd).digest()) 318 | 319 | def add_already_hashed(self, toadd): 320 | t = self.root[:1] 321 | if t == EMPTY: 322 | self.root[:] = TERMINAL + toadd 323 | elif t == TERMINAL: 324 | if toadd == self.root[1:]: 325 | return 326 | self.rootblock = self._allocate_branch() 327 | self._insert_branch([self.root[1:], toadd], self.rootblock, 8, 0, len(self.subblock_lengths) - 1) 328 | self.root[:1] = LAZY 329 | else: 330 | if self._add_to_branch(toadd, self.rootblock, 0) == INVALIDATING: 331 | self.root[:1] = LAZY 332 | 333 | # returns INVALIDATING, DONE 334 | def _add_to_branch(self, toadd, block, depth): 335 | return self._add_to_branch_inner(toadd, block, 8, depth, len(self.subblock_lengths) - 1) 336 | 337 | # returns NOTSTARTED, INVALIDATING, DONE 338 | def _add_to_branch_inner(self, toadd, block, pos, depth, moddepth): 339 | if moddepth == 0: 340 | nextblock = self._ref(block[pos:pos + 8]) 341 | if nextblock is None: 342 | return NOTSTARTED 343 | nextpos = from_bytes(block[pos + 8:pos + 10]) 344 | if nextpos == 0xFFFF: 345 | return self._add_to_branch(toadd, nextblock, depth) 346 | else: 347 | return self._add_to_leaf(toadd, block, pos, nextblock, nextpos, depth) 348 | if get_bit(toadd, depth) == 0: 349 | r = self._add_to_branch_inner(toadd, block, pos + 66, depth + 1, moddepth - 1) 350 | if r == INVALIDATING: 351 | if block[pos:pos + 1] != LAZY: 352 | block[pos:pos + 1] = LAZY 353 | if block[pos + 33:pos + 34] != LAZY: 354 | return INVALIDATING 355 | return DONE 356 | if r == DONE: 357 | return DONE 358 | t0 = block[pos:pos + 1] 359 | t1 = block[pos + 33:pos + 34] 360 | if t0 == EMPTY: 361 | if t1 == EMPTY: 362 | return NOTSTARTED 363 | block[pos:pos + 1] = TERMINAL 364 | block[pos + 1:pos + 33] = toadd 365 | if t1 != LAZY: 366 | return INVALIDATING 367 | else: 368 | return DONE 369 | assert t0 == TERMINAL 370 | v0 = block[pos + 1:pos + 33] 371 | if v0 == toadd: 372 | return DONE 373 | if t1 == TERMINAL: 374 | v1 = block[pos + 34:pos + 66] 375 | if v1 == toadd: 376 | return DONE 377 | block[pos + 33:pos + 66] = bytes(33) 378 | self._insert_branch([toadd, v0, v1], block, pos, depth, moddepth) 379 | else: 380 | self._insert_branch([toadd, v0], block, pos + 66, depth + 1, moddepth - 1) 381 | block[pos:pos + 1] = LAZY 382 | if t1 != LAZY: 383 | return INVALIDATING 384 | else: 385 | return DONE 386 | else: 387 | r = self._add_to_branch_inner(toadd, block, pos + 66 + self.subblock_lengths[moddepth - 1], depth + 1, moddepth - 1) 388 | if r == INVALIDATING: 389 | if block[pos + 33:pos + 34] != LAZY: 390 | block[pos + 33:pos + 34] = LAZY 391 | if block[pos:pos + 1] != LAZY: 392 | return INVALIDATING 393 | return DONE 394 | if r == DONE: 395 | return DONE 396 | t0 = block[pos:pos + 1] 397 | t1 = block[pos + 33:pos + 34] 398 | if t1 == EMPTY: 399 | if t0 == EMPTY: 400 | return NOTSTARTED 401 | block[pos + 33:pos + 34] = TERMINAL 402 | block[pos + 34:pos + 66] = toadd 403 | if t0 != LAZY: 404 | return INVALIDATING 405 | else: 406 | return DONE 407 | assert t1 == TERMINAL 408 | v1 = block[pos + 34:pos + 66] 409 | if v1 == toadd: 410 | return DONE 411 | if t0 == TERMINAL: 412 | v0 = block[pos + 1:pos + 33] 413 | if v0 == toadd: 414 | return DONE 415 | block[pos:pos + 33] = bytes(33) 416 | self._insert_branch([toadd, v0, v1], block, pos, depth, moddepth) 417 | else: 418 | self._insert_branch([toadd, v1], block, pos + 66 + self.subblock_lengths[moddepth - 1], depth + 1, moddepth - 1) 419 | block[pos + 33:pos + 34] = LAZY 420 | if t0 != LAZY: 421 | return INVALIDATING 422 | else: 423 | return DONE 424 | 425 | def _insert_branch(self, things, block, pos, depth, moddepth): 426 | assert 2 <= len(things) <= 3 427 | if moddepth == 0: 428 | child = self._ref(block[:8]) 429 | r = FULL 430 | if child is not None: 431 | r, leafpos = self._insert_leaf(things, child, depth) 432 | if r == FULL: 433 | child = self._allocate_leaf() 434 | r, leafpos = self._insert_leaf(things, child, depth) 435 | if r == FULL: 436 | self._deallocate(child) 437 | newb = self._allocate_branch() 438 | block[pos:pos + 8] = self._deref(newb) 439 | block[pos + 8:pos + 10] = to_bytes(0xFFFF, 2) 440 | self._insert_branch(things, newb, 8, depth, len(self.subblock_lengths) - 1) 441 | return 442 | block[:8] = self._deref(child) 443 | # increment the number of inputs in the active child 444 | child[2:4] = to_bytes(from_bytes(child[2:4]) + 1, 2) 445 | block[pos:pos + 8] = self._deref(child) 446 | block[pos + 8:pos + 10] = to_bytes(leafpos, 2) 447 | return 448 | things.sort() 449 | if len(things) == 2: 450 | block[pos:pos + 1] = TERMINAL 451 | block[pos + 1:pos + 33] = things[0] 452 | block[pos + 33:pos + 34] = TERMINAL 453 | block[pos + 34:pos + 66] = things[1] 454 | return 455 | bits = [get_bit(thing, depth) for thing in things] 456 | if bits[0] == bits[1] == bits[2]: 457 | if bits[0] == 0: 458 | self._insert_branch(things, block, pos + 66, depth + 1, moddepth - 1) 459 | block[pos:pos + 1] = LAZY 460 | else: 461 | self._insert_branch(things, block, pos + 66 + self.subblock_lengths[moddepth - 1], depth + 1, moddepth - 1) 462 | block[pos + 33:pos + 34] = LAZY 463 | else: 464 | if bits[0] == bits[1]: 465 | block[pos + 33:pos + 34] = TERMINAL 466 | block[pos + 34:pos + 66] = things[2] 467 | self._insert_branch(things[:2], block, pos + 66, depth + 1, moddepth - 1) 468 | block[pos:pos + 1] = LAZY 469 | else: 470 | block[pos:pos + 1] = TERMINAL 471 | block[pos + 1:pos + 33] = things[0] 472 | self._insert_branch(things[1:], block, pos + 66 + self.subblock_lengths[moddepth - 1], depth + 1, moddepth - 1) 473 | block[pos + 33:pos + 34] = LAZY 474 | 475 | # returns INVALIDATING, DONE 476 | def _add_to_leaf(self, toadd, branch, branchpos, leaf, leafpos, depth): 477 | r = self._add_to_leaf_inner(toadd, leaf, leafpos, depth) 478 | if r != FULL: 479 | return r 480 | if from_bytes(leaf[2:4]) == 1: 481 | # leaf is full and only has one input 482 | # it cannot be split so it must be replaced with a branch 483 | newb = self._allocate_branch() 484 | self._copy_leaf_to_branch(newb, 8, len(self.subblock_lengths) - 1, leaf, leafpos) 485 | self._add_to_branch(toadd, newb, depth) 486 | branch[branchpos:branchpos + 8] = self._deref(newb) 487 | branch[branchpos + 8:branchpos + 10] = to_bytes(0xFFFF, 2) 488 | if branch[:8] == self._deref(leaf): 489 | branch[:8] = bytes(8) 490 | self._deallocate(leaf) 491 | return INVALIDATING 492 | active = self._ref(branch[:8]) 493 | if active is None or active is leaf: 494 | active = self._allocate_leaf() 495 | r, newpos = self._copy_between_leafs(leaf, active, leafpos) 496 | if r != DONE: 497 | active = self._allocate_leaf() 498 | r, newpos = self._copy_between_leafs(leaf, active, leafpos) 499 | assert r == DONE 500 | branch[branchpos:branchpos + 8] = self._deref(active) 501 | if branch[:8] != self._deref(active): 502 | branch[:8] = self._deref(active) 503 | branch[branchpos + 8:branchpos + 10] = to_bytes(newpos, 2) 504 | self._delete_from_leaf(leaf, leafpos) 505 | return self._add_to_leaf(toadd, branch, branchpos, active, newpos, depth) 506 | 507 | # returns INVALIDATING, DONE, FULL 508 | def _add_to_leaf_inner(self, toadd, leaf, pos, depth): 509 | assert pos >= 0 510 | rpos = pos * 70 + 4 511 | if get_bit(toadd, depth) == 0: 512 | t = leaf[rpos:rpos + 1] 513 | if t == EMPTY: 514 | leaf[rpos:rpos + 1] = TERMINAL 515 | leaf[rpos + 1:rpos + 33] = toadd 516 | return INVALIDATING 517 | elif t == TERMINAL: 518 | oldval0 = leaf[rpos + 1:rpos + 33] 519 | if oldval0 == toadd: 520 | return DONE 521 | t1 = leaf[rpos + 33:rpos + 34] 522 | if t1 == TERMINAL: 523 | oldval1 = leaf[rpos + 34:rpos + 66] 524 | if toadd == oldval1: 525 | return DONE 526 | nextpos = from_bytes(leaf[:2]) 527 | leaf[:2] = to_bytes(pos, 2) 528 | leaf[rpos + 2:rpos + 66] = bytes(64) 529 | leaf[rpos:rpos + 2] = to_bytes(nextpos, 2) 530 | r, nextnextpos = self._insert_leaf([toadd, oldval0, oldval1], leaf, depth) 531 | if r == FULL: 532 | leaf[:2] = to_bytes(nextpos, 2) 533 | leaf[rpos:rpos + 1] = TERMINAL 534 | leaf[rpos + 1:rpos + 33] = oldval0 535 | leaf[rpos + 33:rpos + 34] = TERMINAL 536 | leaf[rpos + 34:rpos + 66] = oldval1 537 | return FULL 538 | assert nextnextpos == pos 539 | return INVALIDATING 540 | r, newpos = self._insert_leaf([toadd, oldval0], leaf, depth + 1) 541 | if r == FULL: 542 | return FULL 543 | leaf[rpos + 66:rpos + 68] = to_bytes(newpos + 1, 2) 544 | leaf[rpos:rpos + 1] = LAZY 545 | if t1 == LAZY: 546 | return DONE 547 | return INVALIDATING 548 | else: 549 | r = self._add_to_leaf_inner(toadd, leaf, from_bytes(leaf[rpos + 66:rpos + 68]) - 1, depth + 1) 550 | if r == INVALIDATING: 551 | if t == MIDDLE: 552 | leaf[rpos:rpos + 1] = LAZY 553 | return INVALIDATING 554 | return DONE 555 | return r 556 | else: 557 | t = leaf[rpos + 33:rpos + 34] 558 | if t == EMPTY: 559 | leaf[rpos + 33:rpos + 34] = TERMINAL 560 | leaf[rpos + 34:rpos + 66] = toadd 561 | return INVALIDATING 562 | elif t == TERMINAL: 563 | oldval1 = leaf[rpos + 34:rpos + 66] 564 | if oldval1 == toadd: 565 | return DONE 566 | t0 = leaf[rpos:rpos + 1] 567 | if t0 == TERMINAL: 568 | oldval0 = leaf[rpos + 1:rpos + 33] 569 | if toadd == oldval0: 570 | return DONE 571 | nextpos = from_bytes(leaf[:2]) 572 | leaf[:2] = to_bytes(pos, 2) 573 | leaf[rpos + 2:rpos + 66] = bytes(64) 574 | leaf[rpos:rpos + 2] = to_bytes(nextpos, 2) 575 | r, nextnextpos = self._insert_leaf([toadd, oldval0, oldval1], leaf, depth) 576 | if r == FULL: 577 | leaf[:2] = to_bytes(nextpos, 2) 578 | leaf[rpos:rpos + 1] = TERMINAL 579 | leaf[rpos + 1:rpos + 33] = oldval0 580 | leaf[rpos + 33:rpos + 34] = TERMINAL 581 | leaf[rpos + 34:rpos + 66] = oldval1 582 | return FULL 583 | assert nextnextpos == pos 584 | return INVALIDATING 585 | r, newpos = self._insert_leaf([toadd, oldval1], leaf, depth + 1) 586 | if r == FULL: 587 | return FULL 588 | leaf[rpos + 68:rpos + 70] = to_bytes(newpos + 1, 2) 589 | leaf[rpos + 33:rpos + 34] = LAZY 590 | if t0 == LAZY: 591 | return DONE 592 | return INVALIDATING 593 | else: 594 | r = self._add_to_leaf_inner(toadd, leaf, from_bytes(leaf[rpos + 68:rpos + 70]) - 1, depth + 1) 595 | if r == INVALIDATING: 596 | if t == MIDDLE: 597 | leaf[rpos + 33:rpos + 34] = LAZY 598 | return INVALIDATING 599 | return DONE 600 | return r 601 | 602 | # returns state, newpos 603 | # state can be FULL, DONE 604 | def _copy_between_leafs(self, fromleaf, toleaf, frompos): 605 | r, pos = self._copy_between_leafs_inner(fromleaf, toleaf, frompos) 606 | if r == DONE: 607 | toleaf[2:4] = to_bytes(from_bytes(toleaf[2:4]) + 1, 2) 608 | fromleaf[2:4] = to_bytes(from_bytes(fromleaf[2:4]) - 1, 2) 609 | return r, pos 610 | 611 | # returns state, newpos 612 | # state can be FULL, DONE 613 | def _copy_between_leafs_inner(self, fromleaf, toleaf, frompos): 614 | topos = from_bytes(toleaf[:2]) 615 | if topos == 0xFFFF: 616 | return FULL, None 617 | rfrompos = 4 + frompos * 70 618 | rtopos = 4 + topos * 70 619 | toleaf[0:2] = toleaf[rtopos:rtopos + 2] 620 | t0 = fromleaf[rfrompos:rfrompos + 1] 621 | lowpos = None 622 | highpos = None 623 | if t0 == MIDDLE or t0 == LAZY: 624 | r, lowpos = self._copy_between_leafs_inner(fromleaf, toleaf, from_bytes(fromleaf[rfrompos + 66:rfrompos + 68]) - 1) 625 | if r == FULL: 626 | assert toleaf[:2] == toleaf[rtopos:rtopos + 2] 627 | toleaf[:2] = to_bytes(topos, 2) 628 | return FULL, None 629 | t1 = fromleaf[rfrompos + 33:rfrompos + 34] 630 | if t1 == MIDDLE or t1 == LAZY: 631 | r, highpos = self._copy_between_leafs_inner(fromleaf, toleaf, from_bytes(fromleaf[rfrompos + 68:rfrompos + 70]) - 1) 632 | if r == FULL: 633 | if t0 == MIDDLE or t0 == LAZY: 634 | self._delete_from_leaf(toleaf, lowpos) 635 | assert toleaf[:2] == toleaf[rtopos:rtopos + 2] 636 | toleaf[:2] = to_bytes(topos, 2) 637 | return FULL, None 638 | toleaf[rtopos:rtopos + 66] = fromleaf[rfrompos:rfrompos + 66] 639 | if lowpos is not None: 640 | toleaf[rtopos + 66:rtopos + 68] = to_bytes(lowpos + 1, 2) 641 | if highpos is not None: 642 | toleaf[rtopos + 68:rtopos + 70] = to_bytes(highpos + 1, 2) 643 | return DONE, topos 644 | 645 | def _delete_from_leaf(self, leaf, pos): 646 | assert pos >= 0 647 | rpos = 4 + pos * 70 648 | t = leaf[rpos:rpos + 1] 649 | if t == MIDDLE or t == LAZY: 650 | self._delete_from_leaf(leaf, from_bytes(leaf[rpos + 66:rpos + 68]) - 1) 651 | t = leaf[rpos + 33:rpos + 34] 652 | if t == MIDDLE or t == LAZY: 653 | self._delete_from_leaf(leaf, from_bytes(leaf[rpos + 68:rpos + 70]) - 1) 654 | leaf[rpos + 2:rpos + 70] = bytes(68) 655 | leaf[rpos:rpos + 2] = leaf[:2] 656 | leaf[:2] = to_bytes(pos, 2) 657 | 658 | def _copy_leaf_to_branch(self, branch, branchpos, moddepth, leaf, leafpos): 659 | assert leafpos >= 0 660 | rleafpos = 4 + leafpos * 70 661 | if moddepth == 0: 662 | active = self._ref(branch[:8]) 663 | if active is None: 664 | active = self._allocate_leaf() 665 | branch[0:8] = self._deref(active) 666 | r, newpos = self._copy_between_leafs_inner(leaf, active, leafpos) 667 | assert r == DONE 668 | active[2:4] = to_bytes(from_bytes(active[2:4]) + 1, 2) 669 | branch[branchpos:branchpos + 8] = self._deref(active) 670 | branch[branchpos + 8:branchpos + 10] = to_bytes(newpos, 2) 671 | return 672 | branch[branchpos:branchpos + 66] = leaf[rleafpos:rleafpos + 66] 673 | t = leaf[rleafpos:rleafpos + 1] 674 | if t == MIDDLE or t == LAZY: 675 | self._copy_leaf_to_branch(branch, branchpos + 66, moddepth - 1, leaf, from_bytes(leaf[rleafpos + 66:rleafpos + 68]) - 1) 676 | t = leaf[rleafpos + 33:rleafpos + 34] 677 | if t == MIDDLE or t == LAZY: 678 | self._copy_leaf_to_branch(branch, branchpos + 66 + self.subblock_lengths[moddepth - 1], moddepth - 1, leaf, from_bytes(leaf[rleafpos + 68:rleafpos + 70]) - 1) 679 | 680 | # returns (status, pos) 681 | # status can be INVALIDATING, FULL 682 | def _insert_leaf(self, things, leaf, depth): 683 | assert 2 <= len(things) <= 3 684 | pos = from_bytes(leaf[:2]) 685 | if pos == 0xFFFF: 686 | return FULL, None 687 | lpos = pos * 70 + 4 688 | leaf[:2] = leaf[lpos:lpos + 2] 689 | things.sort() 690 | if len(things) == 2: 691 | leaf[lpos:lpos + 1] = TERMINAL 692 | leaf[lpos + 1:lpos + 33] = things[0] 693 | leaf[lpos + 33:lpos + 34] = TERMINAL 694 | leaf[lpos + 34:lpos + 66] = things[1] 695 | return INVALIDATING, pos 696 | bits = [get_bit(thing, depth) for thing in things] 697 | if bits[0] == bits[1] == bits[2]: 698 | r, laterpos = self._insert_leaf(things, leaf, depth + 1) 699 | if r == FULL: 700 | leaf[:2] = to_bytes(pos, 2) 701 | return FULL, None 702 | if bits[0] == 0: 703 | leaf[lpos + 66:lpos + 68] = to_bytes(laterpos + 1, 2) 704 | leaf[lpos:lpos + 1] = LAZY 705 | else: 706 | leaf[lpos + 68:lpos + 70] = to_bytes(laterpos + 1, 2) 707 | leaf[lpos + 33:lpos + 34] = LAZY 708 | leaf[lpos:lpos + 2] = bytes(2) 709 | return INVALIDATING, pos 710 | elif bits[0] == bits[1]: 711 | r, laterpos = self._insert_leaf([things[0], things[1]], leaf, depth + 1) 712 | if r == FULL: 713 | leaf[:2] = to_bytes(pos, 2) 714 | return FULL, None 715 | leaf[lpos + 34:lpos + 66] = things[2] 716 | leaf[lpos + 33:lpos + 34] = TERMINAL 717 | leaf[lpos + 66:lpos + 68] = to_bytes(laterpos + 1, 2) 718 | leaf[lpos:lpos + 1] = LAZY 719 | else: 720 | r, laterpos = self._insert_leaf([things[1], things[2]], leaf, depth + 1) 721 | if r == FULL: 722 | leaf[:2] = to_bytes(pos, 2) 723 | return FULL, None 724 | leaf[lpos + 1:lpos + 33] = things[0] 725 | leaf[lpos:lpos + 1] = TERMINAL 726 | leaf[lpos + 68:lpos + 70] = to_bytes(laterpos + 1, 2) 727 | leaf[lpos + 33:lpos + 34] = LAZY 728 | return INVALIDATING, pos 729 | 730 | # Convenience function 731 | def remove(self, toremove): 732 | return self.remove_already_hashed(sha256(toremove).digest()) 733 | 734 | def remove_already_hashed(self, toremove): 735 | t = self.root[:1] 736 | if t == EMPTY: 737 | return 738 | elif t == TERMINAL: 739 | if toremove == self.root[1:]: 740 | self.root[:] = bytes(33) 741 | return 742 | else: 743 | status, oneval = self._remove_branch(toremove, self.rootblock, 0) 744 | if status == INVALIDATING: 745 | self.root[:1] = LAZY 746 | elif status == ONELEFT: 747 | self.root[1:] = oneval 748 | self.root[:1] = TERMINAL 749 | self.rootblock = None 750 | elif status == FRAGILE: 751 | self._catch_branch(self.rootblock, 8, len(self.subblock_lengths) - 1) 752 | self.root[:1] = LAZY 753 | 754 | # returns (status, oneval) 755 | # status can be ONELEFT, FRAGILE, INVALIDATING, DONE 756 | def _remove_branch(self, toremove, block, depth): 757 | result, val = self._remove_branch_inner(toremove, block, 8, depth, len(self.subblock_lengths) - 1) 758 | assert result != NOTSTARTED 759 | if result == ONELEFT: 760 | self._deallocate(block) 761 | return result, val 762 | 763 | # returns (status, oneval) 764 | # status can be NOTSTARTED, ONELEFT, FRAGILE, INVALIDATING, DONE 765 | def _remove_branch_inner(self, toremove, block, pos, depth, moddepth): 766 | if moddepth == 0: 767 | if block[pos:pos + 8] == bytes(8): 768 | return NOTSTARTED, None 769 | p = from_bytes(block[pos + 8:pos + 10]) 770 | if p == 0xFFFF: 771 | r, val = self._remove_branch(toremove, self._ref(block[pos:pos + 8]), depth) 772 | else: 773 | r, val = self._remove_leaf(toremove, self._ref(block[pos:pos + 8]), p, depth, block) 774 | if r == ONELEFT: 775 | block[pos:pos + 10] = bytes(10) 776 | return r, val 777 | if get_bit(toremove, depth) == 0: 778 | r, val = self._remove_branch_inner(toremove, block, pos + 66, depth + 1, moddepth - 1) 779 | if r == NOTSTARTED: 780 | t = block[pos:pos + 1] 781 | if t == EMPTY: 782 | if block[pos + 33:pos + 34] == EMPTY: 783 | return NOTSTARTED, None 784 | return DONE, None 785 | assert t == TERMINAL 786 | if block[pos + 1:pos + 33] == toremove: 787 | t1 = block[pos + 33:pos + 34] 788 | if t1 == TERMINAL: 789 | left = block[pos + 34:pos + 66] 790 | block[pos:pos + 66] = bytes(66) 791 | return ONELEFT, left 792 | else: 793 | assert t1 != EMPTY 794 | block[pos:pos + 33] = bytes(33) 795 | return FRAGILE, None 796 | elif block[pos + 34:pos + 66] == toremove: 797 | left = block[pos + 1:pos + 33] 798 | block[pos:pos + 66] = bytes(66) 799 | return ONELEFT, left 800 | return DONE, None 801 | elif r == ONELEFT: 802 | was_invalid = block[pos:pos + 1] == LAZY 803 | block[pos + 1:pos + 33] = val 804 | block[pos:pos + 1] = TERMINAL 805 | if block[pos + 33:pos + 34] == TERMINAL: 806 | return FRAGILE, None 807 | if not was_invalid: 808 | return INVALIDATING, None 809 | else: 810 | return DONE, None 811 | elif r == FRAGILE: 812 | t1 = block[pos + 33:pos + 34] 813 | # scan up the tree until the other child is non-empty 814 | if t1 == EMPTY: 815 | block[pos:pos + 1] = LAZY 816 | return FRAGILE, None 817 | # the other child is non-empty, if the tree can be collapsed 818 | # it will be up to the level below this one, so try that 819 | self._catch_branch(block, pos + 66, moddepth - 1) 820 | # done collasping, continue invalidating if neccessary 821 | if block[pos:pos + 1] == LAZY: 822 | return DONE, None 823 | block[pos:pos + 1] = LAZY 824 | if t1 == LAZY: 825 | return DONE, None 826 | return INVALIDATING, None 827 | elif r == INVALIDATING: 828 | t = block[pos:pos + 1] 829 | if t == LAZY: 830 | return DONE, None 831 | else: 832 | assert t == MIDDLE 833 | block[pos:pos + 1] = LAZY 834 | if block[pos + 33:pos + 34] == LAZY: 835 | return DONE, None 836 | return INVALIDATING, None 837 | assert r == DONE 838 | return r, val 839 | else: 840 | r, val = self._remove_branch_inner(toremove, block, pos + 66 + self.subblock_lengths[moddepth - 1], depth + 1, moddepth - 1) 841 | if r == NOTSTARTED: 842 | t = block[pos + 33:pos + 34] 843 | if t == EMPTY: 844 | if block[pos:pos + 1] == EMPTY: 845 | return NOTSTARTED, None 846 | return DONE, None 847 | assert t == TERMINAL 848 | if block[pos + 34:pos + 66] == toremove: 849 | if block[pos:pos + 1] == TERMINAL: 850 | left = block[pos + 1:pos + 33] 851 | block[pos:pos + 66] = bytes(66) 852 | return ONELEFT, left 853 | else: 854 | block[pos + 33:pos + 66] = bytes(33) 855 | return FRAGILE, None 856 | elif block[pos + 1:pos + 33] == toremove: 857 | left = block[pos + 34:pos + 66] 858 | block[pos:pos + 66] = bytes(66) 859 | return ONELEFT, left 860 | return DONE, None 861 | elif r == ONELEFT: 862 | was_invalid = block[pos + 33:pos + 34] == LAZY 863 | block[pos + 34:pos + 66] = val 864 | block[pos + 33:pos + 34] = TERMINAL 865 | if block[pos:pos + 1] == TERMINAL: 866 | return FRAGILE, None 867 | if not was_invalid: 868 | return INVALIDATING, None 869 | return DONE, None 870 | elif r == FRAGILE: 871 | t0 = block[pos:pos + 1] 872 | if t0 == EMPTY: 873 | block[pos + 33:pos + 34] = LAZY 874 | return FRAGILE, None 875 | self._catch_branch(block, pos + 66 + self.subblock_lengths[moddepth - 1], moddepth - 1) 876 | if block[pos + 33:pos + 34] == LAZY: 877 | return DONE, None 878 | block[pos + 33:pos + 34] = LAZY 879 | if t0 == LAZY: 880 | return DONE, None 881 | return INVALIDATING, None 882 | elif r == INVALIDATING: 883 | t = block[pos + 33:pos + 34] 884 | if t == LAZY: 885 | return DONE, None 886 | else: 887 | assert t == MIDDLE 888 | block[pos + 33:pos + 34] = LAZY 889 | if block[pos:pos + 1] == LAZY: 890 | return DONE, None 891 | return INVALIDATING, None 892 | assert r == DONE 893 | return r, val 894 | 895 | # returns (status, oneval) 896 | # status can be ONELEFT, FRAGILE, INVALIDATING, DONE 897 | def _remove_leaf(self, toremove, block, pos, depth, branch): 898 | result, val = self._remove_leaf_inner(toremove, block, pos, depth) 899 | if result == ONELEFT: 900 | numin = from_bytes(block[2:4]) 901 | if numin == 1: 902 | self._deallocate(block) 903 | if branch[:8] == self._deref(block): 904 | branch[:8] = bytes(8) 905 | else: 906 | block[2:4] = to_bytes(numin - 1, 2) 907 | return result, val 908 | 909 | def _deallocate_leaf_node(self, leaf, pos): 910 | assert pos >= 0 911 | rpos = 4 + pos * 70 912 | next = leaf[:2] 913 | leaf[rpos:rpos + 2] = leaf[:2] 914 | leaf[rpos + 2:rpos + 70] = bytes(68) 915 | leaf[:2] = to_bytes(pos, 2) 916 | 917 | # returns (status, oneval) 918 | # status can be ONELEFT, FRAGILE, INVALIDATING, DONE 919 | def _remove_leaf_inner(self, toremove, block, pos, depth): 920 | assert pos >= 0 921 | rpos = 4 + pos * 70 922 | if get_bit(toremove, depth) == 0: 923 | t = block[rpos:rpos + 1] 924 | if t == EMPTY: 925 | return DONE, None 926 | if t == TERMINAL: 927 | t1 = block[rpos + 33:rpos + 34] 928 | if block[rpos + 1:rpos + 33] == toremove: 929 | if t1 == TERMINAL: 930 | left = block[rpos + 34:rpos + 66] 931 | self._deallocate_leaf_node(block, pos) 932 | return ONELEFT, left 933 | block[rpos:rpos + 33] = bytes(33) 934 | return FRAGILE, None 935 | if block[rpos + 34:rpos + 66] == toremove: 936 | left = block[rpos + 1:rpos + 33] 937 | self._deallocate_leaf_node(block, pos) 938 | return ONELEFT, left 939 | return DONE, None 940 | else: 941 | r, val = self._remove_leaf_inner(toremove, block, from_bytes(block[rpos + 66:rpos + 68]) - 1, depth + 1) 942 | if r == DONE: 943 | return DONE, None 944 | if r == INVALIDATING: 945 | if t == MIDDLE: 946 | block[rpos:rpos + 1] = LAZY 947 | if block[rpos + 33:rpos + 34] != LAZY: 948 | return INVALIDATING, None 949 | return DONE, None 950 | if r == ONELEFT: 951 | t1 = block[rpos + 33:rpos + 34] 952 | assert t1 != EMPTY 953 | block[rpos + 1:rpos + 33] = val 954 | block[rpos:rpos + 1] = TERMINAL 955 | block[rpos + 66:rpos + 68] = bytes(2) 956 | if t1 == TERMINAL: 957 | return FRAGILE, None 958 | if t != LAZY and t1 != LAZY: 959 | return INVALIDATING, None 960 | return DONE, None 961 | assert r == FRAGILE 962 | t1 = block[rpos + 33:rpos + 34] 963 | if t1 == EMPTY: 964 | if t != LAZY: 965 | block[rpos:rpos + 1] = LAZY 966 | return FRAGILE, None 967 | self._catch_leaf(block, from_bytes(block[rpos + 66:rpos + 68]) - 1) 968 | if t == LAZY: 969 | return DONE, None 970 | block[rpos:rpos + 1] = LAZY 971 | if t1 == LAZY: 972 | return DONE, None 973 | return INVALIDATING, None 974 | else: 975 | t = block[rpos + 33:rpos + 34] 976 | if t == EMPTY: 977 | return DONE, None 978 | elif t == TERMINAL: 979 | t0 = block[rpos:rpos + 1] 980 | if block[rpos + 34:rpos + 66] == toremove: 981 | if t0 == TERMINAL: 982 | left = block[rpos + 1:rpos + 33] 983 | self._deallocate_leaf_node(block, pos) 984 | return ONELEFT, left 985 | block[rpos + 33:rpos + 66] = bytes(33) 986 | return FRAGILE, None 987 | if block[rpos + 1:rpos + 33] == toremove: 988 | left = block[rpos + 34:rpos + 66] 989 | self._deallocate_leaf_node(block, pos) 990 | return ONELEFT, left 991 | return DONE, None 992 | else: 993 | r, val = self._remove_leaf_inner(toremove, block, from_bytes(block[rpos + 68:rpos + 70]) - 1, depth + 1) 994 | if r == DONE: 995 | return DONE, None 996 | if r == INVALIDATING: 997 | if t == MIDDLE: 998 | block[rpos + 33:rpos + 34] = LAZY 999 | if block[rpos:rpos + 1] != LAZY: 1000 | return INVALIDATING, None 1001 | return DONE, None 1002 | if r == ONELEFT: 1003 | t0 = block[rpos:rpos + 1] 1004 | assert t0 != EMPTY 1005 | block[rpos + 34:rpos + 66] = val 1006 | block[rpos + 33:rpos + 34] = TERMINAL 1007 | block[rpos + 68:rpos + 70] = bytes(2) 1008 | if t0 == TERMINAL: 1009 | return FRAGILE, None 1010 | if t != LAZY and t0 != LAZY: 1011 | return INVALIDATING, None 1012 | return DONE, None 1013 | assert r == FRAGILE 1014 | t0 = block[rpos:rpos + 1] 1015 | if t0 == EMPTY: 1016 | if t != LAZY: 1017 | block[rpos + 33:rpos + 34] = LAZY 1018 | return FRAGILE, None 1019 | self._catch_leaf(block, from_bytes(block[rpos + 68:rpos + 70]) - 1) 1020 | if t == LAZY: 1021 | return DONE, None 1022 | block[rpos + 33:rpos + 34] = LAZY 1023 | if t0 == LAZY: 1024 | return DONE, None 1025 | return INVALIDATING, None 1026 | 1027 | def _catch_branch(self, block, pos, moddepth): 1028 | if moddepth == 0: 1029 | leafpos = from_bytes(block[pos + 8:pos + 10]) 1030 | if leafpos == 0xFFFF: 1031 | self._catch_branch(self._ref(block[pos:pos + 8]), 8, len(self.subblock_lengths) - 1) 1032 | else: 1033 | self._catch_leaf(self._ref(block[pos:pos + 8]), leafpos) 1034 | return 1035 | if block[pos:pos + 1] == EMPTY: 1036 | assert block[pos + 33:pos + 34] != TERMINAL 1037 | r = self._collapse_branch_inner(block, pos + 66 + self.subblock_lengths[moddepth - 1], moddepth - 1) 1038 | if r != None: 1039 | block[pos:pos + 66] = r 1040 | return 1041 | if block[pos + 33:pos + 34] == EMPTY: 1042 | assert block[pos:pos + 1] != TERMINAL 1043 | r = self._collapse_branch_inner(block, pos + 66, moddepth - 1) 1044 | if r != None: 1045 | block[pos:pos + 66] = r 1046 | 1047 | # returns two hashes string or None 1048 | def _collapse_branch(self, block): 1049 | r = self._collapse_branch_inner(block, 8, len(self.subblock_lengths) - 1) 1050 | if r != None: 1051 | self._deallocate(block) 1052 | return r 1053 | 1054 | # returns two hashes string or None 1055 | def _collapse_branch_inner(self, block, pos, moddepth): 1056 | if moddepth == 0: 1057 | leafpos = from_bytes(block[pos + 8:pos + 10]) 1058 | if leafpos == 0xFFFF: 1059 | r = self._collapse_branch(self._ref(block[pos:pos + 8])) 1060 | else: 1061 | r = self._collapse_leaf(self._ref(block[pos:pos + 8]), from_bytes(block[pos + 8:pos + 10]), block) 1062 | if r != None: 1063 | block[pos:pos + 10] = bytes(10) 1064 | return r 1065 | t0 = block[pos:pos + 1] 1066 | t1 = block[pos + 33:pos + 34] 1067 | if t0 == TERMINAL and t1 == TERMINAL: 1068 | r = block[pos:pos + 66] 1069 | block[pos:pos + 66] = bytes(66) 1070 | return r 1071 | if t0 == EMPTY: 1072 | r = self._collapse_branch_inner(block, pos + 66 + self.subblock_lengths[moddepth - 1], moddepth - 1) 1073 | if r != None: 1074 | block[pos + 33:pos + 66] = bytes(33) 1075 | return r 1076 | if t1 == EMPTY: 1077 | r = self._collapse_branch_inner(block, pos + 66, moddepth - 1) 1078 | if r != None: 1079 | block[pos:pos + 33] = bytes(33) 1080 | return r 1081 | return None 1082 | 1083 | def _catch_leaf(self, leaf, pos): 1084 | assert pos >= 0 1085 | rpos = 4 + pos * 70 1086 | t0 = leaf[rpos:rpos + 1] 1087 | t1 = leaf[rpos + 33:rpos + 34] 1088 | if t0 == EMPTY: 1089 | r = self._collapse_leaf_inner(leaf, from_bytes(leaf[rpos + 68:rpos + 70]) - 1) 1090 | if r != None: 1091 | leaf[rpos + 68:rpos + 70] = bytes(2) 1092 | leaf[rpos:rpos + 66] = r 1093 | elif t1 == EMPTY: 1094 | r = self._collapse_leaf_inner(leaf, from_bytes(leaf[rpos + 66:rpos + 68]) - 1) 1095 | if r != None: 1096 | leaf[rpos + 66:rpos + 68] = bytes(2) 1097 | leaf[rpos:rpos + 66] = r 1098 | 1099 | # returns two hashes string or None 1100 | def _collapse_leaf(self, leaf, pos, branch): 1101 | assert pos >= 0 1102 | r = self._collapse_leaf_inner(leaf, pos) 1103 | if r != None: 1104 | inputs = from_bytes(leaf[2:4]) 1105 | if inputs == 1: 1106 | self._deallocate(leaf) 1107 | if branch[:8] == self._deref(leaf): 1108 | branch[:8] = bytes(8) 1109 | return r 1110 | leaf[2:4] = to_bytes(inputs - 1, 2) 1111 | return r 1112 | 1113 | # returns two hashes string or None 1114 | def _collapse_leaf_inner(self, leaf, pos): 1115 | assert pos >= 0 1116 | rpos = 4 + pos * 70 1117 | t0 = leaf[rpos:rpos + 1] 1118 | t1 = leaf[rpos + 33:rpos + 34] 1119 | r = None 1120 | if t0 == TERMINAL and t1 == TERMINAL: 1121 | r = leaf[rpos:rpos + 66] 1122 | elif t0 == EMPTY: 1123 | r = self._collapse_leaf_inner(leaf, from_bytes(leaf[rpos + 68:rpos + 70]) - 1) 1124 | elif t1 == EMPTY: 1125 | r = self._collapse_leaf_inner(leaf, from_bytes(leaf[rpos + 66:rpos + 68]) - 1) 1126 | if r is not None: 1127 | # this leaf node is being collapsed, deallocate it 1128 | leaf[rpos + 2:rpos + 70] = bytes(68) 1129 | leaf[rpos:rpos + 2] = leaf[:2] 1130 | leaf[:2] = to_bytes(pos, 2) 1131 | return r 1132 | 1133 | # Convenience function 1134 | def is_included(self, tocheck): 1135 | return self.is_included_already_hashed(sha256(tocheck).digest()) 1136 | 1137 | # returns (boolean, proof string) 1138 | def is_included_already_hashed(self, tocheck): 1139 | buf = [] 1140 | self.get_root() 1141 | t = self.root[:1] 1142 | if t == EMPTY: 1143 | return False, EMPTY 1144 | if t == TERMINAL: 1145 | return tocheck == self.root[1:], self.root 1146 | assert t == MIDDLE 1147 | r = self._is_included_branch(tocheck, self.rootblock, 8, 0, len(self.subblock_lengths) - 1, buf) 1148 | return r, b''.join([bytes(x) for x in buf]) 1149 | 1150 | # returns boolean, appends to buf 1151 | def _is_included_branch(self, tocheck, block, pos, depth, moddepth, buf): 1152 | if moddepth == 0: 1153 | if block[pos + 8:pos + 10] == bytes([0xFF, 0xFF]): 1154 | return self._is_included_branch(tocheck, self._ref(block[pos:pos + 8]), 8, depth, len(self.subblock_lengths) - 1, buf) 1155 | else: 1156 | return self._is_included_leaf(tocheck, self._ref(block[pos:pos + 8]), from_bytes(block[pos + 8:pos + 10]), depth, buf) 1157 | buf.append(MIDDLE) 1158 | if block[pos + 1:pos + 33] == tocheck or block[pos + 34:pos + 66] == tocheck: 1159 | _finish_proof(block[pos:pos + 66], depth, buf) 1160 | return True 1161 | if get_bit(tocheck, depth) == 0: 1162 | t = block[pos:pos + 1] 1163 | if t == EMPTY or t == TERMINAL: 1164 | _finish_proof(block[pos:pos + 66], depth, buf) 1165 | return False 1166 | assert t == MIDDLE 1167 | r = self._is_included_branch(tocheck, block, pos + 66, depth + 1, moddepth - 1, buf) 1168 | buf.append(_quick_summary(block[pos + 33:pos + 66])) 1169 | return r 1170 | else: 1171 | t = block[pos + 33:pos + 34] 1172 | if t == EMPTY or t == TERMINAL: 1173 | _finish_proof(block[pos:pos + 66], depth, buf) 1174 | return False 1175 | assert t == MIDDLE 1176 | buf.append(_quick_summary(block[pos:pos + 33])) 1177 | return self._is_included_branch(tocheck, block, pos + 66 + self.subblock_lengths[moddepth - 1], depth + 1, moddepth - 1, buf) 1178 | 1179 | # returns boolean, appends to buf 1180 | def _is_included_leaf(self, tocheck, block, pos, depth, buf): 1181 | assert pos >= 0 1182 | pos = 4 + pos * 70 1183 | buf.append(MIDDLE) 1184 | if block[pos + 1:pos + 33] == tocheck or block[pos + 34:pos + 66] == tocheck: 1185 | _finish_proof(block[pos:pos + 66], depth, buf) 1186 | return True 1187 | if get_bit(tocheck, depth) == 0: 1188 | t = block[pos:pos + 1] 1189 | if t == EMPTY or t == TERMINAL: 1190 | _finish_proof(block[pos:pos + 66], depth, buf) 1191 | return False 1192 | assert t == MIDDLE 1193 | r = self._is_included_leaf(tocheck, block, from_bytes(block[pos + 66:pos + 68]) - 1, depth + 1, buf) 1194 | buf.append(_quick_summary(block[pos + 33:pos + 66])) 1195 | return r 1196 | else: 1197 | t = block[pos + 33:pos + 34] 1198 | if t == EMPTY or t == TERMINAL: 1199 | _finish_proof(block[pos:pos + 66], depth, buf) 1200 | return False 1201 | assert t == MIDDLE 1202 | buf.append(_quick_summary(block[pos:pos + 33])) 1203 | return self._is_included_leaf(tocheck, block, from_bytes(block[pos + 68:pos + 70]) - 1, depth + 1, buf) 1204 | 1205 | def _finish_proof(val, depth, buf): 1206 | assert len(val) == 66 1207 | v0 = val[1:33] 1208 | v1 = val[34:] 1209 | if val[:1] == TERMINAL and val[33:34] == TERMINAL: 1210 | b0 = get_bit(v0, depth) 1211 | b1 = get_bit(v1, depth) 1212 | if b0 == b1: 1213 | if b0 == 0: 1214 | buf.append(MIDDLE) 1215 | _finish_proof(val, depth + 1, buf) 1216 | buf.append(EMPTY) 1217 | else: 1218 | buf.append(EMPTY) 1219 | buf.append(MIDDLE) 1220 | _finish_proof(val, depth + 1, buf) 1221 | return 1222 | buf.append(_quick_summary(val[:33])) 1223 | buf.append(_quick_summary(val[33:])) 1224 | 1225 | def _quick_summary(val): 1226 | assert len(val) == 33 1227 | t = val[:1] 1228 | if t == EMPTY: 1229 | return EMPTY 1230 | if t == TERMINAL: 1231 | return val 1232 | assert t == MIDDLE 1233 | return LAZY + val[1:] 1234 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MerkleSet 2 | A highly performant Merkle set data structure 3 | 4 | Merkle set implementations tend to have very poor cache coherence because nodes are often stored nowhere near each other in memory. The non-reference implemention in here does a good job of keeping parent and sibling nodes nearby, thus dramatically reducing the number of cache misses and improving performance. 5 | 6 | RefenceMerkleSet.py contains a simple reference implementation. 7 | 8 | MerkleSet.py contains an implementation which will be very performant after porting to C. A number of aspects of it don't make much sense in Python, most notably the _ref and _deref methods, which should be replaced with simple referencing and dereferencing on a port to C. This was written in a slightly odd style specifically for the purposes of making porting to C a direct transliteration. 9 | 10 | TestMerkleSet.py does extensive testing of both implementions. It gets 98% code coverage and handles many semantic edge cases as well. 11 | -------------------------------------------------------------------------------- /ReferenceMerkleSet.py: -------------------------------------------------------------------------------- 1 | from hashlib import blake2b 2 | 3 | """ 4 | A simple, confidence-inspiring Merkle Set standard 5 | 6 | Advantages of this standard: 7 | Low CPU requirements 8 | Small proofs of inclusion/exclusion 9 | Reasonably simple implementation 10 | 11 | The main tricks in this standard are: 12 | 13 | Uses blake2b because that has the best performance on 512 bit inputs 14 | Skips repeated hashing of exactly two things even when they share prefix bits 15 | 16 | 17 | Proofs support proving including/exclusion for a large number of values in 18 | a single string. They're a serialization of a subset of the tree. 19 | 20 | Proof format: 21 | 22 | multiproof: subtree 23 | subtree: middle or terminal or truncated or empty 24 | middle: MIDDLE 1 subtree subtree 25 | terminal: TERMINAL 1 hash 32 26 | # If the sibling is empty truncated implies more than two children. 27 | truncated: TRUNCATED 1 hash 32 28 | empty: EMPTY 1 29 | EMPTY: \x00 30 | TERMINAL: \x01 31 | MIDDLE: \x02 32 | TRUNCATED: \x03 33 | """ 34 | 35 | EMPTY = bytes([0]) 36 | TERMINAL = bytes([1]) 37 | MIDDLE = bytes([2]) 38 | TRUNCATED = bytes([3]) 39 | 40 | BLANK = bytes([0] * 32) 41 | 42 | prehashed = {} 43 | 44 | def init_prehashed(): 45 | for x in [EMPTY, TERMINAL, MIDDLE]: 46 | for y in [EMPTY, TERMINAL, MIDDLE]: 47 | prehashed[x + y] = blake2b(bytes([0] * 30) + x + y) 48 | 49 | init_prehashed() 50 | 51 | def hashdown(mystr): 52 | assert len(mystr) == 66 53 | h = prehashed[bytes(mystr[0:1] + mystr[33:34])].copy() 54 | h.update(mystr[1:33] + mystr[34:]) 55 | return h.digest()[:32] 56 | 57 | def compress_root(mystr): 58 | assert len(mystr) == 33 59 | if mystr[0:1] == MIDDLE: 60 | return mystr[1:] 61 | if mystr[0:1] == EMPTY: 62 | assert mystr[1:] == BLANK 63 | return BLANK 64 | return blake2b(mystr).digest()[:32] 65 | 66 | def get_bit(mybytes, pos): 67 | assert len(mybytes) == 32 68 | return (mybytes[pos // 8] >> (7 - (pos % 8))) & 1 69 | 70 | class ReferenceMerkleSet: 71 | def __init__(self, root = None): 72 | self.root = root 73 | if root is None: 74 | self.root = _empty 75 | 76 | def get_root(self): 77 | return compress_root(self.root.get_hash()) 78 | 79 | def add_already_hashed(self, toadd): 80 | self.root = self.root.add(toadd, 0) 81 | 82 | def remove_already_hashed(self, toremove): 83 | self.root = self.root.remove(toremove, 0) 84 | 85 | def is_included_already_hashed(self, tocheck): 86 | proof = [] 87 | r = self.root.is_included(tocheck, 0, proof) 88 | return r, b''.join(proof) 89 | 90 | def _audit(self, hashes): 91 | newhashes = [] 92 | self.root._audit(newhashes, []) 93 | assert newhashes == sorted(newhashes) 94 | 95 | class EmptyNode: 96 | def __init__(self): 97 | self.hash = BLANK 98 | 99 | def get_hash(self): 100 | return EMPTY + BLANK 101 | 102 | def is_empty(self): 103 | return True 104 | 105 | def is_terminal(self): 106 | return False 107 | 108 | def is_double(self): 109 | raise SetError() 110 | 111 | def add(self, toadd, depth): 112 | return TerminalNode(toadd) 113 | 114 | def remove(self, toremove, depth): 115 | return self 116 | 117 | def is_included(self, tocheck, depth, p): 118 | p.append(EMPTY) 119 | return False 120 | 121 | def other_included(self, tocheck, depth, p, collapse): 122 | p.append(EMPTY) 123 | 124 | def _audit(self, hashes, bits): 125 | pass 126 | 127 | _empty = EmptyNode() 128 | 129 | class TerminalNode: 130 | def __init__(self, hash, bits = None): 131 | assert len(hash) == 32 132 | self.hash = hash 133 | if bits is not None: 134 | self._audit([], bits) 135 | 136 | def get_hash(self): 137 | return TERMINAL + self.hash 138 | 139 | def is_empty(self): 140 | return False 141 | 142 | def is_terminal(self): 143 | return True 144 | 145 | def is_double(self): 146 | raise SetError() 147 | 148 | def add(self, toadd, depth): 149 | if toadd == self.hash: 150 | return self 151 | if toadd > self.hash: 152 | return self._make_middle([self, TerminalNode(toadd)], depth) 153 | else: 154 | return self._make_middle([TerminalNode(toadd), self], depth) 155 | 156 | def _make_middle(self, children, depth): 157 | cbits = [get_bit(child.hash, depth) for child in children] 158 | if cbits[0] != cbits[1]: 159 | return MiddleNode(children) 160 | nextvals = [None, None] 161 | nextvals[cbits[0] ^ 1] = _empty 162 | nextvals[cbits[0]] = self._make_middle(children, depth + 1) 163 | return MiddleNode(nextvals) 164 | 165 | def remove(self, toremove, depth): 166 | if toremove == self.hash: 167 | return _empty 168 | return self 169 | 170 | def is_included(self, tocheck, depth, proof): 171 | proof.append(TERMINAL + self.hash) 172 | return tocheck == self.hash 173 | 174 | def other_included(self, tocheck, depth, p, collapse): 175 | p.append(TERMINAL + self.hash) 176 | 177 | def _audit(self, hashes, bits): 178 | hashes.append(self.hash) 179 | for pos, v in enumerate(bits): 180 | assert get_bit(self.hash, pos) == v 181 | 182 | class MiddleNode: 183 | def __init__(self, children): 184 | self.children = children 185 | if children[0].is_empty() and children[1].is_double(): 186 | self.hash = children[1].hash 187 | elif children[1].is_empty() and children[0].is_double(): 188 | self.hash = children[0].hash 189 | else: 190 | if children[0].is_empty() and (children[1].is_empty() or children[1].is_terminal()): 191 | raise SetError() 192 | if children[1].is_empty() and children[0].is_terminal(): 193 | raise SetError 194 | if children[0].is_terminal() and children[1].is_terminal() and children[0].hash >= children[1].hash: 195 | raise SetError 196 | self.hash = hashdown(children[0].get_hash() + children[1].get_hash()) 197 | 198 | def get_hash(self): 199 | return MIDDLE + self.hash 200 | 201 | def is_empty(self): 202 | return False 203 | 204 | def is_terminal(self): 205 | return False 206 | 207 | def is_double(self): 208 | if self.children[0].is_empty(): 209 | return self.children[1].is_double() 210 | if self.children[1].is_empty(): 211 | return self.children[0].is_double() 212 | return self.children[0].is_terminal() and self.children[1].is_terminal() 213 | 214 | def add(self, toadd, depth): 215 | bit = get_bit(toadd, depth) 216 | child = self.children[bit] 217 | newchild = child.add(toadd, depth + 1) 218 | if newchild is child: 219 | return self 220 | newvals = [x for x in self.children] 221 | newvals[bit] = newchild 222 | return MiddleNode(newvals) 223 | 224 | def remove(self, toremove, depth): 225 | bit = get_bit(toremove, depth) 226 | child = self.children[bit] 227 | newchild = child.remove(toremove, depth + 1) 228 | if newchild is child: 229 | return self 230 | otherchild = self.children[bit ^ 1] 231 | if newchild.is_empty() and otherchild.is_terminal(): 232 | return otherchild 233 | if newchild.is_terminal() and otherchild.is_empty(): 234 | return newchild 235 | newvals = [x for x in self.children] 236 | newvals[bit] = newchild 237 | return MiddleNode(newvals) 238 | 239 | def is_included(self, tocheck, depth, p): 240 | p.append(MIDDLE) 241 | if get_bit(tocheck, depth) == 0: 242 | r = self.children[0].is_included(tocheck, depth + 1, p) 243 | self.children[1].other_included(tocheck, depth + 1, p, not self.children[0].is_empty()) 244 | return r 245 | else: 246 | self.children[0].other_included(tocheck, depth + 1, p, not self.children[1].is_empty()) 247 | return self.children[1].is_included(tocheck, depth + 1, p) 248 | 249 | def other_included(self, tocheck, depth, p, collapse): 250 | if collapse or not self.is_double(): 251 | p.append(TRUNCATED + self.hash) 252 | else: 253 | self.is_included(tocheck, depth, p) 254 | 255 | def _audit(self, hashes, bits): 256 | self.children[0]._audit(hashes, bits + [0]) 257 | self.children[1]._audit(hashes, bits + [1]) 258 | 259 | class TruncatedNode: 260 | def __init__(self, hash): 261 | self.hash = hash 262 | 263 | def get_hash(self): 264 | return MIDDLE + self.hash 265 | 266 | def is_empty(self): 267 | return False 268 | 269 | def is_terminal(self): 270 | return False 271 | 272 | def is_double(self): 273 | return False 274 | 275 | def is_included(self, tocheck, depth, p): 276 | raise SetError() 277 | 278 | def other_included(self, tocheck, depth, p, collapse): 279 | p.append(TRUNCATED + self.hash) 280 | 281 | class SetError(BaseException): 282 | pass 283 | 284 | def confirm_included(root, val, proof): 285 | return confirm_not_included_already_hashed(root, sha256(val).digest(), proof) 286 | 287 | def confirm_included_already_hashed(root, val, proof): 288 | return _confirm(root, val, proof, True) 289 | 290 | def confirm_not_included(root, val, proof): 291 | return confirm_not_included_already_hashed(root, sha256(val).digest(), proof) 292 | 293 | def confirm_not_included_already_hashed(root, val, proof): 294 | return _confirm(root, val, proof, False) 295 | 296 | def _confirm(root, val, proof, expected): 297 | try: 298 | p = deserialize_proof(proof) 299 | if p.get_root() != root: 300 | return False 301 | r, junk = p.is_included_already_hashed(val) 302 | return r == expected 303 | except SetError: 304 | return False 305 | 306 | def deserialize_proof(proof): 307 | try: 308 | r, pos = _deserialize(proof, 0, []) 309 | if pos != len(proof): 310 | raise SetError() 311 | return ReferenceMerkleSet(r) 312 | except IndexError: 313 | raise SetError() 314 | 315 | def _deserialize(proof, pos, bits): 316 | t = proof[pos:pos + 1] 317 | if t == EMPTY: 318 | return _empty, pos + 1 319 | if t == TERMINAL: 320 | return TerminalNode(proof[pos + 1:pos + 33], bits), pos + 33 321 | if t == TRUNCATED: 322 | return TruncatedNode(proof[pos + 1:pos + 33]), pos + 33 323 | if t != MIDDLE: 324 | raise SetError() 325 | v0, pos = _deserialize(proof, pos + 1, bits + [0]) 326 | v1, pos = _deserialize(proof, pos, bits + [1]) 327 | return MiddleNode([v0, v1]), pos 328 | 329 | -------------------------------------------------------------------------------- /TestMerkleSet.py: -------------------------------------------------------------------------------- 1 | from ReferenceMerkleSet import * 2 | from MerkleSet import * 3 | 4 | def from_bytes(f): 5 | return int.from_bytes(f, 'big') 6 | 7 | def to_bytes(f, v): 8 | return int.to_bytes(f, v, 'big') 9 | 10 | # Add numhashes things, only checking the hash halfway through to test lazy evaluation 11 | def _testlazy(numhashes, mset, roots, proofss): 12 | hashes = [blake2b(to_bytes(i, 10)).digest()[:32] for i in range(numhashes)] 13 | checkpoint = numhashes // 2 14 | for i in range(numhashes - 1): 15 | if i == checkpoint: 16 | r, proof = mset.is_included_already_hashed(hashes[checkpoint // 2]) 17 | assert r 18 | assert proof == proofss[i][checkpoint // 2] 19 | mset.add_already_hashed(hashes[i]) 20 | mset._audit(hashes[:i + 1]) 21 | r, proof = mset.is_included_already_hashed(hashes[checkpoint]) 22 | assert r 23 | assert proof == proofss[-1][checkpoint] 24 | for i in range(numhashes - 1, -1, -1): 25 | mset.remove_already_hashed(hashes[i]) 26 | mset._audit(hashes[:i]) 27 | if i == checkpoint or i == 0: 28 | assert roots[i] == mset.get_root() 29 | for j in range(numhashes): 30 | r, proof = mset.is_included_already_hashed(hashes[j]) 31 | assert r == (j < i) 32 | assert proof == proofss[i][j] 33 | 34 | def _testmset(numhashes, mset, oldroots = None, oldproofss = None): 35 | hashes = [blake2b(to_bytes(i, 10)).digest()[:32] for i in range(numhashes)] 36 | if oldroots is None: 37 | making_new = True 38 | roots = [] 39 | proofss = [] 40 | else: 41 | making_new = False 42 | roots = oldroots 43 | proofss = oldproofss 44 | assert mset.get_root() == BLANK 45 | mset._audit([]) 46 | # Add numhashes things one at a time, comparing to previously generated roots and proofs 47 | for i in range(numhashes): 48 | if not making_new: 49 | assert roots[i] == mset.get_root() 50 | proofs = proofss[i] 51 | else: 52 | roots.append(mset.get_root()) 53 | proofs = [] 54 | # After each addition check inclusion of everything which has been added or will be added 55 | for j in range(numhashes): 56 | r, proof = mset.is_included_already_hashed(hashes[j]) 57 | assert r == (j < i) 58 | if not making_new: 59 | assert proofss[i][j] == proof 60 | else: 61 | proofs.append(proof) 62 | if r: 63 | assert confirm_included_already_hashed(roots[i], hashes[j], proof) 64 | else: 65 | assert confirm_not_included_already_hashed(roots[i], hashes[j], proof) 66 | if i > 0: 67 | # Add a second time to check idempotence 68 | mset.add_already_hashed(hashes[i-1]) 69 | mset._audit(hashes[:i]) 70 | assert mset.get_root() == roots[i] 71 | for j in range(numhashes): 72 | r, proof = mset.is_included_already_hashed(hashes[j]) 73 | assert proof == proofs[j] 74 | assert r == (j < i) 75 | mset.add_already_hashed(hashes[i]) 76 | mset._audit(hashes[:i+1]) 77 | proofss.append(proofs) 78 | mset.get_root() 79 | mset._audit(hashes) 80 | # Remove everything one at a time checking in that direction as well 81 | for i in range(numhashes - 1, -1, -1): 82 | for k in range(2): 83 | mset.remove_already_hashed(hashes[i]) 84 | mset._audit(hashes[:i]) 85 | assert roots[i] == mset.get_root() 86 | for j in range(numhashes): 87 | r, proof = mset.is_included_already_hashed(hashes[j]) 88 | assert r == (j < i) 89 | assert proof == proofss[i][j] 90 | return roots, proofss 91 | 92 | def testall(): 93 | num = 200 94 | roots, proofss = _testmset(num, ReferenceMerkleSet()) 95 | # Test with a range of values of both parameters 96 | for i in range(1, 5): 97 | for j in range(6): 98 | _testmset(num, MerkleSet(i, 2 ** j), roots, proofss) 99 | _testlazy(num, MerkleSet(i, 2 ** j), roots, proofss) 100 | 101 | testall() 102 | --------------------------------------------------------------------------------