├── README └── pyavltree.py /README: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010 Pavel Grafov 2 | 3 | Implementation of AVL trees (http://en.wikipedia.org/wiki/AVL_tree) in Python. 4 | Class AVL Tree supports the following functionality: 5 | - insertion of a new entry in the tree; 6 | - removal of any entry in the tree; 7 | - search for any entry in the tree; 8 | - "sanity check" for the tree (described later); 9 | - 4 various tree traversals 10 | - preorder, 11 | - inorder, 12 | - postorder, 13 | - inorder non-recursive. 14 | 15 | I would like to mention some sources, that helped me a lot while working on this code: 16 | 1) Wikipedia 17 | 1a) http://en.wikipedia.org/wiki/AVL_tree 18 | Description of AVL trees. 19 | 1b) http://en.wikipedia.org/wiki/Tree_traversal 20 | Description of tree traversals in binary search trees and 21 | sample implementations of traversal algorithms in pseudocode. 22 | 2) http://www.cse.ohio-state.edu/~sgomori/570/avlrotations.html 23 | Rotation algorithms for putting an out-of-balance AVL tree back in balance. 24 | 3) http://sourceforge.net/projects/standardavl/ 25 | Implementation of AVL trees in C++. I borrowed an idea of "sanity check" - 26 | a method, which traverses the tree and checks that tree is in balance, contains 27 | no circular references, height for each node is calculated correctly and so on. 28 | 4) http://oopweb.com/Algorithms/Documents/AvlTrees/Volume/AvlTrees.htm 29 | From this page I borrowed the idea how to correctly delete an entry 30 | from an AVL tree. 31 | 32 | This code is available under MIT License. 33 | 34 | Permission is hereby granted, free of charge, to any person obtaining a copy 35 | of this software and associated documentation files (the "Software"), to deal 36 | in the Software without restriction, including without limitation the rights 37 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 38 | copies of the Software, and to permit persons to whom the Software is 39 | furnished to do so, subject to the following conditions: 40 | 41 | The above copyright notice and this permission notice shall be included in 42 | all copies or substantial portions of the Software. 43 | 44 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 45 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 46 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 47 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 48 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 49 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 50 | THE SOFTWARE. -------------------------------------------------------------------------------- /pyavltree.py: -------------------------------------------------------------------------------- 1 | import random, math 2 | 3 | 4 | 5 | def random_data_generator (max_r): 6 | for i in xrange(max_r): 7 | yield random.randint(0, max_r) 8 | 9 | 10 | 11 | class Node(): 12 | def __init__(self, key): 13 | self.key = key 14 | self.parent = None 15 | self.leftChild = None 16 | self.rightChild = None 17 | self.height = 0 18 | 19 | def __str__(self): 20 | return str(self.key) + "(" + str(self.height) + ")" 21 | 22 | def is_leaf(self): 23 | return (self.height == 0) 24 | 25 | def max_children_height(self): 26 | if self.leftChild and self.rightChild: 27 | return max(self.leftChild.height, self.rightChild.height) 28 | elif self.leftChild and not self.rightChild: 29 | return self.leftChild.height 30 | elif not self.leftChild and self.rightChild: 31 | return self.rightChild.height 32 | else: 33 | return -1 34 | 35 | def balance (self): 36 | return (self.leftChild.height if self.leftChild else -1) - (self.rightChild.height if self.rightChild else -1) 37 | 38 | class AVLTree(): 39 | def __init__(self, *args): 40 | self.rootNode = None 41 | self.elements_count = 0 42 | self.rebalance_count = 0 43 | if len(args) == 1: 44 | for i in args[0]: 45 | self.insert (i) 46 | 47 | def height(self): 48 | if self.rootNode: 49 | return self.rootNode.height 50 | else: 51 | return 0 52 | 53 | def rebalance (self, node_to_rebalance): 54 | self.rebalance_count += 1 55 | A = node_to_rebalance 56 | F = A.parent #allowed to be NULL 57 | if node_to_rebalance.balance() == -2: 58 | if node_to_rebalance.rightChild.balance() <= 0: 59 | """Rebalance, case RRC """ 60 | B = A.rightChild 61 | C = B.rightChild 62 | assert (not A is None and not B is None and not C is None) 63 | A.rightChild = B.leftChild 64 | if A.rightChild: 65 | A.rightChild.parent = A 66 | B.leftChild = A 67 | A.parent = B 68 | if F is None: 69 | self.rootNode = B 70 | self.rootNode.parent = None 71 | else: 72 | if F.rightChild == A: 73 | F.rightChild = B 74 | else: 75 | F.leftChild = B 76 | B.parent = F 77 | self.recompute_heights (A) 78 | self.recompute_heights (B.parent) 79 | else: 80 | """Rebalance, case RLC """ 81 | B = A.rightChild 82 | C = B.leftChild 83 | assert (not A is None and not B is None and not C is None) 84 | B.leftChild = C.rightChild 85 | if B.leftChild: 86 | B.leftChild.parent = B 87 | A.rightChild = C.leftChild 88 | if A.rightChild: 89 | A.rightChild.parent = A 90 | C.rightChild = B 91 | B.parent = C 92 | C.leftChild = A 93 | A.parent = C 94 | if F is None: 95 | self.rootNode = C 96 | self.rootNode.parent = None 97 | else: 98 | if F.rightChild == A: 99 | F.rightChild = C 100 | else: 101 | F.leftChild = C 102 | C.parent = F 103 | self.recompute_heights (A) 104 | self.recompute_heights (B) 105 | else: 106 | assert(node_to_rebalance.balance() == +2) 107 | if node_to_rebalance.leftChild.balance() >= 0: 108 | B = A.leftChild 109 | C = B.leftChild 110 | """Rebalance, case LLC """ 111 | assert (not A is None and not B is None and not C is None) 112 | A.leftChild = B.rightChild 113 | if (A.leftChild): 114 | A.leftChild.parent = A 115 | B.rightChild = A 116 | A.parent = B 117 | if F is None: 118 | self.rootNode = B 119 | self.rootNode.parent = None 120 | else: 121 | if F.rightChild == A: 122 | F.rightChild = B 123 | else: 124 | F.leftChild = B 125 | B.parent = F 126 | self.recompute_heights (A) 127 | self.recompute_heights (B.parent) 128 | else: 129 | B = A.leftChild 130 | C = B.rightChild 131 | """Rebalance, case LRC """ 132 | assert (not A is None and not B is None and not C is None) 133 | A.leftChild = C.rightChild 134 | if A.leftChild: 135 | A.leftChild.parent = A 136 | B.rightChild = C.leftChild 137 | if B.rightChild: 138 | B.rightChild.parent = B 139 | C.leftChild = B 140 | B.parent = C 141 | C.rightChild = A 142 | A.parent = C 143 | if F is None: 144 | self.rootNode = C 145 | self.rootNode.parent = None 146 | else: 147 | if (F.rightChild == A): 148 | F.rightChild = C 149 | else: 150 | F.leftChild = C 151 | C.parent = F 152 | self.recompute_heights (A) 153 | self.recompute_heights (B) 154 | 155 | def sanity_check (self, *args): 156 | if len(args) == 0: 157 | node = self.rootNode 158 | else: 159 | node = args[0] 160 | if (node is None) or (node.is_leaf() and node.parent is None ): 161 | # trival - no sanity check needed, as either the tree is empty or there is only one node in the tree 162 | pass 163 | else: 164 | if node.height != node.max_children_height() + 1: 165 | raise Exception ("Invalid height for node " + str(node) + ": " + str(node.height) + " instead of " + str(node.max_children_height() + 1) + "!" ) 166 | 167 | balFactor = node.balance() 168 | #Test the balance factor 169 | if not (balFactor >= -1 and balFactor <= 1): 170 | raise Exception ("Balance factor for node " + str(node) + " is " + str(balFactor) + "!") 171 | #Make sure we have no circular references 172 | if not (node.leftChild != node): 173 | raise Exception ("Circular reference for node " + str(node) + ": node.leftChild is node!") 174 | if not (node.rightChild != node): 175 | raise Exception ("Circular reference for node " + str(node) + ": node.rightChild is node!") 176 | 177 | if ( node.leftChild ): 178 | if not (node.leftChild.parent == node): 179 | raise Exception ("Left child of node " + str(node) + " doesn't know who his father is!") 180 | if not (node.leftChild.key <= node.key): 181 | raise Exception ("Key of left child of node " + str(node) + " is greater than key of his parent!") 182 | self.sanity_check(node.leftChild) 183 | 184 | if ( node.rightChild ): 185 | if not (node.rightChild.parent == node): 186 | raise Exception ("Right child of node " + str(node) + " doesn't know who his father is!") 187 | if not (node.rightChild.key >= node.key): 188 | raise Exception ("Key of right child of node " + str(node) + " is less than key of his parent!") 189 | self.sanity_check(node.rightChild) 190 | 191 | def recompute_heights (self, start_from_node): 192 | changed = True 193 | node = start_from_node 194 | while node and changed: 195 | old_height = node.height 196 | node.height = (node.max_children_height() + 1 if (node.rightChild or node.leftChild) else 0) 197 | changed = node.height != old_height 198 | node = node.parent 199 | 200 | def add_as_child (self, parent_node, child_node): 201 | node_to_rebalance = None 202 | if child_node.key < parent_node.key: 203 | if not parent_node.leftChild: 204 | parent_node.leftChild = child_node 205 | child_node.parent = parent_node 206 | if parent_node.height == 0: 207 | node = parent_node 208 | while node: 209 | node.height = node.max_children_height() + 1 210 | if not node.balance () in [-1, 0, 1]: 211 | node_to_rebalance = node 212 | break #we need the one that is furthest from the root 213 | node = node.parent 214 | else: 215 | self.add_as_child(parent_node.leftChild, child_node) 216 | else: 217 | if not parent_node.rightChild: 218 | parent_node.rightChild = child_node 219 | child_node.parent = parent_node 220 | if parent_node.height == 0: 221 | node = parent_node 222 | while node: 223 | node.height = node.max_children_height() + 1 224 | if not node.balance () in [-1, 0, 1]: 225 | node_to_rebalance = node 226 | break #we need the one that is furthest from the root 227 | node = node.parent 228 | else: 229 | self.add_as_child(parent_node.rightChild, child_node) 230 | 231 | if node_to_rebalance: 232 | self.rebalance (node_to_rebalance) 233 | 234 | def insert (self, key): 235 | new_node = Node (key) 236 | if not self.rootNode: 237 | self.rootNode = new_node 238 | else: 239 | if not self.find(key): 240 | self.elements_count += 1 241 | self.add_as_child (self.rootNode, new_node) 242 | 243 | def find_biggest(self, start_node): 244 | node = start_node 245 | while node.rightChild: 246 | node = node.rightChild 247 | return node 248 | 249 | def find_smallest(self, start_node): 250 | node = start_node 251 | while node.leftChild: 252 | node = node.leftChild 253 | return node 254 | 255 | def inorder_non_recursive (self): 256 | node = self.rootNode 257 | retlst = [] 258 | while node.leftChild: 259 | node = node.leftChild 260 | while (node): 261 | retlst += [node.key] 262 | if (node.rightChild): 263 | node = node.rightChild 264 | while node.leftChild: 265 | node = node.leftChild 266 | else: 267 | while ((node.parent) and (node == node.parent.rightChild)): 268 | node = node.parent 269 | node = node.parent 270 | return retlst 271 | 272 | def preorder(self, node, retlst = None): 273 | if retlst is None: 274 | retlst = [] 275 | retlst += [node.key] 276 | if node.leftChild: 277 | retlst = self.preorder(node.leftChild, retlst) 278 | if node.rightChild: 279 | retlst = self.preorder(node.rightChild, retlst) 280 | return retlst 281 | 282 | def inorder(self, node, retlst = None): 283 | if retlst is None: 284 | retlst = [] 285 | if node.leftChild: 286 | retlst = self.inorder(node.leftChild, retlst) 287 | retlst += [node.key] 288 | if node.rightChild: 289 | retlst = self.inorder(node.rightChild, retlst) 290 | return retlst 291 | 292 | def postorder(self, node, retlst = None): 293 | if retlst is None: 294 | retlst = [] 295 | if node.leftChild: 296 | retlst = self.postorder(node.leftChild, retlst) 297 | if node.rightChild: 298 | retlst = self.postorder(node.rightChild, retlst) 299 | retlst += [node.key] 300 | return retlst 301 | 302 | def as_list (self, pre_in_post): 303 | if not self.rootNode: 304 | return [] 305 | if pre_in_post == 0: 306 | return self.preorder (self.rootNode) 307 | elif pre_in_post == 1: 308 | return self.inorder (self.rootNode) 309 | elif pre_in_post == 2: 310 | return self.postorder (self.rootNode) 311 | elif pre_in_post == 3: 312 | return self.inorder_non_recursive() 313 | 314 | def find(self, key): 315 | return self.find_in_subtree (self.rootNode, key ) 316 | 317 | def find_in_subtree (self, node, key): 318 | if node is None: 319 | return None # key not found 320 | if key < node.key: 321 | return self.find_in_subtree(node.leftChild, key) 322 | elif key > node.key: 323 | return self.find_in_subtree(node.rightChild, key) 324 | else: # key is equal to node key 325 | return node 326 | 327 | def remove (self, key): 328 | # first find 329 | node = self.find(key) 330 | 331 | if not node is None: 332 | self.elements_count -= 1 333 | 334 | # There are three cases: 335 | # 336 | # 1) The node is a leaf. Remove it and return. 337 | # 338 | # 2) The node is a branch (has only 1 child). Make the pointer to this node 339 | # point to the child of this node. 340 | # 341 | # 3) The node has two children. Swap items with the successor 342 | # of the node (the smallest item in its right subtree) and 343 | # delete the successor from the right subtree of the node. 344 | if node.is_leaf(): 345 | self.remove_leaf(node) 346 | elif (bool(node.leftChild)) ^ (bool(node.rightChild)): 347 | self.remove_branch (node) 348 | else: 349 | assert (node.leftChild) and (node.rightChild) 350 | self.swap_with_successor_and_remove (node) 351 | 352 | def remove_leaf (self, node): 353 | parent = node.parent 354 | if (parent): 355 | if parent.leftChild == node: 356 | parent.leftChild = None 357 | else: 358 | assert (parent.rightChild == node) 359 | parent.rightChild = None 360 | self.recompute_heights(parent) 361 | else: 362 | self.rootNode = None 363 | del node 364 | # rebalance 365 | node = parent 366 | while (node): 367 | if not node.balance() in [-1, 0, 1]: 368 | self.rebalance(node) 369 | node = node.parent 370 | 371 | 372 | def remove_branch (self, node): 373 | parent = node.parent 374 | if (parent): 375 | if parent.leftChild == node: 376 | parent.leftChild = node.rightChild or node.leftChild 377 | else: 378 | assert (parent.rightChild == node) 379 | parent.rightChild = node.rightChild or node.leftChild 380 | if node.leftChild: 381 | node.leftChild.parent = parent 382 | else: 383 | assert (node.rightChild) 384 | node.rightChild.parent = parent 385 | self.recompute_heights(parent) 386 | del node 387 | # rebalance 388 | node = parent 389 | while (node): 390 | if not node.balance() in [-1, 0, 1]: 391 | self.rebalance(node) 392 | node = node.parent 393 | 394 | def swap_with_successor_and_remove (self, node): 395 | successor = self.find_smallest(node.rightChild) 396 | self.swap_nodes (node, successor) 397 | assert (node.leftChild is None) 398 | if node.height == 0: 399 | self.remove_leaf (node) 400 | else: 401 | self.remove_branch (node) 402 | 403 | def swap_nodes (self, node1, node2): 404 | assert (node1.height > node2.height) 405 | parent1 = node1.parent 406 | leftChild1 = node1.leftChild 407 | rightChild1 = node1.rightChild 408 | parent2 = node2.parent 409 | assert (not parent2 is None) 410 | assert (parent2.leftChild == node2 or parent2 == node1) 411 | leftChild2 = node2.leftChild 412 | assert (leftChild2 is None) 413 | rightChild2 = node2.rightChild 414 | 415 | # swap heights 416 | tmp = node1.height 417 | node1.height = node2.height 418 | node2.height = tmp 419 | 420 | if parent1: 421 | if parent1.leftChild == node1: 422 | parent1.leftChild = node2 423 | else: 424 | assert (parent1.rightChild == node1) 425 | parent1.rightChild = node2 426 | node2.parent = parent1 427 | else: 428 | self.rootNode = node2 429 | node2.parent = None 430 | 431 | node2.leftChild = leftChild1 432 | leftChild1.parent = node2 433 | node1.leftChild = leftChild2 # None 434 | node1.rightChild = rightChild2 435 | if rightChild2: 436 | rightChild2.parent = node1 437 | if not (parent2 == node1): 438 | node2.rightChild = rightChild1 439 | rightChild1.parent = node2 440 | 441 | parent2.leftChild = node1 442 | node1.parent = parent2 443 | else: 444 | node2.rightChild = node1 445 | node1.parent = node2 446 | 447 | # use for debug only and only with small trees 448 | def out(self, start_node = None): 449 | if start_node == None: 450 | start_node = self.rootNode 451 | space_symbol = "*" 452 | spaces_count = 80 453 | out_string = "" 454 | initial_spaces_string = space_symbol * spaces_count + "\n" 455 | if not start_node: 456 | return "AVLTree is empty" 457 | else: 458 | level = [start_node] 459 | while (len([i for i in level if (not i is None)])>0): 460 | level_string = initial_spaces_string 461 | for i in xrange(len(level)): 462 | j = (i+1)* spaces_count / (len(level)+1) 463 | level_string = level_string[:j] + (str(level[i]) if level[i] else space_symbol) + level_string[j+1:] 464 | level_next = [] 465 | for i in level: 466 | level_next += ([i.leftChild, i.rightChild] if i else [None, None]) 467 | level = level_next 468 | out_string += level_string 469 | return out_string 470 | 471 | if __name__ == "__main__": 472 | """check empty tree creation""" 473 | a = AVLTree () 474 | a.sanity_check() 475 | 476 | """check not empty tree creation""" 477 | seq = [1,2,3,4,5,6,7,8,9,10,11,12] 478 | seq_copy = [1,2,3,4,5,6,7,8,9,10,11,12] 479 | #random.shuffle(seq) 480 | b = AVLTree (seq) 481 | b.sanity_check() 482 | 483 | """check that inorder traversal on an AVL tree 484 | (and on a binary search tree in the whole) 485 | will return values from the underlying set in order""" 486 | assert (b.as_list(3) == b.as_list(1) == seq_copy) 487 | 488 | """check that node deletion works""" 489 | c = AVLTree (random_data_generator (10000)) 490 | before_deletion = c.elements_count 491 | for i in random_data_generator (1000): 492 | c.remove(i) 493 | after_deletion = c.elements_count 494 | c.sanity_check() 495 | assert (before_deletion >= after_deletion) 496 | #print c.out() 497 | 498 | """check that an AVL tree's height is strictly less than 499 | 1.44*log2(N+2)-1 (there N is number of elements)""" 500 | assert (c.height() < 1.44 * math.log(after_deletion+2, 2) - 1) --------------------------------------------------------------------------------