├── .gitignore ├── 000_Radix_sort.py ├── 001_Expression_Trees.py ├── 002_Binary_Search_Tree.py ├── 003_AVL_Tree.py ├── 004_Leetcode_Binary_Tree_Serialization.py ├── 005_Binary_Heap.py ├── 006_Sort.py ├── 007_Disjoint_Sets.py ├── 008_Trie_Tree.py ├── 009_Binary_Indexed_Tree.py ├── 010_KMP.py ├── 011_Manacher_Algorithm.py ├── 012_Morris_Traversal_Binary_Tree.py ├── 013_Longest_Common_Subsequence.py ├── 014_Floyd_Warshall_Algorithm.py ├── 015_Bellman_Ford_Algorithm.py ├── 016_Topological_Sort.py ├── 017_Dijkstra_Algorithm.py ├── 018_HashHeap.py ├── Fibonacci.java └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | ############# 2 | ## Windows detritus 3 | ############# 4 | 5 | # Windows image file caches 6 | Thumbs.db 7 | ehthumbs.db 8 | 9 | # Folder config file 10 | Desktop.ini 11 | 12 | # Recycle Bin used on file shares 13 | $RECYCLE.BIN/ 14 | 15 | # Mac crap 16 | .DS_Store 17 | 18 | 19 | ############# 20 | ## Python 21 | ############# 22 | 23 | *.py[cod] 24 | 25 | # Packages 26 | *.egg 27 | *.egg-info 28 | dist/ 29 | build/ 30 | eggs/ 31 | parts/ 32 | var/ 33 | sdist/ 34 | develop-eggs/ 35 | .installed.cfg 36 | 37 | # Installer logs 38 | pip-log.txt 39 | 40 | # Unit test / coverage reports 41 | .coverage 42 | .tox 43 | 44 | #Translations 45 | *.mo 46 | 47 | #Mr Developer 48 | .mr.developer.cfg 49 | *.class 50 | -------------------------------------------------------------------------------- /000_Radix_sort.py: -------------------------------------------------------------------------------- 1 | # Radix Sort in 'Data Structure and Algorithm Analysis' P.54 2 | # 3 | # Condition: All number needed to be sorted should be within 0 to M^P - 1 inclusively 4 | # 5 | # Radix Sort: There are N numbers needs to be sorted. In general, 6 | # these numbers are in range 0 to M^P - 1 for Constant P. 7 | # We could sort them by Bucket Sort with M buckets for p 8 | # times. 9 | # Actually, if we set M = 16, P = 8, we could sort all number 10 | # within range of INT type. 11 | # Time complexity is O(NP) = O(N) 12 | # 13 | # Comment: Constrain on the range of number to be sorted, would help decrease 14 | # time complexity. This algorithm in this circumstance would have O(n) 15 | # time complexity, but probably still not as efficient as some of the 16 | # normal sorting algorithm in Chaper 7, because of the high constant 17 | # cost involved. 18 | # 19 | # RadixSort_acc show sorted result in accendent order. This algorithm use 20 | # insert_last function while inserting element into a linked list, whose 21 | # time complexity is O(n), that is kinda expensive. 22 | # 23 | # RadixSort_dec show sorted result in decedent order. This algorithm use 24 | # insert_first function which is much cheaper than insert_last. 25 | # However, finally it shows that this function won't work, becuase insert_first 26 | # function would reverse the order in the unfinished list. For example, (m=10) 27 | # original: 8 -> 0 -> 2 28 | # 1-st round: 8 -> 2 -> 0 29 | # 2-nd round: 0 -> 2 -> 8 30 | # 3-rd round: 8 -> 2 -> 0 31 | # if numbers are mixed, we would not get the right result. 32 | 33 | 34 | def RadixSort_acc(unsorted, m=16, p=8): 35 | """ 36 | : type unsorted: ListNode, header node 37 | : rtype: ListNode 38 | """ 39 | # initialize m linked lists in linkedlists array 40 | linkedlists = [ListNode(-1) for i in xrange(m)] 41 | unfinished = ListNode(-1) 42 | unfinished.next = unsorted.next 43 | 44 | for i in xrange(p): 45 | # sort the list in the least i-th significant 'digit' 46 | terverse = unfinished.next 47 | while terverse: 48 | # fit in value into linkedlists 49 | tmp = terverse.next 50 | digit = terverse.val / (m**i) 51 | linkedlists[digit%m].insert_last(terverse) 52 | terverse = tmp 53 | # combine linkedlists into unfinished and empty linkedlists 54 | unfinished_tmp = unfinished 55 | for j in xrange(m): 56 | if linkedlists[j].next is None: 57 | continue 58 | tmp = linkedlists[j].next 59 | unfinished_tmp.next = tmp 60 | while tmp.next: 61 | tmp = tmp.next 62 | unfinished_tmp = tmp 63 | 64 | linkedlists[j].next = None 65 | return unfinished.next 66 | 67 | 68 | # this method is cracked! 69 | def RadixSort_dec(unsorted, m=16, p=8): 70 | """ 71 | : type unsorted: ListNode, header node 72 | : rtype: ListNode 73 | """ 74 | # initialize m linked lists in linkedlists array 75 | linkedlists = [ListNode(-1) for i in xrange(m)] 76 | unfinished = ListNode(-1) 77 | unfinished.next = unsorted.next 78 | 79 | for i in xrange(p): 80 | # sort the list in the least i-th significant 'digit' 81 | terverse = unfinished.next 82 | while terverse: 83 | # fit in value into linkedlists 84 | tmp = terverse.next 85 | digit = terverse.val / (m**i) 86 | linkedlists[digit%m].insert_first(terverse) 87 | terverse = tmp 88 | # combine linkedlists into unfinished and empty linkedlists 89 | unfinished.next = None 90 | for j in xrange(m): 91 | if linkedlists[j].next is None: 92 | continue 93 | end_node = unfinished.next 94 | unfinished.next = linkedlists[j].next 95 | tmp = linkedlists[j].next 96 | # find the last node in this linked list 97 | while tmp.next: 98 | tmp = tmp.next 99 | tmp.next = end_node 100 | linkedlists[j].next = None 101 | unfinished.display_list() 102 | return unfinished.next 103 | 104 | class ListNode(object): 105 | """ 106 | all linked list implemented should have header!! 107 | """ 108 | def __init__(self, x): 109 | self.val = x 110 | self.next = None 111 | 112 | @classmethod 113 | def create_from_array(cls, array): 114 | # return the header node 115 | res = ListNode(-1) 116 | res_tmp = res 117 | for num in array: 118 | res_tmp.next = ListNode(num) 119 | res_tmp = res_tmp.next 120 | return res 121 | 122 | def insert_first(self, node): 123 | tmp = self.next 124 | self.next = node 125 | node.next = tmp 126 | 127 | def insert_last(self, node): 128 | tmp = self 129 | while tmp.next: 130 | tmp = tmp.next 131 | tmp.next = node 132 | node.next = None 133 | 134 | def empty_list(self): 135 | terverse = self 136 | while terverse: 137 | tmp = terverse.next 138 | del terverse 139 | terverse = tmp 140 | 141 | def display_list(self): 142 | # this function would make the head node visiable 143 | tmp = self 144 | while tmp.next is not None: 145 | print tmp.val, 146 | tmp = tmp.next 147 | print tmp.val 148 | 149 | 150 | if __name__ == "__main__": 151 | head = ListNode.create_from_array([64,8,216,512,27,729,0,1,343,125,143,643,25,634,12,535,474]) 152 | final = RadixSort_acc(head,16,8) 153 | final.display_list() -------------------------------------------------------------------------------- /001_Expression_Trees.py: -------------------------------------------------------------------------------- 1 | # Expression Trees in 'Data Structure and Algorithm Analysis' P.97 2 | # 3 | # Function: Expression Tree construction with postfix input 4 | # Expression Tree calculation 5 | # Support +, -, *, /, ^, % operations only. 6 | # Support INT and FLOAT. 7 | # 8 | # Need stack to implement this. 9 | # In the code bleow, we simulate a stack Data Structure by Linked List 10 | # 11 | # Construct: Store ans in stack 12 | # 13 | # Calculate: Use post-order traversal to calculate the result. 14 | # 15 | 16 | class BinaryTree(object): 17 | def __init__(self, content=-1): 18 | self.val = content 19 | self.left = None 20 | self.right = None 21 | 22 | def display(self): 23 | def print_tree(tree, depth): 24 | print " " * depth + str(tree.val) 25 | if tree.left: 26 | print_tree(tree.left, depth+1) 27 | if tree.right: 28 | print_tree(tree.right, depth+1) 29 | # call the recursive function 30 | print_tree(self, 0) 31 | 32 | 33 | class ExpressionTree(object): 34 | _operators = ['+', '-', '*', '/', '^', '%'] 35 | 36 | @classmethod 37 | def construct(cls, in_list): 38 | """ 39 | given in_list with operators and operands, 40 | automatically construct an Expression Tree and return its root. 41 | """ 42 | if not in_list: 43 | return None 44 | if not in_list[-1] in cls._operators: 45 | print "Error Expression input! Input is None" 46 | return None 47 | 48 | operand_stack = Stack() 49 | for symbol in in_list: 50 | if isinstance(symbol, (int, float)): 51 | # if it is an operands, push it into stack 52 | new_node = BinaryTree(float(symbol)) 53 | operand_stack.push(new_node) 54 | elif symbol in cls._operators: 55 | # if it is an operator, pop 2 operands from stack 56 | # make 2 operands 2 children of the operator 57 | new_node = BinaryTree(symbol) 58 | first_operand = operand_stack.pop() 59 | second_operand = operand_stack.pop() 60 | if first_operand and second_operand: 61 | # first operand is the one popped later 62 | new_node.left, new_node.right = second_operand, first_operand 63 | operand_stack.push(new_node) 64 | else: 65 | operand_stack.pop_all() 66 | print "Error Expression input! Stack already empty" 67 | return None 68 | res = operand_stack.pop() 69 | # check whether stack is empty 70 | if operand_stack.is_empty(): 71 | return res 72 | operand_stack.pop_all() 73 | print "Error Expression input! Stack is not empty in the end!" 74 | return None 75 | 76 | @classmethod 77 | def calculate(cls, in_tree): 78 | def cal_node(tree): 79 | if isinstance(tree.val, (int, float)): 80 | return tree.val 81 | 82 | if tree.left: 83 | left = cal_node(tree.left) 84 | if tree.right: 85 | right = cal_node(tree.right) 86 | 87 | if tree.val == '+': 88 | return left + right 89 | elif tree.val == '-': 90 | return left - right 91 | elif tree.val == '*': 92 | return left * right 93 | elif tree.val == '/': 94 | return left / right 95 | elif tree.val == '^': 96 | return left ** right 97 | elif tree.val == '%': 98 | return left % right 99 | 100 | return cal_node(in_tree) 101 | 102 | 103 | class Stack(object): 104 | """ 105 | Simple stack implemented by linked list. 106 | """ 107 | def __init__(self): 108 | self._storage = ListNode(-1) 109 | 110 | def pop(self): 111 | """ 112 | No need to care about freeing memory 113 | Once there is no reference to that resource, 114 | that resource would be freed. 115 | """ 116 | if self._storage.next: 117 | res, self._storage.next = \ 118 | self._storage.next.val, self._storage.next.next 119 | return res 120 | else: 121 | print 'Empty Stack. Nothing to be pop!' 122 | return None 123 | 124 | def push(self, content): 125 | if not isinstance(content, ListNode): 126 | node = ListNode(content) 127 | else: 128 | node = content 129 | tmp = self._storage.next 130 | self._storage.next = node 131 | node.next = tmp 132 | 133 | def is_empty(self): 134 | return self._storage.next is None 135 | 136 | def pop_all(self): 137 | """ 138 | Python would handle the memory for you :) 139 | """ 140 | self._storage.next = None 141 | 142 | 143 | class ListNode(object): 144 | def __init__(self, x): 145 | self.val = x 146 | self.next = None 147 | 148 | @classmethod 149 | def create_from_array(cls, array): 150 | res = ListNode(-1) 151 | res_tmp = res 152 | for num in array: 153 | res_tmp.next = ListNode(num) 154 | res_tmp = res_tmp.next 155 | return res.next 156 | 157 | def display(self): 158 | tmp = self 159 | while tmp.next is not None: 160 | print tmp.val, 161 | tmp = tmp.next 162 | print tmp.val 163 | 164 | 165 | if __name__ == "__main__": 166 | express = ExpressionTree.construct([1,2,'-',3,4,5,'/','^','%']) 167 | print ExpressionTree.calculate(express) -------------------------------------------------------------------------------- /002_Binary_Search_Tree.py: -------------------------------------------------------------------------------- 1 | # Binary Search Tree in 'Data Structure and Algorithm Analysis' P.100 2 | # 3 | # Keypoint: The book implements all the BST method in recursive way. However, 4 | # in my implementation, I make it all presented in LOOP-way. 5 | # 6 | # Deletion: The general strategy is to replace the data of this node with the 7 | # smallest data of the right subtree and then delete that node, because 8 | # the smallest node in the right sub tree cannot have a left child, which 9 | # can be easily deleted. 10 | # It works the same way if we replace it with the largest node in the right 11 | # subtree. 12 | # Since the LOOP-way implementation needs to keep track on the parent node 13 | # of the node to be deleted, which would add complexity to the code and make 14 | # the code look less elegant. Therefore, delete method is implemented in 15 | # recursive way. 16 | # 17 | # Empty Tree: 18 | # In this implementation, we don't add the root node into this Binary Tree. 19 | # Therefore, we use one node with value of "EmptyNode" to represent an empty 20 | # tree. This reprentation would exist only when it is an empty tree. This would 21 | # not represent the deleted node in the tree. 22 | # This implementation would help when we want to delete the tree to empty and 23 | # then add new items into the tree. 24 | # 25 | # Deletion Special Case: 26 | # 1) delete the only root node, return None 27 | # 2) delete the tree into empty and then insert 28 | # 29 | 30 | class BinaryTree(object): 31 | def __init__(self, content=-1): 32 | self.val = content 33 | self.left = None 34 | self.right = None 35 | 36 | def display(self): 37 | def print_tree(tree, depth): 38 | if tree: 39 | print " " * depth + str(tree.val) 40 | if tree.left or tree.right: 41 | print_tree(tree.left, depth+1) 42 | print_tree(tree.right, depth+1) 43 | else: 44 | print " " * depth + 'None' 45 | # call the recursive function 46 | print_tree(self, 0) 47 | 48 | class BST(BinaryTree): 49 | def __init__(self, content="EmptyNode"): 50 | super(BST,self).__init__(content) 51 | 52 | @classmethod 53 | def createFromList(cls, in_list): 54 | if not in_list: 55 | return BST() 56 | length, res = len(in_list), cls() 57 | for item in in_list: 58 | res.insert(item) 59 | return res 60 | 61 | def makeEmpty(self): 62 | self.value = 'EmptyNode' 63 | self.left = None 64 | self.right = None 65 | 66 | def find(self, match): 67 | """ 68 | find item in the BST 69 | 70 | >>> import random 71 | >>> test = range(20) 72 | >>> random.shuffle(test) 73 | >>> bst = BST.createFromList(test) 74 | >>> for x in xrange(20): 75 | ... res = bst.find(x) 76 | ... if res and res.val == x: 77 | ... continue 78 | ... else: 79 | ... print x 80 | ... 81 | >>> bst.find(20) == None 82 | True 83 | >>> bst.find(21) == None 84 | True 85 | """ 86 | this_tree = self 87 | while this_tree and match != this_tree.val: 88 | if match < this_tree.val: 89 | this_tree = this_tree.left 90 | else: 91 | this_tree = this_tree.right 92 | return this_tree if this_tree else None 93 | 94 | def findMin(self): 95 | """ 96 | return the node with the smallest value 97 | """ 98 | this_tree = self 99 | while this_tree.left: 100 | this_tree = this_tree.left 101 | return this_tree 102 | 103 | def findMax(self): 104 | """ 105 | return the node with the largest value 106 | """ 107 | this_tree = self 108 | while this_tree.right: 109 | this_tree = this_tree.right 110 | return this_tree 111 | 112 | def insert(self, content): 113 | this_tree = self 114 | while 1: 115 | if this_tree.val == "EmptyNode": 116 | this_tree.val = content 117 | return 118 | if content < this_tree.val: 119 | if not this_tree.left: 120 | # If left subtree is None 121 | new_node = BST(content) 122 | this_tree.left = new_node 123 | break 124 | # Keep searching through left subtree 125 | this_tree = this_tree.left 126 | else: 127 | if not this_tree.right: 128 | # If right subtree is None 129 | new_node = BST(content) 130 | this_tree.right = new_node 131 | break 132 | # Keep searching through right subtree 133 | this_tree = this_tree.right 134 | 135 | def _deleteMin(self, p_node): 136 | """ 137 | this method is to support delete method, makes it efficient 138 | """ 139 | this_tree = self 140 | while this_tree.left: 141 | p_node = this_tree 142 | this_tree = this_tree.left 143 | # this_tree is the node with the Min val 144 | if p_node.left and p_node.left.val == this_tree.val: 145 | # p_node.left -> this_tree 146 | p_node.left = this_tree.right 147 | else: 148 | p_node.right = this_tree.right 149 | return this_tree.val 150 | 151 | def delete(self, match): 152 | """ 153 | should be called as bst = bst.deletion(XX) 154 | 155 | >>> my_bst = BST() 156 | >>> item_set = range(20) 157 | >>> for i in xrange(4): 158 | ... random.shuffle(item_set) 159 | ... for item in item_set: 160 | ... my_bst.insert(item) 161 | ... for i in xrange(20): 162 | ... my_bst = my_bst.delete(i) 163 | ... my_bst.display() 164 | ... 165 | EmptyNode 166 | EmptyNode 167 | EmptyNode 168 | EmptyNode 169 | """ 170 | def _delete(self, match): 171 | if match < self.val: 172 | if self.left: 173 | self.left = _delete(self.left, match) 174 | else: 175 | raise ValueError("deletion node not found!!") 176 | elif match > self.val: 177 | if self.right: 178 | self.right = _delete(self.right, match) 179 | else: 180 | raise ValueError("deletion node not found!!") 181 | else: 182 | # need to delete this root node 183 | if self.right and self.left: 184 | # this node has 2 children 185 | self.val = self.right._deleteMin(self) 186 | else: 187 | # this node has at most 1 children 188 | self = self.right if self.right else self.left 189 | return self 190 | # if all nodes of the tree have been deleted, return EmptyNode 191 | # There exists EmptyNode in the tree only at ROOT of the Tree 192 | after_deletion = _delete(self, match) 193 | if after_deletion: 194 | return after_deletion 195 | else: 196 | return BST() 197 | 198 | if __name__ == "__main__": 199 | import doctest 200 | import random 201 | doctest.testmod() 202 | -------------------------------------------------------------------------------- /003_AVL_Tree.py: -------------------------------------------------------------------------------- 1 | # AVL Tree in 'Data Structure and Algorithm Analysis' P.110 2 | # 3 | # Keypoint: After insertion, roatate the tree if AVL-condition is not reached. 4 | # Every node keep updated with the height information. 5 | # 6 | 7 | class BinaryTree(object): 8 | def __init__(self, content=-1): 9 | self.val = content 10 | self.left = None 11 | self.right = None 12 | 13 | def display(self): 14 | def print_tree(tree, depth): 15 | if tree: 16 | if depth: 17 | print "| " * (depth-1) + '+--+' +str(tree.val) 18 | else: 19 | print '+' +str(tree.val) 20 | 21 | if tree.left or tree.right: 22 | print_tree(tree.left, depth+1) 23 | print_tree(tree.right, depth+1) 24 | else: 25 | print " " * depth + 'None' 26 | # call the recursive function 27 | print_tree(self, 0) 28 | 29 | class AVL(BinaryTree): 30 | def __init__(self, content="EmptyNode"): 31 | self._height = 0 32 | super(AVL,self).__init__(content) 33 | 34 | @classmethod 35 | def createFromList(cls, in_list): 36 | if not in_list: 37 | return AVL() 38 | length, res = len(in_list), cls() 39 | for item in in_list: 40 | res = res.insert(item) 41 | return res 42 | 43 | @classmethod 44 | def getHeight(cls, node): 45 | if isinstance(node, cls): 46 | return node._height 47 | else: 48 | return -1 49 | 50 | def makeEmpty(self): 51 | self.value = 'EmptyNode' 52 | self._height = 0 53 | self.left = None 54 | self.right = None 55 | 56 | def find(self, match): 57 | """ 58 | find item in the AVL 59 | 60 | >>> import random 61 | >>> test = range(20) 62 | >>> random.shuffle(test) 63 | >>> bst = AVL.createFromList(test) 64 | >>> for x in xrange(20): 65 | ... res = bst.find(x) 66 | ... if res and res.val == x: 67 | ... continue 68 | ... else: 69 | ... print x 70 | ... 71 | >>> bst.find(20) == None 72 | True 73 | >>> bst.find(21) == None 74 | True 75 | """ 76 | this_tree = self 77 | while this_tree and match != this_tree.val: 78 | if match < this_tree.val: 79 | this_tree = this_tree.left 80 | else: 81 | this_tree = this_tree.right 82 | return this_tree if this_tree else None 83 | 84 | def findMin(self): 85 | """ 86 | return the node with the smallest value 87 | """ 88 | this_tree = self 89 | while this_tree.left: 90 | this_tree = this_tree.left 91 | return this_tree 92 | 93 | def findMax(self): 94 | """ 95 | return the node with the largest value 96 | """ 97 | this_tree = self 98 | while this_tree.right: 99 | this_tree = this_tree.right 100 | return this_tree 101 | 102 | def singleRotate(self, flag): 103 | """ 104 | Single rotate to handle outer unbalance. 105 | Paras: flag = 0 -> left node unbalanced 106 | flag = 1 -> right node unbalanced 107 | """ 108 | # Right node unbalanced 109 | if flag: 110 | k1, k2 = self, self.right 111 | k1.right, k2.left = k2.left, k1 112 | # Left node unbalanced 113 | else: 114 | k1, k2 = self, self.left 115 | k1.left, k2.right = k2.right, k1 116 | 117 | k1._height = max(AVL.getHeight(k1.left), AVL.getHeight(k1.right)) + 1 118 | k2._height = max(AVL.getHeight(k2.left), AVL.getHeight(k2.right)) + 1 119 | return k2 120 | 121 | def doubleRotate(self, flag): 122 | """ 123 | Single rotate to handle inner unbalance. 124 | Paras: flag = 0 -> left node unbalanced 125 | flag = 1 -> right node unbalanced 126 | """ 127 | # Right node unbalanced 128 | if flag: 129 | k1, k2, k3 = self, self.right, self.right.left 130 | k1.right, k2.left, k3.left, k3.right = \ 131 | k3.left, k3.right, k1, k2 132 | else: 133 | k1, k2, k3 = self, self.left, self.left.right 134 | k1.left, k2.right, k3.left, k3.right = \ 135 | k3.right, k3.left, k2, k1 136 | k1._height = max(AVL.getHeight(k1.left), AVL.getHeight(k1.right)) + 1 137 | k2._height = max(AVL.getHeight(k2.left), AVL.getHeight(k2.right)) + 1 138 | k3._height = max(AVL.getHeight(k3.left), AVL.getHeight(k3.right)) + 1 139 | return k3 140 | 141 | def insert(self, content): 142 | """ 143 | Should be called as avl = avl.insert() 144 | 145 | >>> avl = AVL.createFromList(xrange(1,8)) 146 | >>> print AVL.getHeight(avl) 147 | 2 148 | >>> avl = avl.insert(15) 149 | >>> avl = avl.insert(16) 150 | >>> avl = avl.insert(14) 151 | >>> print AVL.getHeight(avl) 152 | 3 153 | """ 154 | # When input_tree is an empty tree 155 | if self.val == "EmptyNode": 156 | self.val = content 157 | return self 158 | # Left subtree operation 159 | elif content < self.val: 160 | if self.left: 161 | self.left = self.left.insert(content) 162 | if AVL.getHeight(self.left) - AVL.getHeight(self.right) == 2: 163 | # Single rotate when there is an outer case 164 | if content < self.left.val: 165 | self = self.singleRotate(0) 166 | else: 167 | self = self.doubleRotate(0) 168 | # If left subtree is None 169 | else: 170 | new_node = AVL(content) 171 | self.left = new_node 172 | 173 | # Right subtree operation 174 | else: 175 | if self.right: 176 | self.right = self.right.insert(content) 177 | if AVL.getHeight(self.right) - AVL.getHeight(self.left) == 2: 178 | if content >= self.right.val: 179 | self = self.singleRotate(1) 180 | else: 181 | self = self.doubleRotate(1) 182 | # If right subtree is None 183 | else: 184 | new_node = AVL(content) 185 | self.right = new_node 186 | # Keep searching through right subtree 187 | # Operation on height 188 | self._height = max(AVL.getHeight(self.left), AVL.getHeight(self.right)) + 1 189 | return self 190 | 191 | def _deleteMin(self, p_node): 192 | """ 193 | this method is to support delete method, makes it efficient 194 | """ 195 | this_tree = self 196 | while this_tree.left: 197 | p_node = this_tree 198 | this_tree = this_tree.left 199 | # this_tree is the node with the Min val 200 | if p_node.left and p_node.left.val == this_tree.val: 201 | # p_node.left -> this_tree 202 | p_node.left = this_tree.right 203 | else: 204 | p_node.right = this_tree.right 205 | return this_tree.val 206 | 207 | def delete(self, match): 208 | """ 209 | should be called as bst = bst.deletion(XX) 210 | 211 | >>> my_avl = AVL() 212 | >>> item_set = range(20) 213 | >>> for i in xrange(4): 214 | ... random.shuffle(item_set) 215 | ... for item in item_set: 216 | ... my_avl = my_avl.insert(item) 217 | ... for i in xrange(20): 218 | ... my_avl = my_avl.delete(i) 219 | ... my_avl.display() 220 | ... 221 | +EmptyNode 222 | +EmptyNode 223 | +EmptyNode 224 | +EmptyNode 225 | """ 226 | def _delete(self, match): 227 | if match < self.val: 228 | if self.left: 229 | self.left = _delete(self.left, match) 230 | else: 231 | raise ValueError("deletion node not found!!") 232 | elif match > self.val: 233 | if self.right: 234 | self.right = _delete(self.right, match) 235 | else: 236 | raise ValueError("deletion node not found!!") 237 | else: 238 | # need to delete this root node 239 | if self.right and self.left: 240 | # this node has 2 children 241 | self.val = self.right._deleteMin(self) 242 | else: 243 | # this node has at most 1 children 244 | self = self.right if self.right else self.left 245 | return self 246 | # if all nodes of the tree have been deleted, return EmptyNode 247 | # There exists EmptyNode in the tree only at ROOT of the Tree 248 | after_deletion = _delete(self, match) 249 | if after_deletion: 250 | return after_deletion 251 | else: 252 | return AVL() 253 | 254 | if __name__ == "__main__": 255 | import doctest 256 | import random 257 | doctest.testmod() 258 | my_avl = AVL.createFromList(xrange(1,100)) 259 | my_avl.display() -------------------------------------------------------------------------------- /004_Leetcode_Binary_Tree_Serialization.py: -------------------------------------------------------------------------------- 1 | from Queue import Queue 2 | 3 | class BinaryTree(object): 4 | def __init__(self, content="EmptyNode"): 5 | self.val = content 6 | self.left = None 7 | self.right = None 8 | 9 | def display(self): 10 | def print_tree(tree, depth): 11 | if tree: 12 | if depth: 13 | print "| " * (depth-1) + '+--+' +str(tree.val) 14 | else: 15 | print '+' +str(tree.val) 16 | 17 | if tree.left or tree.right: 18 | print_tree(tree.left, depth+1) 19 | print_tree(tree.right, depth+1) 20 | else: 21 | print "| " * (depth-1) + '+--+' + 'None' 22 | # call the recursive function 23 | print_tree(self, 0) 24 | 25 | @classmethod 26 | def deserializeFromList(cls, in_list): 27 | res = BinaryTree() 28 | queue = Queue() 29 | queue.put((res, False)) 30 | for item in in_list: 31 | if not queue.empty(): 32 | this_node = queue.get(block=False) 33 | else: 34 | raise ValueError("Deserialization list error!") 35 | if item != '#': 36 | new_node = this_node[0].raw_insert(item, this_node[1]) 37 | queue.put((new_node,False)) 38 | queue.put((new_node,True)) 39 | 40 | return res 41 | 42 | def serialize(self): 43 | queue = Queue() 44 | res = list() 45 | queue.put(self) 46 | while not queue.empty(): 47 | this_node = queue.get(block=False) 48 | if this_node: 49 | res.append(this_node.val) 50 | queue.put(this_node.left) 51 | queue.put(this_node.right) 52 | else: 53 | res.append('#') 54 | last = -1 55 | while res[last] == '#': 56 | last -= 1 57 | if last == -1: 58 | return res 59 | return res[:last+1] 60 | 61 | 62 | 63 | def raw_insert(self, content, flag): 64 | """ 65 | Insert new node with content right below self node 66 | If both 2 nodes are used, raise an error 67 | Params: content - the value of new node 68 | flag - False is for left node, True is for right node 69 | Return the new inserted node 70 | """ 71 | if self.val == "EmptyNode": 72 | self.val = content 73 | return self 74 | if flag and not self.right: 75 | new_node = BinaryTree(content) 76 | self.right = new_node 77 | return new_node 78 | elif not flag and not self.left: 79 | new_node = BinaryTree(content) 80 | self.left = new_node 81 | return new_node 82 | else: 83 | raise ValueError("Insertion Error!!") 84 | 85 | 86 | if __name__ == "__main__": 87 | my_tree = BinaryTree.deserializeFromList([1,2,3,'#','#',4,'#','#',5, '#', '#']) 88 | my_tree.display() 89 | print my_tree.serialize() -------------------------------------------------------------------------------- /005_Binary_Heap.py: -------------------------------------------------------------------------------- 1 | # Priority Queue(Heap) in 'Data Structure and Algorithm Analysis' P.182 2 | # 3 | # Instance variable: 4 | # _size: The max number of elements that the heap could contain 5 | # _capacity: The current number of elements in the heap 6 | # _elements: The list that contains all the elements, and 7 | # len(_elements) should be equal to _size + 1 for index 0 8 | # is a dummy slot. (Initialized as `None`) 9 | # 10 | # Import Operations(methods): 11 | # Insert: Insert a new element into the priority queue. 12 | # Start from the last slot in the heap, percolating up to 13 | # the node whose value is smaller than the inserted element. 14 | # Continue swap the new slot's value with its parent's value. 15 | # 16 | # DelMin: Return the min value of the heap and delete it from the heap. 17 | # Start from the root of the heap, percolating down to the 18 | # node whose child node doesn't exist. Continue swap the slot's 19 | # value with the smaller child's value. 20 | # 21 | # DelMin is harder to be implemented than Insert. Because when percolating 22 | # down, we have to judge whether there exists a child and which child to swap, 23 | # while when percolating up, we need only concern about its unique parent. 24 | # 25 | # 26 | # Tips: Any integer compare with None in Python would always return False 27 | # >>> 0 < None 28 | # False 29 | # 30 | # Binary Search Tree is much stricter than Binary Heap. 31 | # Because left and right child of one node in the tree should 32 | # obey different rule of binary search tree. While, in the binary 33 | # heap, both children obey the same rule (larger than parents). 34 | # 35 | # Binary Heap is always a complete binary tree. No need to worry about 36 | # balance problem. 37 | # 38 | 39 | class BinaryHeap(object): 40 | def __init__(self, maxsize): 41 | self._size = maxsize 42 | self._capacity = 0 43 | self._elements = [None for i in xrange(self._size+1)] 44 | 45 | @classmethod 46 | def creat_from_list(cls, in_list): 47 | """ 48 | This is a classmethod, which creat a heap from input list. 49 | For convenience. 50 | 51 | >>> import random 52 | >>> test_list = range(100) 53 | >>> for i in xrange(5): 54 | ... random.shuffle(test_list) 55 | ... res = BinaryHeap.creat_from_list(test_list) 56 | ... print res.findMin() 57 | ... 58 | 0 59 | 0 60 | 0 61 | 0 62 | 0 63 | """ 64 | this_heap = cls(int(len(in_list)*1.5)) 65 | for element in in_list: 66 | this_heap.insert(element) 67 | return this_heap 68 | 69 | @property 70 | def maxsize(self): 71 | """ 72 | >>> test = BinaryHeap(12) 73 | >>> test.insertElements(range(12)) 74 | >>> test.maxsize 75 | 12 76 | >>> test = BinaryHeap(5) 77 | >>> test.maxsize 78 | 5 79 | """ 80 | return self._size 81 | 82 | @property 83 | def length(self): 84 | """ 85 | >>> test = BinaryHeap(12) 86 | >>> test.insertElements(range(12)) 87 | >>> test.length 88 | 12 89 | >>> test = BinaryHeap(5) 90 | >>> test.length 91 | 0 92 | >>> test.insertElements([5,2,4]) 93 | >>> test.length 94 | 3 95 | """ 96 | return self._capacity 97 | 98 | @property 99 | def isEmpty(self): 100 | """ 101 | >>> test = BinaryHeap(12) 102 | >>> test.isEmpty 103 | True 104 | >>> test.insertElements([4]) 105 | >>> test.isEmpty 106 | False 107 | """ 108 | return self._capacity == 0 109 | 110 | @property 111 | def isFull(self): 112 | """ 113 | >>> test = BinaryHeap(12) 114 | >>> test.isFull 115 | False 116 | >>> test.insertElements(range(12)) 117 | >>> test.isFull 118 | True 119 | """ 120 | return self._capacity == self._size 121 | 122 | def makeEmpty(self): 123 | """ 124 | >>> test = BinaryHeap(12) 125 | >>> test.isFull 126 | False 127 | >>> test.insertElements(range(12)) 128 | >>> test.isFull 129 | True 130 | >>> test.makeEmpty() 131 | >>> test.isEmpty 132 | True 133 | """ 134 | self._elements = [None for i in xrange(self._size+1)] 135 | self._capacity = 0 136 | 137 | def insert(self, element): 138 | if self.isFull: 139 | raise ValueError('Heap is already full!') 140 | slot_index = self._capacity + 1 141 | # when slot_index == 1, then slot_index/2 == 0 142 | # at this time, self._elements[slot_index/2] == None 143 | # However, integetr < None would always return False 144 | while element < self._elements[slot_index/2]: 145 | self._elements[slot_index] = self._elements[slot_index/2] 146 | slot_index /= 2 147 | self._elements[slot_index] = element 148 | # update current capacity 149 | self._capacity += 1 150 | 151 | def insertElements(self, elements): 152 | if self._capacity + len(elements) > self._size: 153 | raise ValueError("Too many elements to be inserted") 154 | for element in elements: 155 | self.insert(element) 156 | 157 | def delMin(self): 158 | """ 159 | Binary heap must be a complete binary tree. 160 | That is, all node except leaf must have left child. 161 | 162 | >>> import random 163 | >>> test_list = range(50) 164 | >>> test_heap = BinaryHeap(len(test_list)) 165 | >>> for x in range(10): 166 | ... random.shuffle(test_list) 167 | ... test_heap.insertElements(test_list) 168 | ... for i in xrange(test_heap.length): 169 | ... if test_heap.delMin() != i: 170 | ... print i, test_list 171 | ... if test_heap.length != len(test_list)-i-1: 172 | ... print i, test_list 173 | ... 174 | """ 175 | if self.isEmpty: 176 | raise ValueError("Can't delete element from empty heap.") 177 | # get the first and the last elements 178 | # as well as empty the last slot 179 | min = self._elements[1] 180 | last = self._elements[self._capacity] 181 | self._elements[self._capacity] = None 182 | # update current capacity 183 | self._capacity -= 1 184 | 185 | # start percolating down 186 | # put the smaller child's value into parent's slot 187 | slot_index = 1 188 | while slot_index*2 <= self._capacity: 189 | # find the smaller child 190 | child = slot_index * 2 191 | if child != self._capacity and self._elements[child+1] < self._elements[child]: 192 | child += 1 193 | if last > self._elements[child]: 194 | self._elements[slot_index] = self._elements[child] 195 | else: 196 | break 197 | slot_index = child 198 | # put the last's value into the smaller child's slot 199 | self._elements[slot_index] = last 200 | return min 201 | 202 | def findMin(self): 203 | if self.isEmpty: 204 | raise ValueError("This is an empty Heap!! No min found!") 205 | return self._elements[1] 206 | 207 | def __str__(self): 208 | return self._elements.__str__() 209 | __repr__ = __str__ 210 | 211 | 212 | if __name__ == "__main__": 213 | import doctest, random 214 | doctest.testmod() -------------------------------------------------------------------------------- /006_Sort.py: -------------------------------------------------------------------------------- 1 | """ 2 | N 1000 10000 100000 1000000 3 | Insert_sort 0.048360 3.7469999 369.38299 - 4 | Shell_sort 0.002749 0.0378999 0.3759999 6.20499992 5 | Heap_sort 0.003490 0.0467000 0.4519999 5.64000010 6 | Merge_sort 0.004260 0.0523999 0.4529998 5.17300009 7 | quick_sort 0.002230 0.0279001 0.2699999 3.22999978 8 | """ 9 | 10 | 11 | LENGTH_SET = [10,20,50,100,200, 500,1000,2000,5000,10000] # length = 10 12 | 13 | # ================= 14 | # Sort Algorithms 15 | # ================= 16 | 17 | def insertion_sort(nums): 18 | """ 19 | Assume the first i elements are in sorted order. 20 | Then, compare the (i+1)th element with the first i-th elements. 21 | If nums[i+1]>nums[i], then the first (i+1)th 22 | elements are in sorted order. 23 | Else, swap this two elements and compare nums[i] and nums[i-1] 24 | until all elements are in order. 25 | 26 | The average number of inversions in an array of N distinct numbers 27 | is N(N-1)/4 28 | 29 | Any Algorithm that sorts by exchanging adjacent elements requires 30 | Omiga(N^2) Each swap removes only one inversion, so Omiga(N^2) swaps are 31 | required. 32 | """ 33 | for i in xrange(1, len(nums)): 34 | this, j = nums[i], i 35 | while this0: 36 | nums[j], j = nums[j-1], j-1 37 | nums[j] = this 38 | 39 | def shell_sort(nums): 40 | """ 41 | One of the first algorithm that breaks the quadratic barrier. 42 | An important property of Shellsort is that an h(k)-sorted file that 43 | is then h(k-1)-sorted remains h(k)-sorted. 44 | 45 | Use insertion sort to make every h elements in the list in order. 46 | The sequence h1,h2,h3,... will work as long as h1==1 47 | """ 48 | def increment_generator(length): 49 | h = length 50 | while h>1: 51 | h = h/2 52 | h = max(1, h if h&1 else h-1) 53 | yield h 54 | for h in increment_generator(len(nums)): 55 | i = 0 56 | for i in xrange(h,len(nums), 1): 57 | this,j = nums[i],i 58 | while nums[j-h]>this and j>h-1: 59 | nums[j],j = nums[j-h],j-h 60 | nums[j] = this 61 | 62 | def heap_sort(nums): 63 | """ 64 | Make use of maxheap. Every time delete max, move the max element 65 | to the last slot of the array. 66 | 67 | The average number of comparisons used to heapsort a random permutation 68 | of N distinct items is 2N*logN - O(NloglogN). 69 | """ 70 | def percolate_down(index, length): 71 | """ 72 | Basic heap operation, percolate nums[index] down until this 73 | list satisfies heap property. 74 | """ 75 | this = nums[index] 76 | child, slot = index*2+1, index 77 | while childnums[child]: 79 | child +=1 80 | if this < nums[child]: 81 | nums[slot] = nums[child] 82 | else: 83 | break 84 | slot = child 85 | child = child*2+1 86 | nums[slot] = this 87 | # First, maxheapify the input nums 88 | # nums[(len(nums)-1)/2] is the last element with child 89 | for i in xrange((len(nums)-1)/2, -1, -1): 90 | percolate_down(i, len(nums)) 91 | # Then, delete the first element and move it to the last 92 | for i in xrange(len(nums)-1, 0, -1): 93 | nums[0], nums[i] = nums[i], nums[0] 94 | percolate_down(0, i) 95 | 96 | def merge_sort(nums): 97 | """ 98 | Recursively mergesort the first half and the second half of the list. 99 | This algorithm is a class of divide and conquer strategy. 100 | Time complexity is O(N+NlogN) 101 | 102 | Although mergesort's running time is O(NlogN), it is hardly ever used for 103 | main memory sorts. The main problem is that merging two sorted lists requires 104 | linear extra memory and the additional work spent copying to the temporary 105 | array and back, throughout the algorithm, has the effect of slowing down the 106 | sort considerably. 107 | """ 108 | def part_sort(start,end): 109 | """ 110 | Basic sort-operation for mergesort. END is not included. 111 | """ 112 | if end - start <= 1: 113 | return 114 | else: 115 | mid = start+(end-start)/2 116 | part_sort(start, mid) 117 | part_sort(mid, end) 118 | merge(start,mid, mid, end) 119 | def merge(start1, end1, start2, end2): 120 | """ 121 | Merge two sorted list together 122 | """ 123 | i, j, ans = start1, start2, [] 124 | while i nums[mid]: 161 | nums[start], nums[mid] = nums[mid], nums[start] 162 | if nums[start] > nums[end]: 163 | nums[start], nums[end] = nums[end], nums[start] 164 | if nums[mid] > nums[end]: 165 | nums[mid], nums[end] = nums[end], nums[start] 166 | # Now is SM, MED, LG 167 | nums[mid], nums[end] = nums[end], nums[mid] 168 | def qsort(start, end): 169 | """ 170 | This is the STEP 3&4 171 | Partition the list into two parts. 172 | And then, return {quick_sort(S1) followed by v followed by quick_sort(S2 )} 173 | `end` is included in the list. 174 | """ 175 | if start>=end: 176 | return 177 | median3(start, end) 178 | pivot, i, j = nums[end], start, end 179 | while True: 180 | while nums[i]pivot and ia[center]: 205 | a[start],a[center]=a[center],a[start] 206 | if a[start]>a[end]: 207 | a[start],a[end]=a[end],a[start] 208 | if a[center]>a[end]: 209 | a[center],a[end]=a[end],a[center] 210 | a[start],a[center]=a[center],a[start] 211 | def doSwap(start,end): 212 | if start>=end: 213 | return 214 | i,j=start,end 215 | median(start,end) 216 | tmp=a[start] 217 | while(True): 218 | while(a[j]>tmp and i=0: 28 | _stack.append(tmp) 29 | tmp = self._data[tmp] 30 | for _ in _stack: 31 | self._data[_] = tmp 32 | return tmp 33 | def union(self, i0, i1): 34 | """ 35 | This original implementation has a fatal error!! 36 | 1) Error 1: infinity loop problem 37 | test_set = BasicDisjointSet(10) 38 | test_set.union(8,9) 39 | test_set.union(9,8) 40 | If `test_set.find(8)` is performed, the program 41 | would run into infinity loop! 42 | 2) Error 2: transitive problem 43 | test_set = BasicDisjointSet(10) 44 | test_set.union(8,9) 45 | test_set.union(7,8) 46 | If `test_set.union(2,9)` is performed, then 47 | `test_set.find(9)` would not be equal to `test_set.find(8)`, 48 | that is the code would remove element 9 from the original set. 49 | 50 | In order to fix it, check the root of the two element, and union 51 | the root instead of union the element. 52 | However, this fix would make union slower than O(1). 53 | """ 54 | if i0 == i1: 55 | raise ValueError("Must union two distinct element!") 56 | p0, p1 = self.find(i0), self.find(i1) 57 | if p0 == p1: 58 | return 59 | self._data[p1] = p0 60 | 61 | 62 | class SmartUnionBySize(BasicDisjointSet): 63 | """ 64 | When `union()`, always make the smaller tree a subtree of the larger. 65 | If Unions are done by size, the depth of any node is never more than logN. 66 | 67 | To implement this strategy, we store the size of the tree as a 68 | negative number in the `_data` array. 69 | """ 70 | def union(self, i0, i1): 71 | if i0 == i1: 72 | raise ValueError("Must union two distinct element!") 73 | p0, p1 = self.find(i0), self.find(i1) 74 | if p0 == p1: 75 | return 76 | if self._data[p0] < self._data[p1]: 77 | # tree p0 got more element, append p1 to p0 78 | self._data[p0] += self._data[p1] 79 | self._data[p1] = p0 80 | else: # if self._data[p0] > self._data[p1]: 81 | self._data[p1] += self._data[p0] 82 | self._data[p0] = p1 83 | 84 | class SmartUnionByHeight(BasicDisjointSet): 85 | def find(self, identity): 86 | """ 87 | Not compatible with path comprehension. 88 | """ 89 | tmp = identity 90 | while self._data[tmp]>=0: 91 | tmp = self._data[tmp] 92 | if identity != tmp: 93 | self._data[identity] = tmp 94 | return tmp 95 | def union(self, i0, i1): 96 | if i0 == i1: 97 | raise ValueError("Must union two distinct element!") 98 | p0, p1 = self.find(i0), self.find(i1) 99 | if p0 == p1: 100 | return 101 | if self._data[p0] < self._data[p1]: 102 | # tree p0 is higher, append p1 to p0 103 | self._data[p1] = p0 104 | else: #if self._data[p0] > self._data[p1]: 105 | # tree p1 is higher, append p0 to p1 106 | if self._data[p0] == self._data[p1]: 107 | self._data[p1] -= 1 108 | self._data[p0] = p1 109 | 110 | 111 | 112 | 113 | def disjoint_set_test(class_name=BasicDisjointSet): 114 | def random_test(test_set): 115 | for i in xrange(5): 116 | try: 117 | test_set.union(randint(0,9), randint(0,9)) 118 | except ValueError: 119 | pass 120 | for i in xrange(10): 121 | assert test_set._data[test_set.find(i)] < 0 122 | 123 | test_lengths = [0,1,10,100,100] 124 | for test_length in test_lengths: 125 | test_set = class_name(test_length) 126 | if test_length <= 1: 127 | print "test_length == 1 passed!" 128 | continue 129 | if test_length >= 10: 130 | for i in xrange(9): 131 | assert test_set.find(i) != test_set.find(i+1) 132 | test_set.union(0,9) 133 | assert test_set.find(0) == test_set.find(9) 134 | test_set.union(1,8) 135 | assert test_set.find(1) == test_set.find(8) 136 | test_set.union(2,5) 137 | test_set.union(0,5) 138 | test_set.union(5,1) 139 | assert test_set.find(5) == test_set.find(0) 140 | assert test_set.find(5) == test_set.find(9) 141 | assert test_set.find(1) == test_set.find(2) 142 | for i in xrange(9): 143 | assert test_set.find(i) == test_set.find(i) 144 | for i in xrange(10): 145 | random_test(test_set) 146 | print "test_length == 10 passed!" 147 | if test_length >= 100: 148 | test_set.union(99,98) 149 | test_set.union(98,97) 150 | assert test_set.find(99) == test_set.find(97) 151 | test_set.union(1,99) 152 | assert test_set.find(99) == test_set.find(0) 153 | assert test_set.find(99) == test_set.find(1) 154 | assert test_set.find(99) == test_set.find(2) 155 | assert test_set.find(99) == test_set.find(5) 156 | assert test_set.find(99) == test_set.find(8) 157 | assert test_set.find(99) == test_set.find(9) 158 | assert test_set.find(97) == test_set.find(0) 159 | assert test_set.find(97) == test_set.find(1) 160 | assert test_set.find(97) == test_set.find(2) 161 | assert test_set.find(97) == test_set.find(5) 162 | assert test_set.find(97) == test_set.find(8) 163 | assert test_set.find(97) == test_set.find(9) 164 | if class_name == SmartUnionBySize: 165 | count = 1 166 | for elem in test_set._data: 167 | if elem >= 0: 168 | count +=1 169 | assert count == -min(test_set._data) 170 | print "test_length == 100 passed!" 171 | 172 | if __name__ == '__main__': 173 | test = BasicDisjointSet(10) 174 | for i in xrange(10): 175 | print test.find(i) 176 | print test._data 177 | test.union(0,1) 178 | test.union(1,0) 179 | print test.find(0), test.find(1) 180 | print test._data 181 | algorithms = [BasicDisjointSet,SmartUnionBySize,SmartUnionByHeight] 182 | for algor in algorithms: 183 | disjoint_set_test(class_name=algor) -------------------------------------------------------------------------------- /008_Trie_Tree.py: -------------------------------------------------------------------------------- 1 | class TrieNode(object): 2 | def __init__(self, char): 3 | self.val = char 4 | self.isWord = False 5 | self.diction = {} 6 | def __getitem__(self, key): 7 | return self.diction.get(key, None) 8 | def __setitem__(self, key, val): 9 | self.diction[key] = val 10 | def child_list(self): 11 | return self.diction.keys() 12 | 13 | class TrieTree(object): 14 | def __init__(self): 15 | self.root = TrieNode('dummy') 16 | def insert(self, word): 17 | def insert_char(node, char, end): 18 | if node[char]: 19 | this_node = node[char] 20 | else: 21 | node[char] = TrieNode(char) 22 | this_node = node[char] 23 | if end: 24 | this_node.isWord = True 25 | return this_node 26 | this_node = self.root 27 | for i,ch in enumerate(word): 28 | this_node = insert_char(this_node, ch, i==(len(word)-1)) 29 | 30 | def find_prefix(self, prefix): 31 | this_node = self.root 32 | for ch in prefix: 33 | this_node = this_node[ch] 34 | if this_node is None: 35 | return False 36 | return True 37 | 38 | def find_word(self, word): 39 | this_node = self.root 40 | for ch in word: 41 | this_node = this_node[ch] 42 | if this_node is None: 43 | return False 44 | return this_node.isWord 45 | 46 | def display_tree(self): 47 | this_node = self.root 48 | 49 | 50 | def permutation(test, length): 51 | def helper(ans, index): 52 | if index == length: 53 | res.append(ans) 54 | return 55 | for ch in test: 56 | helper(ans+ch, index+1) 57 | res = [] 58 | helper("", 0) 59 | return res 60 | 61 | def my_test(dictionary, testchar, length): 62 | mytree = TrieTree() 63 | for word in dictionary: 64 | mytree.insert(word) 65 | for test in permutation(testchar, length): 66 | assert mytree.find_word(test) == (test in dictionary) 67 | # for test in permutation(testchar, length-1): 68 | # print test, mytree.find_prefix(test) 69 | 70 | 71 | if __name__ == '__main__': 72 | dictionary = ['qwre','qwrw','qqrq','eere','qqrw','eerw','wwrq','wwww','qweq'] 73 | test_char = 'qwer' 74 | my_test(dictionary, test_char, 3) -------------------------------------------------------------------------------- /009_Binary_Indexed_Tree.py: -------------------------------------------------------------------------------- 1 | # A Fenwick tree or Binary Indexed Tree is a data structure that can efficiently update 2 | # elements and calculate prefix sums in a table of number. 3 | # 4 | # When compared with a flat array of numbers, the Fenwick tree achieves much higher 5 | # performance for two operations: element update and prefix sum calculation. In a flat 6 | # array of n numbers, calculating prefix sum and updating the elements both require O(n) 7 | # time. Fenwick trees allow both operations to be performed on O(log n) time. This is 8 | # archieved by representing the numbers as a tree, where the value of each node is the 9 | # the sum of the numbers in that subtree. The tree structure allows operations to be 10 | # performed using only O(log n) node access. 11 | # 12 | # Trick this data sturcture use: 13 | # 1. n & (n-1) to eliminate the last '1' in the integer 14 | # parent node is array[i&(i-1)] 15 | # 2. the value of each node is the sum of all its ## 16 | # node i store the sum from (i&(i-1)) to i-1 inclusively. 17 | # 18 | # Methods supported: 19 | # 1. create_tree 20 | # 2. update_node 21 | # 3. query 22 | # 4. add_node 23 | 24 | # from tree_representation import Tree 25 | 26 | """ 27 | For a tree_index == 0b_1100_1011_1101_0010_1000 total 10 high bits, 28 | This node is at Level 10, 29 | This node's parent's index is 0b_1100_1011_1101_0010_0000 - by eliminating the last 1 30 | This node's next's index is 0b_1100_1011_1101_0011_0000 - by combining the last group of 1s 31 | This node's value is the sum of original[0b_1100_1011_1101_0010_0000] to original[0b_1100_1011_1101_0010_0111] 32 | """ 33 | 34 | class FenwickTree(object): 35 | def __init__(self, size): 36 | """ 37 | Init a Fenwick Tree by assuming the original data are all 0s 38 | size is the size of original data 39 | """ 40 | self._data = [0 for i in xrange(size+1)] 41 | self._size = size 42 | 43 | @classmethod 44 | def create_from_list(cls, nums): 45 | """ 46 | Assume the original array is all 0s, then update every element to nums[i] 47 | """ 48 | res = FenwickTree(len(nums)) 49 | for i, num in enumerate(nums): 50 | res.update_index_by_delta(i, num) 51 | return res 52 | 53 | def _sumupto(self, i): 54 | """ 55 | Sum the original array up from array[0] to array[i] inclusively 56 | """ 57 | res, tree_index = 0, i+1 58 | while tree_index: 59 | res += self._data[tree_index] 60 | tree_index = self.get_parent(tree_index) 61 | return res 62 | 63 | 64 | def query_sum(self, i, j): 65 | """ 66 | Sum the original array up from array[i] to array[j] inclusively 67 | If out of index, return None 68 | """ 69 | if i >= self._size or j >= self._size or i < 0 or j < 0: 70 | return None 71 | if i == 0: 72 | return self._sumupto(j) 73 | return self._sumupto(j) - self._sumupto(i-1) 74 | 75 | def get_index(self, i): 76 | """ 77 | Get the original num at index i 78 | """ 79 | return self.query_sum(i,i) 80 | 81 | def update_index_by_delta(self, i, delta): 82 | """ 83 | Add delta to num at index i in the original array 84 | """ 85 | tree_index = i + 1 86 | while tree_index <= self._size: 87 | self._data[tree_index] += delta 88 | tree_index = self.get_next(tree_index) 89 | 90 | def update_index(self, i, value): 91 | """ 92 | Update the num at index i in the original array a new value 93 | """ 94 | delta = value - get_index(i) 95 | self.update_index_by_delta(i, delta) 96 | 97 | def get_parent(self, tree_index): 98 | """ 99 | Given a tree_index, return the tree_index of its parent. 100 | Just eliminate the least significant 1 in that number. 101 | 102 | (tree_index & -tree_index) would return the least significant bit 103 | 104 | >>> a = FenwickTree(2) 105 | >>> a.get_parent(0b1010) # return 0b1000 == 8 106 | 8 107 | >>> a.get_parent(0b1101) # return 0b1100 == 12 108 | 12 109 | """ 110 | return tree_index & (tree_index - 1) 111 | 112 | def get_next(self, tree_index): 113 | """ 114 | Given a tree_index, return the next tree_index should be updated. 115 | 116 | Search all siblings after this node, 117 | then search all parent's siblings after parent, 118 | then all grandparent's siblings after grandparent... 119 | """ 120 | return (tree_index & -tree_index) + tree_index 121 | 122 | 123 | def fenwickTree_test(): 124 | fenwickTree = FenwickTree.create_from_list(range(1,1001)) 125 | for i in xrange(1000): 126 | assert fenwickTree.get_index(i) == i+1 127 | for j in xrange(i, 1000): 128 | assert fenwickTree.query_sum(i, j) == ((i+j+2) * (j-i+1))/2 129 | 130 | if __name__ == '__main__': 131 | # fenwickTree_test() 132 | a = FenwickTree.create_from_list(range(1, 16)) 133 | print a._data 134 | 135 | -------------------------------------------------------------------------------- /010_KMP.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | # Knuth-Morris-Pratt string searching algorithm 3 | # is a string matching algorithm wants to find the starting index `m` 4 | # in string `S[]` that matches the search word `W[]`. 5 | # 6 | # Reference: 7 | # https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm 8 | # http://www.ruanyifeng.com/blog/2013/05/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm.html 9 | # 10 | # It search for a "word" W within a main "text string" S by employling the observation that 11 | # when a mismatch occurs, the word itself embodies sufficient information to determine 12 | # where the next match could begin, thus bypassing re-examination of previously matched characters. 13 | # 14 | # Time Complexity: O(n) where n is the size of text string 15 | # Space Complexity: O(k) where k is the size of search string 16 | # 17 | 18 | def _LPSArray(word): 19 | """ 20 | Build the longest prefix suffix table. 21 | `delta += i+1` makes it O(n) time complexity. 22 | 23 | e.g. word = 'abababcabc' 24 | 1) delta = 1 25 | a b a b a b c a b c 26 | a b a b a b c a b c 27 | X 28 | The first character doesn't match, word[i+delta] != word[i] when i == 0 29 | Increment `delta` by 1 30 | 31 | 2) delta = 2 32 | a b a b a b c a b c 33 | a b a b a b c a b c 34 | m m m m X 35 | The first 4 characters match, word[i+delta] == word[i] holds for i belongs to {0,1,2,3} 36 | Then update lps to [0, 0, 1, 2, 3, 4, ...] 37 | When these 4 characters is matched, no need to recheck it, increment `delta` by (i+1) where i == 3 38 | 39 | 3) delta = 6, there is no match, increment `delta` by 1 40 | 41 | 4) delta = 7 42 | a b a b a b c a b c 43 | a b a b a b c a b c 44 | m m X 45 | The first 2 characteres match, word[i+delta] == word[i] holds for i belongs to {0,1} 46 | Then update lps to [0, 0, 1, 2, 3, 4, 0, 1, 2, 0] 47 | """ 48 | lps = [0] * len(word) 49 | wordIdx = 0 50 | delta = 1 51 | while delta < len(word): 52 | while delta < len(word) and word[delta] == word[wordIdx]: 53 | lps[delta] = wordIdx + 1 54 | wordIdx += 1 55 | delta += 1 56 | if wordIdx != 0: 57 | wordIdx = lps[wordIdx-1] 58 | else: 59 | delta += 1 60 | return lps 61 | 62 | 63 | def kmp_strstr(haystack, needle): 64 | """ 65 | Calculate the prefix suffix table first, which stores the information 66 | of previous mathing. 67 | 68 | e.g. haystack = 'abababcabababcabcdeab', needle = 'abababcabcd', lps = [0,0,1,2,3,4,0,1,2,0,0] 69 | 1) Start with `i_haystack = 0` and `i_needle = 0`, 70 | The first 9 characters match. 71 | a b a b a b c a b a b a b c a b c d e a b 72 | a b a b a b c a b c d 73 | m m m m m m m m m X 74 | Since `i_needle` != 0, update `i_needle` to `lps[i_needle-1]`, that is lps[8] = 2 75 | 76 | 2) Now, `i_haystack = 9`, `i_needle = 2` 77 | All characters match. 78 | a b a b a b c a b a b a b c a b c d e a b 79 | a b a b a b c a b c d 80 | m m s m m m m m m m m 81 | Add `i_haystack = 7` to ans, and reset `i_needle` to 0 82 | 83 | 3) Now, `i_haystack = 9`, `i_needle = 0` 84 | No character match. 85 | a b a b a b c a b a b a b c a b c d e a b 86 | a b a b a b c a b c d 87 | X 88 | Since `i_needle = 0`, increment `i_haystack` by 1 89 | 90 | 4) Now, Now, `i_haystack = 10`, `i_needle = 0` 91 | First 2 characters match. 92 | a b a b a b c a b a b a b c a b c d e a b 93 | a b a b a b c a b c d 94 | m m 95 | since `i_haystack` reaches `len(haystack)`, main loop ends, return `ans` 96 | """ 97 | def computeLPSArray(word): 98 | # The explaination of this function is in _LPSArray() 99 | lps = [0] * len(word) 100 | wordIdx = 0 101 | delta = 1 102 | while delta < len(word): 103 | while delta < len(word) and word[delta] == word[wordIdx]: 104 | lps[delta] = wordIdx + 1 105 | wordIdx += 1 106 | delta += 1 107 | if wordIdx != 0: 108 | wordIdx = lps[wordIdx-1] 109 | else: 110 | delta += 1 111 | return lps 112 | 113 | # handle special cases 114 | if haystack == needle: 115 | return [0] 116 | elif not needle: 117 | return [0] 118 | elif not haystack: 119 | return [] 120 | # init lps table 121 | lps = computeLPSArray(needle) 122 | ans = [] 123 | # main loop 124 | i_haystack = 0 125 | i_needle = 0 126 | while i_haystack < len(haystack): 127 | if haystack[i_haystack] == needle[i_needle]: 128 | i_haystack += 1 129 | i_needle += 1 130 | # print i_needle, i_haystack 131 | if i_needle == len(needle): 132 | ans.append(i_haystack - i_needle) 133 | i_needle = lps[i_needle-1] 134 | elif i_needle != 0: 135 | i_needle = lps[i_needle-1] 136 | else: 137 | i_haystack += 1 138 | 139 | return ans 140 | 141 | 142 | def normal_strstr(haystack, needle): 143 | """ 144 | This is the naive solution to strstr() 145 | This algorithm is same as `haystack[i:i_haystack+len(needle)] == needle` solution 146 | The worst-case time Complexity is O(n*k) 147 | """ 148 | # handle special cases 149 | if haystack == needle: 150 | return [0] 151 | elif not needle: 152 | return [0] 153 | elif not haystack: 154 | return [] 155 | 156 | i_haystack = 0 157 | ans = [] 158 | while i_haystack + len(needle) - 1 < len(haystack): 159 | i_needle = 0 160 | while haystack[i_haystack + i_needle] == needle[i_needle]: 161 | i_needle += 1 162 | if i_needle == len(needle): 163 | ans.append(i_haystack) 164 | break 165 | i_haystack += 1 166 | return ans 167 | 168 | def strstr_test(): 169 | test_text = \ 170 | """ 171 | A string matching algorithm wants to find the starting index m in string S[] that matches the search word W[]. 172 | The most straightforward algorithm is to look for a character match at successive values of the index m, the position in the string being searched, i.e. S[m]. If the index m reaches the end of the string then there is no match, in which case the search is said to "fail". At each position m the algorithm first checks for equality of the first character in the word being searched, i.e. S[m] =? W[0]. If a match is found, the algorithm tests the other characters in the word being searched by checking successive values of the word position index, i. The algorithm retrieves the character W[i] in the word being searched and checks for equality of the expression S[m+i] =? W[i]. If all successive characters match in W at position m, then a match is found at that position in the search string. 173 | Usually, the trial check will quickly reject the trial match. If the strings are uniformly distributed random letters, then the chance that characters match is 1 in 26. In most cases, the trial check will reject the match at the initial letter. The chance that the first two letters will match is 1 in 262 (1 in 676). So if the characters are random, then the expected complexity of searching string S[] of length k is on the order of k comparisons or O(k). The expected performance is very good. If S[] is 1 billion characters and W[] is 1000 characters, then the string search should complete after about one billion character comparisons. 174 | That expected performance is not guaranteed. If the strings are not random, then checking a trial m may take many character comparisons. The worst case is if the two strings match in all but the last letter. Imagine that the string S[] consists of 1 billion characters that are all A, and that the word W[] is 999 A characters terminating in a final B character. The simple string matching algorithm will now examine 1000 characters at each trial position before rejecting the match and advancing the trial position. The simple string search example would now take about 1000 character comparisons times 1 billion positions for 1 trillion character comparisons. If the length of W[] is n, then the worst-case performance is O(kn). 175 | The KMP algorithm has a better worst-case performance than the straightforward algorithm. KMP spends a little time precomputing a table (on the order of the size of W[], O(n)), and then it uses that table to do an efficient search of the string in O(k). 176 | The difference is that KMP makes use of previous match information that the straightforward algorithm does not. In the example above, when KMP sees a trial match fail on the 1000th character (i = 999) because S[m+999] ≠ W[999], it will increment m by 1, but it will know that the first 998 characters at the new position already match. KMP matched 999 A characters before discovering a mismatch at the 1000th character (position 999). Advancing the trial match position m by one throws away the first A, so KMP knows there are 998 A characters that match W[] and does not retest them; that is, KMP sets i to 998. KMP maintains its knowledge in the precomputed table and two state variables. When KMP discovers a mismatch, the table determines how much KMP will increase (variable m) and where it will resume testing (variable i). 177 | """ 178 | words = ['algorithm', 'equality', 'strings', 'difference', 'billion', 'random', 'letters', 'searched', 179 | 'The algorithm retrieves the character W[i] in the word being searched and checks for equality of the expression', 180 | '1000 characters, then the string search should complete', 'ormance is not guarant', 181 | 're all A, and that the word W[] is 999 A characters termin', 'dvancing the trial position. The simple string search example would now take ab', 182 | 'mation that the straightforw', 'en it uses ', ' uniformly dist', 'arch is said to "fai'] 183 | for word in words: 184 | assert normal_strstr(test_text, word) == kmp_strstr(test_text, word) 185 | 186 | 187 | if __name__ == '__main__': 188 | test_strings = ["aaaaaaaaaaacccccccdddddc", "qwertyuiopasdfghjklxcvbnm", "abababcabababcabcdeab" 189 | "jjjjjkkkkkjjjjjkkkkkkjkjjjkkl", "jkljkljkljkljkljkl", "ABABDABACDABABCABAB", 190 | "", "", "emptytest", 191 | "aaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaaaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaaba"] 192 | words = ["ccccccc", "hjkl", "abababcabcd" 193 | "jjjjjjkkk", "ljk", "ABABCABAB", 194 | "", "emptytest", "", 195 | "aaaaaaa"] 196 | for i, text in enumerate(test_strings): 197 | assert kmp_strstr(text, words[i]) == normal_strstr(text, words[i]) 198 | # print _LPSArray("abababcabcd") 199 | # print _LPSArray("aaaacaa") 200 | # print _LPSArray("aaaaaaaaaa") 201 | # print kmp_strstr("aaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaaaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaaa","aaaaaaa") 202 | # print normal_strstr("aaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaaaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaabaaaaaaa","aaaaaaa") 203 | strstr_test() 204 | # print _LPSArray("ababcaabc") 205 | # print kmp_strstr("ababcaababcaabc","ababcaabc") -------------------------------------------------------------------------------- /011_Manacher_Algorithm.py: -------------------------------------------------------------------------------- 1 | # Manacher's Algorithm - Linear Time Longest Palindromic Substring Algorithm 2 | # 3 | # For string "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" (length = 65) 4 | # there will only be 129 comparasions in Manacher algorithm, 5 | # while in `normal_LongestPalindromeSubstring()` function, there will be 2144 comparasions! 6 | # Even more time and space for the DP solution!! 7 | # 8 | # Reference: 9 | # http://www.geeksforgeeks.org/manachers-algorithm-linear-time-longest-palindromic-substring-part-1/ 10 | # https://en.wikipedia.org/wiki/Longest_palindromic_substring 11 | 12 | def manacher(string): 13 | """ 14 | Key points of Manacher's algorithm: 15 | 1) Insert special character into the original string, such as "#" 16 | This string may not be acually inserted, but virtually inserted 17 | by converting virtual_index to physical_index. 18 | This improvement could combine odd palindrome and even palindrome 19 | into one situation. 20 | 21 | 2) Maintain a longest palindrome substring table `lps`, in which 22 | `lps[i]` contian the half length of palindrome which center at 23 | virtual index `i`. By half, we mean round up. 24 | Then the actual palindrome length could be get as `lps[i] - 1` 25 | 26 | 3) Using the information in the table to find longest palindrome in 27 | linear time complexity. 28 | 29 | Time Complexity: O(n) 30 | Space Complexity: O(n) 31 | 32 | Example for "abaaba": 33 | # a # b # a # a # b # a # 34 | physical - 0 - 1 - 2 - 3 - 4 - 5 35 | virtual 0 1 2 3 4 5 6 7 8 9 10 11 12 36 | LPS table 1 2 1 4 1 2 7 2 1 4 1 2 1 37 | right_border 0 2 - 6 - - 12 - - - - - - , `-` means not updated 38 | center_index 0 1 - 3 - - 6 - - - - - - , `-` means not updated 39 | 40 | Therefore, virtual_index = physical_index * 2 + 1 41 | physical_index = (virtual_index - 1)/2 if virtual_index&1 else -1 42 | Alos, the longest palindrome substring is "aba" 43 | """ 44 | # def virtual2physical(virtual_index): 45 | # # convert virtual index to physical index 46 | # # if virtual_index is even, just return -1 47 | # return (virtual_index-1) / 2 if virtual_index & 1 else -1 48 | 49 | # def physical2virtual(physical_index): 50 | # # convert physical_index to virtual_index 51 | # return physical_index * 2 + 1 52 | 53 | def get_actual_char(virtual_index): 54 | if virtual_index & 1: 55 | return string[(virtual_index - 1) / 2] 56 | else: 57 | return "#" 58 | 59 | if not string: 60 | return set([]) 61 | # init LPS table 62 | length = len(string) 63 | lps = [1 for i in xrange(length * 2 + 1)] 64 | # main loop to calculate the lps table 65 | max_length = 0 66 | right_border = 0 67 | center_index = 0 68 | # compare_count = 0 69 | for i, _ in enumerate(lps): 70 | # This IF-STATEMENT is the key of MANACHER'S Algorithm 71 | # If this character is within the range of a longer palindrome, 72 | # then update lps with the symmetric value 73 | if right_border > i: 74 | lps[i] = min(lps[2 * center_index - i], right_border - i + 1) 75 | # expand this palindrome 76 | while 0 <= i - lps[i] and i + lps[i] < len(lps) and get_actual_char(i + lps[i]) == get_actual_char(i - lps[i]): 77 | # compare_count += 1 78 | lps[i] += 1 79 | max_length = max(max_length, lps[i]) 80 | # update right_border and center if current right_border is larger than the previous one 81 | if lps[i] + i - 1 > right_border: 82 | right_border = lps[i] + i - 1 83 | center_index = i 84 | # At here, the length of longest palindrome substring is `max_length - 1` 85 | # print lps 86 | # print compare_count 87 | ans = [] 88 | for i, num in enumerate(lps): 89 | if num == max_length: 90 | ans.append((i-num+1, i+num-1)) 91 | return set(map(lambda x: string[x[0]/2:x[1]/2], ans)) 92 | 93 | 94 | def dp_LongestPalindromeSubstring(string): 95 | """ 96 | DP solution to Longest Palindromic Substring. 97 | dp[i][j] - whether substring start from string[i] to string[j] is palindromic 98 | 99 | Time Complexity: O(n^2) 100 | Space Complexity: O(n^2) 101 | """ 102 | # init dp matrix 103 | dp = [[0 for _ in string] for __ in string] 104 | # when count hits 2, we should break the loop 105 | ans, count = [], 0 106 | for delta in xrange(len(string)): 107 | i = 0 108 | this_ans = [] 109 | while i + delta < len(string): 110 | if delta == 0: 111 | # every single character is a palindrome 112 | dp[i][i+delta] = 1 113 | this_ans.append((i, i+delta+1)) 114 | count = 0 115 | elif delta == 1 and string[i] == string[i+delta]: 116 | # every 2 identical characters can form a palindrome 117 | dp[i][i+delta] = 1 118 | this_ans.append((i, i+delta+1)) 119 | count = 0 120 | elif delta > 1 and dp[i+1][i+delta-1] and string[i] == string[i+delta]: 121 | # start matches end, and also the characters in the middle is a palindrome 122 | dp[i][i+delta] = 1 123 | this_ans.append((i, i+delta+1)) 124 | count = 0 125 | i += 1 126 | # If this length has no palindrome, imcrement `count` 127 | # Otherwise, update `ans` 128 | if not this_ans: 129 | count += 1 130 | else: 131 | ans = this_ans 132 | # check whether to end the loop in advance 133 | if count == 2: 134 | break 135 | # ans are stored as `(start_index, end_index + 1) 136 | return set(map(lambda x: string[x[0]:x[1]], ans)) 137 | 138 | 139 | def normal_LongestPalindromeSubstring(string): 140 | """ 141 | Search from the middle of the string. 142 | Generate all even length and odd length palindromes, 143 | and keep track of the longest palindrome seen so far. 144 | 145 | Step to generate odd length palindrome: 146 | Fix a centre and expand in both directions for longer palindromes. 147 | Step to generate even length palindrome 148 | Fix two centre ( low and high ) and expand in both directions for longer palindromes. 149 | 150 | Time Complexity: O(n^2) 151 | Space Complexity: O(1), if we wants store the result string, then it will be O(n) 152 | """ 153 | # compare_count = 0 154 | max_length = 1 155 | ans = [] 156 | for i in xrange(1, len(string)): 157 | # find the longest even length palindrome with center position 158 | # at (i-1) and i 159 | low = i - 1 160 | high = i 161 | while 0 <= low and high < len(string) and string[low] == string[high]: 162 | # compare_count += 1 163 | if high - low + 1 > max_length: 164 | max_length = high - low + 1 165 | ans = [] 166 | ans.append((low, high+1)) 167 | elif high - low + 1 == max_length: 168 | ans.append((low, high+1)) 169 | 170 | low -= 1 171 | high += 1 172 | # find the longest odd length palindrome with center position at i 173 | low = i 174 | high = i 175 | while 0 <= low and high < len(string) and string[low] == string[high]: 176 | # compare_count += 1 177 | if high - low + 1 > max_length: 178 | max_length = high - low + 1 179 | ans = [] 180 | ans.append((low, high+1)) 181 | elif high - low + 1 == max_length: 182 | ans.append((low, high+1)) 183 | low -= 1 184 | high += 1 185 | # print compare_count 186 | if max_length == 1: 187 | return set(string.split()) 188 | return set(map(lambda x: string[x[0]:x[1]], ans)) 189 | 190 | def longest_palindrome_substring_test(): 191 | import random 192 | char_set = "qwert" 193 | for i in xrange(100): 194 | test_case = [random.choice(char_set) for _ in xrange(300)] 195 | test = "".join(test_case) 196 | # print manacher(test) 197 | assert manacher(test) == dp_LongestPalindromeSubstring(test) 198 | assert manacher(test) == normal_LongestPalindromeSubstring(test) 199 | 200 | 201 | if __name__ == '__main__': 202 | # assert dp_LongestPalindromeSubstring("aaab.ba") == set(["ab.ba"]) 203 | # assert dp_LongestPalindromeSubstring("aaaaabbbb") == set(["aaaaa"]) 204 | # assert dp_LongestPalindromeSubstring("aaaaa") == set(["aaaaa"]) 205 | # assert dp_LongestPalindromeSubstring(".babcbaaaaa") == set(["abcba", "aaaaa"]) 206 | # assert normal_LongestPalindromeSubstring("aaab.ba") == set(["ab.ba"]) 207 | # assert normal_LongestPalindromeSubstring("aaaaabbbb") == set(["aaaaa"]) 208 | # assert normal_LongestPalindromeSubstring("aaaaa") == set(["aaaaa"]) 209 | # assert normal_LongestPalindromeSubstring(".babcbaaaaa") == set(["abcba", "aaaaa"]) 210 | # testcases = ["", "a", "aa", "aaa", "aaaa", "aaaaba", "ab.bab.", "asdsfsexe"] 211 | # for test in testcases: 212 | # assert dp_LongestPalindromeSubstring(test) == normal_LongestPalindromeSubstring(test) 213 | # # print manacher(test), normal_LongestPalindromeSubstring(test) 214 | # assert manacher(test) == normal_LongestPalindromeSubstring(test) 215 | # print manacher("abaaba") 216 | # print manacher("") 217 | longest_palindrome_substring_test() 218 | # manacher("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") 219 | # normal_LongestPalindromeSubstring("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") -------------------------------------------------------------------------------- /012_Morris_Traversal_Binary_Tree.py: -------------------------------------------------------------------------------- 1 | # Reference: http://www.cnblogs.com/AnnieKim/archive/2013/06/15/MorrisTraversal.html 2 | # 3 | # Morris Traversal is an algorithm that can traverse a bianry traverse with 4 | # O(n) time Complexity and O(1) space Complexity. 5 | # The normal recursive and iterative method to traverse a bianry would use O(logn) space 6 | # 7 | # Node: `preNode` of curNode in the Steps refers to the node that is adjacently before 8 | # `curNode` in the output list of the inorder traversal of the given tree. 9 | # 10 | # Steps for in-order-traveral: 11 | # 0. set curNode to root 12 | # 1. if curNode.left is None, output curNode.val and set curNode to curNode.right 13 | # 2. if curNode.left is not None, find preNode of curNode in its left subtree 14 | # 1) if preNode.right == None, set preNode.right to curNode 15 | # 2) if preNode.right == curNode, set preNode.right back to None, output curNode, 16 | # and update curNode to curNode.right 17 | # 3. loop step 1 and 2 until curNode == None 18 | # 19 | # Steps for pre-order-traversal: 20 | # 0. set curNode to root 21 | # 1. if curNode.left is None, set curNode to curNode.right, output curNode.val 22 | # 2. if curNode.left is not None, find preNode of curNode in its left subtree 23 | # 1) if preNode.right == None, set preNode.right to curNode, output curNode.val 24 | # 2) if preNode.right == curNode, set preNode.right back to None, and update 25 | # curNode to curNode.left 26 | # 3. loop step 1 and 2 until curNode == None 27 | 28 | 29 | 30 | class TreeNode(object): 31 | def __init__(self, val): 32 | self.val = val 33 | self.left = None 34 | self.right = None 35 | 36 | def display(self): 37 | def print_tree(tree, depth): 38 | if tree: 39 | print " " * depth + str(tree.val) 40 | if tree.left or tree.right: 41 | print_tree(tree.left, depth+1) 42 | print_tree(tree.right, depth+1) 43 | else: 44 | print " " * depth + 'None' 45 | # call the recursive function 46 | print_tree(self, 0) 47 | 48 | 49 | def _findRightmost(root, parent): 50 | """ 51 | Find the right most node of `root` 52 | `parent` is included to prevent infinite loop 53 | """ 54 | tmp = root 55 | while tmp.right and tmp.right != parent: 56 | tmp = tmp.right 57 | return tmp 58 | 59 | def inOrder_Morris(root): 60 | """ 61 | In-Order-Traversal of Morris Algorithm 62 | """ 63 | if not root: return [] 64 | ans = [] 65 | curNode = root 66 | while curNode: 67 | if curNode.left: 68 | prevNode = _findRightmost(curNode.left, curNode) 69 | if prevNode.right == curNode: 70 | prevNode.right = None 71 | ans.append(curNode.val) 72 | curNode = curNode.right 73 | else: 74 | prevNode.right = curNode 75 | curNode = curNode.left 76 | else: 77 | ans.append(curNode.val) 78 | curNode = curNode.right 79 | return ans 80 | 81 | 82 | def preOrder_Morris(root): 83 | """ 84 | Pre-Order-Traversal of Morris Algorithm 85 | """ 86 | if not root: return [] 87 | ans = [] 88 | curNode = root 89 | while curNode: 90 | if curNode.left: 91 | prevNode = _findRightmost(curNode.left, curNode) 92 | if prevNode.right == curNode: 93 | prevNode.right = None 94 | curNode = curNode.right 95 | else: 96 | ans.append(curNode.val) 97 | prevNode.right = curNode 98 | curNode = curNode.left 99 | else: 100 | ans.append(curNode.val) 101 | curNode = curNode.right 102 | return ans 103 | 104 | 105 | if __name__ == '__main__': 106 | testTree = TreeNode(1) 107 | testTree.left = TreeNode(2) 108 | testTree.left.left = TreeNode(3) 109 | testTree.left.right = TreeNode(4) 110 | testTree.right = TreeNode(5) 111 | testTree.right.left = TreeNode(6) 112 | testTree.right.right = TreeNode(7) 113 | testTree.right.right.left = TreeNode(8) 114 | testTree.right.right.left.right = TreeNode(9) 115 | 116 | # testTree.display() 117 | # print inOrder_Morris(testTree) 118 | print preOrder_Morris(testTree) -------------------------------------------------------------------------------- /013_Longest_Common_Subsequence.py: -------------------------------------------------------------------------------- 1 | # Given two sequences, find the length of longest subsequence present in both of them. 2 | # A subsequence is a sequence that appears in the same relative order, but not necessarily contiguous. 3 | # For example, "abc", "abg", "bdf", "aeg", "acefg", .. etc are subsequences of "abcdefg". So a string of length n has 2^n different possible subsequences. 4 | 5 | class Solution(object): 6 | def longestCommonSubsequence(self, a, b): 7 | """ 8 | :tpye a: string 9 | :type b: string 10 | :rtype: int 11 | """ 12 | lenA = len(a) 13 | lenB = len(b) 14 | dp = [[0 for j in xrange(lenB + 1)] for i in xrange(lenA + 1)] 15 | 16 | for i, line in enumerate(dp): 17 | for j, _ in enumerate(line): 18 | if i == 0 or j == 0: 19 | dp[i][j] = 0 20 | else: 21 | dp[i][j] = max(dp[i-1][j-1] + (a[i-1] == b[j-1]), dp[i][j-1], dp[i-1][j]) 22 | return dp[-1][-1] 23 | 24 | if __name__ == '__main__': 25 | sol = Solution() 26 | print sol.longestCommonSubsequence("abb", "bbc") 27 | print sol.longestCommonSubsequence("ABCDGH", "AEDFHR") 28 | print sol.longestCommonSubsequence("AGGTAB", "GXTXAYB") -------------------------------------------------------------------------------- /014_Floyd_Warshall_Algorithm.py: -------------------------------------------------------------------------------- 1 | # Python Program for Floyd Warshall Algorithm 2 | 3 | # Define infinity as the large enough value. This value will be 4 | # used for vertices not connected to each other 5 | INF = float("inf") 6 | 7 | # Solves all pair shortest path via Floyd Warshall Algrorithm 8 | def floydWarshall(graph): 9 | 10 | """ dist[][] will be the output matrix that will finally 11 | have the shortest distances between every pair of vertices """ 12 | """ initializing the solution matrix same as input graph matrix 13 | OR we can say that the initial values of shortest distances 14 | are based on shortest paths considerting no 15 | intermedidate vertices """ 16 | V = len(graph[0]) 17 | dist = [[elem for elem in line] for line in graph] 18 | 19 | """ Add all vertices one by one to the set of intermediate 20 | vertices. 21 | ---> Before start of a iteration, we have shortest distances 22 | between all pairs of vertices such that the shortest 23 | distances consider only the vertices in set 24 | {0, 1, 2, .. k-1} as intermediate vertices. 25 | ----> After the end of a iteration, vertex no. k is 26 | added to the set of intermediate vertices and the 27 | set becomes {0, 1, 2, .. k} 28 | """ 29 | for k in range(V): 30 | 31 | # pick all vertices as source one by one 32 | for i in range(V): 33 | 34 | # Pick all vertices as destination for the 35 | # above picked source 36 | for j in range(V): 37 | 38 | # If vertex k is on the shortest path from 39 | # i to j, then update the value of dist[i][j] 40 | dist[i][j] = min(dist[i][j], dist[i][k] + dist[k][j]) 41 | 42 | for line in dist: 43 | print line 44 | 45 | return dist 46 | 47 | if __name__ == '__main__': 48 | graph = [ [0, INF, INF, 1, 101], 49 | [INF, 0, 200, INF, INF], 50 | [INF, 200, 0, INF, 103], 51 | [INF, INF, 100, 0, INF], 52 | [INF, INF, 102, INF, 0] 53 | ] 54 | floydWarshall(graph) 55 | 56 | -------------------------------------------------------------------------------- /015_Bellman_Ford_Algorithm.py: -------------------------------------------------------------------------------- 1 | # Bellman-Ford Algorithm - single source shortest paths algorithm 2 | # step 1: init the cost of path as `init` and src to src dist as 0 3 | # step 2: loop for N - 1 times, where N is the number of vertices 4 | # for each edge in the graph, update the shortest distance 5 | # step 3: check for the negative weighted cycle in the graph 6 | 7 | 8 | 9 | class GraphNode(object): 10 | def __init__(self, nodeName, distDict): 11 | self.nodeName = nodeName 12 | self.children = distDict 13 | 14 | class Graph(object): 15 | def __init__(self, numNodes): 16 | self.vertices = numNodes 17 | self.edges = [] 18 | 19 | def addEdge(self, src, dest, weight): 20 | if src >= self.vertices or dest >= self.vertices: 21 | raise ValueError("src or dest node doesn't exist") 22 | self.edges.append((src, dest, weight)) 23 | 24 | 25 | def bellmanFord_GraphVersion(graph, source): 26 | """ 27 | :type graph: Graph 28 | :type source: str - the name of the source node 29 | :rtype: dict - the distance from src to all other nodes 30 | """ 31 | numNodes = graph.vertices 32 | # step 1 33 | dist = [float("inf") for i in xrange(numNodes)] 34 | dist[source] = 0 35 | # step 2 36 | for i in xrange(numNodes-1): 37 | for src, dest, weight in graph.edges: 38 | dist[dest] = min(dist[dest], dist[src] + weight) 39 | # step 3 40 | for src, dest, weight in graph.edges: 41 | if dist[dest] != min(dist[dest], dist[src] + weight): 42 | raise ValueError("Negative Weighted Cycle detected!!") 43 | return dist 44 | 45 | 46 | 47 | def bellmanFord_GraphNodeVersion(graph, source): 48 | """ 49 | :type graph: list[GraphNode] 50 | :type source: str - the name of the source node 51 | :rtype: dict - the distance from src to all other nodes 52 | """ 53 | def getEdge(graph): 54 | for node in graph: 55 | for dest, weight in node.children.items(): 56 | yield node.nodeName, dest, weight 57 | 58 | numNodes = len(graph) 59 | dist = {node.nodeName: float("inf") for node in graph} 60 | dist[source] = 0 61 | # loop for N times 62 | for i in xrange(numNodes): 63 | for src, dest, weight in getEdge(graph): 64 | dist[dest] = min(dist[dest], dist[src] + weight) 65 | 66 | for src, dest, weight in getEdge(graph): 67 | if dist[dest] != min(dist[dest], dist[src] + weight): 68 | return dict() 69 | return dist 70 | 71 | if __name__ == '__main__': 72 | # the graphNode version of Bellman-Ford Algorithm 73 | nodeA = GraphNode("A", {"B": 1}) 74 | nodeB = GraphNode("B", {"C": 5}) 75 | nodeC = GraphNode("C", {"D": 7}) 76 | nodeD = GraphNode("D", {"E": 3}) 77 | nodeE = GraphNode("E", {"C": 6}) 78 | print bellmanFord_GraphNodeVersion([nodeE, nodeC,nodeA, nodeD,nodeB], "A") 79 | # the Graph version of Bellman-Ford Algorithm 80 | graph = Graph(5) 81 | graph.addEdge(3,1,-2) 82 | graph.addEdge(2,3,5) 83 | graph.addEdge(4,1,2) 84 | graph.addEdge(2,4,1) 85 | graph.addEdge(3,0,2) 86 | graph.addEdge(1,0,3) 87 | graph.addEdge(0,1,-1) 88 | graph.addEdge(4,3,2) 89 | graph.addEdge(1,2,3) 90 | print bellmanFord_GraphVersion(graph, 0) 91 | -------------------------------------------------------------------------------- /016_Topological_Sort.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import random, time 3 | 4 | 5 | class Graph(object): 6 | def __init__(self, vertexNum): 7 | self.vertices = vertexNum 8 | self.edges = set() 9 | def addEdge(self, src, dest): 10 | if src < self.vertices and dest < self.vertices: 11 | self.edges.add((src, dest)) 12 | return 13 | raise ValueError("dest or src is out of range!!") 14 | 15 | def __str__(self): 16 | graph = defaultdict(set) 17 | for src, dest in self.edges: 18 | graph[src].add(dest) 19 | graph[dest] = graph.get(dest, set()) 20 | return "%s" % graph 21 | 22 | 23 | def topologicalSort_Naive(graph): 24 | """ 25 | Naive Solution to sort the graph topologically. 26 | Time Complexity: O(E + V^2) 27 | """ 28 | def getNoIncomming(graphMap): 29 | ans = [] 30 | for node, edgeSet in graphMap.items(): 31 | if not edgeSet: 32 | ans.append(node) 33 | graphMap.pop(node) 34 | return ans 35 | 36 | # O(E) time to construct the graphMap 37 | graphMap = {i: set() for i in xrange(graph.vertices)} 38 | for src, dest in graph.edges: 39 | graphMap[dest].add(src) 40 | # O(V^2) time to form the topological sort 41 | ans = [] 42 | while graphMap: 43 | # find the node with no outcomming edges, this operation take O(V) time 44 | noIncomming = getNoIncomming(graphMap) 45 | # add the node with no outcomming edges to the right of Ans 46 | if not noIncomming: 47 | raise ValueError("Cycle Detected in the Graph!!!") 48 | ans += noIncomming 49 | # update the graphMap, kick out all the nodes already in the ans 50 | # this operation takes O(V) time 51 | for poppedNode in noIncomming: 52 | for node in graphMap: 53 | if poppedNode in graphMap[node]: 54 | graphMap[node].remove(poppedNode) 55 | return ans 56 | 57 | 58 | def topologicalSort_Better(graph): 59 | """ 60 | Topological Srot Algorithm according to the pseudo-code in DSAA 61 | Time Complexity: O(V + E) 62 | """ 63 | # O(E) time to construct the indegree and adjacent 64 | indegree = [0] * graph.vertices 65 | adjacent = [set()] * graph.vertices 66 | for src, dest in graph.edges: 67 | indegree[dest] += 1 68 | adjacent[src].add(dest) 69 | ans = [] 70 | # O(V) time to find the zeroIncomming nodes 71 | zeroIncomming = [node for node, num in enumerate(indegree) if num == 0] 72 | # O(E) time in total to form the topological sort 73 | while zeroIncomming: 74 | thisNode = zeroIncomming.pop(0) 75 | ans.append(thisNode) 76 | # for each node currently has no incomming nodes 77 | # decrease the in-degree of all their children 78 | # the new zeroIncomming node could only from their chilren 79 | for decremntNode in adjacent[thisNode]: 80 | # for each child of current zeroIncomming nodes 81 | indegree[decremntNode] -= 1 82 | if indegree[decremntNode] == 0: 83 | zeroIncomming.append(decremntNode) 84 | return ans 85 | 86 | 87 | 88 | def randomCreateDAG(nodeNum, edgeNum): 89 | thisGraph = Graph(nodeNum) 90 | for _ in xrange(edgeNum): 91 | src = random.randrange(0, nodeNum) 92 | while src == nodeNum - 1: 93 | src = random.randrange(0, nodeNum) 94 | dest = random.randrange(src+1, nodeNum) 95 | thisGraph.addEdge(src, dest) 96 | return thisGraph 97 | 98 | 99 | 100 | if __name__ == '__main__': 101 | # greate the test graph 102 | # graph1 = Graph(5) 103 | # graph1.addEdge(0, 1) 104 | # graph1.addEdge(0, 3) 105 | # graph1.addEdge(0, 4) 106 | # graph1.addEdge(0, 2) 107 | # graph1.addEdge(1, 2) 108 | # graph1.addEdge(1, 4) 109 | # graph1.addEdge(1, 3) 110 | # graph1.addEdge(3, 4) 111 | # print topologicalSort_Naive(graph1) 112 | # print topologicalSort_Better(graph1) 113 | # random test cases 114 | for i in xrange(1): 115 | testGraph = randomCreateDAG(10, 50) 116 | topologicalSort_Naive(testGraph) 117 | topologicalSort_Better(testGraph) 118 | # if naive != better: 119 | # print naive 120 | # print better 121 | # raise ValueError("different Topological Sort Result!!") -------------------------------------------------------------------------------- /017_Dijkstra_Algorithm.py: -------------------------------------------------------------------------------- 1 | # Ref: http://www.geeksforgeeks.org/greedy-algorithms-set-6-dijkstras-shortest-path-algorithm/ 2 | # Algorithm 3 | # 1) Create a set sptSet (shortest path tree set) that keeps track of vertices 4 | # included in shortest path tree, i.e., whose minimum distance from source is 5 | # calculated and finalized. Initially, this set is empty. 6 | # 2) Assign a distance value to all vertices in the input graph. Initialize all 7 | # distance values as INFINITE. Assign distance value as 0 for the source vertex 8 | # so that it is picked first. 9 | # 3) While sptSet doesn't include all vertices 10 | # a) Pick a vertex u which is not there in sptSet and has minimum distance value. 11 | # b) Include u to sptSet. 12 | # c) Update distance value of all adjacent vertices of u. To update the distance 13 | # values, iterate through all adjacent vertices. For every adjacent vertex v, 14 | # if sum of distance value of u (from source) and weight of edge u-v, is less 15 | # than the distance value of v, then update the distance value of v. 16 | 17 | import heapq 18 | class Graph(object): 19 | def __init__(self, vertexNum): 20 | self.vertices = vertexNum 21 | self.edges = set() 22 | self.graphMap = {nodeName: set() for nodeName in xrange(self.vertices)} 23 | def addEdge(self, src, dest, cost): 24 | if src < self.vertices and dest < self.vertices: 25 | self.edges.add((src, dest)) 26 | self.graphMap[src].add((dest, cost)) 27 | self.graphMap[dest].add((src, cost)) 28 | return 29 | raise ValueError("dest or src is out of range!!") 30 | def getNeighbors(self, node): 31 | return self.graphMap[node] 32 | def __str__(self): 33 | graph = defaultdict(set) 34 | for src, dest in self.edges: 35 | graph[src].add(dest) 36 | graph[dest] = graph.get(dest, set()) 37 | return "%s" % graph 38 | 39 | def dijkstra(graph, src): 40 | visited = {src} 41 | distance = [float("inf")] * graph.vertices 42 | distance[src] = 0 43 | heap = [(0, src)] 44 | while heap: 45 | dist, thisNode = heapq.heappop(heap) 46 | for neighbor, cost in graph.getNeighbors(thisNode): 47 | distance[neighbor] = min(distance[neighbor], distance[thisNode] + cost) 48 | if neighbor not in visited: 49 | heapq.heappush(heap, (distance[neighbor], neighbor)) 50 | visited.add(thisNode) 51 | return distance 52 | 53 | if __name__ == '__main__': 54 | graph = Graph(9) 55 | graph.addEdge(0, 1, 4) 56 | graph.addEdge(1, 2, 8) 57 | graph.addEdge(2, 3, 7) 58 | graph.addEdge(3, 4, 9) 59 | graph.addEdge(4, 5, 10) 60 | graph.addEdge(5, 6, 2) 61 | graph.addEdge(6, 7, 1) 62 | graph.addEdge(7, 0, 8) 63 | graph.addEdge(7, 8, 7) 64 | graph.addEdge(1, 7, 11) 65 | graph.addEdge(2, 8, 2) 66 | graph.addEdge(6, 8, 6) 67 | graph.addEdge(2, 5, 4) 68 | graph.addEdge(3, 5, 14) 69 | print dijkstra(graph, 0) 70 | -------------------------------------------------------------------------------- /018_HashHeap.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/python 2 | 3 | # This is the Python implementation of Hash Heap based on the list implementation 4 | # of binary heap. The difference between Hash Heap and Binary Heap is that Hash 5 | # Heap supports the `heapRemove` operation in O(log n) time and can check whether 6 | # certain element is in the Hash Heap or not in O(1) time. 7 | # 8 | # Basic automatic tests are given in `pushpopTest()` and `removeTest()`. 9 | # Note: It may takes about 10 seconds to run both test functions. 10 | 11 | import random 12 | 13 | class HeapNode(object): 14 | """ 15 | The node in the HashHeap to deal with duplicates. 16 | Each node store the value of each element and the number of duplicates 17 | with the same value. 18 | """ 19 | def __init__(self, val, cnt): 20 | self.val = val 21 | self.cnt = cnt 22 | 23 | def __cmp__(self, other): 24 | return self.val - other.val 25 | 26 | def __str__(self): 27 | return "[%s, %d]" % (self.val, self.cnt) 28 | __repr__ = __str__ 29 | 30 | class HashHeap(object): 31 | """ 32 | This HashHeap is the same as the list implementation of binary heap, but with 33 | a hashMap to map the value of one elemnt to its index in the list. 34 | """ 35 | def __init__(self, arr): 36 | """ 37 | `_cap` - the number of elements in the HashHeap 38 | `_maxIdx` - the max index of the binary heap 39 | `_data` - the list implementation of the binary heap 40 | `_hashMap` - mapping the element to its index in the binary heap 41 | """ 42 | elemCnt = self._preProcess(arr) 43 | self._cap = len(arr) 44 | self._maxIdx = len(elemCnt) - 1 45 | self._data = [HeapNode(key, value) for key, value in elemCnt.items()] 46 | self._hashMap = {node.val: idx for idx, node in enumerate(self._data)} 47 | self._heapify() 48 | 49 | def _preProcess(self, arr): 50 | """ 51 | Convert the input array into a dict object. 52 | The key to the dict is the value of the element. 53 | The value of the dict is the occurence of each element. 54 | """ 55 | elemCnt = {} 56 | for elem in arr: 57 | elemCnt[elem] = elemCnt.get(elem, 0) + 1 58 | return elemCnt 59 | 60 | def _swap(self, idx1, idx2): 61 | """ 62 | Swap the 2 elements in the heap. 63 | Also, change the index stored in `self._hashMap` 64 | """ 65 | elem1, elem2 = self._data[idx1], self._data[idx2] 66 | self._hashMap[elem1.val] = idx2 67 | self._hashMap[elem2.val] = idx1 68 | self._data[idx1], self._data[idx2] = elem2, elem1 69 | 70 | def _heapify(self): 71 | idx = self._maxIdx 72 | while idx > 0: 73 | parentIdx = (idx - 1) / 2 74 | if self._data[parentIdx] > self._data[idx]: 75 | self._swap(parentIdx, idx) 76 | self._siftDown(idx) 77 | idx -= 1 78 | 79 | def _siftDown(self, idx): 80 | def heapValid(idx): 81 | left, right = idx * 2 + 1, idx * 2 + 2 82 | if left > self._maxIdx: 83 | return True 84 | if right > self._maxIdx: 85 | return self._data[idx] <= self._data[left] 86 | return self._data[idx] <= self._data[left] and self._data[idx] <= self._data[right] 87 | def smallerChild(idx): 88 | left, right = idx * 2 + 1, idx * 2 + 2 89 | if left > self._maxIdx: 90 | return None 91 | if right > self._maxIdx: 92 | return left 93 | return left if self._data[left] < self._data[right] else right 94 | 95 | current = idx 96 | while not heapValid(current): 97 | child = smallerChild(current) 98 | self._swap(current, child) 99 | current = child 100 | 101 | def _siftUp(self, idx): 102 | current = idx 103 | parent = (current - 1) / 2 104 | while current > 0 and self._data[parent] > self._data[current]: 105 | self._swap(parent, current) 106 | current = parent 107 | parent = (current - 1) / 2 108 | 109 | def _removeLastNode(self): 110 | rmNode = self._data.pop(-1) 111 | self._cap -= 1 112 | self._maxIdx -= 1 113 | self._hashMap.pop(rmNode.val) 114 | 115 | def _removeByIdx(self, idx): 116 | thisNode = self._data[idx] 117 | retVal = thisNode.val 118 | if thisNode.cnt > 1: 119 | thisNode.cnt -= 1 120 | self._cap -= 1 121 | elif idx == self._maxIdx: 122 | # the node itself is the last node 123 | self._removeLastNode() 124 | else: 125 | self._swap(idx, self._maxIdx) 126 | self._removeLastNode() 127 | pidx = (idx - 1) / 2 128 | # check to see we should sift up or sift down 129 | if pidx >= 0 and self._data[pidx] > self._data[idx]: 130 | self._siftUp(idx) 131 | else: 132 | self._siftDown(idx) 133 | return retVal 134 | 135 | @property 136 | def length(self): 137 | """ 138 | Return the number of elements in the Hash Heap 139 | """ 140 | return self._cap 141 | 142 | def heapPeep(self): 143 | """ 144 | Return the MIN element in the Hash Heap 145 | """ 146 | if not self._data: 147 | return float("inf") 148 | return self._data[0].val 149 | 150 | def heapPop(self): 151 | """ 152 | Remove the MIN element from the Hash Heap and return its value 153 | """ 154 | return self._removeByIdx(0) 155 | 156 | def heapPush(self, elem): 157 | """ 158 | Push a new element into the Hash Heap 159 | """ 160 | self._cap += 1 161 | if elem not in self._hashMap: 162 | self._maxIdx += 1 163 | self._data.append(HeapNode(elem, 1)) 164 | self._hashMap[elem] = self._maxIdx 165 | self._siftUp(self._maxIdx) 166 | else: 167 | idx = self._hashMap[elem] 168 | self._data[idx].cnt += 1 169 | 170 | def heapRemove(self, elem): 171 | """ 172 | Remove a existing element from the Hash Heap 173 | If the element to be removed is not in the Hash Heap, raise an error. 174 | """ 175 | if elem not in self._hashMap: 176 | raise ValueError("Element to be removed is not in HashHeap!!!") 177 | idx = self._hashMap[elem] 178 | self._removeByIdx(idx) 179 | 180 | def __contains__(self, value): 181 | return value in self._hashMap 182 | 183 | def __str__(self): 184 | return "%s" % [elem.val for elem in self._data] 185 | __repr__ = __str__ 186 | 187 | 188 | def pushpopTest(): 189 | """ 190 | Randomly generate a list, and push each element into the heap. 191 | Test HeapPush by comparing the first element in the heap with the 192 | smallest element in the List. 193 | Test HeapPop by comparing the popped element from the heap with the 194 | sorted list one by one. 195 | """ 196 | for _ in xrange(100): 197 | thisHeap = HashHeap([0]) 198 | testList = [0] 199 | for i in xrange(1000): 200 | thisRandom = random.randrange(-100, 100000) 201 | thisHeap.heapPush(thisRandom) 202 | testList.append(thisRandom) 203 | assert min(testList) == thisHeap.heapPeep() 204 | assert len(testList) == thisHeap.length 205 | assert len(thisHeap._hashMap) == thisHeap._maxIdx + 1 206 | testList.sort() 207 | assert len(testList) == thisHeap.length 208 | for idx, num in enumerate(testList): 209 | assert num == thisHeap.heapPop() 210 | assert len(testList) - 1 - idx == thisHeap.length 211 | assert len(thisHeap._hashMap) == thisHeap._maxIdx + 1 212 | 213 | def removeTest(): 214 | """ 215 | Randomly generate a list, and push each element into the heap. 216 | Test HeapRemove by randomly delete one element from the heap by the probability 217 | of 0.2, and then check whether the first element in the heap is the same as the 218 | smallest element in the list. 219 | """ 220 | for _ in xrange(100): 221 | thisHeap = HashHeap([0]) 222 | testList = [0] 223 | for i in xrange(1000): 224 | thisRandom = random.randrange(-100, 100000) 225 | thisHeap.heapPush(thisRandom) 226 | if random.random() < 0.2: 227 | thisHeap.heapRemove(thisRandom) 228 | else: 229 | testList.append(thisRandom) 230 | assert min(testList) == thisHeap.heapPeep() 231 | assert len(testList) == thisHeap.length 232 | assert len(thisHeap._hashMap) == thisHeap._maxIdx + 1 233 | testList.sort() 234 | assert len(testList) == thisHeap.length 235 | for idx, num in enumerate(testList): 236 | assert num == thisHeap.heapPop() 237 | assert len(testList) - 1 - idx == thisHeap.length 238 | assert len(thisHeap._hashMap) == thisHeap._maxIdx + 1 239 | 240 | 241 | if __name__ == '__main__': 242 | pushpopTest() 243 | removeTest() -------------------------------------------------------------------------------- /Fibonacci.java: -------------------------------------------------------------------------------- 1 | // For nature number n, we have 2 | // Fib(2n) = Fib(n+1)^2 - Fib(n-1)^2 3 | // Fib(2n+1) = Fib(n) ^2 + Fib(n+1)^2 4 | 5 | 6 | public class Fibonacci { 7 | 8 | public static void main(String[] args) { 9 | Fibonacci f = new Fibonacci(); 10 | int[] res = new int[90]; 11 | for (int i = 0; i < res.length; i++) { 12 | res[i] = f.solution(i); 13 | } 14 | for (int i = 1; i < 50; i += 2) { 15 | int calculate = res[i/2] * res[i/2] + res[i/2+1] * res[i/2+1]; 16 | if (calculate != res[i]) { 17 | System.out.format("%d SQX's Ans: %d; Ans: %d\n",i, calculate, res[i]); 18 | } 19 | } 20 | } 21 | 22 | 23 | public int solution(int n) { 24 | TwoTwo ans = new TwoTwo(1,0,0,1); 25 | TwoTwo tmp = new TwoTwo(1,1,1,0); 26 | int left = n; 27 | while (left > 0) { 28 | if ((left & 1) == 1) ans.times(tmp); 29 | tmp.power(); 30 | left = left >> 1; 31 | } 32 | return (int) ans.c; 33 | } 34 | } 35 | 36 | class TwoTwo { 37 | long a; 38 | long b; 39 | long c; 40 | long d; 41 | 42 | public TwoTwo(long a, long b, long c, long d) { 43 | this.a = a; 44 | this.b = b; 45 | this.c = c; 46 | this.d = d; 47 | } 48 | 49 | public void times(TwoTwo t1) { 50 | long ta = (a * t1.a + b * t1.c) % 1000000; 51 | long tb = (a * t1.b + b * t1.d) % 1000000; 52 | long tc = (c * t1.a + d * t1.c) % 1000000; 53 | long td = (c * t1.b + d * t1.d) % 1000000; 54 | a = ta; 55 | b = tb; 56 | c = tc; 57 | d = td; 58 | } 59 | 60 | public void power() { 61 | long ta = (a * a + b * c) % 1000000; 62 | long tb = (a * b + b * d) % 1000000; 63 | long tc = (a * c + c * d) % 1000000; 64 | long td = (b * c + d * d) % 1000000; 65 | a = ta; 66 | b = tb; 67 | c = tc; 68 | d = td; 69 | } 70 | 71 | public String toString() { 72 | return a + " " + b + " " + c + " " + d; 73 | } 74 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data Structure and Algorithm Implemented in Python 2 | 3 | 2016-10-05 4 | === 5 | - Manacher Algorithm added 6 | - There is also the dynamic programming algorithm and the O(1)-space algorithm to longest palindrome substring (LPS) problem 7 | - Manacher Algorithm is an linear-time and linear-spaced algorithm to find the LPS. 8 | - Test cases are randomly generated by `random.choice(char_set)`, where `char_set` only contain 5 different character and the length of each test string are set to 300 9 | - The result of Manacher algorithm should be the same as the result from other LPS algorithms 10 | 11 | 2016-10-04 12 | === 13 | - KMP algorithm added 14 | - KMP is an O(n) time `strStr` algorithm even for worst cases 15 | - Test cases are generated from the sample text in Wiki-pedia on [Knuth–Morris–Pratt_algorithm](https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm) 16 | - The result of KMP algorithm should be the same as the result of normal `strStr` function 17 | 18 | 2016-09-29 19 | === 20 | - Fenwick Tree added 21 | - Fenwick Tree is also known as Binary Indexed Tree 22 | - Fenwick Tree is used to calculate contiguous sum of given list. The sum_up operation, update operation would take O(log n) time on average 23 | - Test cases are generated from `range(1, 1001)` 24 | - According to the test, `fenwickTree.get_index(i)` should be equal to `i+1`, while `fenwickTree.query_sum(i,j)` should be equal to the contiguous sum from `i+1` to `j+1` inclusively 25 | 26 | 2016-08-15 27 | ===== 28 | - Some basic algorithms in **"Data Structure and Algorithm Analysis in C"** by Mark Allen Weiss implemented in Python 29 | --------------------------------------------------------------------------------