├── __init__.py ├── chapter_1 ├── __init__.py ├── common.py ├── module_1_1.py ├── module_1_4.py ├── module_1_5.py ├── module_1_3_double_node_linked_list.py └── module_1_3_linked_list.py ├── chapter_2 ├── __init__.py ├── module_2_1.py ├── module_2_3.py ├── module_2_5.py ├── module_2_2.py └── module_2_4.py ├── chapter_3 ├── __init__.py ├── module_3_5.py ├── module_3_4.py ├── module_3_2.py └── module_3_3.py ├── chapter_4 ├── __init__.py ├── basic_data_struct.py ├── module_4_1.py └── module_4_2.py ├── chapter_5 ├── __init__.py ├── module_5_5.py ├── module_5_4.py ├── basic_data_struct.py ├── module_5_1.py ├── module_5_2.py └── module_5_3.py ├── chapter_6 ├── __init__.py ├── basic_data_struct.py └── module_6_1.py └── README.md /__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | -------------------------------------------------------------------------------- /chapter_1/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | -------------------------------------------------------------------------------- /chapter_2/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | -------------------------------------------------------------------------------- /chapter_3/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | -------------------------------------------------------------------------------- /chapter_4/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | -------------------------------------------------------------------------------- /chapter_5/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | -------------------------------------------------------------------------------- /chapter_6/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # algorithms-sedgewick-python # 2 | 3 |         Algorthms(4th edition) by Robert Sedgewick and Kevin Wayne exercises in python, all the codes can be run with doctest, and the Python Version is 3.4, Python 2.7 might be compatible. Those non-programming exercises will be excluded, and some exercises need to take more time to figure it out. 4 | 5 |        More python algorithms can be found in here 6 | -------------------------------------------------------------------------------- /chapter_6/basic_data_struct.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | 4 | 5 | class Node(object): 6 | 7 | def __init__(self, val): 8 | self._val = val 9 | self.next_node = None 10 | 11 | @property 12 | def val(self): 13 | return self._val 14 | 15 | @val.setter 16 | def val(self, value): 17 | self._val = value 18 | 19 | @property 20 | def next_node(self): 21 | return self._next_node 22 | 23 | @next_node.setter 24 | def next_node(self, node): 25 | self._next_node = node 26 | 27 | 28 | class Bag(object): 29 | 30 | def __init__(self): 31 | self._first = None 32 | self._size = 0 33 | 34 | def __iter__(self): 35 | node = self._first 36 | while node is not None: 37 | yield node.val 38 | node = node.next_node 39 | 40 | def add(self, val): 41 | node = Node(val) 42 | old = self._first 43 | self._first = node 44 | self._first.next_node = old 45 | self._size += 1 46 | 47 | def is_empty(self): 48 | return self._first is None 49 | 50 | def size(self): 51 | return self._size 52 | -------------------------------------------------------------------------------- /chapter_1/common.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | 4 | 5 | class Node(object): 6 | 7 | def __init__(self, val): 8 | self._val = val 9 | self.next_node = None 10 | 11 | @property 12 | def val(self): 13 | return self._val 14 | 15 | @val.setter 16 | def val(self, value): 17 | self._val = value 18 | 19 | @property 20 | def next_node(self): 21 | return self._next_node 22 | 23 | @next_node.setter 24 | def next_node(self, node): 25 | self._next_node = node 26 | 27 | 28 | class DoubleNode(object): 29 | 30 | def __init__(self, val): 31 | self._val = val 32 | self._prev = self._next = None 33 | 34 | @property 35 | def prev(self): 36 | return self._prev 37 | 38 | @prev.setter 39 | def prev(self, node): 40 | self._prev = node 41 | 42 | @property 43 | def next(self): 44 | return self._next 45 | 46 | @next.setter 47 | def next(self, node): 48 | self._next = node 49 | 50 | @property 51 | def val(self): 52 | return self._val 53 | 54 | @val.setter 55 | def val(self, value): 56 | self._val = value 57 | -------------------------------------------------------------------------------- /chapter_2/module_2_1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | import doctest 4 | 5 | 6 | def selection_sort(lst): 7 | """ 8 | Selection sort implemention, select the minimum value in the list and put it in first place, 9 | then scan the whole list but exclude the first one element, 10 | pick the second minimum value in the list and so on util the list is sorted. 11 | every selection sort need N TIMES EXCHANGES, 12 | and the running time is NOTHING TO DO WITH the size of the input array. 13 | >>> lst = [9, 4, 5, 1, 0, 3, 6] 14 | >>> selection_sort(lst) 15 | >>> lst 16 | [0, 1, 3, 4, 5, 6, 9] 17 | """ 18 | length = len(lst) 19 | for i in range(length): 20 | min_index = i 21 | for j in range(i + 1, length): 22 | if lst[j] < lst[min_index]: 23 | min_index = j 24 | lst[min_index], lst[i] = lst[i], lst[min_index] 25 | 26 | 27 | def insertion_sort(lst): 28 | """ 29 | Insertion sort implementation, exchange the current element 30 | and the previous element util current element is larger than the previous element. 31 | for a random list of N size, insertion sort need ~ N**2/4 comparisons 32 | and ~N**2/4 exchanges on average condition, 33 | the worst-case scenario would be ~ N**2/2 comparisons and ~N**2/2 exchanges, 34 | the best-case scenario would be N-1 35 | comparisons and no exchange. 36 | >>> lst = [9, 4, 5, 1, 0, 3, 6] 37 | >>> insertion_sort(lst) 38 | >>> lst 39 | [0, 1, 3, 4, 5, 6, 9] 40 | """ 41 | length = len(lst) 42 | for i in range(1, length): 43 | j = i 44 | while j and lst[j] < lst[j - 1]: 45 | lst[j], lst[j - 1] = lst[j - 1], lst[j] 46 | j -= 1 47 | 48 | 49 | def shell_sort(lst): 50 | """ 51 | Shell sort implementation, exchange the j element 52 | and j-h element util i element is larger than i-1 element. 53 | the algorithms performance is depend on h 54 | >>> lst = [9, 4, 5, 1, 0, 3, 6] 55 | >>> shell_sort(lst) 56 | >>> lst 57 | [0, 1, 3, 4, 5, 6, 9] 58 | """ 59 | length = len(lst) 60 | h = 1 61 | 62 | while h < length / 3: 63 | h = 3 * h + 1 64 | 65 | while h >= 1: 66 | for i in range(h, length): 67 | j = i 68 | while j >= h and lst[j] < lst[j - h]: 69 | lst[j], lst[j - h] = lst[j - h], lst[j] 70 | j -= h 71 | h //= 3 72 | 73 | if __name__ == '__main__': 74 | doctest.testmod() 75 | -------------------------------------------------------------------------------- /chapter_3/module_3_5.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | import doctest 4 | import collections.abc 5 | import random 6 | 7 | 8 | # 3.5.8 practice, implement a LinearProbingHashTable which supports multiple values. 9 | class LinearProbingHT(object): 10 | 11 | """ 12 | >>> ht = LinearProbingHT() 13 | >>> for index, s in enumerate('SEARCHEXAMPLE'): 14 | ... ht.put(s, index) 15 | ... 16 | >>> val = ht.get('E') 17 | >>> val in [1, 6, 12] 18 | True 19 | >>> val2 = ht.get('A') 20 | >>> val2 in [2, 8] 21 | True 22 | >>> ht.delete('E') 23 | >>> ht.get('E') 24 | >>> 25 | """ 26 | 27 | def __init__(self): 28 | self._len = 16 29 | self._size = 0 30 | self._keys = [None] * self._len 31 | self._vals = [None] * self._len 32 | 33 | def __hash(self, key): 34 | return hash(key) & 0x7fffffff % self._len 35 | 36 | def __resize(self, size): 37 | tmp = LinearProbingHT() 38 | for i in range(self._len): 39 | if self._keys[i] is not None: 40 | for item in self._vals[i]: 41 | tmp.put(self._keys[i], item) 42 | self._keys = tmp._keys 43 | self._vals = tmp._vals 44 | self._size = tmp._size 45 | 46 | def __contains(self, key): 47 | return self._keys[self.__hash(key)] is not None 48 | 49 | def put(self, key, value): 50 | assert isinstance(key, collections.abc.Hashable) 51 | 52 | if self._size >= self._len / 2: 53 | self.__resize(self._len * 2) 54 | 55 | index = self.__hash(key) 56 | while self._keys[index]: 57 | if self._keys[index] == key: 58 | self._vals[index].append(value) 59 | return 60 | index = (index + 1) % self._len 61 | 62 | self._keys[index], self._vals[index] = key, [value] 63 | self._size += 1 64 | 65 | def get(self, key): 66 | index = self.__hash(key) 67 | while self._keys[index]: 68 | if self._keys[index] == key: 69 | return random.choice(self._vals[index]) 70 | index = (index + 1) % self._len 71 | return None 72 | 73 | def delete(self, key): 74 | if not self.__contains(key): 75 | return 76 | 77 | index = self.__hash(key) 78 | while self._keys[index] != key: 79 | index = (index + 1) % self._len 80 | 81 | self._keys[index] = self._vals[index] = None 82 | 83 | index = (index + 1) % self._len 84 | 85 | while self._keys[index]: 86 | k, v = self._keys[index], self._vals[index] 87 | self._keys[index] = self._vals[index] = None 88 | self._size -= 1 89 | self.put(k, v) 90 | index = (index + 1) % self._len 91 | 92 | self._size -= 1 93 | 94 | if self._size and self._size == self._len / 8: 95 | self.__resize(self._len / 2) 96 | 97 | def keys(self): 98 | for index, k in enumerate(self._keys): 99 | if k: 100 | yield k 101 | 102 | if __name__ == '__main__': 103 | doctest.testmod() 104 | -------------------------------------------------------------------------------- /chapter_5/module_5_5.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | import doctest 4 | import sys 5 | from basic_data_struct import MinPQ 6 | 7 | 8 | class Node(object): 9 | 10 | def __init__(self, char, freq, left, right): 11 | self._char = char 12 | self._freq = freq 13 | self._left = left 14 | self._right = right 15 | 16 | def is_leaf(self): 17 | return self._left is None and self._right is None 18 | 19 | def __cmp__(self, other): 20 | return self._freq - other._freq 21 | 22 | @property 23 | def char(self): 24 | return self._char 25 | 26 | @property 27 | def freq(self): 28 | return self._freq 29 | 30 | @property 31 | def left(self): 32 | return self._left 33 | 34 | @property 35 | def right(self): 36 | return self._right 37 | 38 | 39 | class Huffman(object): 40 | 41 | @staticmethod 42 | def compress(): 43 | input_string = ''.join(sys.stdin.readlines()) 44 | frequency = [0] * 256 45 | for i in input_string: 46 | frequency[ord(i)] += 1 47 | 48 | root = Huffman.build_trie(frequency) 49 | 50 | Huffman.write_trie(root) 51 | 52 | st = [None] * 256 53 | Huffman.build_code(st, root, '') 54 | sys.stdout.buffer.write(bytes(len(input_string))) 55 | 56 | for i in input_string: 57 | code = st[i] 58 | for c in code: 59 | if c == '0': 60 | sys.stdout.buffer.write(b'0') 61 | elif c == '1': 62 | sys.stdout.buffer.write(b'1') 63 | else: 64 | raise Exception('Illegal state.') 65 | 66 | sys.stdout.close() 67 | 68 | @staticmethod 69 | def build_trie(freq): 70 | min_pq = MinPQ() 71 | for i in range(256): 72 | if freq[i]: 73 | min_pq.insert(chr(i), freq, None, None) 74 | 75 | while min_pq.size() > 1: 76 | left = min_pq.del_min() 77 | right = min_pq.del_min() 78 | parent = Node('\0', left.freq + right.freq, left, right) 79 | min_pq.insert(parent) 80 | return min_pq.del_min() 81 | 82 | @staticmethod 83 | def write_trie(node): 84 | if node.is_leaf(): 85 | sys.stdout.buffer.write(b'1') 86 | sys.stdout.buffer.write(bytes(node.char.encode('ascii'))) 87 | return 88 | sys.stdout.write_bit(b'0') 89 | Huffman.write_trie(node.left) 90 | Huffman.write_trie(node.right) 91 | 92 | @staticmethod 93 | def build_code(st, node, code): 94 | if not node.is_leaf(): 95 | Huffman.build_code(st, node.left, code + '0') 96 | Huffman.build_code(st, node.right, code + '1') 97 | else: 98 | st[ord(node.char)] = code 99 | 100 | @staticmethod 101 | def expand(): 102 | root = Huffman.read_trie() 103 | length = sys.stdin.read(4) 104 | for i in range(length): 105 | node = root 106 | while not node.is_leaf(): 107 | bit = sys.stdin.read(1) 108 | node = node.right if bit else node.left 109 | sys.stdout.write(node.char) 110 | 111 | @staticmethod 112 | def read_trie(): 113 | is_leaf = sys.stdin.read(1) 114 | if(is_leaf): 115 | return Node(sys.stdin.read(1).decode('ascii'), -1, None, None) 116 | return Node('\0', -1, Huffman.read_trie(), Huffman.read_trie()) 117 | 118 | if __name__ == '__main__': 119 | doctest.testmod() 120 | -------------------------------------------------------------------------------- /chapter_2/module_2_3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | import doctest 4 | import random 5 | 6 | 7 | INSERTION_SORT_LENGTH = 8 8 | 9 | 10 | class QuickSort(object): 11 | 12 | """ 13 | >>> qs = QuickSort() 14 | >>> lst = [3, 2, 4, 7, 8, 9, 1, 0, 14, 11, 23, 50, 26] 15 | >>> qs.sort(lst) 16 | >>> lst 17 | [0, 1, 2, 3, 4, 7, 8, 9, 11, 14, 23, 26, 50] 18 | >>> lst2 = ['E', 'A', 'S', 'Y', 'Q', 'U', 'E', 'S', 'T', 'I', 'O', 'N'] 19 | >>> qs.sort(lst2) 20 | >>> lst2 21 | ['A', 'E', 'E', 'I', 'N', 'O', 'Q', 'S', 'S', 'T', 'U', 'Y'] 22 | """ 23 | 24 | def sort(self, lst): 25 | random.shuffle(lst) 26 | self.__sort(lst, 0, len(lst) - 1) 27 | 28 | def __sort(self, lst, low, high): 29 | length = high - low + 1 30 | if length <= INSERTION_SORT_LENGTH: 31 | self.insertion_sort(lst, low, high) 32 | return 33 | index = self.partition(lst, low, high) 34 | self.__sort(lst, low, index) 35 | self.__sort(lst, index + 1, high) 36 | 37 | def insertion_sort(self, lst, low, high): 38 | for i in range(low + 1, high + 1): 39 | j = i 40 | while j > low and lst[j] < lst[j - 1]: 41 | lst[j], lst[j - 1] = lst[j - 1], lst[j] 42 | j -= 1 43 | 44 | # 2.3.18 practice 45 | def three_sample(self, lst, low, mid, high): 46 | if lst[low] <= lst[mid] <= lst[high] or lst[high] <= lst[mid] <= lst[low]: 47 | return mid 48 | elif lst[mid] <= lst[low] <= lst[high] or lst[high] <= lst[low] <= lst[mid]: 49 | return low 50 | else: 51 | return high 52 | 53 | # 2.3.19 practice 54 | def five_sample(self, lst, low, high): 55 | values = [] 56 | for _ in range(5): 57 | index = random.randint(low, high) 58 | values.append((index, lst[index])) 59 | values.sort(key=lambda item: item[1]) 60 | return values[2][0] 61 | 62 | def partition(self, lst, low, high): 63 | # length = high - low + 1 64 | # index = self.three_sample(lst, low, low + length / 2, high) 65 | index = self.five_sample(lst, low, high) 66 | lst[low], lst[index] = lst[index], lst[low] 67 | i, j = low + 1, high 68 | val = lst[low] 69 | while 1: 70 | while i < high and lst[i] <= val: 71 | i += 1 72 | while j > low and lst[j] >= val: 73 | j -= 1 74 | if i >= j: 75 | break 76 | lst[i], lst[j] = lst[j], lst[i] 77 | 78 | lst[low], lst[j] = lst[j], lst[low] 79 | return j 80 | 81 | 82 | class QuickThreeWay(object): 83 | 84 | """ 85 | >>> qtw = QuickThreeWay() 86 | >>> lst = [3, 2, 4, 7, 8, 9, 1, 0] 87 | >>> qtw.sort(lst) 88 | >>> lst 89 | [0, 1, 2, 3, 4, 7, 8, 9] 90 | """ 91 | 92 | def sort(self, lst): 93 | random.shuffle(lst) 94 | self.__sort(lst, 0, len(lst) - 1) 95 | 96 | def __sort(self, lst, low, high): 97 | if high <= low: 98 | return 99 | 100 | lt, i, gt, val = low, low + 1, high, lst[low] 101 | while i <= gt: 102 | if lst[i] < val: 103 | lst[lt], lst[i] = lst[i], lst[lt] 104 | lt += 1 105 | i += 1 106 | elif lst[i] > val: 107 | lst[gt], lst[i] = lst[i], lst[gt] 108 | gt -= 1 109 | else: 110 | i += 1 111 | self.__sort(lst, low, lt - 1) 112 | self.__sort(lst, gt + 1, high) 113 | 114 | 115 | if __name__ == '__main__': 116 | doctest.testmod() 117 | -------------------------------------------------------------------------------- /chapter_1/module_1_1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | import doctest 4 | 5 | 6 | def gcd(p, q): 7 | ''' 8 | return greatest common divisor of two numbers. 9 | >>> gcd(6, 4) 10 | 2 11 | >>> gcd(7, 5) 12 | 1 13 | >>> gcd(10, 5) 14 | 5 15 | ''' 16 | return p if q == 0 else gcd(q, p % q) 17 | 18 | 19 | def is_prime(number): 20 | ''' 21 | determine whether a number is a prime number. 22 | >>> is_prime(1) 23 | False 24 | >>> is_prime(2) 25 | True 26 | >>> is_prime(3) 27 | True 28 | >>> is_prime(4) 29 | False 30 | >>> is_prime(101) 31 | True 32 | >>> is_prime(65535) 33 | False 34 | ''' 35 | if number < 2: 36 | return False 37 | i = 2 38 | while i * i <= number: 39 | if number % i == 0: 40 | return False 41 | i += 1 42 | return True 43 | 44 | 45 | def sqrt(number): 46 | ''' 47 | return the square of the number(Newton's method). 48 | >>> sqrt(4) 49 | 2.0 50 | >>> sqrt(9) 51 | 3.0 52 | >>> sqrt(1) 53 | 1 54 | >>> sqrt(256) 55 | 16.0 56 | ''' 57 | if number < 0: 58 | raise ValueError('input number must be positive.') 59 | err = 1e-15 60 | t = number 61 | while abs(t - number / t) > err * t: 62 | t = float(number / t + t) / 2 63 | return t 64 | 65 | 66 | def harmonic(number): 67 | ''' 68 | return the harmonic number of the given number. 69 | >>> harmonic(2) 70 | 1.5 71 | >>> harmonic(3) 72 | 1.8333333333333333 73 | ''' 74 | return sum([1 / i for i in range(1, number + 1)]) 75 | 76 | 77 | def binary_search(key, lst): 78 | ''' 79 | return the index of the key in the given ascending list(i - 1), if the key not in the list, 80 | return -1. 81 | >>> binary_search(3, [1, 2, 3, 4, 5]) 82 | 2 83 | >>> binary_search(1, [1, 2, 3, 4, 5, 6, 7, 9]) 84 | 0 85 | >>> binary_search(9, [1, 2, 3, 4, 5, 6, 7, 9]) 86 | 7 87 | >>> binary_search(999, [1, 2, 3, 4, 5, 6, 7, 9]) 88 | -1 89 | ''' 90 | 91 | assert isinstance(key, int) 92 | assert isinstance(lst, (list, tuple)) 93 | 94 | low, high = 0, len(lst) - 1 95 | while low <= high: 96 | mid = int((high + low) / 2) 97 | if lst[mid] == key: 98 | return mid 99 | elif lst[mid] > key: 100 | high = mid - 1 101 | else: 102 | low = mid + 1 103 | return -1 104 | 105 | 106 | def sort3num(a, b, c): 107 | ''' 108 | return ascending three numbers. 109 | >>> sort3num(3, 2, 1) 110 | (1, 2, 3) 111 | ''' 112 | if a > b: 113 | a, b = b, a 114 | if a > c: 115 | a, c = c, a 116 | if b > c: 117 | b, c = c, b 118 | return a, b, c 119 | 120 | 121 | # 1.1.16 practice 122 | def exR1(number): 123 | if number <= 0: 124 | return '' 125 | return exR1(number - 3) + str(number) + exR1(number - 2) + str(number) 126 | 127 | 128 | # 1.1.29 practice 129 | def rank(key, lst): 130 | ''' 131 | return the rank of the key in the given list, there may be duplicate keys. 132 | >>> rank(3, [1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 6, 7, 8, 9, 10]) 133 | 2 134 | >>> rank(4, [1, 2, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5]) 135 | 4 136 | ''' 137 | 138 | assert isinstance(key, int) 139 | assert isinstance(lst, (list, tuple)) 140 | 141 | low, high = 0, len(lst) - 1 142 | while low <= high: 143 | mid = int((high + low) / 2) 144 | if lst[mid] == key: 145 | index = mid 146 | while lst[index] == key: 147 | index -= 1 148 | return index + 1 149 | elif lst[mid] > key: 150 | high = mid - 1 151 | else: 152 | low = mid + 1 153 | return -1 154 | 155 | 156 | if __name__ == '__main__': 157 | doctest.testmod() 158 | -------------------------------------------------------------------------------- /chapter_5/module_5_4.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | import doctest 4 | from basic_data_struct import Digragh, Stack, Bag 5 | from collections import defaultdict 6 | 7 | 8 | class DirectedDFS(object): 9 | 10 | def __init__(self, graph, sources): 11 | self._marked = defaultdict(bool) 12 | for vertex in sources: 13 | if not self._marked[vertex]: 14 | self.dfs(graph, vertex) 15 | 16 | def dfs(self, graph, vertex): 17 | self._marked[vertex] = True 18 | for adjacent_vertex in graph.get_adjacent_vertices(vertex): 19 | if not self._marked[adjacent_vertex]: 20 | self.dfs(graph, adjacent_vertex) 21 | 22 | def marked(self, vertex): 23 | return self._marked[vertex] 24 | 25 | 26 | class NFA(object): 27 | 28 | ''' 29 | NFA(nondeterministic finite state automaton) algorithm for regular expression. 30 | Regular expression is a effective string searching method, it will identify specific 31 | string with a given regular expression. First this algorithm construct a NFA with a 32 | given regular expression, that will be a directed graph of epsilon transitions. Then 33 | input a text and go through every character. For each character, first reach all the 34 | possible states and then execute a epsilon transition which output a set with all possible 35 | states. When all character is checked, if we reach the end state, that means the input 36 | text match the regular expression. The worst case of running time is proportional to 37 | O(MN), M is the length of regular expression, N is the length of the input text. 38 | >>> nfa = NFA('(A*B|AC)D') 39 | >>> nfa.recognizes('AAAABD') 40 | True 41 | >>> nfa2 = NFA('(A*B|AC)D') 42 | >>> nfa2.recognizes('AAAAC') 43 | False 44 | >>> nfa3 = NFA('(a|(bc)*d)*') 45 | >>> nfa3.recognizes('abcbcd') 46 | True 47 | >>> nfa4 = NFA('(a|(bc)*d)*') 48 | >>> nfa4.recognizes('abcbcbcdaaaabcbcdaaaddd') 49 | True 50 | >>> nfa5 = NFA('(.*AB((C|D|E)F)*G)') 51 | >>> nfa5.recognizes('dfawefdABCQQQG') 52 | True 53 | ''' 54 | 55 | def __init__(self, regexp): 56 | self._regexp = regexp 57 | self._ops = Stack() 58 | self._reg_len = len(self._regexp) 59 | self._graph = Digragh(self._reg_len + 1) 60 | 61 | for i in range(self._reg_len): 62 | lp = i 63 | if self._regexp[i] == '(' or self._regexp[i] == '|': 64 | self._ops.push(i) 65 | elif self._regexp[i] == ')': 66 | or_op = self._ops.pop() 67 | if self._regexp[or_op] == '|': 68 | lp = self._ops.pop() 69 | self._graph.add_edge(lp, or_op + 1) 70 | self._graph.add_edge(or_op, i) 71 | else: 72 | lp = or_op 73 | if i < self._reg_len - 1 and self._regexp[i + 1] == '*': 74 | self._graph.add_edge(lp, i + 1) 75 | self._graph.add_edge(i + 1, lp) 76 | if self._regexp[i] in ('(', '*', ')') or self._regexp[i].isalpha(): 77 | self._graph.add_edge(i, i + 1) 78 | 79 | def recognizes(self, txt): 80 | pc = Bag() 81 | dfs = DirectedDFS(self._graph, (0,)) 82 | for v in self._graph.vertices(): 83 | if dfs.marked(v): 84 | pc.add(v) 85 | 86 | length = len(txt) 87 | for i in range(length): 88 | match = Bag() 89 | for v in pc: 90 | if v < self._reg_len: 91 | if self._regexp[v] == txt[i] or self._regexp[v] == '.': 92 | match.add(v + 1) 93 | 94 | pc = Bag() 95 | dfs = DirectedDFS(self._graph, match) 96 | for v in self._graph.vertices(): 97 | if dfs.marked(v): 98 | pc.add(v) 99 | 100 | for v in pc: 101 | if v == self._reg_len: 102 | return True 103 | return False 104 | 105 | 106 | if __name__ == '__main__': 107 | doctest.testmod() 108 | -------------------------------------------------------------------------------- /chapter_1/module_1_4.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | from module_1_1 import binary_search 4 | import doctest 5 | 6 | 7 | def two_sum_fast(lst): 8 | """ 9 | Count the number of pair of numbers add up to zero. first sort the list, 10 | then use binary_search the get the other number which could add up to zero, 11 | if in the list, then increase the counter. 12 | >>> lst = [-1, 1, -2, 3, 5, -5, 0, 4] 13 | >>> two_sum_fast(lst) 14 | 2 15 | """ 16 | lst.sort() 17 | cnt = 0 18 | for i in range(len(lst)): 19 | if binary_search(-lst[i], lst) > i: 20 | cnt += 1 21 | return cnt 22 | 23 | 24 | def three_sum_fast(lst): 25 | """ 26 | Count how many three numbers add up to zero. first sort the list, 27 | then using two for-loop and binary search algorithm get the opposite number. 28 | >>> lst = [-1, 2, 1, 3, 0, 4, -4, 5, 9, -5] 29 | >>> three_sum_fast(lst) 30 | 8 31 | """ 32 | lst.sort() 33 | cnt = 0 34 | for i in range(len(lst)): 35 | for j in range(i + 1, len(lst)): 36 | if binary_search(-lst[i] - lst[j], lst) > j: 37 | cnt += 1 38 | return cnt 39 | 40 | 41 | # 1.4.14 practice 42 | def four_sum_fast(lst): 43 | lst.sort() 44 | index = set() 45 | for i in range(len(lst)): 46 | for j in range(i + 1, len(lst)): 47 | index.add((i, j, lst[i] + lst[j])) 48 | 49 | 50 | # 1.4.16 practice 51 | def closest_pair(lst): 52 | """ 53 | Get two closest number in a list, first sort the list, 54 | then iterate through the list compare each summation of two adjacent numbers in the list, 55 | then get the result. 56 | >>> lst = [1, 0, 3, 4, 5, 9, 1] 57 | >>> closest_pair(lst) 58 | (1, 1) 59 | >>> lst 60 | [0, 1, 1, 3, 4, 5, 9] 61 | """ 62 | lst.sort() 63 | max_val = 9999999999 64 | a, b = None, None 65 | for i in range(len(lst) - 1): 66 | res = abs(lst[i] - lst[i + 1]) 67 | if res < max_val: 68 | max_val = res 69 | a, b = lst[i], lst[i + 1] 70 | return a, b 71 | 72 | 73 | # 1.4.17 practice 74 | def farthest_pair(lst): 75 | return min(lst), max(lst) 76 | 77 | 78 | # 1.4.18 practice 79 | def partial_minimum(lst): 80 | """ 81 | Find the partial minimum number in the list, 82 | the whole process is similar to binary search algorithm. 83 | >>> lst = [5, 2, 3, 4, 3, 5, 6, 8, 7, 1, 9] 84 | >>> partial_minimum(lst) 85 | 2 86 | """ 87 | start, end = 0, len(lst) - 1 88 | while start <= end: 89 | mid = int((end + start) / 2) 90 | left = lst[mid - 1] 91 | right = lst[mid + 1] 92 | if lst[mid] <= left and lst[mid] <= right: 93 | return lst[mid] 94 | if lst[mid] > right and mid + 1 <= end: 95 | start = mid + 1 96 | elif lst[mid] > left and mid - 1 >= start: 97 | end = mid - 1 98 | return lst[start] if lst[start] < lst[end] else lst[end] 99 | 100 | 101 | # 1.4.20 practice 102 | def bitonic_list_search(key, lst): 103 | """ 104 | >>> lst = [1, 2, 3, 9, 8, 7, 6, 5, 4, -1] 105 | >>> bitonic_list_search(2, lst) 106 | 1 107 | >>> bitonic_list_search(9, lst) 108 | 3 109 | >>> bitonic_list_search(7, lst) 110 | 5 111 | """ 112 | def find_the_point(lst): 113 | low, high = 0, len(lst) - 1 114 | while low < high: 115 | mid = int((low + high) / 2) 116 | if lst[mid] < lst[mid + 1]: 117 | low = mid + 1 118 | elif lst[mid] > lst[mid + 1]: 119 | high = mid 120 | return high 121 | 122 | def find_left(key, start, end, lst): 123 | while start <= end: 124 | mid = int((start + end) / 2) 125 | if lst[mid] < key: 126 | start = mid + 1 127 | elif lst[mid] > key: 128 | end = mid - 1 129 | else: 130 | return mid 131 | return -1 132 | 133 | def find_right(key, start, end, lst): 134 | while start <= end: 135 | mid = int((start + end) / 2) 136 | if lst[mid] < key: 137 | end = mid - 1 138 | elif lst[mid] > key: 139 | start = mid + 1 140 | else: 141 | return mid 142 | return -1 143 | 144 | index = find_the_point(lst) 145 | if key == lst[index]: 146 | return index 147 | right = find_right(key, index, len(lst) - 1, lst) 148 | left = find_left(key, 0, index, lst) 149 | return left if left > -1 else right 150 | 151 | 152 | if __name__ == '__main__': 153 | doctest.testmod() 154 | -------------------------------------------------------------------------------- /chapter_1/module_1_5.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | import doctest 4 | import random 5 | 6 | 7 | class UnionFind(object): 8 | 9 | """ 10 | Union find implementation, the algorithm is a little bit like tree algorithm but not the same. 11 | >>> uf = UnionFind(10) 12 | >>> connections = [(4, 3), (3, 8), (6, 5), (9, 4), (2, 1), 13 | ... (8, 9), (5, 0), (7, 2), (6, 1), (1, 0), (6, 7)] 14 | >>> for i, j in connections: 15 | ... uf.union(i, j) 16 | ... 17 | >>> uf.connected(1, 4) 18 | False 19 | >>> uf.connected(8, 4) 20 | True 21 | >>> uf.connected(1, 5) 22 | True 23 | >>> uf.connected(1, 7) 24 | True 25 | >>> uf.find(4) 26 | 8 27 | >>> uf.find(8) 28 | 8 29 | """ 30 | 31 | def __init__(self, size): 32 | self._id = [i for i in range(size)] 33 | self._count = size 34 | 35 | def count(self): 36 | return self._count 37 | 38 | def find(self, node): 39 | root = node 40 | while root != self._id[root]: 41 | root = self._id[root] 42 | # 1.5.12 practice 43 | while node != root: 44 | new_node = self._id[node] 45 | self._id[node] = root 46 | node = new_node 47 | return root 48 | 49 | def connected(self, p, q): 50 | return self.find(p) == self.find(q) 51 | 52 | def union(self, p, q): 53 | p_root = self.find(p) 54 | q_root = self.find(q) 55 | if p_root == q_root: 56 | return 57 | self._id[p_root] = q_root 58 | self._count -= 1 59 | 60 | 61 | class WeightedUnionFind(object): 62 | 63 | """ 64 | Weighted union find algorithm, put the smaller tree into the larger tree, lower the tree size. 65 | >>> wuf = WeightedUnionFind(10) 66 | >>> connections = [(4, 3), (3, 8), (6, 5), (9, 4), 67 | ... (2, 1), (8, 9), (5, 0), (7, 2), (6, 1), (1, 0), (6, 7)] 68 | >>> for i, j in connections: 69 | ... wuf.union(i, j) 70 | ... 71 | >>> wuf.connected(1, 4) 72 | False 73 | >>> wuf.connected(8, 4) 74 | True 75 | >>> wuf.connected(1, 5) 76 | True 77 | >>> wuf.connected(1, 7) 78 | True 79 | """ 80 | 81 | def __init__(self, size): 82 | self._count = size 83 | self._id = [i for i in range(size)] 84 | self._size = [1] * size 85 | 86 | def count(self): 87 | return self._count 88 | 89 | def connected(self, p, q): 90 | return self.find(p) == self.find(q) 91 | 92 | def find(self, node): 93 | root = node 94 | while root != self._id[root]: 95 | root = self._id[root] 96 | # 1.5.13 practice 97 | while node != root: 98 | new_node = self._id[node] 99 | self._id[node] = root 100 | node = new_node 101 | return root 102 | 103 | def union(self, p, q): 104 | p_root = self.find(p) 105 | q_root = self.find(q) 106 | if p_root == q_root: 107 | return 108 | if self._size[p_root] < self._size[q_root]: 109 | self._id[p_root] = q_root 110 | self._size[q_root] += self._size[p_root] 111 | else: 112 | self._id[q_root] = p_root 113 | self._size[p_root] += self._size[q_root] 114 | self._count -= 1 115 | 116 | 117 | # 1.5.14 practice 118 | class HeightedUnionFind(object): 119 | 120 | """ 121 | Heighted union find algorithm, 122 | put the shorter tree into taller tree, 123 | the tree's height won't be taller than log(n). 124 | >>> huf = HeightedUnionFind(10) 125 | >>> connections = [(9, 0), (3, 4), (5, 8), (7, 2), (2, 1), (5, 7), (0, 3), (4, 2)] 126 | >>> for i, j in connections: 127 | ... huf.union(i, j) 128 | ... 129 | >>> huf.connected(9, 3) 130 | True 131 | >>> huf.connected(0, 1) 132 | True 133 | >>> huf.connected(9, 8) 134 | True 135 | """ 136 | 137 | def __init__(self, size): 138 | self._id = [i for i in range(size)] 139 | self._height = [1] * size 140 | self._count = size 141 | 142 | def count(self): 143 | return self._count 144 | 145 | def find(self, node): 146 | while node != self._id[node]: 147 | node = self._id[node] 148 | return node 149 | 150 | def connected(self, p, q): 151 | return self.find(p) == self.find(q) 152 | 153 | def union(self, p, q): 154 | p_root = self.find(p) 155 | q_root = self.find(q) 156 | if p_root == q_root: 157 | return 158 | if self._height[p_root] < self._height[q_root]: 159 | self._id[p_root] = q_root 160 | elif self._height[p_root] > self._height[q_root]: 161 | self._id[q_root] = p_root 162 | else: 163 | self._id[q_root] = p_root 164 | self._height[p_root] += 1 165 | self._count -= 1 166 | 167 | 168 | # 1.5.17 practice 169 | def erdos_renyi(size): 170 | """ 171 | >>> erdos_renyi(1000) 172 | """ 173 | uf = UnionFind(size) 174 | while uf.count() > 1: 175 | a = random.randint(0, size - 1) 176 | b = random.randint(0, size - 1) 177 | if a == b: 178 | continue 179 | if not uf.connected(a, b): 180 | uf.union(a, b) 181 | 182 | if __name__ == '__main__': 183 | doctest.testmod() 184 | -------------------------------------------------------------------------------- /chapter_1/module_1_3_double_node_linked_list.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | from __future__ import print_function 4 | from common import DoubleNode 5 | import doctest 6 | 7 | 8 | # 1.3.31 practice 9 | class LinkedList(object): 10 | 11 | """ 12 | The double-node linked list implementation which the node has prev and next attribute. 13 | >>> lst = LinkedList() 14 | >>> lst.push_back(1) 15 | >>> lst.push_front(2) 16 | >>> for i in lst: 17 | ... print(i) 18 | ... 19 | 2 20 | 1 21 | >>> lst.size() 22 | 2 23 | >>> lst.is_empty() 24 | False 25 | >>> lst.pop_front() 26 | 2 27 | >>> lst.pop_front() 28 | 1 29 | >>> lst.is_empty() 30 | True 31 | >>> lst.pop_front() 32 | >>> lst.push_back(1) 33 | >>> lst.push_back(2) 34 | >>> lst.pop_back() 35 | 2 36 | >>> lst.pop_back() 37 | 1 38 | >>> lst.pop_back() 39 | >>> 40 | >>> lst.is_empty() 41 | True 42 | >>> lst.push_back(1) 43 | >>> lst.insert_after(1, DoubleNode(2)) 44 | >>> lst.insert_before(2, DoubleNode(3)) 45 | >>> for i in lst: 46 | ... print(i) 47 | ... 48 | 1 49 | 3 50 | 2 51 | >>> for i in range(10): 52 | ... lst.push_back(i) 53 | ... 54 | >>> lst.remove(1) 55 | >>> lst.remove(3) 56 | >>> [i for i in lst] 57 | [2, 0, 2, 4, 5, 6, 7, 8, 9] 58 | >>> lst.remove(2) 59 | >>> [i for i in lst] 60 | [0, 4, 5, 6, 7, 8, 9] 61 | """ 62 | 63 | def __init__(self): 64 | self._first = self._last = None 65 | self._size = 0 66 | 67 | def __iter__(self): 68 | tmp = self._first 69 | while tmp: 70 | yield tmp.val 71 | tmp = tmp.next 72 | 73 | def is_empty(self): 74 | return self._first is None 75 | 76 | def size(self): 77 | return self._size 78 | 79 | def push_front(self, item): 80 | old = self._first 81 | self._first = DoubleNode(item) 82 | self._first.next = old 83 | if old: 84 | old.prev = self._first 85 | else: 86 | self._last = self._first 87 | self._size += 1 88 | 89 | def push_back(self, item): 90 | old = self._last 91 | self._last = DoubleNode(item) 92 | self._last.prev = old 93 | if old: 94 | old.next = self._last 95 | else: 96 | self._first = self._last 97 | self._size += 1 98 | 99 | def pop_front(self): 100 | if self._first: 101 | old = self._first 102 | self._first = self._first.next 103 | old.next = None 104 | if self._first: 105 | self._first.prev = None 106 | else: 107 | self._last = None 108 | self._size -= 1 109 | return old.val 110 | return None 111 | 112 | def pop_back(self): 113 | if self._last: 114 | old = self._last 115 | self._last = self._last.prev 116 | old.prev = None 117 | if self._last: 118 | self._last.next = None 119 | else: 120 | self._first = None 121 | self._size -= 1 122 | return old.val 123 | return None 124 | 125 | def insert_before(self, target_value, new_node): 126 | tmp = self._first 127 | while tmp and tmp.val != target_value: 128 | tmp = tmp.next 129 | 130 | if not tmp: 131 | return 132 | 133 | if not tmp.prev: 134 | tmp.prev = new_node 135 | new_node.next = tmp 136 | self._first = new_node 137 | self._size += 1 138 | return 139 | 140 | prev_node = tmp.prev 141 | prev_node.next = new_node 142 | new_node.prev = prev_node 143 | 144 | tmp.prev = new_node 145 | new_node.next = tmp 146 | 147 | self._size += 1 148 | 149 | def insert_after(self, target_value, new_node): 150 | tmp = self._first 151 | while tmp and tmp.val != target_value: 152 | tmp = tmp.next 153 | 154 | if not tmp: 155 | return 156 | 157 | if not tmp.next: 158 | tmp.next = new_node 159 | new_node.prev = tmp 160 | self._last = new_node 161 | self._size += 1 162 | return 163 | 164 | next_node = tmp.next 165 | next_node.prev = new_node 166 | new_node.next = next_node 167 | 168 | tmp.next = new_node 169 | new_node.prev = tmp 170 | 171 | self._size += 1 172 | 173 | def remove(self, item): 174 | if not self._first.next and self._first.val == item: 175 | self._first = None 176 | self._size = 0 177 | return 178 | 179 | tmp = self._first 180 | while tmp: 181 | flag = False 182 | if tmp.val == item: 183 | flag = True 184 | if not tmp.prev: 185 | target = tmp 186 | tmp = tmp.next 187 | tmp.prev = target.next = None 188 | self._first = tmp 189 | else: 190 | prev_node, next_node = tmp.prev, tmp.next 191 | tmp.prev = tmp.next = None 192 | prev_node.next, next_node.prev = next_node, prev_node 193 | tmp = next_node 194 | self._size -= 1 195 | if not flag: 196 | tmp = tmp.next 197 | 198 | if __name__ == '__main__': 199 | doctest.testmod() 200 | -------------------------------------------------------------------------------- /chapter_5/basic_data_struct.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | 4 | 5 | class Node(object): 6 | 7 | def __init__(self, val): 8 | self._val = val 9 | self.next_node = None 10 | 11 | @property 12 | def val(self): 13 | return self._val 14 | 15 | @val.setter 16 | def val(self, value): 17 | self._val = value 18 | 19 | @property 20 | def next_node(self): 21 | return self._next_node 22 | 23 | @next_node.setter 24 | def next_node(self, node): 25 | self._next_node = node 26 | 27 | 28 | class Queue(object): 29 | 30 | def __init__(self, q=None): 31 | self._first = None 32 | self._last = None 33 | self._size = 0 34 | if q: 35 | for item in q: 36 | self.enqueue(item) 37 | 38 | def __iter__(self): 39 | node = self._first 40 | while node: 41 | yield node.val 42 | node = node.next_node 43 | 44 | def is_empty(self): 45 | return self._first is None 46 | 47 | def size(self): 48 | return self._size 49 | 50 | def enqueue(self, val): 51 | old_last = self._last 52 | self._last = Node(val) 53 | self._last.next_node = None 54 | if self.is_empty(): 55 | self._first = self._last 56 | else: 57 | old_last.next_node = self._last 58 | self._size += 1 59 | 60 | def dequeue(self): 61 | if not self.is_empty(): 62 | val = self._first.val 63 | self._first = self._first.next_node 64 | if self.is_empty(): 65 | self._last = None 66 | self._size -= 1 67 | return val 68 | return None 69 | 70 | 71 | class Stack(object): 72 | 73 | def __init__(self): 74 | self._first = None 75 | self._size = 0 76 | 77 | def __iter__(self): 78 | node = self._first 79 | while node: 80 | yield node.val 81 | node = node.next_node 82 | 83 | def is_empty(self): 84 | return self._first is None 85 | 86 | def size(self): 87 | return self._size 88 | 89 | def push(self, val): 90 | node = Node(val) 91 | old = self._first 92 | self._first = node 93 | self._first.next_node = old 94 | self._size += 1 95 | 96 | def pop(self): 97 | if self._first: 98 | old = self._first 99 | self._first = self._first.next_node 100 | self._size -= 1 101 | return old.val 102 | return None 103 | 104 | def peek(self): 105 | if self._first: 106 | return self._first.val 107 | return None 108 | 109 | 110 | class Bag(object): 111 | 112 | def __init__(self): 113 | self._first = None 114 | self._size = 0 115 | 116 | def __iter__(self): 117 | node = self._first 118 | while node is not None: 119 | yield node.val 120 | node = node.next_node 121 | 122 | def add(self, val): 123 | node = Node(val) 124 | old = self._first 125 | self._first = node 126 | self._first.next_node = old 127 | self._size += 1 128 | 129 | def is_empty(self): 130 | return self._first is None 131 | 132 | def size(self): 133 | return self._size 134 | 135 | 136 | class Digragh(object): 137 | 138 | def __init__(self, steps): 139 | self._edges_size = 0 140 | self._adj = {i: None for i in range(steps)} 141 | self._vertices = set() 142 | 143 | def vertices_size(self): 144 | return len(self._vertices) 145 | 146 | def edges_size(self): 147 | return self._edges_size 148 | 149 | def add_edge(self, start, end): 150 | self._vertices.add(start) 151 | self._vertices.add(end) 152 | if not self._adj[start]: 153 | self._adj[start] = Bag() 154 | self._adj[start].add(end) 155 | self._edges_size += 1 156 | 157 | def get_adjacent_vertices(self, vertex): 158 | return self._adj[vertex] if self._adj[vertex] is not None else [] 159 | 160 | def vertices(self): 161 | return self._vertices 162 | 163 | def reverse(self): 164 | reverse_graph = Digragh() 165 | for vertex in self.vertices(): 166 | for adjacent_vertex in self.get_adjacent_vertices(vertex): 167 | reverse_graph.add_edge(adjacent_vertex, vertex) 168 | return reverse_graph 169 | 170 | def has_edge(self, start, end): 171 | if not self._adj[start]: 172 | return False 173 | edge = next((i for i in self._adj[start] if i == end), None) 174 | return edge is not None 175 | 176 | def __repr__(self): 177 | s = str(len(self._vertices)) + ' vertices, ' + str(self._edges_size) + ' edges\n' 178 | for k in self._adj: 179 | try: 180 | lst = ' '.join([vertex for vertex in self._adj[k]]) 181 | except TypeError: 182 | if self._adj[k]: 183 | lst = ' '.join([str(vertex) for vertex in self._adj[k]]) 184 | else: 185 | lst = '' 186 | s += '{}: {}\n'.format(k, lst) 187 | return s 188 | 189 | 190 | class MinPQ(object): 191 | 192 | def __init__(self): 193 | self._pq = [] 194 | 195 | def is_empty(self): 196 | return len(self._pq) == 0 197 | 198 | def size(self): 199 | return len(self._pq) 200 | 201 | def swim(self, pos): 202 | while pos > 0 and self._pq[(pos - 1) // 2] > self._pq[pos]: 203 | self._pq[(pos - 1) // 2], self._pq[pos] = self._pq[pos], self._pq[(pos - 1) // 2] 204 | pos = (pos - 1) // 2 205 | 206 | def sink(self, pos): 207 | length = len(self._pq) - 1 208 | while 2 * pos + 1 <= length: 209 | index = 2 * pos + 1 210 | if index < length and self._pq[index] > self._pq[index + 1]: 211 | index += 1 212 | if self._pq[pos] <= self._pq[index]: 213 | break 214 | self._pq[index], self._pq[pos] = self._pq[pos], self._pq[index] 215 | pos = index 216 | 217 | def insert(self, val): 218 | self._pq.append(val) 219 | self.swim(len(self._pq) - 1) 220 | 221 | def del_min(self): 222 | min_val = self._pq[0] 223 | last_index = len(self._pq) - 1 224 | self._pq[0], self._pq[last_index] = self._pq[last_index], self._pq[0] 225 | self._pq.pop(last_index) 226 | self.sink(0) 227 | return min_val 228 | 229 | def min_val(self): 230 | return self._pq[0] 231 | -------------------------------------------------------------------------------- /chapter_5/module_5_1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | import doctest 4 | import pprint 5 | from collections import defaultdict 6 | 7 | 8 | def lsd_sort(string_list, width): 9 | """ 10 | LSD (least significant digit) algorithm implementation. This algorithm can sort 11 | strings with certain length. LSD algorithm need to access arrays about ~7WN + 3WR times 12 | (W is string's length, N is the number of all strings, R is the number of all 13 | characters in the strings). The cost of space is proportional to N + R. 14 | >>> test_data = ['bed', 'bug', 'dad', 'yes', 'zoo', 'now', 'for', 'tip', 'ilk', 15 | ... 'dim', 'tag', 'jot', 'sob', 'nob', 'sky', 'hut', 'men', 'egg', 16 | ... 'few', 'jay', 'owl', 'joy', 'rap', 'gig', 'wee', 'was', 'wad', 17 | ... 'fee', 'tap', 'tar', 'dug', 'jam', 'all', 'bad', 'yet'] 18 | >>> lsd_sort(test_data, 3) 19 | >>> pp = pprint.PrettyPrinter(width=41, compact=True) 20 | >>> pp.pprint(test_data) 21 | ['all', 'bad', 'bed', 'bug', 'dad', 22 | 'dim', 'dug', 'egg', 'fee', 'few', 23 | 'for', 'gig', 'hut', 'ilk', 'jam', 24 | 'jay', 'jot', 'joy', 'men', 'nob', 25 | 'now', 'owl', 'rap', 'sky', 'sob', 26 | 'tag', 'tap', 'tar', 'tip', 'wad', 27 | 'was', 'wee', 'yes', 'yet', 'zoo'] 28 | """ 29 | 30 | length = len(string_list) 31 | radix = 256 32 | aux = [None] * length 33 | 34 | for i in range(width - 1, -1, -1): 35 | count = [0] * (radix + 1) 36 | 37 | for j in range(length): 38 | count[ord(string_list[j][i]) + 1] += 1 39 | 40 | for k in range(radix - 1): 41 | count[k + 1] += count[k] 42 | 43 | for p in range(length): 44 | aux[count[ord(string_list[p][i])]] = string_list[p] 45 | count[ord(string_list[p][i])] += 1 46 | 47 | for n in range(length): 48 | string_list[n] = aux[n] 49 | 50 | 51 | class MSD(object): 52 | 53 | """ 54 | MSD(most significant digit) algorithm implementation. MSD can handle strings with 55 | different length. Because a recursive process exists, so just in case that maximum 56 | recursion depth exceeded, MSD switch to insertion sort when handling small arrays. 57 | The performance will be not fine when most of input strings are the same. And the cost 58 | of space is very expensive because each recursion sort need to create a counting array, 59 | and some of recursions is unnessesary. 60 | >>> test_data = ['she', 'sells', 'seashells', 'by', 'the', 'sea', 'shore', 61 | ... 'the', 'shells', 'she', 'sells', 'are', 'surely', 'seashells'] 62 | >>> msd = MSD() 63 | >>> msd.sort(test_data) 64 | >>> pp = pprint.PrettyPrinter(width=41, compact=True) 65 | >>> pp.pprint(test_data) 66 | ['are', 'by', 'sea', 'seashells', 67 | 'seashells', 'sells', 'sells', 'she', 68 | 'she', 'shells', 'shore', 'surely', 69 | 'the', 'the'] 70 | """ 71 | 72 | def __init__(self): 73 | self._radix = 256 74 | self._switch_2_insertion_length = 20 75 | 76 | def char_at(self, s, index): 77 | return ord(s[index]) if index < len(s) else -1 78 | 79 | def _insertion_sort(self, lst, start, end, index): 80 | for i in range(start, end + 1): 81 | tmp = i 82 | while tmp > start and lst[tmp][index:] < lst[tmp - 1][index:]: 83 | lst[tmp - 1], lst[tmp] = lst[tmp], lst[tmp - 1] 84 | tmp -= 1 85 | 86 | def sort(self, string_list): 87 | length = len(string_list) 88 | aux = [None] * length 89 | self._sort(string_list, 0, length - 1, 0, aux) 90 | 91 | def _sort(self, string_list, start, end, index, aux): 92 | if end <= start + self._switch_2_insertion_length: 93 | self._insertion_sort(string_list, start, end, index) 94 | return 95 | 96 | count = [0] * (self._radix + 2) 97 | 98 | for i in range(start, end + 1): 99 | count[self.char_at(string_list[i], index) + 2] += 1 100 | 101 | for r in range(self._radix + 1): 102 | count[r + 1] += count[r] 103 | 104 | for j in range(start, end + 1): 105 | v = self.char_at(string_list[j], index) + 1 106 | aux[count[v]] = string_list[j] 107 | count[v] += 1 108 | 109 | for n in range(start, end + 1): 110 | string_list[n] = aux[n - start] 111 | 112 | for r in range(self._radix): 113 | self._sort(string_list, start + count[r], start + count[r + 1] - 1, index + 1, aux) 114 | 115 | 116 | class Quick3String(object): 117 | 118 | """ 119 | Quick Three Way algorithm for string sorting purpose. This is almost the 120 | same as Quick Three Way, but it takes ith character of each string as comparison. 121 | It's really helpful when large repetive strings as input strings. 122 | >>> test_data = ['she', 'sells', 'seashells', 'by', 'the', 'sea', 'shore', 123 | ... 'the', 'shells', 'she', 'sells', 'are', 'surely', 'seashells'] 124 | >>> q3s = Quick3String() 125 | >>> q3s.sort(test_data) 126 | >>> pp = pprint.PrettyPrinter(width=41, compact=True) 127 | >>> pp.pprint(test_data) 128 | ['are', 'by', 'sea', 'seashells', 129 | 'seashells', 'sells', 'sells', 'she', 130 | 'she', 'shells', 'shore', 'surely', 131 | 'the', 'the'] 132 | """ 133 | 134 | def char_at(self, s, index): 135 | return ord(s[index]) if index < len(s) else -1 136 | 137 | def sort(self, string_list): 138 | self._sort(string_list, 0, len(string_list) - 1, 0) 139 | 140 | def _sort(self, string_list, start, end, index): 141 | if start >= end: 142 | return 143 | 144 | lt, gt, val, i = start, end, self.char_at(string_list[start], index), start + 1 145 | 146 | while i <= gt: 147 | tmp = self.char_at(string_list[i], index) 148 | if tmp < val: 149 | string_list[i], string_list[lt] = string_list[lt], string_list[i] 150 | lt += 1 151 | elif tmp > val: 152 | string_list[i], string_list[gt] = string_list[gt], string_list[i] 153 | gt -= 1 154 | continue 155 | i += 1 156 | 157 | self._sort(string_list, start, lt - 1, index) 158 | 159 | if val > 0: 160 | self._sort(string_list, lt, gt, index + 1) 161 | self._sort(string_list, gt + 1, end, index) 162 | 163 | 164 | # 5.1.1 practice 165 | def simple_radix_sort(strings): 166 | count = defaultdict(int) 167 | for s in strings: 168 | count[s] += 1 169 | 170 | 171 | if __name__ == '__main__': 172 | doctest.testmod() 173 | -------------------------------------------------------------------------------- /chapter_2/module_2_5.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | import doctest 4 | 5 | 6 | def rank(lst, k): 7 | """ 8 | >>> rank([i for i in range(10)], 5) 9 | 4 10 | """ 11 | def partition(lst, low, high): 12 | val = lst[low] 13 | left, right = low + 1, high 14 | while 1: 15 | while lst[left] < val: 16 | left += 1 17 | while lst[right] > val: 18 | right -= 1 19 | if right < left: 20 | break 21 | lst[left], lst[right] = lst[right], lst[left] 22 | lst[left], lst[low] = lst[low], lst[left] 23 | return left 24 | low, high = 0, len(lst) - 1 25 | while high > low: 26 | j = partition(lst, low, high) 27 | if j == k: 28 | return lst[k] 29 | elif j > k: 30 | high = j - 1 31 | elif j < k: 32 | low = j + 1 33 | return lst[k] 34 | 35 | 36 | # 2.5.4 practice, return a sorted and non-duplicated-item list 37 | def dedup(lst): 38 | """ 39 | >>> lst = [i for i in dedup([2, 1, 3, 1, 1, 3, 2, 3, 4, 7])] 40 | >>> lst 41 | [1, 2, 3, 4, 7] 42 | >>> lst2 = [i for i in dedup([1, 1])] 43 | >>> lst2 44 | [1] 45 | >>> lst3 = [i for i in dedup([2, 1, 1, 4, 3, 5])] 46 | >>> lst3 47 | [1, 2, 3, 4, 5] 48 | """ 49 | assert lst and len(lst) >= 2 50 | 51 | new_list = sorted(lst) 52 | val, count, length = new_list[0], 1, len(new_list) 53 | for i in range(1, length): 54 | if new_list[i] == val: 55 | if i == length - 1: 56 | yield new_list[i] 57 | count += 1 58 | else: 59 | count = 1 60 | val = new_list[i] 61 | yield new_list[i - count] 62 | if count == 1: 63 | yield new_list[length - 1] 64 | 65 | 66 | # 2.5.10 practice, implement a version class with __cmp__ 67 | class Version(object): 68 | 69 | """ 70 | >>> lst = [Version(i) for i in ['115.1.1', '115.10.1', '115.10.2']] 71 | >>> lst.sort() 72 | >>> lst 73 | [Version(115.1.1), Version(115.10.1), Version(115.10.2)] 74 | """ 75 | 76 | def __init__(self, version): 77 | self._version = version 78 | 79 | def __eq__(self, other): 80 | return self._version == other._version 81 | 82 | def __lt__(self, other): 83 | return self._version < other._version 84 | 85 | def __repr__(self): 86 | return 'Version({})'.format(self._version) 87 | 88 | @property 89 | def version(self): 90 | return self._version 91 | 92 | @version.setter 93 | def version(self, val): 94 | assert all(i.isdigit() for i in val.split('.')) 95 | self._version = val 96 | 97 | 98 | # 2.5.14 practice, implement a domain class with __cmp__, compare the reversed order domain. 99 | class Domain(object): 100 | 101 | """ 102 | >>> test_list = ['cs.princeton.edu', 'cs.harvard.edu', 'mail.python.org', 'cs.mit.edu'] 103 | >>> lst = [Domain(i) for i in test_list] 104 | >>> lst.sort() 105 | >>> lst 106 | [Domain(cs.harvard.edu), Domain(cs.mit.edu), Domain(cs.princeton.edu), Domain(mail.python.org)] 107 | """ 108 | 109 | def __init__(self, domain): 110 | self._domain = domain 111 | self._cmp_domain = '.'.join(reversed(self._domain.split('.'))) 112 | 113 | def __eq__(self, other): 114 | return self._cmp_domain == other._cmp_domain 115 | 116 | def __lt__(self, other): 117 | return self._cmp_domain < other._cmp_domain 118 | 119 | def __repr__(self): 120 | return 'Domain({})'.format(self._domain) 121 | 122 | @property 123 | def domain(self): 124 | return self._domain 125 | 126 | @domain.setter 127 | def domain(self, val): 128 | self._domain = val 129 | self._cmp_domain = '.'.join(reversed(self._domain.split('.'))) 130 | 131 | 132 | # 2.5.16 practice, construct object which order by the name with a new alphabet order 133 | class California(object): 134 | 135 | """ 136 | >>> lst = [California(name) for name in ('RISBY', 'PATRICK', 'DAMIEN', 'GEORGE')] 137 | >>> lst.sort() 138 | >>> lst 139 | [California(RISBY), California(GEORGE), California(PATRICK), California(DAMIEN)] 140 | """ 141 | alphabet = ('R', 'W', 'Q', 'O', 'J', 'M', 'V', 'A', 'H', 'B', 'S', 'G', 'Z', 'X', 'N', 142 | 'T', 'C', 'I', 'E', 'K', 'U', 'P', 'D', 'Y', 'F', 'L') 143 | 144 | def __init__(self, name): 145 | self._name = name 146 | self._cmp_tuple = tuple(California.alphabet.index(i) for i in self._name) 147 | 148 | def __eq__(self, other): 149 | return self._cmp_tuple == other._cmp_tuple 150 | 151 | def __lt__(self, other): 152 | return self._cmp_tuple < other._cmp_tuple 153 | 154 | def __repr__(self): 155 | return 'California({})'.format(self._name) 156 | 157 | @property 158 | def name(self): 159 | return self._name 160 | 161 | @name.setter 162 | def name(self, val): 163 | self._name = val 164 | self._cmp_tuple = tuple(California.alphabet.index(i) for i in self._name) 165 | 166 | 167 | # 2.5.19 practice, kendall tau algorithm implementation 168 | class KendallTau(object): 169 | 170 | """ 171 | >>> klt = KendallTau() 172 | >>> klt.kendall_tau_count((0, 3, 1, 6, 2, 5, 4), (1, 0, 3, 6, 4, 2, 5)) 173 | 4 174 | """ 175 | 176 | def kendall_tau_count(self, origin_list, count_list): 177 | lst = [origin_list.index(count_list[i]) for i in range(len(count_list))] 178 | aux = lst[:] 179 | return self.count(lst, aux, 0, len(lst) - 1) 180 | 181 | def count(self, lst, aux, low, high): 182 | if low >= high: 183 | return 0 184 | mid = (low + high) // 2 185 | lc = self.count(lst, aux, low, mid) 186 | rc = self.count(lst, aux, mid + 1, high) 187 | mc = self.merge_count(lst, aux, low, mid, high) 188 | return lc + rc + mc 189 | 190 | def merge_count(self, lst, aux, low, mid, high): 191 | aux[low:high + 1] = lst[low:high + 1] 192 | count, left, right = 0, low, mid + 1 193 | for j in range(low, high + 1): 194 | if left > mid: 195 | lst[j] = aux[right] 196 | right += 1 197 | elif right > high: 198 | lst[j] = aux[left] 199 | left += 1 200 | elif aux[left] < aux[right]: 201 | lst[j] = aux[left] 202 | left += 1 203 | else: 204 | lst[j] = aux[right] 205 | right += 1 206 | count += mid - left + 1 207 | return count 208 | 209 | 210 | if __name__ == '__main__': 211 | doctest.testmod() 212 | -------------------------------------------------------------------------------- /chapter_3/module_3_4.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | import doctest 4 | import string 5 | import collections.abc 6 | 7 | 8 | class Pair(object): 9 | 10 | def __init__(self, key, value): 11 | self._key = key 12 | self._value = value 13 | 14 | @property 15 | def key(self): 16 | return self._key 17 | 18 | @key.setter 19 | def key(self, key): 20 | self._key = key 21 | 22 | @property 23 | def value(self): 24 | return self._value 25 | 26 | @value.setter 27 | def value(self, val): 28 | self._value = val 29 | 30 | 31 | class SeperateChainingHT(object): 32 | 33 | """ 34 | Sperated hash table with chaining method, if one key-value node 35 | put into the position already exists another nodes, just make all 36 | these nodes as a linked list, and the new node append to the linked list. 37 | >>> test_str = 'SEARCHEXAMPLE' 38 | >>> ht = SeperateChainingHT() 39 | >>> for index, s in enumerate(test_str): 40 | ... ht.put(s, index) 41 | ... 42 | >>> ht.put(['a', 'b'], 999) 43 | Traceback (most recent call last): 44 | ... 45 | AssertionError 46 | >>> ht.get('L') 47 | 11 48 | >>> ht.get('S') 49 | 0 50 | >>> ht.get('E') 51 | 12 52 | >>> ht.delete('H') 53 | >>> ht.get('H') 54 | >>> 55 | """ 56 | 57 | def __init__(self): 58 | self.__init(997) 59 | 60 | def __init(self, size): 61 | self._len = size 62 | self._size = 0 63 | self._st = [[]] * self._len 64 | 65 | def __hash(self, key): 66 | return hash(key) & 0x7fffffff % self._len 67 | 68 | def put(self, key, value): 69 | 70 | assert isinstance(key, collections.abc.Hashable) 71 | 72 | slot = self._st[self.__hash(key)] 73 | item = next((i for i in slot if i.key == key), None) 74 | if not item: 75 | slot.append(Pair(key, value)) 76 | else: 77 | item.value = value 78 | 79 | def get(self, key): 80 | slot = self._st[self.__hash(key)] 81 | item = next((i for i in slot if i.key == key), None) 82 | return item.value if item else None 83 | 84 | # 3.4.9 practice, implement a delete function for Seperate-Chaining hash table 85 | def delete(self, key): 86 | slot = self._st[self.__hash(key)] 87 | item = next((i for i in slot if i.key == key), None) 88 | if item: 89 | slot.remove(item) 90 | 91 | def keys(self): 92 | results = [] 93 | for k in self._st: 94 | if k: 95 | results.extend(k) 96 | return results 97 | 98 | 99 | class LinearProbingHT(object): 100 | 101 | """ 102 | Hash table with linear-probing strategy, when collision happens, which means 103 | hashed index is occupied by other element, 104 | then go to the next index, check the slot is available or not. 105 | This strategy need to make sure the list is 1/2 empty, because if the list has 106 | more than 1/2 * len elements, the performance of insertion will be decreased. 107 | >>> test_str = 'SEARCHEXAMPLE' 108 | >>> ht = LinearProbingHT() 109 | >>> for index, s in enumerate(test_str): 110 | ... ht.put(s, index) 111 | ... 112 | >>> ht.put(['a', 'b'], 999) 113 | Traceback (most recent call last): 114 | ... 115 | AssertionError 116 | >>> ht.get('L') 117 | 11 118 | >>> ht.get('S') 119 | 0 120 | >>> ht.get('E') 121 | 12 122 | >>> ht.delete('H') 123 | >>> ht.get('H') 124 | >>> 125 | """ 126 | 127 | def __init__(self): 128 | self._len = 16 # the length of the list 129 | self._size = 0 # the amount of the variables 130 | self._keys = [None] * self._len 131 | self._vals = [None] * self._len 132 | 133 | def __hash(self, key): 134 | return hash(key) & 0x7fffffff % self._len 135 | 136 | def __resize(self, size): 137 | tmp = LinearProbingHT() 138 | for i in range(self._len): 139 | if self._keys[i] is not None: 140 | tmp.put(self._keys[i], self._vals[i]) 141 | self._keys = tmp._keys 142 | self._vals = tmp._vals 143 | self._size = tmp._size 144 | 145 | def __contains(self, key): 146 | return self._keys[self.__hash(key)] is not None 147 | 148 | def put(self, key, value): 149 | assert isinstance(key, collections.abc.Hashable) 150 | 151 | if self._size >= self._len / 2: 152 | self.__resize(self._len * 2) 153 | 154 | index = self.__hash(key) 155 | while self._keys[index]: 156 | if self._keys[index] == key: 157 | self._vals[index] = value 158 | return 159 | index = (index + 1) % self._len 160 | 161 | self._keys[index], self._vals[index] = key, value 162 | self._size += 1 163 | 164 | def get(self, key): 165 | index = self.__hash(key) 166 | while self._keys[index]: 167 | if self._keys[index] == key: 168 | return self._vals[index] 169 | index = (index + 1) % self._len 170 | return None 171 | 172 | def delete(self, key): 173 | if not self.__contains(key): 174 | return 175 | 176 | index = self.__hash(key) 177 | while self._keys[index] != key: 178 | index = (index + 1) % self._len 179 | 180 | self._keys[index] = self._vals[index] = None 181 | 182 | index = (index + 1) % self._len 183 | 184 | while self._keys[index]: 185 | k, v = self._keys[index], self._vals[index] 186 | self._keys[index] = self._vals[index] = None 187 | self._size -= 1 188 | self.put(k, v) 189 | index = (index + 1) % self._len 190 | 191 | self._size -= 1 192 | 193 | if self._size and self._size == self._len / 8: 194 | self.__resize(self._len / 2) 195 | 196 | # 3.4.19 practice 197 | def keys(self): 198 | for index, k in enumerate(self._keys): 199 | if k: 200 | yield k 201 | 202 | 203 | # 3.4.4 practice 204 | def find_complete_hash_number(hash_string): 205 | def unique_index(a, m, hash_string): 206 | index = set() 207 | for s in hash_string: 208 | hash_index = (a * string.ascii_uppercase.index(s)) % m 209 | if hash_index not in index: 210 | index.add(hash_index) 211 | else: 212 | return False 213 | return True 214 | 215 | for m in range(len(hash_string), 100000): 216 | for a in range(1, 1001): 217 | if unique_index(a, m, hash_string): 218 | return a, m 219 | return None, None 220 | 221 | 222 | if __name__ == '__main__': 223 | doctest.testmod() 224 | # print(find_complete_hash_number('SEARCHXMPL')) 225 | -------------------------------------------------------------------------------- /chapter_1/module_1_3_linked_list.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | from __future__ import print_function 4 | import doctest 5 | from common import Node 6 | 7 | 8 | class LinkedList(object): 9 | 10 | ''' 11 | Linked list practice. 12 | >>> ######### init linked list test case. 13 | >>> lst = LinkedList() 14 | >>> for i in range(1, 5): 15 | ... lst.append(i) 16 | ... 17 | >>> lst.print_list() 18 | 1 2 3 4 19 | >>> ######## test delete_last() function, remove all nodes from linked list. 20 | >>> while lst.size(): 21 | ... lst.delete_last() 22 | ... 23 | >>> lst.print_list() 24 | >>> ######## re-init linked list. 25 | >>> lst = LinkedList() 26 | >>> for i in range(1, 5): 27 | ... lst.append(i) 28 | ... 29 | >>> ######## test find() function. 30 | >>> lst.find(5) 31 | False 32 | >>> lst.find(4) 33 | True 34 | >>> lst.find(1) 35 | True 36 | >>> ######### test delete() function. 37 | >>> lst.delete(1) 38 | >>> lst.print_list() 39 | 2 3 4 40 | >>> lst.delete(4) 41 | >>> lst.print_list() 42 | 2 3 4 43 | >>> lst.delete(2) 44 | >>> lst.print_list() 45 | 2 4 46 | >>> ######## new test list. 47 | >>> lst2 = LinkedList() 48 | >>> for i in range(10): 49 | ... lst2.append(i) 50 | ... 51 | >>> ######## test remove_after function. 52 | >>> lst2.remove_after(8) 53 | >>> lst2.remove_after(0) 54 | >>> lst2.print_list() 55 | 0 2 3 4 5 6 7 8 56 | >>> ####### test insert_after function. 57 | >>> lst2.insert_after(0, 1) 58 | >>> lst2.print_list() 59 | 0 1 2 3 4 5 6 7 8 60 | >>> lst2.insert_after(8, 9) 61 | >>> lst2.print_list() 62 | 0 1 2 3 4 5 6 7 8 9 63 | >>> ###### test max_value function. 64 | >>> lst2.max_value() 65 | 9 66 | >>> ##### test remove function, cannot delete all-same-value list yet. 67 | >>> lst2.append(8) 68 | >>> lst2.append(1) 69 | >>> lst2.remove(1) 70 | >>> lst2.remove(8) 71 | >>> lst2.print_list() 72 | 0 2 3 4 5 6 7 9 73 | >>> lst3 = LinkedList() 74 | >>> for i in range(5): 75 | ... lst3.append(3) 76 | ... 77 | >>> lst3.remove(3) 78 | >>> lst3.remove(3) 79 | >>> lst3.print_list() 80 | >>> 81 | >>> for i in range(1, 10): 82 | ... lst3.append(i) 83 | ... 84 | >>> node = lst3.reverse() 85 | >>> lst = [] 86 | >>> while node: 87 | ... lst.append(node.val) 88 | ... node = node.next_node 89 | ... 90 | >>> lst 91 | [9, 8, 7, 6, 5, 4, 3, 2, 1] 92 | ''' 93 | 94 | def __init__(self): 95 | self._first = None 96 | self._size = 0 97 | 98 | def print_list(self): 99 | tmp = self._first 100 | while tmp: 101 | if not tmp.next_node: 102 | print(tmp.val) 103 | else: 104 | print(tmp.val, end=' ') 105 | tmp = tmp.next_node 106 | 107 | def append(self, val): 108 | if not self._first: 109 | self._first = Node(val) 110 | self._size += 1 111 | return 112 | tmp = self._first 113 | while tmp.next_node: 114 | tmp = tmp.next_node 115 | tmp.next_node = Node(val) 116 | self._size += 1 117 | 118 | # 1.3.19 practice 119 | def delete_last(self): 120 | tmp = self._first 121 | if not tmp: 122 | return 123 | if not self._first.next_node: 124 | self._first = None 125 | self._size -= 1 126 | return 127 | while tmp.next_node.next_node: 128 | tmp = tmp.next_node 129 | tmp.next_node = None 130 | self._size -= 1 131 | 132 | # 1.3.21 practice 133 | def find(self, val): 134 | tmp = self._first 135 | while tmp: 136 | if tmp.val == val: 137 | return True 138 | tmp = tmp.next_node 139 | return False 140 | 141 | def size(self): 142 | return self._size 143 | 144 | # 1.3.20 practice 145 | def delete(self, pos): 146 | if pos > self._size: 147 | return 148 | if pos == 1: 149 | self._first = self._first.next_node 150 | self._size -= 1 151 | return 152 | tmp, count = self._first, 1 153 | while count != pos - 1: 154 | count += 1 155 | tmp = tmp.next_node 156 | target = tmp.next_node 157 | tmp.next_node = tmp.next_node.next_node 158 | target.next_node = None 159 | self._size -= 1 160 | 161 | # 1.3.24 practice, accept val as parameter instead of node as parameter 162 | def remove_after(self, item): 163 | tmp = self._first 164 | while tmp.next_node: 165 | if tmp.val == item: 166 | tmp.next_node = tmp.next_node.next_node 167 | break 168 | self._size -= 1 169 | tmp = tmp.next_node 170 | 171 | # 1.3.25 practice, accept val as parameter instead of node as parameter 172 | def insert_after(self, current_node_item, new_node_item): 173 | tmp = self._first 174 | while tmp: 175 | if tmp.val == current_node_item: 176 | old_next_node = tmp.next_node 177 | new_node = Node(new_node_item) 178 | tmp.next_node = new_node 179 | new_node.next_node = old_next_node 180 | self._size += 1 181 | break 182 | tmp = tmp.next_node 183 | 184 | # 1.3.26 practice 185 | def remove(self, key): 186 | if not self._first.next_node and self._first.val == key: 187 | self._first = None 188 | self._size = 0 189 | return 190 | 191 | tmp = self._first 192 | prev = None 193 | while tmp: 194 | if tmp.val == key: 195 | if not prev: 196 | target = tmp 197 | tmp = tmp.next_node 198 | target.next_node = None 199 | else: 200 | prev.next_node = tmp.next_node 201 | self._size -= 1 202 | prev = tmp 203 | tmp = tmp.next_node 204 | 205 | # 1.3.27 practice 206 | def max_value(self): 207 | tmp = self._first 208 | max_val = None 209 | while tmp: 210 | if max_val is None: 211 | max_val = tmp.val 212 | if tmp.val > max_val: 213 | max_val = tmp.val 214 | tmp = tmp.next_node 215 | return max_val 216 | 217 | # 1.3.30 practice 218 | def reverse(self): 219 | first = self._first 220 | reverse_node = None 221 | while first: 222 | second = first.next_node 223 | first.next_node = reverse_node 224 | reverse_node = first 225 | first = second 226 | return reverse_node 227 | 228 | 229 | if __name__ == '__main__': 230 | doctest.testmod() 231 | -------------------------------------------------------------------------------- /chapter_2/module_2_2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | import doctest 4 | import random 5 | 6 | 7 | class MergeSort(object): 8 | 9 | """ 10 | Top-bottom merge sort implementation, merge the two sub arrays 11 | of the whole list and make the list partial ordered, 12 | and the recursion process make sure the whole list is ordered. 13 | for a N-size array, top-bottom merge sort need 1/2NlgN to NlgN comparisons, 14 | and need to access array 6NlgN times at most. 15 | >>> ms = MergeSort() 16 | >>> lst = [4, 3, 2, 5, 7, 9, 0, 1, 8, 7, -1, 11, 13, 31, 24] 17 | >>> ms.sort(lst) 18 | >>> lst 19 | [-1, 0, 1, 2, 3, 4, 5, 7, 7, 8, 9, 11, 13, 24, 31] 20 | """ 21 | 22 | def merge(self, aux, lst, low, mid, high): 23 | left, right = low, mid + 1 24 | 25 | # for i in range(low, high + 1): 26 | # aux[i] = lst[i] 27 | 28 | for j in range(low, high + 1): 29 | if left > mid: 30 | lst[j] = aux[right] 31 | right += 1 32 | elif right > high: 33 | lst[j] = aux[left] 34 | left += 1 35 | elif aux[left] < aux[right]: 36 | lst[j] = aux[left] 37 | left += 1 38 | else: 39 | lst[j] = aux[right] 40 | right += 1 41 | 42 | # 2.2.11 practice, sort the small sub array with insertion sort 43 | def insertion_sort(self, lst, low, high): 44 | for i in range(low + 1, high + 1): 45 | j = i 46 | while j > low and lst[j] < lst[j - 1]: 47 | lst[j], lst[j - 1] = lst[j - 1], lst[j] 48 | j -= 1 49 | 50 | def sort(self, lst): 51 | # 2.2.9 practice, make aux as a function parameter. 52 | aux = lst[:] 53 | self.__sort(aux, lst, 0, len(lst) - 1) 54 | 55 | def __sort(self, aux, lst, low, high): 56 | if high <= low: 57 | return 58 | if high - low <= 7: 59 | self.insertion_sort(lst, low, high) 60 | return 61 | mid = int((low + high) / 2) 62 | self.__sort(lst, aux, low, mid) 63 | self.__sort(lst, aux, mid + 1, high) 64 | # 2.2.11 practice, if assistance array aux[mid] < aux[mid+1], copy the 65 | # value into the origin list. 66 | if aux[mid] < aux[mid + 1]: 67 | lst[low:high - low + 1] = aux[low:high - low + 1] 68 | self.merge(aux, lst, low, mid, high) 69 | 70 | 71 | class MergeSortBU(object): 72 | 73 | """ 74 | Bottom-up merge sort algorithm implementation, cut the whole N-size array into 75 | N/sz small arrays, then merge each two of them, 76 | the sz parameter will be twice after merge all the subarrays, 77 | util the sz parameter is larger than N. 78 | 79 | >>> ms = MergeSortBU() 80 | >>> lst = [4, 3, 2, 5, 7, 9, 0, 1, 8, 7, -1] 81 | >>> ms.sort(lst) 82 | >>> lst 83 | [-1, 0, 1, 2, 3, 4, 5, 7, 7, 8, 9] 84 | """ 85 | 86 | def sort(self, lst): 87 | length = len(lst) 88 | aux = [None] * length 89 | size = 1 90 | while size < length: 91 | for i in range(0, length - size, size * 2): 92 | self.merge(aux, lst, i, i + size - 1, min(i + size * 2 - 1, length - 1)) 93 | size *= 2 94 | 95 | def merge(self, aux, lst, low, mid, high): 96 | left, right = low, mid + 1 97 | for i in range(low, high + 1): 98 | aux[i] = lst[i] 99 | 100 | for j in range(low, high + 1): 101 | if left > mid: 102 | lst[j] = aux[right] 103 | right += 1 104 | elif right > high: 105 | lst[j] = aux[left] 106 | left += 1 107 | elif aux[left] < aux[right]: 108 | lst[j] = aux[left] 109 | left += 1 110 | else: 111 | lst[j] = aux[right] 112 | right += 1 113 | 114 | 115 | # 2.2.14 practice merge two sorted list 116 | def merge_list(lst1, lst2): 117 | """ 118 | >>> merge_list([1, 2, 3, 4], []) 119 | [1, 2, 3, 4] 120 | >>> merge_list([], [1, 2, 3, 4]) 121 | [1, 2, 3, 4] 122 | >>> merge_list([1, 2, 3, 4], [4, 5, 6]) 123 | [1, 2, 3, 4, 4, 5, 6] 124 | >>> merge_list([1, 2, 3, 4], [1, 2, 3, 4]) 125 | [1, 1, 2, 2, 3, 3, 4, 4] 126 | >>> merge_list([1, 2], [5, 6, 7, 8]) 127 | [1, 2, 5, 6, 7, 8] 128 | >>> merge_list([2, 3, 5, 9], [2, 7, 11]) 129 | [2, 2, 3, 5, 7, 9, 11] 130 | """ 131 | assert lst1 or lst2 132 | if not lst1 or not lst2: 133 | return lst1[:] if not lst2 else lst2[:] 134 | 135 | i1 = i2 = 0 136 | new_lst = [] 137 | 138 | for i in range(len(lst1) + len(lst2)): 139 | if i1 > len(lst1) - 1: 140 | new_lst.extend(lst2[i2:]) 141 | break 142 | elif i2 > len(lst2) - 1: 143 | new_lst.extend(lst1[i1:]) 144 | break 145 | elif lst1[i1] < lst2[i2]: 146 | new_lst.append(lst1[i1]) 147 | i1 += 1 148 | else: 149 | new_lst.append(lst2[i2]) 150 | i2 += 1 151 | return new_lst 152 | 153 | 154 | # 2.2.15 practice bottom-up merge list using queue, make each element as sub queue, 155 | # merge first two sub queue in the large queue and enqueue the result util 156 | # there is only one sub queue. 157 | def bu_merge_sort_q(lst): 158 | """ 159 | >>> bu_merge_sort_q([3, 2, 4, 7, 8, 9, 1, 0]) 160 | [0, 1, 2, 3, 4, 7, 8, 9] 161 | >>> test_lst = [i for i in range(10)] 162 | >>> random.shuffle(test_lst) 163 | >>> bu_merge_sort_q(test_lst) 164 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 165 | """ 166 | for i in range(len(lst)): 167 | lst[i] = [lst[i]] 168 | while len(lst) != 1: 169 | lst1 = lst.pop(0) 170 | lst2 = lst.pop(0) 171 | lst.append(merge_list(lst1, lst2)) 172 | lst.extend(lst.pop(0)) 173 | return lst 174 | 175 | 176 | # 2.2.17 practice linked-list sort using merge sort 177 | def linked_list_merge_sort(head): 178 | def merge(node1, node2): 179 | if node1 is None or node2 is None: 180 | return node1 or node2 181 | pt = res = None 182 | if node1.val <= node2.val: 183 | pt = res = node1 184 | node1 = node1.next_node 185 | else: 186 | pt = res = node2 187 | node2 = node2.next_node 188 | 189 | while node1 and node2: 190 | if node1.val < node2.val: 191 | pt.next_node = node1 192 | node1 = node1.next_node 193 | else: 194 | pt.next_node = node2 195 | node2 = node2.next_node 196 | pt = pt.next_node 197 | if node1: 198 | pt.next_node = node1 199 | elif node2: 200 | pt.next_node = node2 201 | return res 202 | 203 | if head is None or head.next is None: 204 | return head 205 | fast_pt = slow_pt = head 206 | while fast_pt.next_node and fast_pt.next_node.next_node: 207 | fast_pt = fast_pt.next_node.next_node 208 | slow_pt = slow_pt.next_node 209 | 210 | linked_list_merge_sort(head) 211 | linked_list_merge_sort(slow_pt) 212 | return merge(head, slow_pt) 213 | 214 | 215 | # 2.2.19 practice, using merge function from merge-sort to count the reverse number 216 | class ReverseCount(object): 217 | 218 | """ 219 | >>> rc = ReverseCount() 220 | >>> rc.reverse_count([1, 7, 2, 9, 6, 4, 5, 3]) 221 | 14 222 | """ 223 | 224 | def reverse_count(self, lst): 225 | sort_lst, aux_lst = lst[:], lst[:] 226 | return self.count(sort_lst, aux_lst, 0, len(lst) - 1) 227 | 228 | def count(self, lst, assist, low, high): 229 | if low >= high: 230 | return 0 231 | mid = int((high + low) / 2) 232 | lc = self.count(lst, assist, low, mid) 233 | rc = self.count(lst, assist, mid + 1, high) 234 | mc = self.merge_count(lst, assist, low, mid, high) 235 | return lc + rc + mc 236 | 237 | def merge_count(self, lst, assist, low, mid, high): 238 | assist[low:high + 1] = lst[low:high + 1] 239 | count, left, right = 0, low, mid + 1 240 | for j in range(low, high + 1): 241 | if left > mid: 242 | lst[j] = assist[right] 243 | right += 1 244 | elif right > high: 245 | lst[j] = assist[left] 246 | left += 1 247 | elif assist[left] < assist[right]: 248 | lst[j] = assist[left] 249 | left += 1 250 | else: 251 | lst[j] = assist[right] 252 | right += 1 253 | count += mid - left + 1 254 | return count 255 | 256 | 257 | if __name__ == '__main__': 258 | doctest.testmod() 259 | -------------------------------------------------------------------------------- /chapter_6/module_6_1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | from basic_data_struct import Bag 4 | from collections import defaultdict 5 | import random 6 | import doctest 7 | 8 | 9 | M_SIZE = 6 10 | 11 | 12 | class Entry(object): 13 | 14 | def __init__(self, key, value, node): 15 | self._key = key 16 | self._value = value 17 | self._next_node = node 18 | 19 | @property 20 | def key(self): 21 | return self._key 22 | 23 | @property 24 | def value(self): 25 | return self._value 26 | 27 | @property 28 | def next_node(self): 29 | return self._next_node 30 | 31 | 32 | class Node(object): 33 | 34 | def __init__(self, k): 35 | self._m_size = k 36 | self._children = [None] * M_SIZE 37 | 38 | @property 39 | def children(self): 40 | return self._children 41 | 42 | @children.setter 43 | def children(self, new_children): 44 | self._children = new_children 45 | 46 | @property 47 | def m_size(self): 48 | return self._m_size 49 | 50 | @m_size.setter 51 | def m_size(self, size): 52 | self._m_size = size 53 | 54 | 55 | class BTree(object): 56 | 57 | # these code is not working yet 58 | def __init__(self): 59 | self._root = Node(0) 60 | self._size = 0 61 | self._height = 0 62 | 63 | def size(self): 64 | return self._size 65 | 66 | def height(self): 67 | return self._height 68 | 69 | def put(self, key, value): 70 | u = self._insert(self._root, key, value, self._height) 71 | self._size += 1 72 | if not u: 73 | return 74 | tmp = Node(2) 75 | tmp.children[0] = Entry(self._root.children[0].key, None, self._root) 76 | tmp.children[1] = Entry(u.children[0].key, None, u) 77 | self._root = tmp 78 | self._height += 1 79 | 80 | def _insert(self, node, key, value, height): 81 | pos = 0 82 | new_entry = Entry(key, value, None) 83 | # external node 84 | if height == 0: 85 | print(key, value) 86 | while pos < node.m_size: 87 | if node.children[pos] and key < node.children[pos].key: 88 | break 89 | pos += 1 90 | else: 91 | while pos < node.m_size: 92 | if pos + 1 == node.m_size or key < node.children[pos + 1].key: 93 | u = self._insert(node.children[pos], key, value, height - 1) 94 | if not u: 95 | return None 96 | new_entry.key = u.children[0].key 97 | new_entry.next_node = u 98 | break 99 | pos += 1 100 | 101 | for i in range(node.m_size, pos, -1): 102 | node.children[i] = node.children[i - 1] 103 | node.children[pos] = new_entry 104 | node.m_size += 1 105 | if node.m_size < M_SIZE: 106 | return None 107 | return self._split(node) 108 | 109 | def _split(self, node): 110 | new_size = int(M_SIZE / 2) 111 | split_node = Node(new_size) 112 | node._m_size = new_size 113 | for i in range(new_size): 114 | split_node._children[i] = node._children[new_size + i] 115 | return split_node 116 | 117 | def get(self, key): 118 | return self._search(self._root, key, self._height) 119 | 120 | def _search(self, node, key, height): 121 | if height == 0: 122 | for i in range(node.m_size): 123 | if node.children[i].key == key: 124 | return node.children[i].value 125 | else: 126 | for i in range(node.m_size): 127 | if i + 1 == node.m_size or key < node.children[i + 1].key: 128 | return self._search(node.children[i].next_node, key, height - 1) 129 | return None 130 | 131 | 132 | class QuickThreeWay(object): 133 | 134 | def sort(self, lst): 135 | random.shuffle(lst) 136 | self.__sort(lst, 0, len(lst) - 1) 137 | 138 | def __sort(self, lst, low, high): 139 | if high <= low: 140 | return 141 | 142 | lt, i, gt, val = low, low + 1, high, lst[low] 143 | while i <= gt: 144 | if lst[i] < val: 145 | lst[lt], lst[i] = lst[i], lst[lt] 146 | lt += 1 147 | i += 1 148 | elif lst[i] > val: 149 | lst[gt], lst[i] = lst[i], lst[gt] 150 | gt -= 1 151 | else: 152 | i += 1 153 | self.__sort(lst, low, lt - 1) 154 | self.__sort(lst, gt + 1, high) 155 | 156 | 157 | class SuffixArray(object): 158 | 159 | def __init__(self, s): 160 | self._length = len(s) 161 | self._suffixes = [] 162 | for i in range(self._length): 163 | self._suffixes.append(s[i:]) 164 | qtw = QuickThreeWay() 165 | qtw.sort(self._suffixes) 166 | 167 | def length(self): 168 | return self._length 169 | 170 | def select(self, index): 171 | return self._suffixes[index] 172 | 173 | def lcp(self, index): 174 | return self._lcp(self._suffixes[index], self._suffixes[index - 1]) 175 | 176 | def _lcp(self, s1, s2): 177 | min_len = min(len(s1), len(s2)) 178 | for i in range(min_len): 179 | if s1[i] != s2[i]: 180 | return i 181 | return min_len 182 | 183 | def rank(self, key): 184 | low, high = 0, self._length 185 | while low <= high: 186 | mid = (low + high) // 2 187 | if self._suffixes[mid] > key: 188 | high = mid - 1 189 | elif self._suffixes[mid] < key: 190 | low = mid + 1 191 | else: 192 | return mid 193 | 194 | 195 | class LRS(object): 196 | 197 | ''' 198 | >>> test_string = ('it was the best of times it was the worst of times ' 199 | ... 'it was the age of wisdom it was the age of foolishness ' 200 | ... 'it was the epoch of belief it was the epoch of incredulity ' 201 | ... 'it was the season of light it was the season of darkness ' 202 | ... 'it was the spring of hope it was the winter of despair') 203 | >>> LRS.run(test_string) 204 | 'st of times it was the ' 205 | ''' 206 | 207 | @staticmethod 208 | def run(input_string): 209 | sa = SuffixArray(input_string) 210 | length = len(input_string) 211 | lrs = '' 212 | for i in range(1, length): 213 | tmp_len = sa.lcp(i) 214 | if tmp_len > len(lrs): 215 | lrs = sa.select(i)[0:tmp_len] 216 | return lrs 217 | 218 | 219 | class FlowEdge(object): 220 | 221 | ''' 222 | >>> edge = FlowEdge(1, 2, 2.0, 1) 223 | >>> edge 224 | 1->2 1/2.0 225 | ''' 226 | 227 | def __init__(self, start, end, capacity, 228 | flow=None, edge=None): 229 | if edge: 230 | self._start = edge.start 231 | self._end = edge.end 232 | self._capacity = edge.capacity 233 | self._flow = edge.flow 234 | return 235 | self._start = start 236 | self._end = end 237 | self._capacity = capacity 238 | self._flow = flow 239 | 240 | @property 241 | def start(self): 242 | return self._start 243 | 244 | @property 245 | def end(self): 246 | return self._end 247 | 248 | @property 249 | def capacity(self): 250 | return self._capacity 251 | 252 | @property 253 | def flow(self): 254 | return self._flow 255 | 256 | def other(self, vertex): 257 | if vertex == self.start: 258 | return self._end 259 | elif vertex == self._end: 260 | return self._start 261 | raise RuntimeError('Illegal endpoint') 262 | 263 | def add_residual_flow_to(self, vertex, delta): 264 | if vertex == self._start: 265 | self._flow -= delta 266 | elif vertex == self._end: 267 | self._flow += delta 268 | raise RuntimeError('Illegal endpoint') 269 | 270 | def __repr__(self): 271 | return '{}->{} {}/{}'.format( 272 | self._start, self._end, self._flow, self._capacity) 273 | 274 | 275 | class FlowNetwork(object): 276 | 277 | def __init__(self): 278 | self._adj = defaultdict(Bag) 279 | self._vertices_size = 0 280 | self._edges_size = 0 281 | 282 | def vertices_size(self): 283 | return self._vertices_size 284 | 285 | def edges_size(self): 286 | return self._edges_size 287 | 288 | def add_edge(self, edge): 289 | self._edges_size += 1 290 | self._adj[edge.start].add(edge) 291 | self._adj[edge.end].add(edge) 292 | 293 | def adj_edges(self, vertex): 294 | return self._adj[vertex] 295 | 296 | def edges(self): 297 | for v in self._adj: 298 | for edge in self._adj[v]: 299 | if edge.end != v: 300 | yield edge 301 | 302 | def __repr__(self): 303 | s = '{} vertices, {} edges\n'.format(self._vertices_size, self._edges_size) 304 | for v in self._adj: 305 | tmp = '{}: {}'.format(v, ', '.join(e for e in self._adj[v] if e.end != v)) 306 | s += tmp 307 | 308 | 309 | if __name__ == '__main__': 310 | doctest.testmod() 311 | -------------------------------------------------------------------------------- /chapter_3/module_3_2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | import doctest 4 | import random 5 | 6 | 7 | class Node(object): 8 | 9 | def __init__(self, key, val, size): 10 | self._left = self._right = None 11 | self._key = key 12 | self._val = val 13 | self._size = size 14 | 15 | @property 16 | def left(self): 17 | return self._left 18 | 19 | @left.setter 20 | def left(self, node): 21 | assert isinstance(node, (Node, type(None))) 22 | self._left = node 23 | 24 | @property 25 | def right(self): 26 | return self._right 27 | 28 | @right.setter 29 | def right(self, node): 30 | assert isinstance(node, (Node, type(None))) 31 | self._right = node 32 | 33 | @property 34 | def size(self): 35 | return self._size 36 | 37 | @size.setter 38 | def size(self, val): 39 | assert isinstance(val, int) and val >= 0 40 | self._size = val 41 | 42 | @property 43 | def key(self): 44 | return self._key 45 | 46 | @key.setter 47 | def key(self, val): 48 | self._key = val 49 | 50 | @property 51 | def val(self): 52 | return self._val 53 | 54 | @val.setter 55 | def val(self, value): 56 | self._val = value 57 | 58 | 59 | class BST(object): 60 | 61 | """ 62 | Binary search tree implementation. 63 | >>> bst = BST() 64 | >>> bst.is_empty() 65 | True 66 | >>> test_str = 'EASYQUESTION' 67 | >>> for (index, element) in enumerate(test_str): 68 | ... bst.put(element, index) 69 | ... 70 | >>> bst.is_binary_tree() 71 | True 72 | >>> bst.get('Q') 73 | 4 74 | >>> bst.get('E') 75 | 6 76 | >>> bst.get('N') 77 | 11 78 | >>> bst.size() 79 | 10 80 | >>> bst.max_val().key 81 | 'Y' 82 | >>> bst.min_val().key 83 | 'A' 84 | >>> bst.select(0).key 85 | 'A' 86 | >>> bst.select(3).key 87 | 'N' 88 | >>> bst.select(4).key 89 | 'O' 90 | >>> bst.select(9).key 91 | 'Y' 92 | >>> bst.rank('A') 93 | 0 94 | >>> bst.rank('E') 95 | 1 96 | >>> bst.rank('Y') 97 | 9 98 | >>> bst.rank('T') 99 | 7 100 | >>> bst.rank('U') 101 | 8 102 | >>> bst.is_empty() 103 | False 104 | >>> node = bst.select(0) 105 | >>> node.key 106 | 'A' 107 | >>> node2 = bst.select(2) 108 | >>> node2.key 109 | 'I' 110 | >>> node3 = bst.select(9) 111 | >>> node3.key 112 | 'Y' 113 | >>> bst.keys() 114 | ['A', 'E', 'I', 'N', 'O', 'Q', 'S', 'T', 'U', 'Y'] 115 | >>> bst.height() 116 | 5 117 | >>> random_key = bst.random_key() 118 | >>> random_key in test_str 119 | True 120 | >>> fn = bst.floor('B') 121 | >>> fn.key 122 | 'A' 123 | >>> fn2 = bst.floor('Z') 124 | >>> fn2.key 125 | 'Y' 126 | >>> fn3 = bst.floor('E') 127 | >>> fn3.key 128 | 'E' 129 | >>> cn = bst.ceiling('B') 130 | >>> cn.key 131 | 'E' 132 | >>> cn2 = bst.ceiling('R') 133 | >>> cn2.key 134 | 'S' 135 | >>> cn3 = bst.ceiling('S') 136 | >>> cn3.key 137 | 'S' 138 | >>> bst.delete_min() 139 | >>> bst.min_val().key 140 | 'E' 141 | >>> bst.delete_max() 142 | >>> bst.max_val().key 143 | 'U' 144 | >>> bst.delete('O') 145 | >>> bst.delete('S') 146 | >>> bst.keys() 147 | ['E', 'I', 'N', 'Q', 'T', 'U'] 148 | >>> bst.is_binary_tree() 149 | True 150 | >>> bst.is_ordered() 151 | True 152 | >>> bst.is_rank_consistent() 153 | True 154 | >>> bst.check() 155 | True 156 | """ 157 | 158 | def __init__(self): 159 | self._root = None 160 | self._exist_keys = set() 161 | self._last_visited_node = None 162 | 163 | def size(self): 164 | """ 165 | Return the node's amount of the binary search tree. 166 | """ 167 | if not self._root: 168 | return 0 169 | return self._root.size 170 | 171 | def is_empty(self): 172 | return self._root is None 173 | 174 | def node_size(self, node): 175 | return 0 if not node else node.size 176 | 177 | # 3.2.13 practice, implement get method with iteration. 178 | def get(self, key): 179 | """ 180 | Return the corresponding value with the given key, iterate the whole tree, 181 | if the current node's key is equal to the given key, then return the node's value. 182 | if the current node's key is smaller than the given key, 183 | then jump to the right node of the current node, 184 | else jump to the left node of the current node. 185 | """ 186 | 187 | # 3.2.28 practice add cache for bst. 188 | if self._last_visited_node and self._last_visited_node.key == key: 189 | return self._last_visited_node.val 190 | 191 | temp = self._root 192 | 193 | while temp: 194 | if temp.key == key: 195 | self._last_visited_node = temp 196 | return temp.val 197 | 198 | if temp.key > key: 199 | temp = temp.left 200 | 201 | if temp.key < key: 202 | temp = temp.right 203 | return temp 204 | 205 | # 3.2.13 practice, implement get method with iteration, 206 | # use set data structure for recording exist keys, if new key exists, stop 207 | # increment the node's size counter. 208 | def put(self, key, val): 209 | """ 210 | Insert a new node into the binary search tree, iterate the whole tree, 211 | find the appropriate location for the new node and add the new node as the tree leaf. 212 | """ 213 | key_exists = key in self._exist_keys 214 | if not key_exists: 215 | self._exist_keys.add(key) 216 | temp = self._root 217 | inserted_node = None 218 | new_node = Node(key, val, 1) 219 | 220 | while temp: 221 | inserted_node = temp 222 | if not key_exists: 223 | temp.size += 1 224 | 225 | if temp.key > key: 226 | temp = temp.left 227 | elif temp.key < key: 228 | temp = temp.right 229 | elif temp.key == key: 230 | temp.val = val 231 | return 232 | 233 | if not inserted_node: 234 | self._root = new_node 235 | return 236 | else: 237 | if inserted_node.key < key: 238 | inserted_node.right = new_node 239 | else: 240 | inserted_node.left = new_node 241 | 242 | inserted_node.size = self.node_size( 243 | inserted_node.left) + self.node_size(inserted_node.right) + 1 244 | 245 | self._last_visited_node = new_node 246 | 247 | # 3.2.14 practice 248 | def max_val(self): 249 | """ 250 | Find the maximum value in the binary search tree. 251 | """ 252 | if not self._root: 253 | return None 254 | tmp = self._root 255 | while tmp.right: 256 | tmp = tmp.right 257 | return tmp 258 | 259 | def __min_val(self, node): 260 | """ 261 | Find the minimum value in the binary search tree which start with specific node. 262 | """ 263 | assert isinstance(node, Node) 264 | 265 | tmp = node 266 | while tmp.left: 267 | tmp = tmp.left 268 | return tmp 269 | 270 | # 3.2.14 practice 271 | def min_val(self): 272 | """ 273 | Find the minimum value in the binary search tree. 274 | """ 275 | return self.__min_val(self._root) 276 | 277 | # 3.2.14 practice 278 | def select(self, k): 279 | """ 280 | Find the kth node of the binary search tree, 281 | the solution is similar with get() or put() function. 282 | """ 283 | assert isinstance(k, int) and k <= self.size() 284 | 285 | if not self._root: 286 | return None 287 | 288 | tmp = self._root 289 | while tmp: 290 | tmp_size = self.node_size(tmp.left) 291 | if tmp_size > k: 292 | tmp = tmp.left 293 | elif tmp_size < k: 294 | tmp = tmp.right 295 | k = k - tmp_size - 1 296 | else: 297 | return tmp 298 | 299 | # 3.2.14 practice 300 | def rank(self, key): 301 | """ 302 | Find the rank of the node in the binary search tree by the given key. 303 | """ 304 | result = 0 305 | if not self._root: 306 | return -1 307 | tmp = self._root 308 | 309 | while tmp: 310 | if tmp.key > key: 311 | tmp = tmp.left 312 | elif tmp.key < key: 313 | result += self.node_size(tmp.left) + 1 314 | tmp = tmp.right 315 | elif tmp.key == key: 316 | result += self.node_size(tmp.left) 317 | break 318 | return result 319 | 320 | def delete_min(self): 321 | self._root = self.__delete_min(self._root) 322 | 323 | def __delete_min(self, node): 324 | # find the minimum-value node. 325 | if not node.left: 326 | return node.right 327 | node.left = self.__delete_min(node.left) 328 | node.size = self.node_size(node.left) + self.node_size(node.right) + 1 329 | return node 330 | 331 | def delete_max(self): 332 | self._root = self.__delete_max(self._root) 333 | 334 | def __delete_max(self, node): 335 | # find the maximum-value node. 336 | if not node.right: 337 | return node.left 338 | node.right = self.__delete_max(node.right) 339 | node.size = self.node_size(node.left) + self.node_size(node.right) + 1 340 | return node 341 | 342 | def delete(self, key): 343 | self._root = self.__delete(self._root, key) 344 | 345 | def __delete(self, node, key): 346 | if not node: 347 | return None 348 | if key < node.key: 349 | node.left = self.__delete(node.left, key) 350 | elif key > node.key: 351 | node.right = self.__delete(node.right, key) 352 | else: 353 | # node's left or right side is None. 354 | if not node.left or not node.right: 355 | return (node.left or node.right) 356 | # node's both side is not None. 357 | tmp = node 358 | node = self.__min_val(tmp.right) 359 | node.right = self.__delete_min(tmp.right) 360 | node.left = tmp.left 361 | node.size = self.node_size(node.left) + self.node_size(node.right) + 1 362 | return node 363 | 364 | def keys(self): 365 | return self.keys_range(self.min_val().key, self.max_val().key) 366 | 367 | def keys_range(self, low, high): 368 | queue = [] 369 | self.__keys(self._root, queue, low, high) 370 | return queue 371 | 372 | def __keys(self, node, queue, low, high): 373 | if not node: 374 | return 375 | if low < node.key: 376 | self.__keys(node.left, queue, low, high) 377 | if low <= node.key and high >= node.key: 378 | queue.append(node.key) 379 | if high > node.key: 380 | self.__keys(node.right, queue, low, high) 381 | 382 | # 3.2.6 practice, add height function for binary tree. 383 | def height(self): 384 | return self.__height(self._root) 385 | 386 | def __height(self, node): 387 | if not node: 388 | return -1 389 | return 1 + max(self.__height(node.left), self.__height(node.right)) 390 | 391 | # 3.2.21 randomly choose a node from bianry search tree. 392 | def random_key(self): 393 | if not self._root: 394 | return None 395 | total_size = self._root.size 396 | rank = random.randint(0, total_size - 1) 397 | random_node = self.select(rank) 398 | return random_node.key 399 | 400 | # 3.2.29 practice, check if each node's size is 401 | # equals to the summation of left node's size and right node's size. 402 | def is_binary_tree(self): 403 | return self.__is_binary_tree(self._root) 404 | 405 | def __is_binary_tree(self, node): 406 | if not node: 407 | return True 408 | if node.size != self.node_size(node.left) + self.node_size(node.right) + 1: 409 | return False 410 | return self.__is_binary_tree(node.left) and self.__is_binary_tree(node.right) 411 | 412 | # 3.2.30 practice, check if each node in binary search tree is ordered 413 | # (less than right node and greater than left node) 414 | def is_ordered(self): 415 | return self.__is_ordered(self._root, None, None) 416 | 417 | def __is_ordered(self, node, min_key, max_key): 418 | if not node: 419 | return True 420 | if min_key and node.key <= min_key: 421 | return False 422 | if max_key and node.key >= max_key: 423 | return False 424 | return (self.__is_ordered(node.left, min_key, node.key) and 425 | self.__is_ordered(node.right, node.key, max_key)) 426 | 427 | # 3.2.24 practice, check if each node's rank is correct. 428 | def is_rank_consistent(self): 429 | for i in range(self.size()): 430 | if i != self.rank(self.select(i).key): 431 | return False 432 | 433 | for key in self.keys(): 434 | if key != self.select(self.rank(key)).key: 435 | return False 436 | 437 | return True 438 | 439 | # 3.2.32 practice, check if a data structure is binary search tree. 440 | def check(self): 441 | if not self.is_binary_tree(): 442 | return False 443 | if not self.is_ordered(): 444 | return False 445 | if not self.is_rank_consistent(): 446 | return False 447 | return True 448 | 449 | def floor(self, key): 450 | tmp = None 451 | node = self._root 452 | while node: 453 | if key < node.key: 454 | node = node.left 455 | elif key > node.key: 456 | tmp = node 457 | node = node.right 458 | else: 459 | return node 460 | return tmp 461 | 462 | def ceiling(self, key): 463 | tmp = None 464 | node = self._root 465 | while node: 466 | if key < node.key: 467 | tmp = node 468 | node = node.left 469 | elif key > node.key: 470 | node = node.right 471 | else: 472 | return node 473 | return tmp 474 | 475 | if __name__ == '__main__': 476 | doctest.testmod() 477 | -------------------------------------------------------------------------------- /chapter_5/module_5_2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | import doctest 4 | from basic_data_struct import Queue 5 | 6 | 7 | class Node(object): 8 | 9 | def __init__(self): 10 | self._val = None 11 | self._size = 1 12 | self.next_nodes = {} 13 | 14 | @property 15 | def val(self): 16 | return self._val 17 | 18 | @val.setter 19 | def val(self, value): 20 | self._val = value 21 | 22 | # 5.2.10 practice 23 | @property 24 | def size(self): 25 | return self._size 26 | 27 | @size.setter 28 | def size(self, new_size): 29 | self._size = new_size 30 | 31 | 32 | class Trie(object): 33 | 34 | ''' 35 | Trie is a special data structure for string querying. Trie is similar with other tree-like 36 | structures, But every node has R links, which R is alphabet's size, that means every link is 37 | corresponding to a character. A node with a value that means the node is the end of a string. 38 | For searching a string, we can keep getting the next node by the next character, until we 39 | reach a node with an actual value, otherwise the target string miss. 40 | Trie is as fast as symbol table and more flexible than binary tree, but it cost lots of 41 | extra space. The running time of searching and inserting be proportional to O(N) which N is 42 | the length of the searching key. 43 | Trie is suitable for those shorter keys and the alphabet is rather small. Because long key 44 | and large alphabet could take much more space. 45 | >>> trie = Trie() 46 | >>> trie.get('xxxx') 47 | >>> test_data = ['she', 'sells', 'sea', 'shells', 'by', 'the', 'sea', 'shore'] 48 | >>> for index, d in enumerate(test_data): 49 | ... trie.put(d, index) 50 | >>> trie.size() 51 | 8 52 | >>> [trie.get(i).val for i in test_data] 53 | [0, 1, 6, 3, 4, 5, 6, 7] 54 | >>> [i for i in trie.keys()] 55 | ['by', 'sea', 'sells', 'she', 'shells', 'shore', 'the'] 56 | >>> [trie.rank(i) for i in trie.keys()] 57 | [1, 2, 3, 4, 5, 6, 7] 58 | >>> [trie.select(i) for i in range(1, 8)] 59 | ['by', 'sea', 'sells', 'she', 'she', 'shore', 'the'] 60 | >>> [i for i in trie.keys_with_prefix('sh')] 61 | ['she', 'shells', 'shore'] 62 | >>> [i for i in trie.keys_that_match('.he')] 63 | ['she', 'the'] 64 | >>> [i for i in trie.keys_that_match('s..')] 65 | ['sea', 'she'] 66 | >>> trie.longest_prefix_of('shellsort') 67 | 6 68 | >>> trie.delete('she') 69 | >>> trie.size() 70 | 7 71 | >>> trie.get('she').val 72 | >>> [i for i in trie.keys()] 73 | ['by', 'sea', 'sells', 'shells', 'shore', 'the'] 74 | >>> [trie.rank(i) for i in trie.keys()] 75 | [1, 2, 3, 4, 5, 6] 76 | ''' 77 | 78 | def __init__(self): 79 | self._root = Node() 80 | self._size = 0 81 | 82 | def size(self): 83 | return self._size 84 | 85 | def get(self, key): 86 | tmp = self._root 87 | d = 0 88 | 89 | while tmp: 90 | if d == len(key): 91 | return tmp 92 | char = key[d] 93 | try: 94 | tmp = tmp.next_nodes[char] 95 | except KeyError: 96 | return None 97 | d += 1 98 | return tmp 99 | 100 | def put(self, key, value): 101 | exist_node = self.get(key) 102 | if exist_node: 103 | exist_node.val = value 104 | self._size += 1 105 | return 106 | 107 | tmp = self._root 108 | for i in key: 109 | if i not in tmp.next_nodes: 110 | tmp.next_nodes[i] = Node() 111 | else: 112 | tmp.next_nodes[i].size += 1 113 | tmp = tmp.next_nodes[i] 114 | tmp.val = value 115 | self._size += 1 116 | 117 | def keys(self): 118 | ''' 119 | Return all the keys in trie tree. 120 | ''' 121 | return self.keys_with_prefix('') 122 | 123 | def keys_with_prefix(self, prefix): 124 | ''' 125 | Return all the keys starts with the given prefix in the trie tree. 126 | ''' 127 | q = Queue() 128 | if prefix == '': 129 | self._collect(self._root, prefix, q) 130 | else: 131 | start_node = self.get(prefix) 132 | self._collect(start_node, prefix, q) 133 | return q 134 | 135 | def _collect(self, node, prefix, q): 136 | if not node: 137 | return 138 | 139 | if node.val is not None: 140 | q.enqueue(prefix) 141 | 142 | for i in range(256): 143 | if chr(i) in node.next_nodes: 144 | self._collect(node.next_nodes[chr(i)], prefix + chr(i), q) 145 | 146 | def keys_that_match(self, pattern): 147 | ''' 148 | Return all the keys match the given pattern in the trie tree. 149 | ''' 150 | q = Queue() 151 | self._keys_collect(self._root, '', pattern, q) 152 | return q 153 | 154 | def _keys_collect(self, node, prefix, pattern, q): 155 | length = len(prefix) 156 | if not node: 157 | return 158 | 159 | if length == len(pattern): 160 | if node.val is not None: 161 | q.enqueue(prefix) 162 | return 163 | 164 | char = pattern[length] 165 | for i in range(256): 166 | if (char == '.' or char == chr(i)) and chr(i) in node.next_nodes: 167 | self._keys_collect(node.next_nodes[chr(i)], prefix + chr(i), pattern, q) 168 | 169 | def longest_prefix_of(self, s): 170 | ''' 171 | Return the longest prefix's length of the given string which the prefix is in the trie tree. 172 | ''' 173 | tmp = self._root 174 | length = d = 0 175 | 176 | while tmp: 177 | if tmp.val: 178 | length = d 179 | if d == len(s): 180 | return length 181 | char = s[d] 182 | if char not in tmp.next_nodes: 183 | break 184 | tmp = tmp.next_nodes[char] 185 | d += 1 186 | 187 | return length 188 | 189 | def delete(self, key): 190 | self._root = self._delete(self._root, key, 0) 191 | self._size -= 1 192 | 193 | def _delete(self, node, key, d): 194 | if not node: 195 | return None 196 | 197 | if d == len(key): 198 | node.val = None 199 | node.size -= 1 200 | else: 201 | index = key[d] 202 | node.size -= 1 203 | node.next_nodes[index] = self._delete(node.next_nodes[index], key, d + 1) 204 | 205 | if node.val: 206 | return node 207 | 208 | for i in range(256): 209 | if chr(i) in node.next_nodes: 210 | return node 211 | return None 212 | 213 | # 5.2.8 practice 214 | def select(self, k): 215 | tmp = self._root 216 | result = '' 217 | while tmp and tmp.val is None: 218 | count = 0 219 | count_list = [] 220 | sorted_keys = sorted(tmp.next_nodes.keys()) 221 | for c in sorted_keys: 222 | count_list.append((c, tmp.next_nodes[c].size + count)) 223 | count = tmp.next_nodes[c].size + count 224 | 225 | for index, elem in enumerate(count_list): 226 | key, count = elem 227 | if k <= count: 228 | tmp = tmp.next_nodes[key] 229 | result += key 230 | if index != 0: 231 | k -= count_list[index - 1][1] 232 | break 233 | return result 234 | 235 | # 5.2.8 practice 236 | def rank(self, key): 237 | tmp = self._root 238 | d = 0 239 | result = 0 240 | 241 | while d != len(key): 242 | char = key[d] 243 | if char not in tmp.next_nodes: 244 | return -1 245 | char_list = sorted(tmp.next_nodes.keys()) 246 | for c in char_list: 247 | if c == char: 248 | break 249 | result += tmp.next_nodes[c].size 250 | if len(tmp.next_nodes) == 1 and tmp.size != 1: 251 | result += 1 252 | tmp = tmp.next_nodes[char] 253 | 254 | d += 1 255 | return result + 1 256 | 257 | 258 | class TNode(object): 259 | 260 | def __init__(self): 261 | self._char = None 262 | self._left = None 263 | self._right = None 264 | self._mid = None 265 | self._val = None 266 | 267 | @property 268 | def char(self): 269 | return self._char 270 | 271 | @char.setter 272 | def char(self, new_char): 273 | self._char = new_char 274 | 275 | @property 276 | def left(self): 277 | return self._left 278 | 279 | @left.setter 280 | def left(self, node): 281 | self._left = node 282 | 283 | @property 284 | def right(self): 285 | return self._right 286 | 287 | @right.setter 288 | def right(self, node): 289 | self._right = node 290 | 291 | @property 292 | def mid(self): 293 | return self._mid 294 | 295 | @mid.setter 296 | def mid(self, node): 297 | self._mid = node 298 | 299 | @property 300 | def val(self): 301 | return self._val 302 | 303 | @val.setter 304 | def val(self, value): 305 | self._val = value 306 | 307 | 308 | class TernarySearchTries(object): 309 | 310 | ''' 311 | Ternary-Search-Trie is another tree-like data structure for string querying. 312 | Every node in Ternary-Search-Trie contains a value and three links, and from left 313 | to right each link means the key of the node is smaller, equal, and large than the 314 | current node. Ternary-Search-Trie is more compatible than Trie, but also every 315 | operation is a little bit more complicated than Trie. 316 | >>> tst = TernarySearchTries() 317 | >>> tst.get('test') 318 | >>> test_data = ['she', 'sells', 'sea', 'shells', 'by', 'the', 'sea', 'shore'] 319 | >>> for index, d in enumerate(test_data): 320 | ... tst.put(d, index) 321 | >>> tst.size() 322 | 8 323 | >>> [tst.get(i).val for i in test_data] 324 | [0, 1, 6, 3, 4, 5, 6, 7] 325 | >>> [i for i in tst.keys_with_prefix('sh')] 326 | ['she', 'shells', 'shore'] 327 | >>> tst.longest_prefix_of('shellsort') 328 | 6 329 | >>> [i for i in tst.keys_that_match('.he')] 330 | ['she', 'the'] 331 | ''' 332 | 333 | def __init__(self): 334 | self._root = None 335 | self._size = 0 336 | 337 | def size(self): 338 | return self._size 339 | 340 | def get(self, key): 341 | tmp = self._root 342 | if not tmp: 343 | return None 344 | d = 0 345 | while d < len(key) and tmp: 346 | char = key[d] 347 | if char < tmp.char: 348 | tmp = tmp.left 349 | elif char > tmp.char: 350 | tmp = tmp.right 351 | elif d < len(key) - 1: 352 | tmp = tmp.mid 353 | d += 1 354 | else: 355 | break 356 | return tmp if tmp else None 357 | 358 | def put(self, key, value): 359 | if not key: 360 | return 361 | self._root = self._put(self._root, key, value, 0) 362 | self._size += 1 363 | 364 | def _put(self, node, key, value, d): 365 | char = key[d] 366 | if not node: 367 | node = TNode() 368 | node.char = char 369 | 370 | if char < node.char: 371 | node.left = self._put(node.left, key, value, d) 372 | elif char > node.char: 373 | node.right = self._put(node.right, key, value, d) 374 | elif d < len(key) - 1: 375 | node.mid = self._put(node.mid, key, value, d + 1) 376 | else: 377 | node.val = value 378 | return node 379 | 380 | def keys(self): 381 | ''' 382 | Return all the keys in trie tree. 383 | ''' 384 | return self.keys_with_prefix('') 385 | 386 | # 5.2.9 practice, implementation is available on the official website 387 | def keys_with_prefix(self, prefix): 388 | ''' 389 | Return all the keys starts with the given prefix in the trie tree. 390 | ''' 391 | q = Queue() 392 | node = self.get(prefix) 393 | if not node: 394 | return q 395 | if node.val: 396 | q.enqueue(prefix) 397 | self._collect(node.mid, prefix, q) 398 | return q 399 | 400 | def _collect(self, node, prefix, q): 401 | if not node: 402 | return 403 | self._collect(node.left, prefix, q) 404 | if node.val is not None: 405 | q.enqueue(prefix + node.char) 406 | self._collect(node.mid, prefix + node.char, q) 407 | self._collect(node.right, prefix, q) 408 | 409 | # 5.2.9 practice, implementation is available on the official website 410 | def longest_prefix_of(self, key): 411 | if not key or key.strip() == '': 412 | return 0 413 | 414 | length = d = 0 415 | tmp = self._root 416 | 417 | while tmp: 418 | if d == len(key): 419 | return length 420 | char = key[d] 421 | if char < tmp.char: 422 | tmp = tmp.left 423 | elif char > tmp.char: 424 | tmp = tmp.right 425 | else: 426 | d += 1 427 | if tmp.val: 428 | length = d 429 | tmp = tmp.mid 430 | return length 431 | 432 | # 5.2.9 practice, implementation is available on the official website 433 | def keys_that_match(self, pattern): 434 | q = Queue() 435 | self._keys_collect(self._root, '', 0, pattern, q) 436 | return q 437 | 438 | def _keys_collect(self, node, prefix, index, pattern, q): 439 | if not node: 440 | return 441 | 442 | char = pattern[index] 443 | if char == '.' or char < node.char: 444 | self._keys_collect(node.left, prefix, index, pattern, q) 445 | 446 | if char == '.' or char == node.char: 447 | if node.val is not None and index == len(pattern) - 1: 448 | q.enqueue(prefix + node.char) 449 | if index < len(pattern) - 1: 450 | self._keys_collect(node.mid, prefix + node.char, index + 1, pattern, q) 451 | 452 | if char == '.' or char > node.char: 453 | self._keys_collect(node.right, prefix, index, pattern, q) 454 | 455 | if __name__ == '__main__': 456 | doctest.testmod() 457 | -------------------------------------------------------------------------------- /chapter_4/basic_data_struct.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | import doctest 4 | from collections import defaultdict 5 | 6 | """ 7 | copy from module_1_3.py, this is for avoiding package import problems. 8 | """ 9 | 10 | 11 | class Node(object): 12 | 13 | def __init__(self, val): 14 | self._val = val 15 | self.next_node = None 16 | 17 | @property 18 | def val(self): 19 | return self._val 20 | 21 | @val.setter 22 | def val(self, value): 23 | self._val = value 24 | 25 | @property 26 | def next_node(self): 27 | return self._next_node 28 | 29 | @next_node.setter 30 | def next_node(self, node): 31 | self._next_node = node 32 | 33 | 34 | class Stack(object): 35 | 36 | def __init__(self): 37 | self._first = None 38 | self._size = 0 39 | 40 | def __iter__(self): 41 | node = self._first 42 | while node: 43 | yield node.val 44 | node = node.next_node 45 | 46 | def is_empty(self): 47 | return self._first is None 48 | 49 | def size(self): 50 | return self._size 51 | 52 | def push(self, val): 53 | node = Node(val) 54 | old = self._first 55 | self._first = node 56 | self._first.next_node = old 57 | self._size += 1 58 | 59 | def pop(self): 60 | if self._first: 61 | old = self._first 62 | self._first = self._first.next_node 63 | self._size -= 1 64 | return old.val 65 | return None 66 | 67 | # 1.3.7 practice 68 | def peek(self): 69 | if self._first: 70 | return self._first.val 71 | return None 72 | 73 | 74 | class Queue(object): 75 | 76 | def __init__(self, q=None): 77 | self._first = None 78 | self._last = None 79 | self._size = 0 80 | if q: 81 | for item in q: 82 | self.enqueue(item) 83 | 84 | def __iter__(self): 85 | node = self._first 86 | while node: 87 | yield node.val 88 | node = node.next_node 89 | 90 | def is_empty(self): 91 | return self._first is None 92 | 93 | def size(self): 94 | return self._size 95 | 96 | def enqueue(self, val): 97 | old_last = self._last 98 | self._last = Node(val) 99 | self._last.next_node = None 100 | if self.is_empty(): 101 | self._first = self._last 102 | else: 103 | old_last.next_node = self._last 104 | self._size += 1 105 | 106 | def dequeue(self): 107 | if not self.is_empty(): 108 | val = self._first.val 109 | self._first = self._first.next_node 110 | if self.is_empty(): 111 | self._last = None 112 | self._size -= 1 113 | return val 114 | return None 115 | 116 | 117 | class Bag(object): 118 | 119 | def __init__(self): 120 | self._first = None 121 | self._size = 0 122 | 123 | def __iter__(self): 124 | node = self._first 125 | while node is not None: 126 | yield node.val 127 | node = node.next_node 128 | 129 | def __contains__(self, item): 130 | tmp = self._first 131 | while tmp: 132 | if tmp == item: 133 | return True 134 | return False 135 | 136 | def add(self, val): 137 | node = Node(val) 138 | old = self._first 139 | self._first = node 140 | self._first.next_node = old 141 | self._size += 1 142 | 143 | def is_empty(self): 144 | return self._first is None 145 | 146 | def size(self): 147 | return self._size 148 | 149 | 150 | class MinPQ(object): 151 | 152 | def __init__(self, data=None): 153 | self._pq = [] 154 | if data: 155 | for item in data: 156 | self.insert(data) 157 | 158 | def is_empty(self): 159 | return len(self._pq) == 0 160 | 161 | def size(self): 162 | return len(self._pq) 163 | 164 | def swim(self, pos): 165 | while pos > 0 and self._pq[(pos - 1) // 2] > self._pq[pos]: 166 | self._pq[(pos - 1) // 2], self._pq[pos] = self._pq[pos], self._pq[(pos - 1) // 2] 167 | pos = (pos - 1) // 2 168 | 169 | def sink(self, pos): 170 | length = len(self._pq) - 1 171 | while 2 * pos + 1 <= length: 172 | index = 2 * pos + 1 173 | if index < length and self._pq[index] > self._pq[index + 1]: 174 | index += 1 175 | if self._pq[pos] <= self._pq[index]: 176 | break 177 | self._pq[index], self._pq[pos] = self._pq[pos], self._pq[index] 178 | pos = index 179 | 180 | def insert(self, val): 181 | self._pq.append(val) 182 | self.swim(len(self._pq) - 1) 183 | 184 | def del_min(self): 185 | min_val = self._pq[0] 186 | last_index = len(self._pq) - 1 187 | self._pq[0], self._pq[last_index] = self._pq[last_index], self._pq[0] 188 | self._pq.pop(last_index) 189 | self.sink(0) 190 | return min_val 191 | 192 | def min_val(self): 193 | return self._pq[0] 194 | 195 | 196 | class DisjointNode(object): 197 | 198 | def __init__(self, parent, size=1): 199 | self._parent = parent 200 | self._size = size 201 | 202 | @property 203 | def parent(self): 204 | return self._parent 205 | 206 | @parent.setter 207 | def parent(self, new_parent): 208 | self._parent = new_parent 209 | 210 | @property 211 | def size(self): 212 | return self._size 213 | 214 | @size.setter 215 | def size(self, val): 216 | assert val > 0 217 | self._size = val 218 | 219 | 220 | class GenericUnionFind(object): 221 | 222 | """ 223 | >>> guf = GenericUnionFind() 224 | >>> connections = [(4, 3), (3, 8), (6, 5), (9, 4), 225 | ... (2, 1), (8, 9), (5, 0), (7, 2), (6, 1), (1, 0), (6, 7)] 226 | >>> for i, j in connections: 227 | ... guf.union(i, j) 228 | ... 229 | >>> guf.connected(1, 4) 230 | False 231 | >>> guf.connected(8, 4) 232 | True 233 | >>> guf.connected(1, 5) 234 | True 235 | >>> guf.connected(1, 7) 236 | True 237 | """ 238 | 239 | def __init__(self, tuple_data=None): 240 | self._id = {} 241 | if tuple_data: 242 | for a, b in tuple_data: 243 | self.union(a, b) 244 | 245 | def count(self): 246 | pass 247 | 248 | def connected(self, p, q): 249 | return self.find(p) and self.find(q) and self.find(p) == self.find(q) 250 | 251 | def find(self, node): 252 | if node not in self._id: 253 | return None 254 | tmp = node 255 | while self._id[tmp].parent != tmp: 256 | tmp = self._id[tmp].parent 257 | return self._id[tmp].parent 258 | 259 | def union(self, p, q): 260 | p_root = self.find(p) 261 | q_root = self.find(q) 262 | 263 | if p_root == q_root: 264 | if p_root is None and q_root is None: 265 | self._id[p] = DisjointNode(q) 266 | self._id[q] = DisjointNode(q, 2) 267 | return 268 | return 269 | 270 | if p_root is None: 271 | self._id[p] = DisjointNode(q_root, 1) 272 | self._id[q_root].size += 1 273 | return 274 | 275 | if q_root is None: 276 | self._id[q] = DisjointNode(p_root, 1) 277 | self._id[p_root].size += 1 278 | return 279 | 280 | if self._id[p_root].size < self._id[q_root].size: 281 | self._id[p_root].parent = q_root 282 | self._id[q_root].size += self._id[p_root].size 283 | else: 284 | self._id[q_root].parent = p_root 285 | self._id[p_root].size += self._id[q_root].size 286 | 287 | 288 | class MaxPQ(object): 289 | 290 | def __init__(self, data=None): 291 | self._pq = [] 292 | if data: 293 | for item in data: 294 | self.insert(item) 295 | 296 | def is_empty(self): 297 | return len(self._pq) == 0 298 | 299 | def size(self): 300 | return len(self._pq) 301 | 302 | def swim(self, pos): 303 | while pos > 0 and self._pq[(pos - 1) // 2] < self._pq[pos]: 304 | self._pq[(pos - 1) // 2], self._pq[pos] = self._pq[pos], self._pq[(pos - 1) // 2] 305 | pos = (pos - 1) // 2 306 | 307 | def sink(self, pos): 308 | length = len(self._pq) - 1 309 | while 2 * pos + 1 <= length: 310 | index = 2 * pos + 1 311 | if index < length and self._pq[index] < self._pq[index + 1]: 312 | index += 1 313 | if self._pq[pos] >= self._pq[index]: 314 | break 315 | self._pq[index], self._pq[pos] = self._pq[pos], self._pq[index] 316 | pos = index 317 | 318 | def insert(self, val): 319 | self._pq.append(val) 320 | self.swim(len(self._pq) - 1) 321 | 322 | def del_max(self): 323 | max_val = self._pq[0] 324 | last_index = len(self._pq) - 1 325 | self._pq[0], self._pq[last_index] = self._pq[last_index], self._pq[0] 326 | self._pq.pop(last_index) 327 | self.sink(0) 328 | return max_val 329 | 330 | def max_val(self): 331 | return self._pq[0] 332 | 333 | 334 | class IndexMinPQ(object): 335 | 336 | def __init__(self, max_size): 337 | assert max_size > 0 338 | self._max_size = max_size 339 | self._index = [-1] * (max_size + 1) 340 | self._reverse_index = [-1] * (max_size + 1) 341 | self._keys = [None] * (max_size + 1) 342 | self._keys_size = 0 343 | 344 | def is_empty(self): 345 | return self._keys_size == 0 346 | 347 | def size(self): 348 | return self._keys_size 349 | 350 | def contains(self, index): 351 | if index >= self._max_size: 352 | return False 353 | return self._reverse_index[index] != -1 354 | 355 | def insert(self, index, element): 356 | if index >= self._max_size or self.contains(index): 357 | return 358 | 359 | self._keys_size += 1 360 | self._index[self._keys_size] = index 361 | self._reverse_index[index] = self._keys_size 362 | self._keys[index] = element 363 | self.swim(self._keys_size) 364 | 365 | def min_index(self): 366 | return None if self._keys_size == 0 else self._index[1] 367 | 368 | def min_key(self): 369 | return None if self._keys_size == 0 else self._keys[self._index[1]] 370 | 371 | def exchange(self, pos_a, pos_b): 372 | self._index[pos_a], self._index[pos_b] = self._index[pos_b], self._index[pos_a] 373 | self._reverse_index[self._index[pos_a]] = pos_a 374 | self._reverse_index[self._index[pos_b]] = pos_b 375 | 376 | def swim(self, pos): 377 | while pos > 1 and self._keys[self._index[pos // 2]] > self._keys[self._index[pos]]: 378 | self.exchange(pos // 2, pos) 379 | pos //= 2 380 | 381 | def sink(self, pos): 382 | length = self._keys_size 383 | while 2 * pos <= length: 384 | tmp = 2 * pos 385 | if tmp < length and self._keys[self._index[tmp]] > self._keys[self._index[tmp + 1]]: 386 | tmp += 1 387 | if not self._keys[self._index[tmp]] < self._keys[self._index[pos]]: 388 | break 389 | self.exchange(tmp, pos) 390 | pos = tmp 391 | 392 | def change_key(self, i, key): 393 | if i < 0 or i >= self._max_size or not self.contains(i): 394 | return 395 | self._keys[i] = key 396 | self.swim(self._reverse_index[i]) 397 | self.sink(self._reverse_index[i]) 398 | 399 | def delete_min(self): 400 | if self._keys_size == 0: 401 | return 402 | min_index = self._index[1] 403 | self.exchange(1, self._keys_size) 404 | self._keys_size -= 1 405 | self.sink(1) 406 | self._reverse_index[min_index] = -1 407 | self._keys[self._index[self._keys_size + 1]] = None 408 | self._index[self._keys_size + 1] = -1 409 | return min_index 410 | 411 | 412 | # data structure for EdgeWeightedDiGraph Topological 413 | 414 | class DirectedCycle(object): 415 | 416 | def __init__(self, graph): 417 | self._marked = defaultdict(bool) 418 | self._edge_to = {} 419 | self._on_stack = defaultdict(bool) 420 | self._cycle = Stack() 421 | for v in graph.vertices(): 422 | if not self._marked[v]: 423 | self.dfs(graph, v) 424 | 425 | def dfs(self, graph, vertex): 426 | self._on_stack[vertex] = True 427 | self._marked[vertex] = True 428 | 429 | for edge in graph.adjacent_edges(vertex): 430 | end = edge.end 431 | if self.has_cycle(): 432 | return 433 | elif not self._marked[end]: 434 | self._edge_to[end] = vertex 435 | self.dfs(graph, end) 436 | elif self._on_stack[end]: 437 | tmp = vertex 438 | while tmp != end: 439 | self._cycle.push(tmp) 440 | tmp = self._edge_to[tmp] 441 | self._cycle.push(end) 442 | self._cycle.push(vertex) 443 | self._on_stack[vertex] = False 444 | 445 | def has_cycle(self): 446 | return not self._cycle.is_empty() 447 | 448 | def cycle(self): 449 | return self._cycle 450 | 451 | 452 | class DepthFirstOrder(object): 453 | 454 | def __init__(self, graph): 455 | self._pre = Queue() 456 | self._post = Queue() 457 | self._reverse_post = Stack() 458 | self._marked = defaultdict(bool) 459 | 460 | for v in graph.vertices(): 461 | if not self._marked[v]: 462 | self.dfs(graph, v) 463 | 464 | def dfs(self, graph, vertex): 465 | self._pre.enqueue(vertex) 466 | self._marked[vertex] = True 467 | for edge in graph.adjacent_edges(vertex): 468 | if not self._marked[edge.end]: 469 | self.dfs(graph, edge.end) 470 | 471 | self._post.enqueue(vertex) 472 | self._reverse_post.push(vertex) 473 | 474 | def prefix(self): 475 | return self._pre 476 | 477 | def postfix(self): 478 | return self._post 479 | 480 | def reverse_postfix(self): 481 | return self._reverse_post 482 | 483 | 484 | class Topological(object): 485 | 486 | def __init__(self, graph): 487 | cycle_finder = DirectedCycle(graph) 488 | self._order = None 489 | if not cycle_finder.has_cycle(): 490 | df_order = DepthFirstOrder(graph) 491 | self._order = df_order.reverse_postfix() 492 | 493 | def order(self): 494 | return self._order 495 | 496 | def is_DAG(self): 497 | return self._order is not None 498 | 499 | 500 | if __name__ == '__main__': 501 | doctest.testmod() 502 | -------------------------------------------------------------------------------- /chapter_4/module_4_1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | import copy 4 | import doctest 5 | import random 6 | from collections import defaultdict 7 | from basic_data_struct import Bag, Queue, Stack 8 | 9 | 10 | class Graph(object): 11 | 12 | """ 13 | Undirected graph implementation. The cost of space is proportional to O(V + E) 14 | (V is the number of vertices and E is the number of edges). Adding 15 | an edge only takes constant time. The running time of 16 | Checking if node v is adjacent to w and traveling all adjacent point of v 17 | is related to the degree of v. This implementation supports multiple 18 | input data types(immutable). 19 | TODO: Test file input. 20 | >>> g = Graph() 21 | >>> test_data = [(0, 5), (4, 3), (0, 1), (9, 12), (6, 4), (5, 4), (0, 2), # from book tinyG.txt 22 | ... (11, 12), (9, 10), (0, 6), (7, 8), (9, 11), (5, 3)] 23 | >>> for a, b in test_data: 24 | ... g.add_edge(a, b) 25 | ... 26 | >>> g.vertices_size() 27 | 13 28 | >>> len(test_data) == g.edges_size() 29 | True 30 | >>> adjacent_vertices = ' '.join([str(v) for v in g.get_adjacent_vertices(0)]) 31 | >>> adjacent_vertices 32 | '6 2 1 5' 33 | >>> g.degree(0) 34 | 4 35 | >>> g.degree(9) 36 | 3 37 | >>> g.max_degree() 38 | 4 39 | >>> g.number_of_self_loops() 40 | 0 41 | >>> g 42 | 13 vertices, 13 edges 43 | 0: 6 2 1 5 44 | 1: 0 45 | 2: 0 46 | 3: 5 4 47 | 4: 5 6 3 48 | 5: 3 4 0 49 | 6: 0 4 50 | 7: 8 51 | 8: 7 52 | 9: 11 10 12 53 | 10: 9 54 | 11: 9 12 55 | 12: 11 9 56 | 57 | >>> g2 = Graph(graph=g) 58 | >>> g2.add_edge(4, 9) 59 | >>> g.has_edge(4, 9) 60 | False 61 | >>> g2.has_edge(4, 9) 62 | True 63 | >>> g2.has_edge(9, 4) 64 | True 65 | >>> g2.add_edge(4, 9) 66 | >>> [i for i in g2.get_adjacent_vertices(4)] 67 | [9, 5, 6, 3] 68 | """ 69 | 70 | def __init__(self, input_file=None, graph=None): 71 | self._edges_size = 0 72 | self._adj = defaultdict(Bag) 73 | # 4.1.3 practice, add a graph parameter for constructor method. 74 | if graph: 75 | self._adj = copy.deepcopy(graph._adj) 76 | self._edges_size = graph.edges_size() 77 | 78 | def vertices_size(self): 79 | return len(self._adj.keys()) 80 | 81 | def edges_size(self): 82 | return self._edges_size 83 | 84 | def add_edge(self, vertext_a, vertext_b): 85 | # 4.1.5 practice, no self cycle or parallel edges. 86 | if self.has_edge(vertext_a, vertext_b) or vertext_a == vertext_b: 87 | return 88 | self._adj[vertext_a].add(vertext_b) 89 | self._adj[vertext_b].add(vertext_a) 90 | 91 | self._edges_size += 1 92 | 93 | # 4.1.4 practice, add has_edge method 94 | def has_edge(self, vertext_a, vertext_b): 95 | if vertext_a not in self._adj or vertext_b not in self._adj: 96 | return False 97 | edge = next((i for i in self._adj[vertext_a] if i == vertext_b), None) 98 | return edge is not None 99 | 100 | def get_adjacent_vertices(self, vertex): 101 | return self._adj[vertex] 102 | 103 | def vertices(self): 104 | return self._adj.keys() 105 | 106 | def degree(self, vertex): 107 | assert vertex in self._adj 108 | return self._adj[vertex].size() 109 | 110 | def max_degree(self): 111 | result = 0 112 | for vertex in self._adj: 113 | v_degree = self.degree(vertex) 114 | if v_degree > result: 115 | result = v_degree 116 | return result 117 | 118 | def avg_degree(self): 119 | return float(2 * self._edges_size) / self.vertices_size() 120 | 121 | def number_of_self_loops(self): 122 | count = 0 123 | for k in self._adj: 124 | for vertex in self._adj[k]: 125 | if vertex == k: 126 | count += 1 127 | return int(count / 2) 128 | 129 | # 4.1.31 check the number of parallel edges with linear running time. 130 | def number_of_parallel_edges(self): 131 | count = 0 132 | for k in self._adj: 133 | tmp = set() 134 | for vertex in self._adj[k]: 135 | if vertex not in tmp: 136 | tmp.add(vertex) 137 | else: 138 | count += 1 139 | return int(count / 2) 140 | 141 | def __repr__(self): 142 | s = str(self.vertices_size()) + ' vertices, ' + str(self._edges_size) + ' edges\n' 143 | for k in self._adj: 144 | try: 145 | lst = ' '.join([vertex for vertex in self._adj[k]]) 146 | except TypeError: 147 | lst = ' '.join([str(vertex) for vertex in self._adj[k]]) 148 | s += '{}: {}\n'.format(k, lst) 149 | return s 150 | 151 | 152 | class DepthFirstPaths(object): 153 | 154 | """ 155 | Undirected graph depth-first searching algorithms implementation. 156 | Depth-First-Search recurvisely reaching all vertices that are adjacent to it, 157 | and then treat these adjacent_vertices as start_vertex and searching again util all the 158 | connected vertices is marked. 159 | >>> g = Graph() 160 | >>> test_data = [(0, 5), (2, 4), (2, 3), (1, 2), (0, 1), (3, 4), (3, 5), (0, 2)] 161 | >>> for a, b in test_data: 162 | ... g.add_edge(a, b) 163 | ... 164 | >>> dfp = DepthFirstPaths(g, 0) 165 | >>> [dfp.has_path_to(i) for i in range(6)] 166 | [True, True, True, True, True, True] 167 | >>> [i for i in dfp.path_to(4)] 168 | [0, 2, 3, 4] 169 | >>> [i for i in dfp.path_to(1)] 170 | [0, 2, 1] 171 | """ 172 | 173 | def __init__(self, graph, start_vertex): 174 | self._marked = defaultdict(bool) 175 | self._edge_to = {} 176 | self._start = start_vertex 177 | self.dfs(graph, self._start) 178 | 179 | def dfs(self, graph, vertex): 180 | self._marked[vertex] = True 181 | 182 | for v in graph.get_adjacent_vertices(vertex): 183 | if not self._marked[v]: 184 | self._edge_to[v] = vertex 185 | self.dfs(graph, v) 186 | 187 | def has_path_to(self, vertex): 188 | return self._marked[vertex] 189 | 190 | def vertices_size(self): 191 | return len(self._marked.keys()) 192 | 193 | def path_to(self, vertex): 194 | if not self.has_path_to(vertex): 195 | return None 196 | 197 | tmp = vertex 198 | path = Stack() 199 | while tmp != self._start: 200 | path.push(tmp) 201 | tmp = self._edge_to[tmp] 202 | path.push(self._start) 203 | return path 204 | 205 | 206 | class BreadthFirstPaths(object): 207 | 208 | """ 209 | Breadth-First-Search algorithm implementation. This algorithm 210 | uses queue as assist data structure. First enqueue the start_vertex, 211 | marked it as visited and dequeue the vertex, then marked all the 212 | adjacent vertices of start_vertex and enqueue them. Continue this process 213 | util all connected vertices are marked. 214 | With Breadth-First-Search algorithm, we can find the shortest path from x to y. 215 | The worst scenario of running time is proportional to O(V + E) (V is the number 216 | of vertices and E is the number of edges). 217 | >>> g = Graph() 218 | >>> test_data = [(0, 5), (2, 4), (2, 3), (1, 2), (0, 1), (3, 4), (3, 5), (0, 2)] 219 | >>> for a, b in test_data: 220 | ... g.add_edge(a, b) 221 | ... 222 | >>> bfp = BreadthFirstPaths(g, 0) 223 | >>> [bfp.has_path_to(i) for i in range(6)] 224 | [True, True, True, True, True, True] 225 | >>> [i for i in bfp.path_to(4)] 226 | [0, 2, 4] 227 | >>> [i for i in bfp.path_to(5)] 228 | [0, 5] 229 | >>> bfp.dist_to(4) 230 | 2 231 | >>> bfp.dist_to(5) 232 | 1 233 | >>> bfp.dist_to('not a vertex') 234 | -1 235 | """ 236 | 237 | def __init__(self, graph, start_vertex): 238 | self._marked = defaultdict(bool) 239 | self._edge_to = {} 240 | 241 | self._start = start_vertex 242 | self._dist = {start_vertex: 0} 243 | self.bfs(graph, self._start) 244 | 245 | def bfs(self, graph, vertex): 246 | queue = Queue() 247 | self._marked[vertex] = True 248 | queue.enqueue(vertex) 249 | while not queue.is_empty(): 250 | tmp = queue.dequeue() 251 | for v in graph.get_adjacent_vertices(tmp): 252 | if not self._marked[v]: 253 | self._edge_to[v] = tmp 254 | self._dist[v] = self._dist[tmp] + 1 255 | self._marked[v] = True 256 | queue.enqueue(v) 257 | 258 | def has_path_to(self, vertex): 259 | return self._marked[vertex] 260 | 261 | def path_to(self, vertex): 262 | if not self.has_path_to(vertex): 263 | return None 264 | 265 | tmp = vertex 266 | path = Stack() 267 | while tmp != self._start: 268 | path.push(tmp) 269 | tmp = self._edge_to[tmp] 270 | path.push(self._start) 271 | return path 272 | 273 | # 4.1.13 practice, implement dist_to method which only takes constant time. 274 | def dist_to(self, vertex): 275 | return self._dist.get(vertex, -1) 276 | 277 | def max_distance(self): 278 | return max(self._dist.values()) 279 | 280 | 281 | class ConnectedComponent(object): 282 | 283 | """ 284 | Construct connected components using Depth-First-Search algorithm. 285 | Using this algorithm we need to construct all the connected components 286 | from the beginning which the cost of running time and space are both 287 | proportional to O(V + E). But it takes only constant time for querying 288 | if two vertices are connected. 289 | >>> g = Graph() 290 | >>> test_data = [(0, 5), (4, 3), (0, 1), (9, 12), (6, 4), (5, 4), (0, 2), 291 | ... (11, 12), (9, 10), (0, 6), (7, 8), (9, 11), (5, 3)] 292 | >>> for a, b in test_data: 293 | ... g.add_edge(a, b) 294 | ... 295 | >>> cc = ConnectedComponent(g) 296 | >>> cc.connected(0, 8) 297 | False 298 | >>> cc.connected(0, 4) 299 | True 300 | >>> cc.connected(0, 9) 301 | False 302 | >>> cc.vertex_id(0) 303 | 0 304 | >>> cc.vertex_id(7) 305 | 1 306 | >>> cc.vertex_id(11) 307 | 2 308 | >>> cc.count() 309 | 3 310 | """ 311 | 312 | def __init__(self, graph): 313 | self._marked = defaultdict(bool) 314 | self._id = defaultdict(int) 315 | self._count = 0 316 | 317 | for s in graph.vertices(): 318 | if not self._marked[s]: 319 | self.dfs(graph, s) 320 | self._count += 1 321 | 322 | def dfs(self, graph, vertex): 323 | self._marked[vertex] = True 324 | self._id[vertex] = self._count 325 | for s in graph.get_adjacent_vertices(vertex): 326 | if not self._marked[s]: 327 | self.dfs(graph, s) 328 | 329 | def connected(self, vertex_1, vertex_2): 330 | return self._id[vertex_1] == self._id[vertex_2] 331 | 332 | def vertex_id(self, vertex): 333 | return self._id[vertex] 334 | 335 | def count(self): 336 | return self._count 337 | 338 | 339 | class Cycle(object): 340 | 341 | """ 342 | Using Depth-First-Search algorithm to check whether a graph has a cycle. 343 | if a graph is tree-like structure(no cycle), then has_cycle is never reached. 344 | >>> g = Graph() 345 | >>> test_data = [(0, 1), (0, 2), (0, 6), (0, 5), (3, 5), (6, 4)] 346 | >>> for a, b in test_data: 347 | ... g.add_edge(a, b) 348 | ... 349 | >>> cycle = Cycle(g) 350 | >>> cycle.has_cycle() 351 | False 352 | >>> g2 = Graph() 353 | >>> has_cycle_data = [(0, 1), (0, 2), (0, 6), (0, 5), (3, 5), (6, 4), (3, 4)] 354 | >>> for a, b in has_cycle_data: 355 | ... g2.add_edge(a, b) 356 | ... 357 | >>> cycle2 = Cycle(g2) 358 | >>> cycle2.has_cycle() 359 | True 360 | """ 361 | 362 | def __init__(self, graph): 363 | self._marked = defaultdict(bool) 364 | self._has_cycle = False 365 | for vertex in graph.vertices(): 366 | if not self._marked[vertex]: 367 | self.dfs(graph, vertex, vertex) 368 | 369 | def dfs(self, graph, vertex_1, vertex_2): 370 | self._marked[vertex_1] = True 371 | for adj in graph.get_adjacent_vertices(vertex_1): 372 | if not self._marked[adj]: 373 | self.dfs(graph, adj, vertex_1) 374 | else: 375 | if adj != vertex_2: 376 | self._has_cycle = True 377 | 378 | def has_cycle(self): 379 | return self._has_cycle 380 | 381 | 382 | class TwoColor(object): 383 | 384 | """ 385 | Using Depth-First-Search algorithm to solve Two-Color problems. 386 | >>> g = Graph() 387 | >>> test_data = [(0, 5), (2, 4), (2, 3), (1, 2), (0, 1), (3, 4), (3, 5), (0, 2)] 388 | >>> for a, b in test_data: 389 | ... g.add_edge(a, b) 390 | ... 391 | >>> tc = TwoColor(g) 392 | >>> tc.is_bipartite() 393 | False 394 | """ 395 | 396 | def __init__(self, graph): 397 | self._marked = defaultdict(bool) 398 | self._color = defaultdict(bool) 399 | self._is_twocolorable = True 400 | 401 | for vertex in graph.vertices(): 402 | if not self._marked[vertex]: 403 | self.dfs(graph, vertex) 404 | 405 | def dfs(self, graph, vertex): 406 | self._marked[vertex] = True 407 | for v in graph.get_adjacent_vertices(vertex): 408 | if not self._marked[v]: 409 | self._color[v] = self._color[vertex] 410 | self.dfs(graph, v) 411 | else: 412 | if self._color[v] == self._color[vertex]: 413 | self._is_twocolorable = False 414 | 415 | def is_bipartite(self): 416 | return self._is_twocolorable 417 | 418 | 419 | # 4.1.16 practice, implement GraphProperties class. 420 | class GraphProperties(object): 421 | 422 | """ 423 | >>> g = Graph() 424 | >>> test_data = [(0, 5), (2, 4), (2, 3), (1, 2), (0, 1), (3, 4), (3, 5), (0, 2)] 425 | >>> for a, b in test_data: 426 | ... g.add_edge(a, b) 427 | ... 428 | >>> gp = GraphProperties(g) 429 | >>> gp.eccentricity(0) 430 | 2 431 | >>> gp.eccentricity(1) 432 | 2 433 | >>> gp.diameter() 434 | 2 435 | >>> gp.radius() 436 | 2 437 | """ 438 | 439 | def __init__(self, graph): 440 | self._eccentricities = {} 441 | self._diameter = 0 442 | self._radius = 9999999999 443 | dfp = DepthFirstPaths(graph, random.sample(graph.vertices(), 1)[0]) 444 | if dfp.vertices_size() != graph.vertices_size(): 445 | raise Exception('graph is not connected.') 446 | 447 | for vertex in graph.vertices(): 448 | bfp = BreadthFirstPaths(graph, vertex) 449 | dist = bfp.max_distance() 450 | if dist < self._radius: 451 | self._radius = dist 452 | if dist > self._diameter: 453 | self._diameter = dist 454 | self._eccentricities[vertex] = dist 455 | 456 | def eccentricity(self, vertex): 457 | return self._eccentricities.get(vertex, -1) 458 | 459 | def diameter(self): 460 | return self._diameter 461 | 462 | def radius(self): 463 | return self._radius 464 | 465 | def center(self): 466 | centers = [k for k, v in self._eccentricities.items() if v == self._radius] 467 | random.shuffle(centers) 468 | return centers[0] 469 | 470 | # 4.1.17 practice 471 | def girth(self): 472 | pass 473 | 474 | 475 | if __name__ == '__main__': 476 | doctest.testmod() 477 | -------------------------------------------------------------------------------- /chapter_5/module_5_3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | import doctest 4 | 5 | 6 | def brute_force_backward_search(pattern, txt): 7 | ''' 8 | >>> test_data = 'ABACADABRAC' 9 | >>> pattern = 'ABRA' 10 | >>> brute_force_backward_search(pattern, test_data) 11 | 6 12 | >>> pattern2 = 'ACNOTEXIST' 13 | >>> brute_force_backward_search(pattern2, test_data) 14 | 11 15 | ''' 16 | assert pattern != '' and txt != '' and pattern is not None and txt is not None 17 | 18 | p_index = t_index = 0 19 | while p_index < len(pattern) and t_index < len(txt): 20 | if txt[t_index] == pattern[p_index]: 21 | p_index += 1 22 | else: 23 | t_index -= p_index 24 | p_index = 0 25 | t_index += 1 26 | 27 | if p_index == len(pattern): 28 | return t_index - len(pattern) 29 | return len(txt) 30 | 31 | 32 | class KMP(object): 33 | 34 | ''' 35 | >>> kmp = KMP('AACAA') 36 | >>> kmp.search('AABRAACADABRAACAADABRA') 37 | 12 38 | >>> kmp = KMP('rab') 39 | >>> kmp.search('abacadabrabracabracadabrabrabracad') 40 | 8 41 | >>> kmp.count('abacadabrabracabracadabrabrabracad') 42 | 3 43 | >>> kmp.search_all('abacadabrabracabracadabrabrabracad') 44 | [8, 23, 26] 45 | >>> kmp2 = KMP('abracadabra') 46 | >>> kmp2.search('abacadabrabracabracadabrabrabracad') 47 | 14 48 | >>> kmp2.count('abacadabrabracabracadabrabrabracad') 49 | 1 50 | >>> kmp2.search_all('abacadabrabracabracadabrabrabracad') 51 | [14] 52 | >>> kmp3 = KMP('bcara') 53 | >>> kmp3.search('abacadabrabracabracadabrabrabracad') 54 | 34 55 | >>> kmp3.count('abacadabrabracabracadabrabrabracad') 56 | 0 57 | >>> kmp3.search_all('abacadabrabracabracadabrabrabracad') 58 | [] 59 | >>> kmp4 = KMP('rabrabracad') 60 | >>> kmp4.search('abacadabrabracabracadabrabrabracad') 61 | 23 62 | >>> kmp5 = KMP('abacad') 63 | >>> kmp5.search('abacadabrabracabracadabrabrabracad') 64 | 0 65 | ''' 66 | 67 | def __init__(self, pattern): 68 | self._pat = pattern 69 | arr = [0] * len(pattern) 70 | self._dfa = [arr[:] for _ in range(256)] 71 | self._dfa[ord(pattern[0])][0] = 1 72 | 73 | x = 0 74 | for j in range(1, len(pattern)): 75 | for c in range(256): 76 | self._dfa[c][j] = self._dfa[c][x] 77 | self._dfa[ord(pattern[j])][j] = j + 1 78 | x = self._dfa[ord(pattern[j])][x] 79 | 80 | def search(self, txt): 81 | p_index = t_index = 0 82 | while t_index < len(txt) and p_index < len(self._pat): 83 | p_index = self._dfa[ord(txt[t_index])][p_index] 84 | t_index += 1 85 | if p_index == len(self._pat): 86 | return t_index - len(self._pat) 87 | return len(txt) 88 | 89 | # 5.3.8 practice 90 | def count(self, txt): 91 | counter = 0 92 | p_index = t_index = 0 93 | while t_index < len(txt) and p_index < len(self._pat): 94 | p_index = self._dfa[ord(txt[t_index])][p_index] 95 | t_index += 1 96 | if p_index == len(self._pat): 97 | counter += 1 98 | p_index = 0 99 | return counter 100 | 101 | # 5.3.8 practice 102 | def search_all(self, txt): 103 | positions = [] 104 | p_index = t_index = 0 105 | while t_index < len(txt) and p_index < len(self._pat): 106 | p_index = self._dfa[ord(txt[t_index])][p_index] 107 | t_index += 1 108 | if p_index == len(self._pat): 109 | positions.append(t_index - len(self._pat)) 110 | p_index = 0 111 | return positions 112 | 113 | 114 | class BoyerMoore(object): 115 | 116 | ''' 117 | >>> bm = BoyerMoore('NEEDLE') 118 | >>> bm.search('FINDINAHAYSTACKNEEDLE') 119 | 15 120 | >>> bm = BoyerMoore('rab') 121 | >>> bm.search('abacadabrabracabracadabrabrabracad') 122 | 8 123 | >>> bm.count('abacadabrabracabracadabrabrabracad') 124 | 3 125 | >>> bm.search_all('abacadabrabracabracadabrabrabracad') 126 | [8, 23, 26] 127 | >>> bm2 = BoyerMoore('abracadabra') 128 | >>> bm2.search('abacadabrabracabracadabrabrabracad') 129 | 14 130 | >>> bm2.count('abacadabrabracabracadabrabrabracad') 131 | 1 132 | >>> bm2.search_all('abacadabrabracabracadabrabrabracad') 133 | [14] 134 | >>> bm3 = BoyerMoore('bcara') 135 | >>> bm3.search('abacadabrabracabracadabrabrabracad') 136 | 34 137 | >>> bm3.count('abacadabrabracabracadabrabrabracad') 138 | 0 139 | >>> bm3.search_all('abacadabrabracabracadabrabrabracad') 140 | [] 141 | >>> bm4 = BoyerMoore('rabrabracad') 142 | >>> bm4.search('abacadabrabracabracadabrabrabracad') 143 | 23 144 | >>> bm5 = BoyerMoore('abacad') 145 | >>> bm5.search('abacadabrabracabracadabrabrabracad') 146 | 0 147 | ''' 148 | 149 | def __init__(self, pattern): 150 | self._pat = pattern 151 | self._right = [-1] * 256 152 | for index, char in enumerate(pattern): 153 | self._right[ord(char)] = index 154 | 155 | def search(self, txt): 156 | txt_len = len(txt) 157 | pat_len = len(self._pat) 158 | skip = index = 0 159 | while index <= txt_len - pat_len: 160 | skip = 0 161 | for j in range(pat_len - 1, -1, -1): 162 | if self._pat[j] != txt[index + j]: 163 | skip = j - self._right[ord(txt[index + j])] 164 | if skip < 1: 165 | skip = 1 166 | break 167 | if skip == 0: 168 | return index 169 | index += skip 170 | return txt_len 171 | 172 | # 5.3.9 practice 173 | def count(self, txt): 174 | txt_len = len(txt) 175 | pat_len = len(self._pat) 176 | skip = index = counter = 0 177 | while index <= txt_len - pat_len: 178 | skip = 0 179 | for j in range(pat_len - 1, -1, -1): 180 | if self._pat[j] != txt[index + j]: 181 | skip = j - self._right[ord(txt[index + j])] 182 | if skip < 1: 183 | skip = 1 184 | break 185 | if skip == 0: 186 | counter += 1 187 | skip = 1 188 | index += skip 189 | return counter 190 | 191 | # 5.3.9 practice 192 | def search_all(self, txt): 193 | txt_len = len(txt) 194 | pat_len = len(self._pat) 195 | skip = index = 0 196 | positions = [] 197 | while index <= txt_len - pat_len: 198 | skip = 0 199 | for j in range(pat_len - 1, -1, -1): 200 | if self._pat[j] != txt[index + j]: 201 | skip = j - self._right[ord(txt[index + j])] 202 | if skip < 1: 203 | skip = 1 204 | break 205 | if skip == 0: 206 | positions.append(index) 207 | skip = 1 208 | index += skip 209 | return positions 210 | 211 | 212 | class RabinKarp(object): 213 | 214 | ''' 215 | >>> rk = RabinKarp('rab') 216 | >>> rk.search('abacadabrabracabracadabrabrabracad') 217 | 8 218 | >>> rk.count('abacadabrabracabracadabrabrabracad') 219 | 3 220 | >>> rk.search_all('abacadabrabracabracadabrabrabracad') 221 | [8, 23, 26] 222 | >>> rk2 = RabinKarp('abracadabra') 223 | >>> rk2.search('abacadabrabracabracadabrabrabracad') 224 | 14 225 | >>> rk2.count('abacadabrabracabracadabrabrabracad') 226 | 1 227 | >>> rk2.search_all('abacadabrabracabracadabrabrabracad') 228 | [14] 229 | >>> rk3 = RabinKarp('bcara') 230 | >>> rk3.search('abacadabrabracabracadabrabrabracad') 231 | 34 232 | >>> rk3.count('abacadabrabracabracadabrabrabracad') 233 | 0 234 | >>> rk3.search_all('abacadabrabracabracadabrabrabracad') 235 | [] 236 | >>> rk4 = RabinKarp('rabrabracad') 237 | >>> rk4.search('abacadabrabracabracadabrabrabracad') 238 | 23 239 | >>> rk5 = RabinKarp('abacad') 240 | >>> rk5.search('abacadabrabracabracadabrabrabracad') 241 | 0 242 | ''' 243 | 244 | def __init__(self, pattern): 245 | self._pat = pattern 246 | self._pat_len = len(pattern) 247 | self._q = 997 248 | self._rm = 1 249 | for i in range(1, self._pat_len): 250 | self._rm = (256 * self._rm) % self._q 251 | self._pat_hash = self._hash(pattern, self._pat_len) 252 | 253 | def check(self, i, txt=None): 254 | # 5.3.12 practice, implement LasVegas version check method. 255 | if txt: 256 | for j in range(self._pat_len): 257 | if not self._pat[j] != txt[i + j]: 258 | return False 259 | return True 260 | 261 | def _hash(self, txt, length): 262 | result = 0 263 | for i in range(length): 264 | result = (256 * result + ord(txt[i])) % self._q 265 | return result 266 | 267 | def search(self, txt): 268 | txt_len = len(txt) 269 | txt_hash = self._hash(txt, self._pat_len) 270 | if self._pat_hash == txt_hash and self.check(0): 271 | return 0 272 | 273 | for i in range(self._pat_len, txt_len): 274 | txt_hash = (txt_hash + self._q - self._rm * ord(txt[i - self._pat_len]) 275 | % self._q) % self._q 276 | txt_hash = (txt_hash * 256 + ord(txt[i])) % self._q 277 | if self._pat_hash == txt_hash: 278 | if self.check(i - self._pat_len + 1): 279 | return i - self._pat_len + 1 280 | return txt_len 281 | 282 | # 5.3.10 practice 283 | def search_all(self, txt): 284 | txt_len = len(txt) 285 | txt_hash = self._hash(txt, self._pat_len) 286 | positions = [] 287 | if self._pat_hash == txt_hash and self.check(0): 288 | positions.append(0) 289 | 290 | for i in range(self._pat_len, txt_len): 291 | txt_hash = (txt_hash + self._q - self._rm * ord(txt[i - self._pat_len]) 292 | % self._q) % self._q 293 | txt_hash = (txt_hash * 256 + ord(txt[i])) % self._q 294 | if self._pat_hash == txt_hash: 295 | if self.check(i - self._pat_len + 1): 296 | positions.append(i - self._pat_len + 1) 297 | return positions 298 | 299 | # 5.3.10 practice 300 | def count(self, txt): 301 | txt_len = len(txt) 302 | txt_hash = self._hash(txt, self._pat_len) 303 | count = 0 304 | if self._pat_hash == txt_hash and self.check(0): 305 | count += 1 306 | 307 | for i in range(self._pat_len, txt_len): 308 | txt_hash = (txt_hash + self._q - self._rm * ord(txt[i - self._pat_len]) 309 | % self._q) % self._q 310 | txt_hash = (txt_hash * 256 + ord(txt[i])) % self._q 311 | if self._pat_hash == txt_hash: 312 | if self.check(i - self._pat_len + 1): 313 | count += 1 314 | return count 315 | 316 | 317 | # 5.3.1 practice, brute force string search algorithm with specific interfaces. 318 | class Brute(object): 319 | 320 | ''' 321 | >>> brute = Brute('rab') 322 | >>> brute.search('abacadabrabracabracadabrabrabracad') 323 | 8 324 | >>> brute.count('abacadabrabracabracadabrabrabracad') 325 | 3 326 | >>> [i for i in brute.search_all('abacadabrabracabracadabrabrabracad')] 327 | [8, 23, 26] 328 | >>> brute = Brute('abracadabra') 329 | >>> brute.search('abacadabrabracabracadabrabrabracad') 330 | 14 331 | >>> [i for i in brute.search_all('abacadabrabracabracadabrabrabracad')] 332 | [14] 333 | >>> brute.count('abacadabrabracabracadabrabrabracad') 334 | 1 335 | >>> brute = Brute('bcara') 336 | >>> brute.search('abacadabrabracabracadabrabrabracad') 337 | 34 338 | >>> brute.count('abacadabrabracabracadabrabrabracad') 339 | 0 340 | >>> brute.search_all('abacadabrabracabracadabrabrabracad') 341 | [] 342 | >>> brute = Brute('rabrabracad') 343 | >>> brute.search('abacadabrabracabracadabrabrabracad') 344 | 23 345 | >>> brute = Brute('abacad') 346 | >>> brute.search('abacadabrabracabracadabrabrabracad') 347 | 0 348 | ''' 349 | 350 | def __init__(self, pattern): 351 | self._pat = pattern 352 | self._pat_len = len(pattern) 353 | 354 | def search(self, txt): 355 | txt_len = len(txt) 356 | for i in range(txt_len - self._pat_len + 1): 357 | j = 0 358 | while j < self._pat_len: 359 | if txt[j + i] != self._pat[j]: 360 | break 361 | j += 1 362 | if j == self._pat_len: 363 | return i 364 | return txt_len 365 | 366 | # 5.3.7 practice 367 | def count(self, txt): 368 | counter = 0 369 | txt_len = len(txt) 370 | for i in range(txt_len - self._pat_len + 1): 371 | j = 0 372 | while j < self._pat_len: 373 | if txt[j + i] != self._pat[j]: 374 | break 375 | j += 1 376 | if j == self._pat_len: 377 | counter += 1 378 | return counter 379 | 380 | # 5.3.7 practice 381 | def search_all(self, txt): 382 | positions = [] 383 | txt_len = len(txt) 384 | for i in range(txt_len - self._pat_len + 1): 385 | j = 0 386 | while j < self._pat_len: 387 | if txt[j + i] != self._pat[j]: 388 | break 389 | j += 1 390 | if j == self._pat_len: 391 | positions.append(i) 392 | return positions 393 | 394 | 395 | # 5.3.4 practice, counting consecutive empty spaces, 396 | # the running would be proportional to O(n) 397 | def empty_space(txt, count): 398 | ''' 399 | >>> empty_space(' xxxx ', 3) 400 | 0 401 | >>> empty_space('xxx xxxXXXXXX Xxxx', 3) 402 | 3 403 | >>> empty_space('xxxx ', 3) 404 | 4 405 | >>> empty_space('xxx ', 3) 406 | 4 407 | ''' 408 | 409 | index, length = 0, len(txt) 410 | while index < length - count + 1: 411 | if txt[index] == ' ': 412 | i = 0 413 | while i < count: 414 | if txt[i + index] != ' ': 415 | index += i 416 | break 417 | i += 1 418 | if i == count: 419 | return index 420 | index += 1 421 | return length - 1 422 | 423 | 424 | # 5.3.5 practice, implement brute force 425 | # algorithm comparing substring from right to left. 426 | class BruteForceRL(object): 427 | 428 | ''' 429 | >>> brute = BruteForceRL('rab') 430 | >>> brute.search('abacadabrabracabracadabrabrabracad') 431 | 8 432 | >>> brute = BruteForceRL('abracadabra') 433 | >>> brute.search('abacadabrabracabracadabrabrabracad') 434 | 14 435 | >>> brute = BruteForceRL('bcara') 436 | >>> brute.search('abacadabrabracabracadabrabrabracad') 437 | 34 438 | >>> brute = BruteForceRL('rabrabracad') 439 | >>> brute.search('abacadabrabracabracadabrabrabracad') 440 | 23 441 | >>> brute = BruteForceRL('abacad') 442 | >>> brute.search('abacadabrabracabracadabrabrabracad') 443 | 0 444 | ''' 445 | 446 | def __init__(self, pattern): 447 | self._pat = pattern 448 | self._pat_len = len(pattern) 449 | 450 | def search(self, txt): 451 | txt_len = len(txt) 452 | for i in range(txt_len - self._pat_len + 1): 453 | j = self._pat_len - 1 454 | while j >= 0: 455 | if txt[j + i] != self._pat[j]: 456 | break 457 | j -= 1 458 | if j == -1: 459 | return i 460 | return txt_len 461 | 462 | if __name__ == '__main__': 463 | doctest.testmod() 464 | -------------------------------------------------------------------------------- /chapter_3/module_3_3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | import doctest 4 | 5 | RED = 1 6 | BLACK = 2 7 | 8 | 9 | class Node(object): 10 | 11 | def __init__(self, key, value, size, color): 12 | self._key = key 13 | self._value = value 14 | self._size = size 15 | self._color = color 16 | self._left = None 17 | self._right = None 18 | 19 | @property 20 | def color(self): 21 | return self._color 22 | 23 | @color.setter 24 | def color(self, new_color): 25 | assert new_color in (RED, BLACK) 26 | self._color = new_color 27 | 28 | @property 29 | def key(self): 30 | return self._key 31 | 32 | @key.setter 33 | def key(self, new_key): 34 | self._key = new_key 35 | 36 | @property 37 | def value(self): 38 | return self._value 39 | 40 | @value.setter 41 | def value(self, val): 42 | self._value = val 43 | 44 | @property 45 | def left(self): 46 | return self._left 47 | 48 | @left.setter 49 | def left(self, node): 50 | assert isinstance(node, (Node, type(None))) 51 | self._left = node 52 | 53 | @property 54 | def right(self): 55 | return self._right 56 | 57 | @right.setter 58 | def right(self, node): 59 | assert isinstance(node, (Node, type(None))) 60 | self._right = node 61 | 62 | @property 63 | def size(self): 64 | return self._size 65 | 66 | @size.setter 67 | def size(self, val): 68 | assert isinstance(val, int) and val >= 0 69 | self._size = val 70 | 71 | 72 | class RBTree(object): 73 | 74 | """ 75 | >>> rbt = RBTree() 76 | >>> rbt.is_empty() 77 | True 78 | >>> rbt.size() 79 | 0 80 | >>> for index, e in enumerate('EASYQUITION'): 81 | ... rbt.put(e, index) 82 | ... 83 | >>> rbt.check() 84 | True 85 | >>> node1 = rbt.get('A').value 86 | >>> node1 87 | 1 88 | >>> rbt.get('E').value 89 | 0 90 | >>> rbt.get('Y').value 91 | 3 92 | >>> rbt.get('N').value 93 | 10 94 | >>> rbt.is_empty() 95 | False 96 | >>> rbt.size() ### duplicate values 'I' 97 | 10 98 | >>> rbt.min_val().value 99 | 1 100 | >>> rbt.max_val().value 101 | 3 102 | >>> rbt.delete_min() 103 | >>> rbt.min_val().value 104 | 0 105 | >>> rbt.delete_min() 106 | >>> rbt.min_val().value 107 | 8 108 | >>> rbt.delete_max() 109 | >>> rbt.max_val().value 110 | 5 111 | >>> rbt.delete_max() 112 | >>> rbt.max_val().value 113 | 7 114 | >>> rbt.check() 115 | True 116 | """ 117 | 118 | def __init__(self): 119 | self._root = None 120 | 121 | def __is_red(self, node): 122 | if not node: 123 | return False 124 | return node.color == RED 125 | 126 | def size(self): 127 | return self.__node_size(self._root) 128 | 129 | def is_empty(self): 130 | return self._root is None 131 | 132 | def __node_size(self, node): 133 | return 0 if not node else node.size 134 | 135 | def __rotate_left(self, node): 136 | assert node and self.__is_red(node.right) 137 | 138 | rotate_node = node.right 139 | node.right = rotate_node.left 140 | rotate_node.left = node 141 | rotate_node.color = node.color 142 | node.color = RED 143 | rotate_node.size = node.size 144 | node.size = self.__node_size(node.left) + self.__node_size(node.right) + 1 145 | return rotate_node 146 | 147 | def __rotate_right(self, node): 148 | assert node and self.__is_red(node.left) 149 | 150 | rotate_node = node.left 151 | node.left = rotate_node.right 152 | rotate_node.right = node 153 | rotate_node.color = node.color 154 | node.color = RED 155 | rotate_node.size = node.size 156 | node.size = self.__node_size(node.left) + self.__node_size(node.right) + 1 157 | return rotate_node 158 | 159 | def __flip_colors(self, node): 160 | assert node and node.left and node.right 161 | assert (not self.__is_red(node) and self.__is_red(node.left) and 162 | self.__is_red(node.right) or 163 | self.__is_red(node) and 164 | not self.__is_red(node.left) and 165 | not self.__is_red(node.right)) 166 | 167 | node.color = RED if node.color == BLACK else BLACK 168 | node.left.color = RED if node.left.color == BLACK else BLACK 169 | node.right.color = RED if node.right.color == BLACK else BLACK 170 | 171 | def get(self, key): 172 | return self.__get(self._root, key) 173 | 174 | def __get(self, node, key): 175 | tmp = node 176 | while tmp: 177 | if tmp.key > key: 178 | tmp = tmp.left 179 | elif tmp.key < key: 180 | tmp = tmp.right 181 | else: 182 | break 183 | return tmp 184 | 185 | def min_val(self): 186 | return self.__min_val(self._root) 187 | 188 | def __min_val(self, node): 189 | tmp = node 190 | while tmp.left: 191 | tmp = tmp.left 192 | return tmp 193 | 194 | def put(self, key, value): 195 | self._root = self.__put(self._root, key, value) 196 | self._root.color = BLACK 197 | 198 | def __put(self, node, key, value): 199 | if not node: 200 | return Node(key, value, 1, RED) 201 | if key < node.key: 202 | node.left = self.__put(node.left, key, value) 203 | elif key > node.key: 204 | node.right = self.__put(node.right, key, value) 205 | else: 206 | node.value = value 207 | 208 | # according to the book's definition, red node only exists in left node, 209 | # if right node is red, rotate left, make sure left node is red. 210 | if self.__is_red(node.right) and not self.__is_red(node.left): 211 | node = self.__rotate_left(node) 212 | 213 | # a red-black tree could not exist two consecutive red left node, 214 | # in this case, rotate right, then the left node and right node is both red, 215 | # the next move would be flip both node's colors. 216 | if self.__is_red(node.left) and node.left.left and self.__is_red(node.left.left): 217 | node = self.__rotate_right(node) 218 | 219 | if self.__is_red(node.left) and self.__is_red(node.right): 220 | self.__flip_colors(node) 221 | 222 | node.size = self.__node_size(node.left) + self.__node_size(node.right) + 1 223 | return node 224 | 225 | def __balance(self, node): 226 | assert node is not None 227 | 228 | if self.__is_red(node.right): 229 | node = self.__rotate_left(node) 230 | 231 | if self.__is_red(node.left) and self.__is_red(node.left.left): 232 | node = self.__rotate_right(node) 233 | 234 | if self.__is_red(node.left) and self.__is_red(node.right): 235 | self.__flip_colors(node) 236 | 237 | node.size = self.__node_size(node.left) + self.__node_size(node.right) + 1 238 | return node 239 | 240 | def __move_red_left(self, node): 241 | assert node is not None 242 | assert (self.__is_red(node) and not self.__is_red(node.left) and 243 | not self.__is_red(node.left.left)) 244 | 245 | self.__flip_colors(node) 246 | # if node.right.left node is red, that means there is one more node can be "borrow", 247 | # then move one node to node's right side. 248 | if self.__is_red(node.right.left): 249 | node.right = self.__rotate_right(node.right) 250 | node = self.__rotate_left(node) 251 | return node 252 | 253 | # 3.3.39 delete minimum key in red-black tree, the java implementation is on the book, 254 | # this is python implementation of the book's answer. 255 | def delete_min(self): 256 | 257 | if self.is_empty(): 258 | return None 259 | 260 | # this is for keeping red-black tree's balance 261 | if not self.__is_red(self._root.left) and not self.__is_red(self._root.right): 262 | self._root.color = RED 263 | self._root = self.__delete_min(self._root) 264 | if not self.is_empty(): 265 | self._root.color = BLACK 266 | 267 | def __delete_min(self, node): 268 | if not node.left: 269 | return None 270 | # if node's left node and node's left's left node is not red, "borrow" one node 271 | # from node's right side to keep the red-black tree balance. 272 | if not self.__is_red(node.left) and not self.__is_red(node.left.left): 273 | node = self.__move_red_left(node) 274 | node.left = self.__delete_min(node.left) 275 | return self.__balance(node) 276 | 277 | def __move_red_right(self, node): 278 | self.__flip_colors(node) 279 | # this is the same priciple to the __move_red_left function, move one node from 280 | # the node's right side if the two consecutive left node is not red. 281 | if not self.__is_red(node.left.left): 282 | node = self.__rotate_right(node) 283 | return node 284 | 285 | # 3.3.39 delete maximum key in red-black tree, the java implementation is on the book, 286 | # this is python implementation of the book's answer, there is a little bit different with 287 | # delete_min function. 288 | def delete_max(self): 289 | # this is for keeping red-black tree's balance 290 | if not self.__is_red(self._root.left) and not self.__is_red(self._root.right): 291 | self._root.color = RED 292 | self._root = self.__delete_max(self._root) 293 | if not self.is_empty(): 294 | self._root.color = BLACK 295 | 296 | def __delete_max(self, node): 297 | if self.__is_red(node.left): 298 | node = self.__rotate_right(node) 299 | if not node.right: 300 | return None 301 | if not self.__is_red(node.right) and not self.__is_red(node.right.left): 302 | node = self.__move_red_right(node) 303 | node.right = self.__delete_max(node.right) 304 | return self.__balance(node) 305 | 306 | def delete(self, key): 307 | if not self.__is_red(self._root.left) and not self.__is_red(self._root.right): 308 | self._root.color = RED 309 | self._root = self.__delete(self._root, key) 310 | if not self.is_empty(): 311 | self._root.color = BLACK 312 | 313 | def __delete(self, node, key): 314 | if key < node.key: 315 | # same principle with delete_min function 316 | if not self.__is_red(node.left) and not self.__is_red(node.left.left): 317 | node = self.__move_red_left(node) 318 | node.left = self.__delete(node.left, key) 319 | else: 320 | if self.__is_red(node.left): 321 | node = self.__rotate_right(node) 322 | 323 | if key == node.key and node.right is None: 324 | return None 325 | 326 | if not self.__is_red(node.right) and not self.__is_red(node.right.left): 327 | node = self.__move_red_right(node) 328 | 329 | if key == node.key: 330 | node.value = self.__get(node.right, self.__min_val(node.right).key) 331 | node.key = self.__min_val(node.right).key 332 | node.right = self.__delete_min(node.right) 333 | else: 334 | node.right = self.__delete(node.right, key) 335 | return self.__balance(node) 336 | 337 | def select(self, k): 338 | """ 339 | Find the kth node of the binary search tree, 340 | the solution is similar with get() or put() function. 341 | """ 342 | assert isinstance(k, int) and k <= self.size() 343 | 344 | if not self._root: 345 | return None 346 | 347 | tmp = self._root 348 | while tmp: 349 | tmp_size = self.__node_size(tmp.left) 350 | if tmp_size > k: 351 | tmp = tmp.left 352 | elif tmp_size < k: 353 | tmp = tmp.right 354 | k = k - tmp_size - 1 355 | else: 356 | return tmp 357 | 358 | def rank(self, key): 359 | """ 360 | Find the rank of the node in the binary search tree by the given key. 361 | """ 362 | result = 0 363 | if not self._root: 364 | return -1 365 | tmp = self._root 366 | 367 | while tmp: 368 | if tmp.key > key: 369 | tmp = tmp.left 370 | elif tmp.key < key: 371 | result += self.__node_size(tmp.left) + 1 372 | tmp = tmp.right 373 | elif tmp.key == key: 374 | result += self.__node_size(tmp.left) 375 | break 376 | return result 377 | 378 | def max_val(self): 379 | """ 380 | Find the maximum value in the binary search tree. 381 | """ 382 | if not self._root: 383 | return None 384 | tmp = self._root 385 | while tmp.right: 386 | tmp = tmp.right 387 | return tmp 388 | 389 | def keys(self): 390 | return self.keys_range(self.min_val().key, self.max_val().key) 391 | 392 | def keys_range(self, low, high): 393 | queue = [] 394 | self.__keys(self._root, queue, low, high) 395 | return queue 396 | 397 | def __keys(self, node, queue, low, high): 398 | if not node: 399 | return 400 | if low < node.key: 401 | self.__keys(node.left, queue, low, high) 402 | if low <= node.key and high >= node.key: 403 | queue.append(node.key) 404 | if high > node.key: 405 | self.__keys(node.right, queue, low, high) 406 | 407 | def is_rbt(self): 408 | return self.__is_rbt(self._root) 409 | 410 | def __is_rbt(self, node): 411 | if not node: 412 | return True 413 | if self.__is_red(node.right): 414 | return False 415 | if node != self._root and self.__is_red(node) and self.__is_red(node.left): 416 | return False 417 | return self.__is_rbt(node.left) and self.__is_rbt(node.right) 418 | 419 | def is_binary_tree(self): 420 | return self.__is_binary_tree(self._root) 421 | 422 | def __is_binary_tree(self, node): 423 | if not node: 424 | return True 425 | if node.size != self.__node_size(node.left) + self.__node_size(node.right) + 1: 426 | return False 427 | return self.__is_binary_tree(node.left) and self.__is_binary_tree(node.right) 428 | 429 | def is_ordered(self): 430 | return self.__is_ordered(self._root, None, None) 431 | 432 | def __is_ordered(self, node, min_key, max_key): 433 | if not node: 434 | return True 435 | if min_key and node.key <= min_key: 436 | return False 437 | if max_key and node.key >= max_key: 438 | return False 439 | return (self.__is_ordered(node.left, min_key, node.key) and 440 | self.__is_ordered(node.right, node.key, max_key)) 441 | 442 | def is_rank_consistent(self): 443 | for i in range(self.size()): 444 | if i != self.rank(self.select(i).key): 445 | return False 446 | 447 | for key in self.keys(): 448 | if key != self.select(self.rank(key)).key: 449 | return False 450 | 451 | return True 452 | 453 | def check(self): 454 | if not self.is_binary_tree(): 455 | return False 456 | if not self.is_ordered(): 457 | return False 458 | if not self.is_rank_consistent(): 459 | return False 460 | if not self.is_rbt(): 461 | return False 462 | return True 463 | 464 | 465 | if __name__ == '__main__': 466 | doctest.testmod() 467 | -------------------------------------------------------------------------------- /chapter_2/module_2_4.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | import doctest 4 | import random 5 | import bisect 6 | 7 | 8 | class MaxPQ(object): 9 | 10 | """ 11 | >>> mpq = MaxPQ(10) 12 | >>> lst = [i for i in range(10)] 13 | >>> random.shuffle(lst) 14 | >>> for i in lst: 15 | ... mpq.insert_effective(i) 16 | ... 17 | >>> mpq.min_val() 18 | 0 19 | >>> print_lst = [] 20 | >>> while not mpq.is_empty(): 21 | ... print_lst.append(str(mpq.del_max())) 22 | ... 23 | >>> ' '.join(print_lst) 24 | '9 8 7 6 5 4 3 2 1 0' 25 | """ 26 | 27 | def __init__(self, size): 28 | self._pq = [None] * (size + 1) 29 | self._size = 0 30 | self._min = None 31 | 32 | def is_empty(self): 33 | return self._size == 0 34 | 35 | def size(self): 36 | return self._size 37 | 38 | def swim(self, pos): 39 | while pos > 1 and self._pq[pos // 2] < self._pq[pos]: 40 | self._pq[pos // 2], self._pq[pos] = self._pq[pos], self._pq[pos // 2] 41 | pos //= 2 42 | 43 | def sink(self, pos): 44 | while 2 * pos <= self._size: 45 | index = 2 * pos 46 | if index < self._size and self._pq[index] < self._pq[index + 1]: 47 | index += 1 48 | if self._pq[pos] >= self._pq[index]: 49 | break 50 | self._pq[index], self._pq[pos] = self._pq[pos], self._pq[index] 51 | pos = index 52 | 53 | def insert(self, val): 54 | self._size += 1 55 | self._pq[self._size] = val 56 | if self._min is None or self._min > val: 57 | self._min = val 58 | self.swim(self._size) 59 | 60 | def min_val(self): 61 | return self._min 62 | 63 | def del_max(self): 64 | max_val = self._pq[1] 65 | self._pq[1], self._pq[self._size] = self._pq[self._size], self._pq[1] 66 | self._pq[self._size] = None 67 | self._size -= 1 68 | self.sink(1) 69 | return max_val 70 | 71 | # 2.4.26 practice 72 | def swim_effective(self, pos): 73 | val = self._pq[pos] 74 | while pos > 1 and self._pq[pos // 2] < val: 75 | self._pq[pos] = self._pq[pos // 2] 76 | pos //= 2 77 | self._pq[pos] = val 78 | 79 | def insert_effective(self, val): 80 | self._size += 1 81 | self._pq[self._size] = val 82 | if self._min is None or self._min > val: 83 | self._min = val 84 | self.swim_effective(self._size) 85 | 86 | def max_val(self): 87 | return self._pq[1] 88 | 89 | 90 | class MinPQ(object): 91 | 92 | """ 93 | >>> mpq = MinPQ(10) 94 | >>> lst = [i for i in range(10)] 95 | >>> random.shuffle(lst) 96 | >>> for i in lst: 97 | ... mpq.insert(i) 98 | ... 99 | >>> print_lst = [] 100 | >>> while not mpq.is_empty(): 101 | ... print_lst.append(str(mpq.del_min())) 102 | ... 103 | >>> ' '.join(print_lst) 104 | '0 1 2 3 4 5 6 7 8 9' 105 | """ 106 | 107 | def __init__(self, size): 108 | self._pq = [None] * (size + 1) 109 | self._size = 0 110 | 111 | def is_empty(self): 112 | return self._size == 0 113 | 114 | def size(self): 115 | return self._size 116 | 117 | def swim(self, pos): 118 | while pos > 1 and self._pq[int(pos / 2)] > self._pq[pos]: 119 | self._pq[int(pos / 2)], self._pq[pos] = self._pq[pos], self._pq[int(pos / 2)] 120 | pos //= 2 121 | 122 | def sink(self, pos): 123 | while 2 * pos <= self._size: 124 | index = 2 * pos 125 | if index < self._size and self._pq[index] > self._pq[index + 1]: 126 | index += 1 127 | if self._pq[pos] <= self._pq[index]: 128 | break 129 | self._pq[index], self._pq[pos] = self._pq[pos], self._pq[index] 130 | pos = index 131 | 132 | def insert(self, val): 133 | self._size += 1 134 | self._pq[self._size] = val 135 | self.swim(self._size) 136 | 137 | def del_min(self): 138 | min_val = self._pq[1] 139 | self._pq[1], self._pq[self._size] = self._pq[self._size], self._pq[1] 140 | self._pq[self._size] = None 141 | self._size -= 1 142 | self.sink(1) 143 | return min_val 144 | 145 | def min_val(self): 146 | return self._pq[1] 147 | 148 | 149 | # 2.4.22 practice, a little change for python version, the queue's size is not limited. 150 | class MaxPQDynamic(object): 151 | 152 | """ 153 | >>> mpq = MaxPQDynamic() 154 | >>> lst = [i for i in range(10)] 155 | >>> random.shuffle(lst) 156 | >>> for i in lst: 157 | ... mpq.insert(i) 158 | ... 159 | >>> print_lst = [] 160 | >>> while not mpq.is_empty(): 161 | ... print_lst.append(str(mpq.del_max())) 162 | ... 163 | >>> ' '.join(print_lst) 164 | '9 8 7 6 5 4 3 2 1 0' 165 | """ 166 | 167 | def __init__(self): 168 | self._pq = [] 169 | 170 | def is_empty(self): 171 | return len(self._pq) == 0 172 | 173 | def size(self): 174 | return len(self._pq) 175 | 176 | def swim(self, pos): 177 | while pos > 0 and self._pq[(pos - 1) // 2] < self._pq[pos]: 178 | self._pq[(pos - 1) // 2], self._pq[pos] = self._pq[pos], self._pq[(pos - 1) // 2] 179 | pos = (pos - 1) // 2 180 | 181 | def sink(self, pos): 182 | length = len(self._pq) - 1 183 | while 2 * pos + 1 <= length: 184 | index = 2 * pos + 1 185 | if index < length and self._pq[index] < self._pq[index + 1]: 186 | index += 1 187 | if self._pq[pos] >= self._pq[index]: 188 | break 189 | self._pq[index], self._pq[pos] = self._pq[pos], self._pq[index] 190 | pos = index 191 | 192 | def insert(self, val): 193 | self._pq.append(val) 194 | self.swim(len(self._pq) - 1) 195 | 196 | def del_max(self): 197 | max_val = self._pq[0] 198 | last_index = len(self._pq) - 1 199 | self._pq[0], self._pq[last_index] = self._pq[last_index], self._pq[0] 200 | self._pq.pop(last_index) 201 | self.sink(0) 202 | return max_val 203 | 204 | def max_val(self): 205 | return self._pq[0] 206 | 207 | 208 | class MinPQDynamic(object): 209 | 210 | """ 211 | >>> mpq = MinPQDynamic() 212 | >>> lst = [i for i in range(10)] 213 | >>> random.shuffle(lst) 214 | >>> for i in lst: 215 | ... mpq.insert(i) 216 | ... 217 | >>> print_lst = [] 218 | >>> while not mpq.is_empty(): 219 | ... print_lst.append(str(mpq.del_min())) 220 | ... 221 | >>> ' '.join(print_lst) 222 | '0 1 2 3 4 5 6 7 8 9' 223 | """ 224 | 225 | def __init__(self): 226 | self._pq = [] 227 | 228 | def is_empty(self): 229 | return len(self._pq) == 0 230 | 231 | def size(self): 232 | return len(self._pq) 233 | 234 | def swim(self, pos): 235 | while pos > 0 and self._pq[(pos - 1) // 2] > self._pq[pos]: 236 | self._pq[(pos - 1) // 2], self._pq[pos] = self._pq[pos], self._pq[(pos - 1) // 2] 237 | pos = (pos - 1) // 2 238 | 239 | def binary_swim(self, pos): 240 | index, vals, temp, target = [], [], pos, self._pq[pos] 241 | while temp: 242 | index.append(temp) 243 | vals.append(self._pq[temp]) 244 | temp = (temp - 1) // 2 245 | 246 | insert_pos = bisect.bisect_left(vals, target) 247 | if insert_pos == len(vals): 248 | return 249 | 250 | i = insert_pos - 1 251 | while i < len(vals) - 1: 252 | self._pq[index[i + 1]] = self._pq[index[i]] 253 | i += 1 254 | 255 | self._pq[insert_pos - 1] = target 256 | 257 | def sink(self, pos): 258 | length = len(self._pq) - 1 259 | while 2 * pos + 1 <= length: 260 | index = 2 * pos + 1 261 | if index < length and self._pq[index] > self._pq[index + 1]: 262 | index += 1 263 | if self._pq[pos] <= self._pq[index]: 264 | break 265 | self._pq[index], self._pq[pos] = self._pq[pos], self._pq[index] 266 | pos = index 267 | 268 | def insert(self, val): 269 | self._pq.append(val) 270 | self.swim(len(self._pq) - 1) 271 | 272 | def del_min(self): 273 | min_val = self._pq[0] 274 | last_index = len(self._pq) - 1 275 | self._pq[0], self._pq[last_index] = self._pq[last_index], self._pq[0] 276 | self._pq.pop(last_index) 277 | self.sink(0) 278 | return min_val 279 | 280 | def min_val(self): 281 | return self._pq[0] 282 | 283 | 284 | # 2.4.30 practice 285 | class MeanHeap(object): 286 | 287 | """ 288 | >>> mh = MeanHeap() 289 | >>> for i in range(9): 290 | ... mh.insert(i) 291 | ... 292 | >>> mh.median() 293 | 4 294 | >>> mh.insert(9) 295 | >>> mh.median() 296 | 4.5 297 | >>> mh.insert(10) 298 | >>> mh.median() 299 | 5 300 | """ 301 | 302 | def __init__(self): 303 | self._min_heap = MinPQDynamic() 304 | self._max_heap = MaxPQDynamic() 305 | 306 | def is_empty(self): 307 | return self._min_heap.is_empty() and self._max_heap.is_empty() 308 | 309 | def size(self): 310 | return self._min_heap.size() and self._max_heap.size() 311 | 312 | def median(self): 313 | if self.is_empty(): 314 | return None 315 | if self._min_heap.size() < self._max_heap.size(): 316 | return self._max_heap.max_val() 317 | 318 | if self._max_heap.size() < self._min_heap.size(): 319 | return self._min_heap.min_val() 320 | 321 | return (self._min_heap.min_val() + self._max_heap.max_val()) / 2 322 | 323 | def insert(self, val): 324 | if self._min_heap.is_empty(): 325 | self._min_heap.insert(val) 326 | return 327 | 328 | if self._max_heap.is_empty(): 329 | self._max_heap.insert(val) 330 | return 331 | 332 | if val < self._max_heap.max_val(): 333 | if self._max_heap.size() < self._min_heap.size(): 334 | self._max_heap.insert(val) 335 | else: 336 | self._min_heap.insert(self._max_heap.del_max()) 337 | self._max_heap.insert(val) 338 | 339 | if val > self._min_heap.min_val(): 340 | if self._min_heap.size() < self._max_heap.size(): 341 | self._min_heap.insert(val) 342 | else: 343 | self._max_heap.insert(self._min_heap.del_min()) 344 | self._min_heap.insert(val) 345 | 346 | if val > self._max_heap.max_val() and val < self._min_heap.min_val(): 347 | if self._max_heap.size() < self._min_heap.size(): 348 | self._max_heap.insert(val) 349 | else: 350 | self._min_heap.insert(val) 351 | 352 | 353 | # 2.4.33, 2.4.34 index minimum priority queue. 354 | class IndexMinPQ(object): 355 | 356 | """ 357 | >>> test_data = 'testexmaple' 358 | >>> imp = IndexMinPQ(len(test_data)) 359 | >>> imp.is_empty() 360 | True 361 | >>> for index, s in enumerate(test_data): 362 | ... imp.insert(index, s) 363 | ... 364 | >>> imp.is_empty() 365 | False 366 | >>> imp.size() 367 | 11 368 | >>> [imp.contains(i) for i in (12, -1, 1, 4, 10)] 369 | [False, False, True, True, True] 370 | >>> imp.min_index() 371 | 7 372 | """ 373 | 374 | def __init__(self, max_size): 375 | assert max_size > 0 376 | self._max_size = max_size 377 | self._index = [-1] * (max_size + 1) 378 | self._reverse_index = [-1] * (max_size + 1) 379 | self._keys = [None] * (max_size + 1) 380 | self._keys_size = 0 381 | 382 | def is_empty(self): 383 | return self._keys_size == 0 384 | 385 | def size(self): 386 | return self._keys_size 387 | 388 | def contains(self, index): 389 | if index < 0 or index >= self._max_size: 390 | return False 391 | return self._reverse_index[index] != -1 392 | 393 | def insert(self, index, element): 394 | if index < 0 or index >= self._max_size or self.contains(index): 395 | return 396 | 397 | self._keys_size += 1 398 | self._index[self._keys_size] = index 399 | self._reverse_index[index] = self._keys_size 400 | self._keys[index] = element 401 | self.swim(self._keys_size) 402 | 403 | def min_index(self): 404 | return None if self._keys_size == 0 else self._index[1] 405 | 406 | def min_key(self): 407 | return None if self._keys_size == 0 else self._keys[self._index[1]] 408 | 409 | def exchange(self, pos_a, pos_b): 410 | self._index[pos_a], self._index[pos_b] = self._index[pos_b], self._index[pos_a] 411 | self._reverse_index[self._index[pos_a]] = pos_a 412 | self._reverse_index[self._index[pos_b]] = pos_b 413 | 414 | def swim(self, pos): 415 | while pos > 1 and self._keys[self._index[pos // 2]] > self._keys[self._index[pos]]: 416 | self.exchange(pos // 2, pos) 417 | pos //= 2 418 | 419 | def sink(self, pos): 420 | length = self._keys_size 421 | while 2 * pos <= length: 422 | tmp = 2 * pos 423 | if tmp < length and self._keys[self._index[tmp]] > self._keys[self._index[tmp + 1]]: 424 | tmp += 1 425 | if not self._keys[self._index[tmp]] < self._keys[self._index[pos]]: 426 | break 427 | self.exchange(tmp, pos) 428 | pos = tmp 429 | 430 | def change_key(self, i, key): 431 | if i < 0 or i >= self._max_size or not self.contains(i): 432 | return 433 | self._keys[i] = key 434 | self.swim(self._reverse_index[i]) 435 | self.sink(self._reverse_index[i]) 436 | 437 | def delete_min(self): 438 | if self._keys_size == 0: 439 | return 440 | min_index = self._index[1] 441 | self.exchange(1, self._keys_size) 442 | self._keys_size -= 1 443 | self.sink(1) 444 | self._reverse_index[min_index] = -1 445 | self._keys[self._index[self._keys_size + 1]] = None 446 | self._index[self._keys_size + 1] = -1 447 | return min_index 448 | 449 | 450 | class Node(object): 451 | 452 | def __init__(self, i, j): 453 | self._sum = i ** 3 + j ** 3 454 | self.i = i 455 | self.j = j 456 | 457 | def __cmp__(self, other): 458 | if self._sum < other._sum: 459 | return -1 460 | elif self._sum > other._sum: 461 | return 1 462 | return 0 463 | 464 | def __str__(self): 465 | return '{} = {}^3 + {}^3'.format(self._sum, self.i, self.j) 466 | 467 | 468 | # 2.4.25 practice, cube sum implementation. 469 | def cubesum(): 470 | min_pq = MinPQDynamic() 471 | n = 10 ** 6 472 | for i in range(n): 473 | min_pq.insert(Node(i, i)) 474 | 475 | while not min_pq.is_empty(): 476 | node = min_pq.del_min() 477 | if node.j < n: 478 | min_pq.insert(Node(node.i, node.j + 1)) 479 | 480 | 481 | def heap_sort(lst): 482 | """ 483 | Heap-sort implementation, using priority queue sink() method as util function, 484 | first build the maximum priority queue, and exchange list[0] and lst[size], then size minus one, 485 | and sink the list[0] again, util size equals zero. 486 | 487 | >>> lst = [] 488 | >>> lst = [i for i in range(10)] 489 | >>> random.shuffle(lst) 490 | >>> heap_sort(lst) 491 | >>> lst 492 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 493 | """ 494 | def sink(lst, pos, size): 495 | while 2 * pos + 1 <= size: 496 | index = 2 * pos + 1 497 | if index < size and lst[index + 1] > lst[index]: 498 | index += 1 499 | if lst[pos] >= lst[index]: 500 | break 501 | lst[pos], lst[index] = lst[index], lst[pos] 502 | pos = index 503 | 504 | size = len(lst) - 1 505 | for i in range(size // 2, -1, -1): 506 | sink(lst, i, size) 507 | 508 | while size: 509 | lst[0], lst[size] = lst[size], lst[0] 510 | size -= 1 511 | sink(lst, 0, size) 512 | 513 | 514 | if __name__ == '__main__': 515 | doctest.testmod() 516 | # cubesum() 517 | -------------------------------------------------------------------------------- /chapter_4/module_4_2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding:UTF-8 -*- 3 | import copy 4 | import doctest 5 | from collections import defaultdict 6 | from basic_data_struct import Bag, Stack, Queue 7 | 8 | 9 | class Digragh(object): 10 | 11 | """ 12 | Directed graph implementation. Every edges is directed, so if v is 13 | reachable from w, w might not be reachable from v.There would ba an 14 | assist data structure to mark all available vertices, because 15 | self._adj.keys() is only for the vertices which outdegree is not 0. 16 | Directed graph is almost the same with Undirected graph,many codes 17 | from Gragh can be reusable. 18 | >>> # 4.2.6 practice 19 | >>> graph = Digragh() 20 | >>> test_data = [(4, 2), (2, 3), (3, 2), (6, 0), (0, 1), (2, 0), 21 | ... (11, 12), (12, 9), (9, 10), (9, 11), (8, 9), (10, 12), 22 | ... (11, 4), (4, 3), (3, 5), (7, 8), (8, 7), (5, 4), (0, 5), 23 | ... (6, 4), (6, 9), (7, 6)] 24 | >>> for a, b in test_data: 25 | ... graph.add_edge(a, b) 26 | ... 27 | >>> graph.vertices_size() 28 | 13 29 | >>> graph.edges_size() 30 | 22 31 | >>> [i for i in graph.get_adjacent_vertices(2)] 32 | [0, 3] 33 | >>> [j for j in graph.get_adjacent_vertices(6)] 34 | [9, 4, 0] 35 | >>> [v for v in graph.vertices()] 36 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] 37 | >>> graph 38 | 13 vertices, 22 edges 39 | 0: 5 1 40 | 2: 0 3 41 | 3: 5 2 42 | 4: 3 2 43 | 5: 4 44 | 6: 9 4 0 45 | 7: 6 8 46 | 8: 7 9 47 | 9: 11 10 48 | 10: 12 49 | 11: 4 12 50 | 12: 9 51 | 52 | >>> 53 | """ 54 | 55 | def __init__(self, graph=None): 56 | self._edges_size = 0 57 | self._adj = defaultdict(Bag) 58 | self._vertices = set() 59 | 60 | # 4.2.3 practice, generate graph from another graph. 61 | if graph: 62 | self._adj = copy.deepcopy(graph._adj) 63 | self._vertices_size = graph.vertices_size() 64 | self._edges_size = graph.edges_size() 65 | self._vertices = copy.copy(graph.vertices()) 66 | 67 | def vertices_size(self): 68 | return len(self._vertices) 69 | 70 | def edges_size(self): 71 | return self._edges_size 72 | 73 | def add_edge(self, start, end): 74 | # 4.2.5 practice, parallel edge and self cycle are not allowed 75 | if self.has_edge(start, end) or start == end: 76 | return 77 | self._vertices.add(start) 78 | self._vertices.add(end) 79 | self._adj[start].add(end) 80 | self._edges_size += 1 81 | 82 | def get_adjacent_vertices(self, vertex): 83 | return self._adj[vertex] 84 | 85 | def vertices(self): 86 | return self._vertices 87 | 88 | def reverse(self): 89 | reverse_graph = Digragh() 90 | for vertex in self.vertices(): 91 | for adjacent_vertex in self.get_adjacent_vertices(vertex): 92 | reverse_graph.add_edge(adjacent_vertex, vertex) 93 | return reverse_graph 94 | 95 | # 4.2.4 practice, add has_edge method for Digraph 96 | def has_edge(self, start, end): 97 | edge = next((i for i in self._adj[start] if i == end), None) 98 | return edge is not None 99 | 100 | def __repr__(self): 101 | s = str(len(self._vertices)) + ' vertices, ' + str(self._edges_size) + ' edges\n' 102 | for k in self._adj: 103 | try: 104 | lst = ' '.join([vertex for vertex in self._adj[k]]) 105 | except TypeError: 106 | lst = ' '.join([str(vertex) for vertex in self._adj[k]]) 107 | s += '{}: {}\n'.format(k, lst) 108 | return s 109 | 110 | 111 | class DirectedDFS(object): 112 | 113 | """ 114 | Depth-First-Search algorithm with directed graph, which can solve directed 115 | graph reachable problem. 116 | >>> graph = Digragh() 117 | >>> test_data = [(4, 2), (2, 3), (3, 2), (6, 0), (0, 1), (2, 0), 118 | ... (11, 12), (12, 9), (9, 10), (9, 11), (8, 9), (10, 12), 119 | ... (11, 4), (4, 3), (3, 5), (7, 8), (8, 7), (5, 4), (0, 5), 120 | ... (6, 4), (6, 9), (7, 6)] 121 | >>> for a, b in test_data: 122 | ... graph.add_edge(a, b) 123 | ... 124 | >>> dfs = DirectedDFS(graph, 1) 125 | >>> [i for i in graph.vertices() if dfs.marked(i)] 126 | [1] 127 | >>> dfs1 = DirectedDFS(graph, 2) 128 | >>> [i for i in graph.vertices() if dfs1.marked(i)] 129 | [0, 1, 2, 3, 4, 5] 130 | >>> dfs2 = DirectedDFS(graph, 1, 2, 6) 131 | >>> [i for i in graph.vertices() if dfs2.marked(i)] 132 | [0, 1, 2, 3, 4, 5, 6, 9, 10, 11, 12] 133 | """ 134 | 135 | def __init__(self, graph, *sources): 136 | self._marked = defaultdict(bool) 137 | for vertex in sources: 138 | if not self._marked[vertex]: 139 | self.dfs(graph, vertex) 140 | 141 | def dfs(self, graph, vertex): 142 | self._marked[vertex] = True 143 | for adjacent_vertex in graph.get_adjacent_vertices(vertex): 144 | if not self._marked[adjacent_vertex]: 145 | self.dfs(graph, adjacent_vertex) 146 | 147 | def marked(self, vertex): 148 | return self._marked[vertex] 149 | 150 | 151 | class DirectedCycle(object): 152 | 153 | """ 154 | Using Depth-First-Search algorithm to check 155 | whether a cycle exists in a directed graph. 156 | There is an assist attribute call _on_stack, 157 | if an adjacent vertex is in _on_stack(True), 158 | that means a cycle exists. 159 | >>> graph = Digragh() 160 | >>> test_data = [(4, 2), (2, 3), (3, 2), (6, 0), (0, 1), (2, 0), 161 | ... (11, 12), (12, 9), (9, 10), (9, 11), (8, 9), (10, 12), 162 | ... (11, 4), (4, 3), (3, 5), (7, 8), (8, 7), (5, 4), (0, 5), 163 | ... (6, 4), (6, 9), (7, 6)] 164 | >>> for a, b in test_data: 165 | ... graph.add_edge(a, b) 166 | ... 167 | >>> dc = DirectedCycle(graph) 168 | >>> dc.has_cycle() 169 | True 170 | >>> [i for i in dc.cycle()] 171 | [3, 5, 4, 3] 172 | """ 173 | 174 | def __init__(self, graph): 175 | self._marked = defaultdict(bool) 176 | self._edge_to = {} 177 | self._on_stack = defaultdict(bool) 178 | self._cycle = Stack() 179 | for v in graph.vertices(): 180 | if not self._marked[v]: 181 | self.dfs(graph, v) 182 | 183 | def dfs(self, graph, vertex): 184 | self._on_stack[vertex] = True 185 | self._marked[vertex] = True 186 | 187 | for v in graph.get_adjacent_vertices(vertex): 188 | if self.has_cycle(): 189 | return 190 | elif not self._marked[v]: 191 | self._edge_to[v] = vertex 192 | self.dfs(graph, v) 193 | elif self._on_stack[v]: 194 | tmp = vertex 195 | while tmp != v: 196 | self._cycle.push(tmp) 197 | tmp = self._edge_to[tmp] 198 | self._cycle.push(v) 199 | self._cycle.push(vertex) 200 | self._on_stack[vertex] = False 201 | 202 | def has_cycle(self): 203 | return not self._cycle.is_empty() 204 | 205 | def cycle(self): 206 | return self._cycle 207 | 208 | 209 | class DepthFirstOrder(object): 210 | 211 | def __init__(self, graph): 212 | self._pre = Queue() 213 | self._post = Queue() 214 | self._reverse_post = Stack() 215 | self._marked = defaultdict(bool) 216 | 217 | for v in graph.vertices(): 218 | if not self._marked[v]: 219 | self.dfs(graph, v) 220 | 221 | def dfs(self, graph, vertex): 222 | self._pre.enqueue(vertex) 223 | self._marked[vertex] = True 224 | for v in graph.get_adjacent_vertices(vertex): 225 | if not self._marked[v]: 226 | self.dfs(graph, v) 227 | 228 | self._post.enqueue(vertex) 229 | self._reverse_post.push(vertex) 230 | 231 | def prefix(self): 232 | return self._pre 233 | 234 | def postfix(self): 235 | return self._post 236 | 237 | def reverse_postfix(self): 238 | return self._reverse_post 239 | 240 | 241 | class Topological(object): 242 | 243 | """ 244 | Topological-Sorting implementation. Topological-Sorting 245 | has to be applied on a directed acyclic graph. If there is 246 | an edge u->w, then u is before w. This implementation is using 247 | Depth-First-Search algorithm, for any edge v->w, dfs(w) 248 | will return before dfs(v), because the input graph should 249 | not contain any cycle. 250 | Another Topological-Sorting implementation is using queue to 251 | enqueue a vertex which indegree is 0. Then dequeue and marked 252 | it, enqueue all its adjacent vertex util all the vertices in the 253 | graph is marked. This implementation is not given. 254 | >>> test_data = [(2, 3), (0, 6), (0, 1), (2, 0), (11, 12), 255 | ... (9, 12), (9, 10), (9, 11), (3, 5), (8, 7), 256 | ... (5, 4), (0, 5), (6, 4), (6, 9), (7, 6)] 257 | >>> graph = Digragh() 258 | >>> for a, b in test_data: 259 | ... graph.add_edge(a, b) 260 | ... 261 | >>> topo = Topological(graph) 262 | >>> topo.is_DAG() 263 | True 264 | >>> [i for i in topo.order()] 265 | [8, 7, 2, 3, 0, 6, 9, 10, 11, 12, 1, 5, 4] 266 | """ 267 | 268 | def __init__(self, graph): 269 | cycle_finder = DirectedCycle(graph) 270 | self._order = None 271 | if not cycle_finder.has_cycle(): 272 | df_order = DepthFirstOrder(graph) 273 | self._order = df_order.reverse_postfix() 274 | 275 | def order(self): 276 | return self._order 277 | 278 | def is_DAG(self): 279 | return self._order is not None 280 | 281 | 282 | class KosarajuSCC(object): 283 | 284 | """ 285 | >>> test_data = ((4, 2), (2, 3), (3, 2), (6, 0), (0, 1), (2, 0), 286 | ... (11, 12), (12, 9), (9, 10), (9, 11), (7, 9), (10, 12), 287 | ... (11, 4), (4, 3), (3, 5), (6, 8), (8, 6), (5, 4), (0, 5), 288 | ... (6, 4), (6, 9), (7, 6)) 289 | >>> graph = Digragh() 290 | >>> for a, b in test_data: 291 | ... graph.add_edge(a, b) 292 | ... 293 | >>> scc = KosarajuSCC(graph) 294 | >>> count = scc.count() 295 | >>> output = defaultdict(Queue) 296 | >>> for v in graph.vertices(): 297 | ... output[scc.vertex_id(v)].enqueue(v) 298 | ... 299 | >>> ['{}: {}'.format(k, ', '.join(map(str, v))) for k, v in output.items()] 300 | ['0: 1', '1: 0, 2, 3, 4, 5', '2: 9, 10, 11, 12', '3: 6, 8', '4: 7'] 301 | """ 302 | 303 | def __init__(self, graph): 304 | self._marked = defaultdict(bool) 305 | self._id = {} 306 | self._count = 0 307 | order = DepthFirstOrder(graph.reverse()) 308 | for v in order.reverse_postfix(): 309 | if not self._marked[v]: 310 | self.dfs(graph, v) 311 | self._count += 1 312 | 313 | def dfs(self, graph, vertex): 314 | self._marked[vertex] = True 315 | self._id[vertex] = self._count 316 | for v in graph.get_adjacent_vertices(vertex): 317 | if not self._marked[v]: 318 | self.dfs(graph, v) 319 | 320 | def strongly_connected(self, vertex_1, vertex_2): 321 | return self._id[vertex_1] == self._id[vertex_2] 322 | 323 | def vertex_id(self, vertex): 324 | return self._id[vertex] 325 | 326 | def count(self): 327 | return self._count 328 | 329 | 330 | class TransitiveClosure(object): 331 | 332 | """ 333 | This class can check if v is reachable 334 | from w in a directed graph using DirectedDFS. 335 | The cost of running time is proportional to 336 | O(V(V + E)), and the cost of space is proportional 337 | to O(V*V), so this is not a good solution for 338 | large scale graphs. 339 | >>> test_data = ((4, 2), (2, 3), (3, 2), (6, 0), (0, 1), (2, 0), 340 | ... (11, 12), (12, 9), (9, 10), (9, 11), (7, 9), (10, 12), 341 | ... (11, 4), (4, 3), (3, 5), (6, 8), (8, 6), (5, 4), (0, 5), 342 | ... (6, 4), (6, 9), (7, 6)) 343 | >>> graph = Digragh() 344 | >>> for a, b in test_data: 345 | ... graph.add_edge(a, b) 346 | ... 347 | >>> tc = TransitiveClosure(graph) 348 | >>> tc.reachable(1, 5) 349 | False 350 | >>> tc.reachable(1, 0) 351 | False 352 | >>> tc.reachable(0, 1) 353 | True 354 | >>> tc.reachable(0, 9) 355 | False 356 | >>> tc.reachable(8, 12) 357 | True 358 | """ 359 | 360 | def __init__(self, graph): 361 | self._all = {} 362 | for vertex in graph.vertices(): 363 | self._all[vertex] = DirectedDFS(graph, vertex) 364 | 365 | def reachable(self, start, end): 366 | return self._all[start].marked(end) 367 | 368 | 369 | # 4.2.7 practice, implement Degrees class 370 | # which compute degrees of vertices in a directed graph. 371 | class Degrees(object): 372 | 373 | """ 374 | >>> test_data = ((4, 2), (2, 3), (3, 2), (6, 0), (0, 1), (2, 0), 375 | ... (11, 12), (12, 9), (9, 10), (9, 11), (7, 9), (10, 12), 376 | ... (11, 4), (4, 3), (3, 5), (6, 8), (8, 6), (5, 4), (0, 5), 377 | ... (6, 4), (6, 9), (7, 6)) 378 | >>> graph = Digragh() 379 | >>> for a, b in test_data: 380 | ... graph.add_edge(a, b) 381 | ... 382 | >>> degree = Degrees(graph) 383 | >>> degree.indegree(0) 384 | 2 385 | >>> degree.outdegree(0) 386 | 2 387 | >>> degree.indegree(1) 388 | 1 389 | >>> degree.outdegree(1) 390 | 0 391 | >>> degree.indegree(9) 392 | 3 393 | >>> degree.outdegree(9) 394 | 2 395 | >>> degree.is_map() 396 | False 397 | >>> [i for i in degree.sources()] 398 | [] 399 | """ 400 | 401 | def __init__(self, graph): 402 | self._indegree = defaultdict(int) 403 | self._outdegree = defaultdict(int) 404 | length = 0 405 | for v in graph.vertices(): 406 | length += 1 407 | for adj in graph.get_adjacent_vertices(v): 408 | self._indegree[adj] += 1 409 | self._outdegree[v] += 1 410 | 411 | self._sources = (k for k, v in self._indegree.items() if v == 0) 412 | self._sinks = (k for k, v in self._outdegree.items() if v == 0) 413 | self._is_map = len([k for k, v in self._outdegree.items() if v == 1]) == length 414 | 415 | def indegree(self, vertex): 416 | return self._indegree[vertex] 417 | 418 | def outdegree(self, vertex): 419 | return self._outdegree[vertex] 420 | 421 | def sources(self): 422 | return self._sources 423 | 424 | def sinks(self): 425 | return self._sinks 426 | 427 | def is_map(self): 428 | return self._is_map 429 | 430 | 431 | # 4.2.20 practice, check if euler cycle exists. 432 | class Euler(object): 433 | 434 | """ 435 | >>> test_data = ((4, 2), (2, 3), (3, 2), (6, 0), (0, 1), (2, 0), 436 | ... (11, 12), (12, 9), (9, 10), (9, 11), (7, 9), (10, 12), 437 | ... (11, 4), (4, 3), (3, 5), (6, 8), (8, 6), (5, 4), (0, 5), 438 | ... (6, 4), (6, 9), (7, 6)) 439 | >>> graph = Digragh() 440 | >>> for a, b in test_data: 441 | ... graph.add_edge(a, b) 442 | ... 443 | >>> euler = Euler(graph) 444 | >>> euler.is_euler_cycle_exists() 445 | False 446 | """ 447 | 448 | def __init__(self, graph): 449 | self._indegree = defaultdict(int) 450 | self._outdegree = defaultdict(int) 451 | length = 0 452 | for v in graph.vertices(): 453 | length += 1 454 | for adj in graph.get_adjacent_vertices(v): 455 | self._indegree[adj] += 1 456 | self._outdegree[v] += 1 457 | 458 | self._euler_cycle_exists = len([k for k, v in self._indegree.items() 459 | if self._outdegree[k] == v]) == length 460 | 461 | def is_euler_cycle_exists(self): 462 | return self._euler_cycle_exists 463 | 464 | 465 | # 4.2.24 practice, check if a graph contains hamilton path, 466 | # the following step is very simple and is given in the book. 467 | def hamilton_path_exists(graph): 468 | """ 469 | >>> test_data = [(2, 3), (0, 6), (0, 1), (2, 0), (11, 12), 470 | ... (9, 12), (9, 10), (9, 11), (3, 5), (8, 7), 471 | ... (5, 4), (0, 5), (6, 4), (6, 9), (7, 6)] 472 | >>> graph = Digragh() 473 | >>> for a, b in test_data: 474 | ... graph.add_edge(a, b) 475 | ... 476 | >>> graph = Digragh() 477 | >>> for a, b in test_data: 478 | ... graph.add_edge(a, b) 479 | ... 480 | >>> hamilton_path_exists(graph) 481 | False 482 | >>> graph_2 = Digragh(graph) 483 | >>> graph_2.add_edge(7, 2) 484 | >>> graph_2.add_edge(3, 0) 485 | >>> graph_2.add_edge(12, 1) 486 | >>> graph_2.add_edge(1, 5) 487 | >>> graph_2.add_edge(10, 11) 488 | >>> hamilton_path_exists(graph_2) 489 | True 490 | """ 491 | 492 | ts = Topological(graph) 493 | vertices = [v for v in ts.order()] 494 | has_path = True 495 | for i in range(len(vertices) - 1): 496 | if not graph.has_edge(vertices[i], vertices[i+1]): 497 | has_path = False 498 | return has_path 499 | 500 | 501 | # 4.2.25 practice 502 | def unique_topologial_sort_order(graph): 503 | return hamilton_path_exists(graph) 504 | 505 | 506 | # 4.2.30 practice, see http://algs4.cs.princeton.edu/42digraph/TopologicalX.java.html. 507 | class TopologicalWithDegree(object): 508 | 509 | """ 510 | >>> test_data = [(2, 3), (0, 6), (0, 1), (2, 0), (11, 12), 511 | ... (9, 12), (9, 10), (9, 11), (3, 5), (8, 7), 512 | ... (5, 4), (0, 5), (6, 4), (6, 9), (7, 6)] 513 | >>> graph = Digragh() 514 | >>> for a, b in test_data: 515 | ... graph.add_edge(a, b) 516 | ... 517 | >>> twd = TopologicalWithDegree(graph) 518 | >>> twd.has_order() 519 | True 520 | >>> [v for v in twd.order()] 521 | [2, 8, 0, 3, 7, 1, 5, 6, 9, 4, 11, 10, 12] 522 | >>> twd.rank(8) 523 | 1 524 | >>> twd.rank(10) 525 | 11 526 | """ 527 | 528 | def __init__(self, graph): 529 | indegree = defaultdict(int) 530 | self._order = Queue() 531 | self._rank = defaultdict(int) 532 | count = 0 533 | for v in graph.vertices(): 534 | for adj in graph.get_adjacent_vertices(v): 535 | indegree[adj] += 1 536 | queue = Queue() 537 | for v in graph.vertices(): 538 | if indegree[v] == 0: 539 | queue.enqueue(v) 540 | 541 | while not queue.is_empty(): 542 | vertex = queue.dequeue() 543 | self._order.enqueue(vertex) 544 | self._rank[vertex] = count 545 | count += 1 546 | for v in graph.get_adjacent_vertices(vertex): 547 | indegree[v] -= 1 548 | if indegree[v] == 0: 549 | queue.enqueue(v) 550 | 551 | if count != graph.vertices_size(): 552 | self._order = None 553 | 554 | assert self.check(graph) 555 | 556 | def has_order(self): 557 | return self._order is not None 558 | 559 | def order(self): 560 | return self._order 561 | 562 | def rank(self, vertex): 563 | if vertex not in self._rank: 564 | return -1 565 | return self._rank[vertex] 566 | 567 | def check(self, graph): 568 | # digraph is acyclic 569 | if self.has_order(): 570 | # check that ranks provide a valid topological order 571 | for vertex in graph.vertices(): 572 | # check that vertex has a rank number 573 | if vertex not in self._rank: 574 | return 1 575 | for adj in graph.get_adjacent_vertices(vertex): 576 | if self._rank[vertex] > self._rank[adj]: 577 | return 2 578 | # check that ranks provide a valid topological order 579 | for index, v in enumerate(self._order): 580 | if index != self._rank[v]: 581 | return 3 582 | return True 583 | return False 584 | 585 | if __name__ == '__main__': 586 | doctest.testmod() 587 | --------------------------------------------------------------------------------