├── __init__.py
├── chapter_1
├── __init__.py
├── common.py
├── module_1_1.py
├── module_1_4.py
├── module_1_5.py
├── module_1_3_double_node_linked_list.py
└── module_1_3_linked_list.py
├── chapter_2
├── __init__.py
├── module_2_1.py
├── module_2_3.py
├── module_2_5.py
├── module_2_2.py
└── module_2_4.py
├── chapter_3
├── __init__.py
├── module_3_5.py
├── module_3_4.py
├── module_3_2.py
└── module_3_3.py
├── chapter_4
├── __init__.py
├── basic_data_struct.py
├── module_4_1.py
└── module_4_2.py
├── chapter_5
├── __init__.py
├── module_5_5.py
├── module_5_4.py
├── basic_data_struct.py
├── module_5_1.py
├── module_5_2.py
└── module_5_3.py
├── chapter_6
├── __init__.py
├── basic_data_struct.py
└── module_6_1.py
└── README.md
/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 |
--------------------------------------------------------------------------------
/chapter_1/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 |
--------------------------------------------------------------------------------
/chapter_2/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 |
--------------------------------------------------------------------------------
/chapter_3/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 |
--------------------------------------------------------------------------------
/chapter_4/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 |
--------------------------------------------------------------------------------
/chapter_5/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 |
--------------------------------------------------------------------------------
/chapter_6/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # algorithms-sedgewick-python #
2 |
3 | Algorthms(4th edition) by Robert Sedgewick and Kevin Wayne exercises in python, all the codes can be run with doctest, and the Python Version is 3.4, Python 2.7 might be compatible. Those non-programming exercises will be excluded, and some exercises need to take more time to figure it out.
4 |
5 | More python algorithms can be found in here
6 |
--------------------------------------------------------------------------------
/chapter_6/basic_data_struct.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 |
4 |
5 | class Node(object):
6 |
7 | def __init__(self, val):
8 | self._val = val
9 | self.next_node = None
10 |
11 | @property
12 | def val(self):
13 | return self._val
14 |
15 | @val.setter
16 | def val(self, value):
17 | self._val = value
18 |
19 | @property
20 | def next_node(self):
21 | return self._next_node
22 |
23 | @next_node.setter
24 | def next_node(self, node):
25 | self._next_node = node
26 |
27 |
28 | class Bag(object):
29 |
30 | def __init__(self):
31 | self._first = None
32 | self._size = 0
33 |
34 | def __iter__(self):
35 | node = self._first
36 | while node is not None:
37 | yield node.val
38 | node = node.next_node
39 |
40 | def add(self, val):
41 | node = Node(val)
42 | old = self._first
43 | self._first = node
44 | self._first.next_node = old
45 | self._size += 1
46 |
47 | def is_empty(self):
48 | return self._first is None
49 |
50 | def size(self):
51 | return self._size
52 |
--------------------------------------------------------------------------------
/chapter_1/common.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 |
4 |
5 | class Node(object):
6 |
7 | def __init__(self, val):
8 | self._val = val
9 | self.next_node = None
10 |
11 | @property
12 | def val(self):
13 | return self._val
14 |
15 | @val.setter
16 | def val(self, value):
17 | self._val = value
18 |
19 | @property
20 | def next_node(self):
21 | return self._next_node
22 |
23 | @next_node.setter
24 | def next_node(self, node):
25 | self._next_node = node
26 |
27 |
28 | class DoubleNode(object):
29 |
30 | def __init__(self, val):
31 | self._val = val
32 | self._prev = self._next = None
33 |
34 | @property
35 | def prev(self):
36 | return self._prev
37 |
38 | @prev.setter
39 | def prev(self, node):
40 | self._prev = node
41 |
42 | @property
43 | def next(self):
44 | return self._next
45 |
46 | @next.setter
47 | def next(self, node):
48 | self._next = node
49 |
50 | @property
51 | def val(self):
52 | return self._val
53 |
54 | @val.setter
55 | def val(self, value):
56 | self._val = value
57 |
--------------------------------------------------------------------------------
/chapter_2/module_2_1.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | import doctest
4 |
5 |
6 | def selection_sort(lst):
7 | """
8 | Selection sort implemention, select the minimum value in the list and put it in first place,
9 | then scan the whole list but exclude the first one element,
10 | pick the second minimum value in the list and so on util the list is sorted.
11 | every selection sort need N TIMES EXCHANGES,
12 | and the running time is NOTHING TO DO WITH the size of the input array.
13 | >>> lst = [9, 4, 5, 1, 0, 3, 6]
14 | >>> selection_sort(lst)
15 | >>> lst
16 | [0, 1, 3, 4, 5, 6, 9]
17 | """
18 | length = len(lst)
19 | for i in range(length):
20 | min_index = i
21 | for j in range(i + 1, length):
22 | if lst[j] < lst[min_index]:
23 | min_index = j
24 | lst[min_index], lst[i] = lst[i], lst[min_index]
25 |
26 |
27 | def insertion_sort(lst):
28 | """
29 | Insertion sort implementation, exchange the current element
30 | and the previous element util current element is larger than the previous element.
31 | for a random list of N size, insertion sort need ~ N**2/4 comparisons
32 | and ~N**2/4 exchanges on average condition,
33 | the worst-case scenario would be ~ N**2/2 comparisons and ~N**2/2 exchanges,
34 | the best-case scenario would be N-1
35 | comparisons and no exchange.
36 | >>> lst = [9, 4, 5, 1, 0, 3, 6]
37 | >>> insertion_sort(lst)
38 | >>> lst
39 | [0, 1, 3, 4, 5, 6, 9]
40 | """
41 | length = len(lst)
42 | for i in range(1, length):
43 | j = i
44 | while j and lst[j] < lst[j - 1]:
45 | lst[j], lst[j - 1] = lst[j - 1], lst[j]
46 | j -= 1
47 |
48 |
49 | def shell_sort(lst):
50 | """
51 | Shell sort implementation, exchange the j element
52 | and j-h element util i element is larger than i-1 element.
53 | the algorithms performance is depend on h
54 | >>> lst = [9, 4, 5, 1, 0, 3, 6]
55 | >>> shell_sort(lst)
56 | >>> lst
57 | [0, 1, 3, 4, 5, 6, 9]
58 | """
59 | length = len(lst)
60 | h = 1
61 |
62 | while h < length / 3:
63 | h = 3 * h + 1
64 |
65 | while h >= 1:
66 | for i in range(h, length):
67 | j = i
68 | while j >= h and lst[j] < lst[j - h]:
69 | lst[j], lst[j - h] = lst[j - h], lst[j]
70 | j -= h
71 | h //= 3
72 |
73 | if __name__ == '__main__':
74 | doctest.testmod()
75 |
--------------------------------------------------------------------------------
/chapter_3/module_3_5.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | import doctest
4 | import collections.abc
5 | import random
6 |
7 |
8 | # 3.5.8 practice, implement a LinearProbingHashTable which supports multiple values.
9 | class LinearProbingHT(object):
10 |
11 | """
12 | >>> ht = LinearProbingHT()
13 | >>> for index, s in enumerate('SEARCHEXAMPLE'):
14 | ... ht.put(s, index)
15 | ...
16 | >>> val = ht.get('E')
17 | >>> val in [1, 6, 12]
18 | True
19 | >>> val2 = ht.get('A')
20 | >>> val2 in [2, 8]
21 | True
22 | >>> ht.delete('E')
23 | >>> ht.get('E')
24 | >>>
25 | """
26 |
27 | def __init__(self):
28 | self._len = 16
29 | self._size = 0
30 | self._keys = [None] * self._len
31 | self._vals = [None] * self._len
32 |
33 | def __hash(self, key):
34 | return hash(key) & 0x7fffffff % self._len
35 |
36 | def __resize(self, size):
37 | tmp = LinearProbingHT()
38 | for i in range(self._len):
39 | if self._keys[i] is not None:
40 | for item in self._vals[i]:
41 | tmp.put(self._keys[i], item)
42 | self._keys = tmp._keys
43 | self._vals = tmp._vals
44 | self._size = tmp._size
45 |
46 | def __contains(self, key):
47 | return self._keys[self.__hash(key)] is not None
48 |
49 | def put(self, key, value):
50 | assert isinstance(key, collections.abc.Hashable)
51 |
52 | if self._size >= self._len / 2:
53 | self.__resize(self._len * 2)
54 |
55 | index = self.__hash(key)
56 | while self._keys[index]:
57 | if self._keys[index] == key:
58 | self._vals[index].append(value)
59 | return
60 | index = (index + 1) % self._len
61 |
62 | self._keys[index], self._vals[index] = key, [value]
63 | self._size += 1
64 |
65 | def get(self, key):
66 | index = self.__hash(key)
67 | while self._keys[index]:
68 | if self._keys[index] == key:
69 | return random.choice(self._vals[index])
70 | index = (index + 1) % self._len
71 | return None
72 |
73 | def delete(self, key):
74 | if not self.__contains(key):
75 | return
76 |
77 | index = self.__hash(key)
78 | while self._keys[index] != key:
79 | index = (index + 1) % self._len
80 |
81 | self._keys[index] = self._vals[index] = None
82 |
83 | index = (index + 1) % self._len
84 |
85 | while self._keys[index]:
86 | k, v = self._keys[index], self._vals[index]
87 | self._keys[index] = self._vals[index] = None
88 | self._size -= 1
89 | self.put(k, v)
90 | index = (index + 1) % self._len
91 |
92 | self._size -= 1
93 |
94 | if self._size and self._size == self._len / 8:
95 | self.__resize(self._len / 2)
96 |
97 | def keys(self):
98 | for index, k in enumerate(self._keys):
99 | if k:
100 | yield k
101 |
102 | if __name__ == '__main__':
103 | doctest.testmod()
104 |
--------------------------------------------------------------------------------
/chapter_5/module_5_5.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | import doctest
4 | import sys
5 | from basic_data_struct import MinPQ
6 |
7 |
8 | class Node(object):
9 |
10 | def __init__(self, char, freq, left, right):
11 | self._char = char
12 | self._freq = freq
13 | self._left = left
14 | self._right = right
15 |
16 | def is_leaf(self):
17 | return self._left is None and self._right is None
18 |
19 | def __cmp__(self, other):
20 | return self._freq - other._freq
21 |
22 | @property
23 | def char(self):
24 | return self._char
25 |
26 | @property
27 | def freq(self):
28 | return self._freq
29 |
30 | @property
31 | def left(self):
32 | return self._left
33 |
34 | @property
35 | def right(self):
36 | return self._right
37 |
38 |
39 | class Huffman(object):
40 |
41 | @staticmethod
42 | def compress():
43 | input_string = ''.join(sys.stdin.readlines())
44 | frequency = [0] * 256
45 | for i in input_string:
46 | frequency[ord(i)] += 1
47 |
48 | root = Huffman.build_trie(frequency)
49 |
50 | Huffman.write_trie(root)
51 |
52 | st = [None] * 256
53 | Huffman.build_code(st, root, '')
54 | sys.stdout.buffer.write(bytes(len(input_string)))
55 |
56 | for i in input_string:
57 | code = st[i]
58 | for c in code:
59 | if c == '0':
60 | sys.stdout.buffer.write(b'0')
61 | elif c == '1':
62 | sys.stdout.buffer.write(b'1')
63 | else:
64 | raise Exception('Illegal state.')
65 |
66 | sys.stdout.close()
67 |
68 | @staticmethod
69 | def build_trie(freq):
70 | min_pq = MinPQ()
71 | for i in range(256):
72 | if freq[i]:
73 | min_pq.insert(chr(i), freq, None, None)
74 |
75 | while min_pq.size() > 1:
76 | left = min_pq.del_min()
77 | right = min_pq.del_min()
78 | parent = Node('\0', left.freq + right.freq, left, right)
79 | min_pq.insert(parent)
80 | return min_pq.del_min()
81 |
82 | @staticmethod
83 | def write_trie(node):
84 | if node.is_leaf():
85 | sys.stdout.buffer.write(b'1')
86 | sys.stdout.buffer.write(bytes(node.char.encode('ascii')))
87 | return
88 | sys.stdout.write_bit(b'0')
89 | Huffman.write_trie(node.left)
90 | Huffman.write_trie(node.right)
91 |
92 | @staticmethod
93 | def build_code(st, node, code):
94 | if not node.is_leaf():
95 | Huffman.build_code(st, node.left, code + '0')
96 | Huffman.build_code(st, node.right, code + '1')
97 | else:
98 | st[ord(node.char)] = code
99 |
100 | @staticmethod
101 | def expand():
102 | root = Huffman.read_trie()
103 | length = sys.stdin.read(4)
104 | for i in range(length):
105 | node = root
106 | while not node.is_leaf():
107 | bit = sys.stdin.read(1)
108 | node = node.right if bit else node.left
109 | sys.stdout.write(node.char)
110 |
111 | @staticmethod
112 | def read_trie():
113 | is_leaf = sys.stdin.read(1)
114 | if(is_leaf):
115 | return Node(sys.stdin.read(1).decode('ascii'), -1, None, None)
116 | return Node('\0', -1, Huffman.read_trie(), Huffman.read_trie())
117 |
118 | if __name__ == '__main__':
119 | doctest.testmod()
120 |
--------------------------------------------------------------------------------
/chapter_2/module_2_3.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | import doctest
4 | import random
5 |
6 |
7 | INSERTION_SORT_LENGTH = 8
8 |
9 |
10 | class QuickSort(object):
11 |
12 | """
13 | >>> qs = QuickSort()
14 | >>> lst = [3, 2, 4, 7, 8, 9, 1, 0, 14, 11, 23, 50, 26]
15 | >>> qs.sort(lst)
16 | >>> lst
17 | [0, 1, 2, 3, 4, 7, 8, 9, 11, 14, 23, 26, 50]
18 | >>> lst2 = ['E', 'A', 'S', 'Y', 'Q', 'U', 'E', 'S', 'T', 'I', 'O', 'N']
19 | >>> qs.sort(lst2)
20 | >>> lst2
21 | ['A', 'E', 'E', 'I', 'N', 'O', 'Q', 'S', 'S', 'T', 'U', 'Y']
22 | """
23 |
24 | def sort(self, lst):
25 | random.shuffle(lst)
26 | self.__sort(lst, 0, len(lst) - 1)
27 |
28 | def __sort(self, lst, low, high):
29 | length = high - low + 1
30 | if length <= INSERTION_SORT_LENGTH:
31 | self.insertion_sort(lst, low, high)
32 | return
33 | index = self.partition(lst, low, high)
34 | self.__sort(lst, low, index)
35 | self.__sort(lst, index + 1, high)
36 |
37 | def insertion_sort(self, lst, low, high):
38 | for i in range(low + 1, high + 1):
39 | j = i
40 | while j > low and lst[j] < lst[j - 1]:
41 | lst[j], lst[j - 1] = lst[j - 1], lst[j]
42 | j -= 1
43 |
44 | # 2.3.18 practice
45 | def three_sample(self, lst, low, mid, high):
46 | if lst[low] <= lst[mid] <= lst[high] or lst[high] <= lst[mid] <= lst[low]:
47 | return mid
48 | elif lst[mid] <= lst[low] <= lst[high] or lst[high] <= lst[low] <= lst[mid]:
49 | return low
50 | else:
51 | return high
52 |
53 | # 2.3.19 practice
54 | def five_sample(self, lst, low, high):
55 | values = []
56 | for _ in range(5):
57 | index = random.randint(low, high)
58 | values.append((index, lst[index]))
59 | values.sort(key=lambda item: item[1])
60 | return values[2][0]
61 |
62 | def partition(self, lst, low, high):
63 | # length = high - low + 1
64 | # index = self.three_sample(lst, low, low + length / 2, high)
65 | index = self.five_sample(lst, low, high)
66 | lst[low], lst[index] = lst[index], lst[low]
67 | i, j = low + 1, high
68 | val = lst[low]
69 | while 1:
70 | while i < high and lst[i] <= val:
71 | i += 1
72 | while j > low and lst[j] >= val:
73 | j -= 1
74 | if i >= j:
75 | break
76 | lst[i], lst[j] = lst[j], lst[i]
77 |
78 | lst[low], lst[j] = lst[j], lst[low]
79 | return j
80 |
81 |
82 | class QuickThreeWay(object):
83 |
84 | """
85 | >>> qtw = QuickThreeWay()
86 | >>> lst = [3, 2, 4, 7, 8, 9, 1, 0]
87 | >>> qtw.sort(lst)
88 | >>> lst
89 | [0, 1, 2, 3, 4, 7, 8, 9]
90 | """
91 |
92 | def sort(self, lst):
93 | random.shuffle(lst)
94 | self.__sort(lst, 0, len(lst) - 1)
95 |
96 | def __sort(self, lst, low, high):
97 | if high <= low:
98 | return
99 |
100 | lt, i, gt, val = low, low + 1, high, lst[low]
101 | while i <= gt:
102 | if lst[i] < val:
103 | lst[lt], lst[i] = lst[i], lst[lt]
104 | lt += 1
105 | i += 1
106 | elif lst[i] > val:
107 | lst[gt], lst[i] = lst[i], lst[gt]
108 | gt -= 1
109 | else:
110 | i += 1
111 | self.__sort(lst, low, lt - 1)
112 | self.__sort(lst, gt + 1, high)
113 |
114 |
115 | if __name__ == '__main__':
116 | doctest.testmod()
117 |
--------------------------------------------------------------------------------
/chapter_1/module_1_1.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | import doctest
4 |
5 |
6 | def gcd(p, q):
7 | '''
8 | return greatest common divisor of two numbers.
9 | >>> gcd(6, 4)
10 | 2
11 | >>> gcd(7, 5)
12 | 1
13 | >>> gcd(10, 5)
14 | 5
15 | '''
16 | return p if q == 0 else gcd(q, p % q)
17 |
18 |
19 | def is_prime(number):
20 | '''
21 | determine whether a number is a prime number.
22 | >>> is_prime(1)
23 | False
24 | >>> is_prime(2)
25 | True
26 | >>> is_prime(3)
27 | True
28 | >>> is_prime(4)
29 | False
30 | >>> is_prime(101)
31 | True
32 | >>> is_prime(65535)
33 | False
34 | '''
35 | if number < 2:
36 | return False
37 | i = 2
38 | while i * i <= number:
39 | if number % i == 0:
40 | return False
41 | i += 1
42 | return True
43 |
44 |
45 | def sqrt(number):
46 | '''
47 | return the square of the number(Newton's method).
48 | >>> sqrt(4)
49 | 2.0
50 | >>> sqrt(9)
51 | 3.0
52 | >>> sqrt(1)
53 | 1
54 | >>> sqrt(256)
55 | 16.0
56 | '''
57 | if number < 0:
58 | raise ValueError('input number must be positive.')
59 | err = 1e-15
60 | t = number
61 | while abs(t - number / t) > err * t:
62 | t = float(number / t + t) / 2
63 | return t
64 |
65 |
66 | def harmonic(number):
67 | '''
68 | return the harmonic number of the given number.
69 | >>> harmonic(2)
70 | 1.5
71 | >>> harmonic(3)
72 | 1.8333333333333333
73 | '''
74 | return sum([1 / i for i in range(1, number + 1)])
75 |
76 |
77 | def binary_search(key, lst):
78 | '''
79 | return the index of the key in the given ascending list(i - 1), if the key not in the list,
80 | return -1.
81 | >>> binary_search(3, [1, 2, 3, 4, 5])
82 | 2
83 | >>> binary_search(1, [1, 2, 3, 4, 5, 6, 7, 9])
84 | 0
85 | >>> binary_search(9, [1, 2, 3, 4, 5, 6, 7, 9])
86 | 7
87 | >>> binary_search(999, [1, 2, 3, 4, 5, 6, 7, 9])
88 | -1
89 | '''
90 |
91 | assert isinstance(key, int)
92 | assert isinstance(lst, (list, tuple))
93 |
94 | low, high = 0, len(lst) - 1
95 | while low <= high:
96 | mid = int((high + low) / 2)
97 | if lst[mid] == key:
98 | return mid
99 | elif lst[mid] > key:
100 | high = mid - 1
101 | else:
102 | low = mid + 1
103 | return -1
104 |
105 |
106 | def sort3num(a, b, c):
107 | '''
108 | return ascending three numbers.
109 | >>> sort3num(3, 2, 1)
110 | (1, 2, 3)
111 | '''
112 | if a > b:
113 | a, b = b, a
114 | if a > c:
115 | a, c = c, a
116 | if b > c:
117 | b, c = c, b
118 | return a, b, c
119 |
120 |
121 | # 1.1.16 practice
122 | def exR1(number):
123 | if number <= 0:
124 | return ''
125 | return exR1(number - 3) + str(number) + exR1(number - 2) + str(number)
126 |
127 |
128 | # 1.1.29 practice
129 | def rank(key, lst):
130 | '''
131 | return the rank of the key in the given list, there may be duplicate keys.
132 | >>> rank(3, [1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 6, 7, 8, 9, 10])
133 | 2
134 | >>> rank(4, [1, 2, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5])
135 | 4
136 | '''
137 |
138 | assert isinstance(key, int)
139 | assert isinstance(lst, (list, tuple))
140 |
141 | low, high = 0, len(lst) - 1
142 | while low <= high:
143 | mid = int((high + low) / 2)
144 | if lst[mid] == key:
145 | index = mid
146 | while lst[index] == key:
147 | index -= 1
148 | return index + 1
149 | elif lst[mid] > key:
150 | high = mid - 1
151 | else:
152 | low = mid + 1
153 | return -1
154 |
155 |
156 | if __name__ == '__main__':
157 | doctest.testmod()
158 |
--------------------------------------------------------------------------------
/chapter_5/module_5_4.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | import doctest
4 | from basic_data_struct import Digragh, Stack, Bag
5 | from collections import defaultdict
6 |
7 |
8 | class DirectedDFS(object):
9 |
10 | def __init__(self, graph, sources):
11 | self._marked = defaultdict(bool)
12 | for vertex in sources:
13 | if not self._marked[vertex]:
14 | self.dfs(graph, vertex)
15 |
16 | def dfs(self, graph, vertex):
17 | self._marked[vertex] = True
18 | for adjacent_vertex in graph.get_adjacent_vertices(vertex):
19 | if not self._marked[adjacent_vertex]:
20 | self.dfs(graph, adjacent_vertex)
21 |
22 | def marked(self, vertex):
23 | return self._marked[vertex]
24 |
25 |
26 | class NFA(object):
27 |
28 | '''
29 | NFA(nondeterministic finite state automaton) algorithm for regular expression.
30 | Regular expression is a effective string searching method, it will identify specific
31 | string with a given regular expression. First this algorithm construct a NFA with a
32 | given regular expression, that will be a directed graph of epsilon transitions. Then
33 | input a text and go through every character. For each character, first reach all the
34 | possible states and then execute a epsilon transition which output a set with all possible
35 | states. When all character is checked, if we reach the end state, that means the input
36 | text match the regular expression. The worst case of running time is proportional to
37 | O(MN), M is the length of regular expression, N is the length of the input text.
38 | >>> nfa = NFA('(A*B|AC)D')
39 | >>> nfa.recognizes('AAAABD')
40 | True
41 | >>> nfa2 = NFA('(A*B|AC)D')
42 | >>> nfa2.recognizes('AAAAC')
43 | False
44 | >>> nfa3 = NFA('(a|(bc)*d)*')
45 | >>> nfa3.recognizes('abcbcd')
46 | True
47 | >>> nfa4 = NFA('(a|(bc)*d)*')
48 | >>> nfa4.recognizes('abcbcbcdaaaabcbcdaaaddd')
49 | True
50 | >>> nfa5 = NFA('(.*AB((C|D|E)F)*G)')
51 | >>> nfa5.recognizes('dfawefdABCQQQG')
52 | True
53 | '''
54 |
55 | def __init__(self, regexp):
56 | self._regexp = regexp
57 | self._ops = Stack()
58 | self._reg_len = len(self._regexp)
59 | self._graph = Digragh(self._reg_len + 1)
60 |
61 | for i in range(self._reg_len):
62 | lp = i
63 | if self._regexp[i] == '(' or self._regexp[i] == '|':
64 | self._ops.push(i)
65 | elif self._regexp[i] == ')':
66 | or_op = self._ops.pop()
67 | if self._regexp[or_op] == '|':
68 | lp = self._ops.pop()
69 | self._graph.add_edge(lp, or_op + 1)
70 | self._graph.add_edge(or_op, i)
71 | else:
72 | lp = or_op
73 | if i < self._reg_len - 1 and self._regexp[i + 1] == '*':
74 | self._graph.add_edge(lp, i + 1)
75 | self._graph.add_edge(i + 1, lp)
76 | if self._regexp[i] in ('(', '*', ')') or self._regexp[i].isalpha():
77 | self._graph.add_edge(i, i + 1)
78 |
79 | def recognizes(self, txt):
80 | pc = Bag()
81 | dfs = DirectedDFS(self._graph, (0,))
82 | for v in self._graph.vertices():
83 | if dfs.marked(v):
84 | pc.add(v)
85 |
86 | length = len(txt)
87 | for i in range(length):
88 | match = Bag()
89 | for v in pc:
90 | if v < self._reg_len:
91 | if self._regexp[v] == txt[i] or self._regexp[v] == '.':
92 | match.add(v + 1)
93 |
94 | pc = Bag()
95 | dfs = DirectedDFS(self._graph, match)
96 | for v in self._graph.vertices():
97 | if dfs.marked(v):
98 | pc.add(v)
99 |
100 | for v in pc:
101 | if v == self._reg_len:
102 | return True
103 | return False
104 |
105 |
106 | if __name__ == '__main__':
107 | doctest.testmod()
108 |
--------------------------------------------------------------------------------
/chapter_1/module_1_4.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | from module_1_1 import binary_search
4 | import doctest
5 |
6 |
7 | def two_sum_fast(lst):
8 | """
9 | Count the number of pair of numbers add up to zero. first sort the list,
10 | then use binary_search the get the other number which could add up to zero,
11 | if in the list, then increase the counter.
12 | >>> lst = [-1, 1, -2, 3, 5, -5, 0, 4]
13 | >>> two_sum_fast(lst)
14 | 2
15 | """
16 | lst.sort()
17 | cnt = 0
18 | for i in range(len(lst)):
19 | if binary_search(-lst[i], lst) > i:
20 | cnt += 1
21 | return cnt
22 |
23 |
24 | def three_sum_fast(lst):
25 | """
26 | Count how many three numbers add up to zero. first sort the list,
27 | then using two for-loop and binary search algorithm get the opposite number.
28 | >>> lst = [-1, 2, 1, 3, 0, 4, -4, 5, 9, -5]
29 | >>> three_sum_fast(lst)
30 | 8
31 | """
32 | lst.sort()
33 | cnt = 0
34 | for i in range(len(lst)):
35 | for j in range(i + 1, len(lst)):
36 | if binary_search(-lst[i] - lst[j], lst) > j:
37 | cnt += 1
38 | return cnt
39 |
40 |
41 | # 1.4.14 practice
42 | def four_sum_fast(lst):
43 | lst.sort()
44 | index = set()
45 | for i in range(len(lst)):
46 | for j in range(i + 1, len(lst)):
47 | index.add((i, j, lst[i] + lst[j]))
48 |
49 |
50 | # 1.4.16 practice
51 | def closest_pair(lst):
52 | """
53 | Get two closest number in a list, first sort the list,
54 | then iterate through the list compare each summation of two adjacent numbers in the list,
55 | then get the result.
56 | >>> lst = [1, 0, 3, 4, 5, 9, 1]
57 | >>> closest_pair(lst)
58 | (1, 1)
59 | >>> lst
60 | [0, 1, 1, 3, 4, 5, 9]
61 | """
62 | lst.sort()
63 | max_val = 9999999999
64 | a, b = None, None
65 | for i in range(len(lst) - 1):
66 | res = abs(lst[i] - lst[i + 1])
67 | if res < max_val:
68 | max_val = res
69 | a, b = lst[i], lst[i + 1]
70 | return a, b
71 |
72 |
73 | # 1.4.17 practice
74 | def farthest_pair(lst):
75 | return min(lst), max(lst)
76 |
77 |
78 | # 1.4.18 practice
79 | def partial_minimum(lst):
80 | """
81 | Find the partial minimum number in the list,
82 | the whole process is similar to binary search algorithm.
83 | >>> lst = [5, 2, 3, 4, 3, 5, 6, 8, 7, 1, 9]
84 | >>> partial_minimum(lst)
85 | 2
86 | """
87 | start, end = 0, len(lst) - 1
88 | while start <= end:
89 | mid = int((end + start) / 2)
90 | left = lst[mid - 1]
91 | right = lst[mid + 1]
92 | if lst[mid] <= left and lst[mid] <= right:
93 | return lst[mid]
94 | if lst[mid] > right and mid + 1 <= end:
95 | start = mid + 1
96 | elif lst[mid] > left and mid - 1 >= start:
97 | end = mid - 1
98 | return lst[start] if lst[start] < lst[end] else lst[end]
99 |
100 |
101 | # 1.4.20 practice
102 | def bitonic_list_search(key, lst):
103 | """
104 | >>> lst = [1, 2, 3, 9, 8, 7, 6, 5, 4, -1]
105 | >>> bitonic_list_search(2, lst)
106 | 1
107 | >>> bitonic_list_search(9, lst)
108 | 3
109 | >>> bitonic_list_search(7, lst)
110 | 5
111 | """
112 | def find_the_point(lst):
113 | low, high = 0, len(lst) - 1
114 | while low < high:
115 | mid = int((low + high) / 2)
116 | if lst[mid] < lst[mid + 1]:
117 | low = mid + 1
118 | elif lst[mid] > lst[mid + 1]:
119 | high = mid
120 | return high
121 |
122 | def find_left(key, start, end, lst):
123 | while start <= end:
124 | mid = int((start + end) / 2)
125 | if lst[mid] < key:
126 | start = mid + 1
127 | elif lst[mid] > key:
128 | end = mid - 1
129 | else:
130 | return mid
131 | return -1
132 |
133 | def find_right(key, start, end, lst):
134 | while start <= end:
135 | mid = int((start + end) / 2)
136 | if lst[mid] < key:
137 | end = mid - 1
138 | elif lst[mid] > key:
139 | start = mid + 1
140 | else:
141 | return mid
142 | return -1
143 |
144 | index = find_the_point(lst)
145 | if key == lst[index]:
146 | return index
147 | right = find_right(key, index, len(lst) - 1, lst)
148 | left = find_left(key, 0, index, lst)
149 | return left if left > -1 else right
150 |
151 |
152 | if __name__ == '__main__':
153 | doctest.testmod()
154 |
--------------------------------------------------------------------------------
/chapter_1/module_1_5.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | import doctest
4 | import random
5 |
6 |
7 | class UnionFind(object):
8 |
9 | """
10 | Union find implementation, the algorithm is a little bit like tree algorithm but not the same.
11 | >>> uf = UnionFind(10)
12 | >>> connections = [(4, 3), (3, 8), (6, 5), (9, 4), (2, 1),
13 | ... (8, 9), (5, 0), (7, 2), (6, 1), (1, 0), (6, 7)]
14 | >>> for i, j in connections:
15 | ... uf.union(i, j)
16 | ...
17 | >>> uf.connected(1, 4)
18 | False
19 | >>> uf.connected(8, 4)
20 | True
21 | >>> uf.connected(1, 5)
22 | True
23 | >>> uf.connected(1, 7)
24 | True
25 | >>> uf.find(4)
26 | 8
27 | >>> uf.find(8)
28 | 8
29 | """
30 |
31 | def __init__(self, size):
32 | self._id = [i for i in range(size)]
33 | self._count = size
34 |
35 | def count(self):
36 | return self._count
37 |
38 | def find(self, node):
39 | root = node
40 | while root != self._id[root]:
41 | root = self._id[root]
42 | # 1.5.12 practice
43 | while node != root:
44 | new_node = self._id[node]
45 | self._id[node] = root
46 | node = new_node
47 | return root
48 |
49 | def connected(self, p, q):
50 | return self.find(p) == self.find(q)
51 |
52 | def union(self, p, q):
53 | p_root = self.find(p)
54 | q_root = self.find(q)
55 | if p_root == q_root:
56 | return
57 | self._id[p_root] = q_root
58 | self._count -= 1
59 |
60 |
61 | class WeightedUnionFind(object):
62 |
63 | """
64 | Weighted union find algorithm, put the smaller tree into the larger tree, lower the tree size.
65 | >>> wuf = WeightedUnionFind(10)
66 | >>> connections = [(4, 3), (3, 8), (6, 5), (9, 4),
67 | ... (2, 1), (8, 9), (5, 0), (7, 2), (6, 1), (1, 0), (6, 7)]
68 | >>> for i, j in connections:
69 | ... wuf.union(i, j)
70 | ...
71 | >>> wuf.connected(1, 4)
72 | False
73 | >>> wuf.connected(8, 4)
74 | True
75 | >>> wuf.connected(1, 5)
76 | True
77 | >>> wuf.connected(1, 7)
78 | True
79 | """
80 |
81 | def __init__(self, size):
82 | self._count = size
83 | self._id = [i for i in range(size)]
84 | self._size = [1] * size
85 |
86 | def count(self):
87 | return self._count
88 |
89 | def connected(self, p, q):
90 | return self.find(p) == self.find(q)
91 |
92 | def find(self, node):
93 | root = node
94 | while root != self._id[root]:
95 | root = self._id[root]
96 | # 1.5.13 practice
97 | while node != root:
98 | new_node = self._id[node]
99 | self._id[node] = root
100 | node = new_node
101 | return root
102 |
103 | def union(self, p, q):
104 | p_root = self.find(p)
105 | q_root = self.find(q)
106 | if p_root == q_root:
107 | return
108 | if self._size[p_root] < self._size[q_root]:
109 | self._id[p_root] = q_root
110 | self._size[q_root] += self._size[p_root]
111 | else:
112 | self._id[q_root] = p_root
113 | self._size[p_root] += self._size[q_root]
114 | self._count -= 1
115 |
116 |
117 | # 1.5.14 practice
118 | class HeightedUnionFind(object):
119 |
120 | """
121 | Heighted union find algorithm,
122 | put the shorter tree into taller tree,
123 | the tree's height won't be taller than log(n).
124 | >>> huf = HeightedUnionFind(10)
125 | >>> connections = [(9, 0), (3, 4), (5, 8), (7, 2), (2, 1), (5, 7), (0, 3), (4, 2)]
126 | >>> for i, j in connections:
127 | ... huf.union(i, j)
128 | ...
129 | >>> huf.connected(9, 3)
130 | True
131 | >>> huf.connected(0, 1)
132 | True
133 | >>> huf.connected(9, 8)
134 | True
135 | """
136 |
137 | def __init__(self, size):
138 | self._id = [i for i in range(size)]
139 | self._height = [1] * size
140 | self._count = size
141 |
142 | def count(self):
143 | return self._count
144 |
145 | def find(self, node):
146 | while node != self._id[node]:
147 | node = self._id[node]
148 | return node
149 |
150 | def connected(self, p, q):
151 | return self.find(p) == self.find(q)
152 |
153 | def union(self, p, q):
154 | p_root = self.find(p)
155 | q_root = self.find(q)
156 | if p_root == q_root:
157 | return
158 | if self._height[p_root] < self._height[q_root]:
159 | self._id[p_root] = q_root
160 | elif self._height[p_root] > self._height[q_root]:
161 | self._id[q_root] = p_root
162 | else:
163 | self._id[q_root] = p_root
164 | self._height[p_root] += 1
165 | self._count -= 1
166 |
167 |
168 | # 1.5.17 practice
169 | def erdos_renyi(size):
170 | """
171 | >>> erdos_renyi(1000)
172 | """
173 | uf = UnionFind(size)
174 | while uf.count() > 1:
175 | a = random.randint(0, size - 1)
176 | b = random.randint(0, size - 1)
177 | if a == b:
178 | continue
179 | if not uf.connected(a, b):
180 | uf.union(a, b)
181 |
182 | if __name__ == '__main__':
183 | doctest.testmod()
184 |
--------------------------------------------------------------------------------
/chapter_1/module_1_3_double_node_linked_list.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | from __future__ import print_function
4 | from common import DoubleNode
5 | import doctest
6 |
7 |
8 | # 1.3.31 practice
9 | class LinkedList(object):
10 |
11 | """
12 | The double-node linked list implementation which the node has prev and next attribute.
13 | >>> lst = LinkedList()
14 | >>> lst.push_back(1)
15 | >>> lst.push_front(2)
16 | >>> for i in lst:
17 | ... print(i)
18 | ...
19 | 2
20 | 1
21 | >>> lst.size()
22 | 2
23 | >>> lst.is_empty()
24 | False
25 | >>> lst.pop_front()
26 | 2
27 | >>> lst.pop_front()
28 | 1
29 | >>> lst.is_empty()
30 | True
31 | >>> lst.pop_front()
32 | >>> lst.push_back(1)
33 | >>> lst.push_back(2)
34 | >>> lst.pop_back()
35 | 2
36 | >>> lst.pop_back()
37 | 1
38 | >>> lst.pop_back()
39 | >>>
40 | >>> lst.is_empty()
41 | True
42 | >>> lst.push_back(1)
43 | >>> lst.insert_after(1, DoubleNode(2))
44 | >>> lst.insert_before(2, DoubleNode(3))
45 | >>> for i in lst:
46 | ... print(i)
47 | ...
48 | 1
49 | 3
50 | 2
51 | >>> for i in range(10):
52 | ... lst.push_back(i)
53 | ...
54 | >>> lst.remove(1)
55 | >>> lst.remove(3)
56 | >>> [i for i in lst]
57 | [2, 0, 2, 4, 5, 6, 7, 8, 9]
58 | >>> lst.remove(2)
59 | >>> [i for i in lst]
60 | [0, 4, 5, 6, 7, 8, 9]
61 | """
62 |
63 | def __init__(self):
64 | self._first = self._last = None
65 | self._size = 0
66 |
67 | def __iter__(self):
68 | tmp = self._first
69 | while tmp:
70 | yield tmp.val
71 | tmp = tmp.next
72 |
73 | def is_empty(self):
74 | return self._first is None
75 |
76 | def size(self):
77 | return self._size
78 |
79 | def push_front(self, item):
80 | old = self._first
81 | self._first = DoubleNode(item)
82 | self._first.next = old
83 | if old:
84 | old.prev = self._first
85 | else:
86 | self._last = self._first
87 | self._size += 1
88 |
89 | def push_back(self, item):
90 | old = self._last
91 | self._last = DoubleNode(item)
92 | self._last.prev = old
93 | if old:
94 | old.next = self._last
95 | else:
96 | self._first = self._last
97 | self._size += 1
98 |
99 | def pop_front(self):
100 | if self._first:
101 | old = self._first
102 | self._first = self._first.next
103 | old.next = None
104 | if self._first:
105 | self._first.prev = None
106 | else:
107 | self._last = None
108 | self._size -= 1
109 | return old.val
110 | return None
111 |
112 | def pop_back(self):
113 | if self._last:
114 | old = self._last
115 | self._last = self._last.prev
116 | old.prev = None
117 | if self._last:
118 | self._last.next = None
119 | else:
120 | self._first = None
121 | self._size -= 1
122 | return old.val
123 | return None
124 |
125 | def insert_before(self, target_value, new_node):
126 | tmp = self._first
127 | while tmp and tmp.val != target_value:
128 | tmp = tmp.next
129 |
130 | if not tmp:
131 | return
132 |
133 | if not tmp.prev:
134 | tmp.prev = new_node
135 | new_node.next = tmp
136 | self._first = new_node
137 | self._size += 1
138 | return
139 |
140 | prev_node = tmp.prev
141 | prev_node.next = new_node
142 | new_node.prev = prev_node
143 |
144 | tmp.prev = new_node
145 | new_node.next = tmp
146 |
147 | self._size += 1
148 |
149 | def insert_after(self, target_value, new_node):
150 | tmp = self._first
151 | while tmp and tmp.val != target_value:
152 | tmp = tmp.next
153 |
154 | if not tmp:
155 | return
156 |
157 | if not tmp.next:
158 | tmp.next = new_node
159 | new_node.prev = tmp
160 | self._last = new_node
161 | self._size += 1
162 | return
163 |
164 | next_node = tmp.next
165 | next_node.prev = new_node
166 | new_node.next = next_node
167 |
168 | tmp.next = new_node
169 | new_node.prev = tmp
170 |
171 | self._size += 1
172 |
173 | def remove(self, item):
174 | if not self._first.next and self._first.val == item:
175 | self._first = None
176 | self._size = 0
177 | return
178 |
179 | tmp = self._first
180 | while tmp:
181 | flag = False
182 | if tmp.val == item:
183 | flag = True
184 | if not tmp.prev:
185 | target = tmp
186 | tmp = tmp.next
187 | tmp.prev = target.next = None
188 | self._first = tmp
189 | else:
190 | prev_node, next_node = tmp.prev, tmp.next
191 | tmp.prev = tmp.next = None
192 | prev_node.next, next_node.prev = next_node, prev_node
193 | tmp = next_node
194 | self._size -= 1
195 | if not flag:
196 | tmp = tmp.next
197 |
198 | if __name__ == '__main__':
199 | doctest.testmod()
200 |
--------------------------------------------------------------------------------
/chapter_5/basic_data_struct.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 |
4 |
5 | class Node(object):
6 |
7 | def __init__(self, val):
8 | self._val = val
9 | self.next_node = None
10 |
11 | @property
12 | def val(self):
13 | return self._val
14 |
15 | @val.setter
16 | def val(self, value):
17 | self._val = value
18 |
19 | @property
20 | def next_node(self):
21 | return self._next_node
22 |
23 | @next_node.setter
24 | def next_node(self, node):
25 | self._next_node = node
26 |
27 |
28 | class Queue(object):
29 |
30 | def __init__(self, q=None):
31 | self._first = None
32 | self._last = None
33 | self._size = 0
34 | if q:
35 | for item in q:
36 | self.enqueue(item)
37 |
38 | def __iter__(self):
39 | node = self._first
40 | while node:
41 | yield node.val
42 | node = node.next_node
43 |
44 | def is_empty(self):
45 | return self._first is None
46 |
47 | def size(self):
48 | return self._size
49 |
50 | def enqueue(self, val):
51 | old_last = self._last
52 | self._last = Node(val)
53 | self._last.next_node = None
54 | if self.is_empty():
55 | self._first = self._last
56 | else:
57 | old_last.next_node = self._last
58 | self._size += 1
59 |
60 | def dequeue(self):
61 | if not self.is_empty():
62 | val = self._first.val
63 | self._first = self._first.next_node
64 | if self.is_empty():
65 | self._last = None
66 | self._size -= 1
67 | return val
68 | return None
69 |
70 |
71 | class Stack(object):
72 |
73 | def __init__(self):
74 | self._first = None
75 | self._size = 0
76 |
77 | def __iter__(self):
78 | node = self._first
79 | while node:
80 | yield node.val
81 | node = node.next_node
82 |
83 | def is_empty(self):
84 | return self._first is None
85 |
86 | def size(self):
87 | return self._size
88 |
89 | def push(self, val):
90 | node = Node(val)
91 | old = self._first
92 | self._first = node
93 | self._first.next_node = old
94 | self._size += 1
95 |
96 | def pop(self):
97 | if self._first:
98 | old = self._first
99 | self._first = self._first.next_node
100 | self._size -= 1
101 | return old.val
102 | return None
103 |
104 | def peek(self):
105 | if self._first:
106 | return self._first.val
107 | return None
108 |
109 |
110 | class Bag(object):
111 |
112 | def __init__(self):
113 | self._first = None
114 | self._size = 0
115 |
116 | def __iter__(self):
117 | node = self._first
118 | while node is not None:
119 | yield node.val
120 | node = node.next_node
121 |
122 | def add(self, val):
123 | node = Node(val)
124 | old = self._first
125 | self._first = node
126 | self._first.next_node = old
127 | self._size += 1
128 |
129 | def is_empty(self):
130 | return self._first is None
131 |
132 | def size(self):
133 | return self._size
134 |
135 |
136 | class Digragh(object):
137 |
138 | def __init__(self, steps):
139 | self._edges_size = 0
140 | self._adj = {i: None for i in range(steps)}
141 | self._vertices = set()
142 |
143 | def vertices_size(self):
144 | return len(self._vertices)
145 |
146 | def edges_size(self):
147 | return self._edges_size
148 |
149 | def add_edge(self, start, end):
150 | self._vertices.add(start)
151 | self._vertices.add(end)
152 | if not self._adj[start]:
153 | self._adj[start] = Bag()
154 | self._adj[start].add(end)
155 | self._edges_size += 1
156 |
157 | def get_adjacent_vertices(self, vertex):
158 | return self._adj[vertex] if self._adj[vertex] is not None else []
159 |
160 | def vertices(self):
161 | return self._vertices
162 |
163 | def reverse(self):
164 | reverse_graph = Digragh()
165 | for vertex in self.vertices():
166 | for adjacent_vertex in self.get_adjacent_vertices(vertex):
167 | reverse_graph.add_edge(adjacent_vertex, vertex)
168 | return reverse_graph
169 |
170 | def has_edge(self, start, end):
171 | if not self._adj[start]:
172 | return False
173 | edge = next((i for i in self._adj[start] if i == end), None)
174 | return edge is not None
175 |
176 | def __repr__(self):
177 | s = str(len(self._vertices)) + ' vertices, ' + str(self._edges_size) + ' edges\n'
178 | for k in self._adj:
179 | try:
180 | lst = ' '.join([vertex for vertex in self._adj[k]])
181 | except TypeError:
182 | if self._adj[k]:
183 | lst = ' '.join([str(vertex) for vertex in self._adj[k]])
184 | else:
185 | lst = ''
186 | s += '{}: {}\n'.format(k, lst)
187 | return s
188 |
189 |
190 | class MinPQ(object):
191 |
192 | def __init__(self):
193 | self._pq = []
194 |
195 | def is_empty(self):
196 | return len(self._pq) == 0
197 |
198 | def size(self):
199 | return len(self._pq)
200 |
201 | def swim(self, pos):
202 | while pos > 0 and self._pq[(pos - 1) // 2] > self._pq[pos]:
203 | self._pq[(pos - 1) // 2], self._pq[pos] = self._pq[pos], self._pq[(pos - 1) // 2]
204 | pos = (pos - 1) // 2
205 |
206 | def sink(self, pos):
207 | length = len(self._pq) - 1
208 | while 2 * pos + 1 <= length:
209 | index = 2 * pos + 1
210 | if index < length and self._pq[index] > self._pq[index + 1]:
211 | index += 1
212 | if self._pq[pos] <= self._pq[index]:
213 | break
214 | self._pq[index], self._pq[pos] = self._pq[pos], self._pq[index]
215 | pos = index
216 |
217 | def insert(self, val):
218 | self._pq.append(val)
219 | self.swim(len(self._pq) - 1)
220 |
221 | def del_min(self):
222 | min_val = self._pq[0]
223 | last_index = len(self._pq) - 1
224 | self._pq[0], self._pq[last_index] = self._pq[last_index], self._pq[0]
225 | self._pq.pop(last_index)
226 | self.sink(0)
227 | return min_val
228 |
229 | def min_val(self):
230 | return self._pq[0]
231 |
--------------------------------------------------------------------------------
/chapter_5/module_5_1.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | import doctest
4 | import pprint
5 | from collections import defaultdict
6 |
7 |
8 | def lsd_sort(string_list, width):
9 | """
10 | LSD (least significant digit) algorithm implementation. This algorithm can sort
11 | strings with certain length. LSD algorithm need to access arrays about ~7WN + 3WR times
12 | (W is string's length, N is the number of all strings, R is the number of all
13 | characters in the strings). The cost of space is proportional to N + R.
14 | >>> test_data = ['bed', 'bug', 'dad', 'yes', 'zoo', 'now', 'for', 'tip', 'ilk',
15 | ... 'dim', 'tag', 'jot', 'sob', 'nob', 'sky', 'hut', 'men', 'egg',
16 | ... 'few', 'jay', 'owl', 'joy', 'rap', 'gig', 'wee', 'was', 'wad',
17 | ... 'fee', 'tap', 'tar', 'dug', 'jam', 'all', 'bad', 'yet']
18 | >>> lsd_sort(test_data, 3)
19 | >>> pp = pprint.PrettyPrinter(width=41, compact=True)
20 | >>> pp.pprint(test_data)
21 | ['all', 'bad', 'bed', 'bug', 'dad',
22 | 'dim', 'dug', 'egg', 'fee', 'few',
23 | 'for', 'gig', 'hut', 'ilk', 'jam',
24 | 'jay', 'jot', 'joy', 'men', 'nob',
25 | 'now', 'owl', 'rap', 'sky', 'sob',
26 | 'tag', 'tap', 'tar', 'tip', 'wad',
27 | 'was', 'wee', 'yes', 'yet', 'zoo']
28 | """
29 |
30 | length = len(string_list)
31 | radix = 256
32 | aux = [None] * length
33 |
34 | for i in range(width - 1, -1, -1):
35 | count = [0] * (radix + 1)
36 |
37 | for j in range(length):
38 | count[ord(string_list[j][i]) + 1] += 1
39 |
40 | for k in range(radix - 1):
41 | count[k + 1] += count[k]
42 |
43 | for p in range(length):
44 | aux[count[ord(string_list[p][i])]] = string_list[p]
45 | count[ord(string_list[p][i])] += 1
46 |
47 | for n in range(length):
48 | string_list[n] = aux[n]
49 |
50 |
51 | class MSD(object):
52 |
53 | """
54 | MSD(most significant digit) algorithm implementation. MSD can handle strings with
55 | different length. Because a recursive process exists, so just in case that maximum
56 | recursion depth exceeded, MSD switch to insertion sort when handling small arrays.
57 | The performance will be not fine when most of input strings are the same. And the cost
58 | of space is very expensive because each recursion sort need to create a counting array,
59 | and some of recursions is unnessesary.
60 | >>> test_data = ['she', 'sells', 'seashells', 'by', 'the', 'sea', 'shore',
61 | ... 'the', 'shells', 'she', 'sells', 'are', 'surely', 'seashells']
62 | >>> msd = MSD()
63 | >>> msd.sort(test_data)
64 | >>> pp = pprint.PrettyPrinter(width=41, compact=True)
65 | >>> pp.pprint(test_data)
66 | ['are', 'by', 'sea', 'seashells',
67 | 'seashells', 'sells', 'sells', 'she',
68 | 'she', 'shells', 'shore', 'surely',
69 | 'the', 'the']
70 | """
71 |
72 | def __init__(self):
73 | self._radix = 256
74 | self._switch_2_insertion_length = 20
75 |
76 | def char_at(self, s, index):
77 | return ord(s[index]) if index < len(s) else -1
78 |
79 | def _insertion_sort(self, lst, start, end, index):
80 | for i in range(start, end + 1):
81 | tmp = i
82 | while tmp > start and lst[tmp][index:] < lst[tmp - 1][index:]:
83 | lst[tmp - 1], lst[tmp] = lst[tmp], lst[tmp - 1]
84 | tmp -= 1
85 |
86 | def sort(self, string_list):
87 | length = len(string_list)
88 | aux = [None] * length
89 | self._sort(string_list, 0, length - 1, 0, aux)
90 |
91 | def _sort(self, string_list, start, end, index, aux):
92 | if end <= start + self._switch_2_insertion_length:
93 | self._insertion_sort(string_list, start, end, index)
94 | return
95 |
96 | count = [0] * (self._radix + 2)
97 |
98 | for i in range(start, end + 1):
99 | count[self.char_at(string_list[i], index) + 2] += 1
100 |
101 | for r in range(self._radix + 1):
102 | count[r + 1] += count[r]
103 |
104 | for j in range(start, end + 1):
105 | v = self.char_at(string_list[j], index) + 1
106 | aux[count[v]] = string_list[j]
107 | count[v] += 1
108 |
109 | for n in range(start, end + 1):
110 | string_list[n] = aux[n - start]
111 |
112 | for r in range(self._radix):
113 | self._sort(string_list, start + count[r], start + count[r + 1] - 1, index + 1, aux)
114 |
115 |
116 | class Quick3String(object):
117 |
118 | """
119 | Quick Three Way algorithm for string sorting purpose. This is almost the
120 | same as Quick Three Way, but it takes ith character of each string as comparison.
121 | It's really helpful when large repetive strings as input strings.
122 | >>> test_data = ['she', 'sells', 'seashells', 'by', 'the', 'sea', 'shore',
123 | ... 'the', 'shells', 'she', 'sells', 'are', 'surely', 'seashells']
124 | >>> q3s = Quick3String()
125 | >>> q3s.sort(test_data)
126 | >>> pp = pprint.PrettyPrinter(width=41, compact=True)
127 | >>> pp.pprint(test_data)
128 | ['are', 'by', 'sea', 'seashells',
129 | 'seashells', 'sells', 'sells', 'she',
130 | 'she', 'shells', 'shore', 'surely',
131 | 'the', 'the']
132 | """
133 |
134 | def char_at(self, s, index):
135 | return ord(s[index]) if index < len(s) else -1
136 |
137 | def sort(self, string_list):
138 | self._sort(string_list, 0, len(string_list) - 1, 0)
139 |
140 | def _sort(self, string_list, start, end, index):
141 | if start >= end:
142 | return
143 |
144 | lt, gt, val, i = start, end, self.char_at(string_list[start], index), start + 1
145 |
146 | while i <= gt:
147 | tmp = self.char_at(string_list[i], index)
148 | if tmp < val:
149 | string_list[i], string_list[lt] = string_list[lt], string_list[i]
150 | lt += 1
151 | elif tmp > val:
152 | string_list[i], string_list[gt] = string_list[gt], string_list[i]
153 | gt -= 1
154 | continue
155 | i += 1
156 |
157 | self._sort(string_list, start, lt - 1, index)
158 |
159 | if val > 0:
160 | self._sort(string_list, lt, gt, index + 1)
161 | self._sort(string_list, gt + 1, end, index)
162 |
163 |
164 | # 5.1.1 practice
165 | def simple_radix_sort(strings):
166 | count = defaultdict(int)
167 | for s in strings:
168 | count[s] += 1
169 |
170 |
171 | if __name__ == '__main__':
172 | doctest.testmod()
173 |
--------------------------------------------------------------------------------
/chapter_2/module_2_5.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | import doctest
4 |
5 |
6 | def rank(lst, k):
7 | """
8 | >>> rank([i for i in range(10)], 5)
9 | 4
10 | """
11 | def partition(lst, low, high):
12 | val = lst[low]
13 | left, right = low + 1, high
14 | while 1:
15 | while lst[left] < val:
16 | left += 1
17 | while lst[right] > val:
18 | right -= 1
19 | if right < left:
20 | break
21 | lst[left], lst[right] = lst[right], lst[left]
22 | lst[left], lst[low] = lst[low], lst[left]
23 | return left
24 | low, high = 0, len(lst) - 1
25 | while high > low:
26 | j = partition(lst, low, high)
27 | if j == k:
28 | return lst[k]
29 | elif j > k:
30 | high = j - 1
31 | elif j < k:
32 | low = j + 1
33 | return lst[k]
34 |
35 |
36 | # 2.5.4 practice, return a sorted and non-duplicated-item list
37 | def dedup(lst):
38 | """
39 | >>> lst = [i for i in dedup([2, 1, 3, 1, 1, 3, 2, 3, 4, 7])]
40 | >>> lst
41 | [1, 2, 3, 4, 7]
42 | >>> lst2 = [i for i in dedup([1, 1])]
43 | >>> lst2
44 | [1]
45 | >>> lst3 = [i for i in dedup([2, 1, 1, 4, 3, 5])]
46 | >>> lst3
47 | [1, 2, 3, 4, 5]
48 | """
49 | assert lst and len(lst) >= 2
50 |
51 | new_list = sorted(lst)
52 | val, count, length = new_list[0], 1, len(new_list)
53 | for i in range(1, length):
54 | if new_list[i] == val:
55 | if i == length - 1:
56 | yield new_list[i]
57 | count += 1
58 | else:
59 | count = 1
60 | val = new_list[i]
61 | yield new_list[i - count]
62 | if count == 1:
63 | yield new_list[length - 1]
64 |
65 |
66 | # 2.5.10 practice, implement a version class with __cmp__
67 | class Version(object):
68 |
69 | """
70 | >>> lst = [Version(i) for i in ['115.1.1', '115.10.1', '115.10.2']]
71 | >>> lst.sort()
72 | >>> lst
73 | [Version(115.1.1), Version(115.10.1), Version(115.10.2)]
74 | """
75 |
76 | def __init__(self, version):
77 | self._version = version
78 |
79 | def __eq__(self, other):
80 | return self._version == other._version
81 |
82 | def __lt__(self, other):
83 | return self._version < other._version
84 |
85 | def __repr__(self):
86 | return 'Version({})'.format(self._version)
87 |
88 | @property
89 | def version(self):
90 | return self._version
91 |
92 | @version.setter
93 | def version(self, val):
94 | assert all(i.isdigit() for i in val.split('.'))
95 | self._version = val
96 |
97 |
98 | # 2.5.14 practice, implement a domain class with __cmp__, compare the reversed order domain.
99 | class Domain(object):
100 |
101 | """
102 | >>> test_list = ['cs.princeton.edu', 'cs.harvard.edu', 'mail.python.org', 'cs.mit.edu']
103 | >>> lst = [Domain(i) for i in test_list]
104 | >>> lst.sort()
105 | >>> lst
106 | [Domain(cs.harvard.edu), Domain(cs.mit.edu), Domain(cs.princeton.edu), Domain(mail.python.org)]
107 | """
108 |
109 | def __init__(self, domain):
110 | self._domain = domain
111 | self._cmp_domain = '.'.join(reversed(self._domain.split('.')))
112 |
113 | def __eq__(self, other):
114 | return self._cmp_domain == other._cmp_domain
115 |
116 | def __lt__(self, other):
117 | return self._cmp_domain < other._cmp_domain
118 |
119 | def __repr__(self):
120 | return 'Domain({})'.format(self._domain)
121 |
122 | @property
123 | def domain(self):
124 | return self._domain
125 |
126 | @domain.setter
127 | def domain(self, val):
128 | self._domain = val
129 | self._cmp_domain = '.'.join(reversed(self._domain.split('.')))
130 |
131 |
132 | # 2.5.16 practice, construct object which order by the name with a new alphabet order
133 | class California(object):
134 |
135 | """
136 | >>> lst = [California(name) for name in ('RISBY', 'PATRICK', 'DAMIEN', 'GEORGE')]
137 | >>> lst.sort()
138 | >>> lst
139 | [California(RISBY), California(GEORGE), California(PATRICK), California(DAMIEN)]
140 | """
141 | alphabet = ('R', 'W', 'Q', 'O', 'J', 'M', 'V', 'A', 'H', 'B', 'S', 'G', 'Z', 'X', 'N',
142 | 'T', 'C', 'I', 'E', 'K', 'U', 'P', 'D', 'Y', 'F', 'L')
143 |
144 | def __init__(self, name):
145 | self._name = name
146 | self._cmp_tuple = tuple(California.alphabet.index(i) for i in self._name)
147 |
148 | def __eq__(self, other):
149 | return self._cmp_tuple == other._cmp_tuple
150 |
151 | def __lt__(self, other):
152 | return self._cmp_tuple < other._cmp_tuple
153 |
154 | def __repr__(self):
155 | return 'California({})'.format(self._name)
156 |
157 | @property
158 | def name(self):
159 | return self._name
160 |
161 | @name.setter
162 | def name(self, val):
163 | self._name = val
164 | self._cmp_tuple = tuple(California.alphabet.index(i) for i in self._name)
165 |
166 |
167 | # 2.5.19 practice, kendall tau algorithm implementation
168 | class KendallTau(object):
169 |
170 | """
171 | >>> klt = KendallTau()
172 | >>> klt.kendall_tau_count((0, 3, 1, 6, 2, 5, 4), (1, 0, 3, 6, 4, 2, 5))
173 | 4
174 | """
175 |
176 | def kendall_tau_count(self, origin_list, count_list):
177 | lst = [origin_list.index(count_list[i]) for i in range(len(count_list))]
178 | aux = lst[:]
179 | return self.count(lst, aux, 0, len(lst) - 1)
180 |
181 | def count(self, lst, aux, low, high):
182 | if low >= high:
183 | return 0
184 | mid = (low + high) // 2
185 | lc = self.count(lst, aux, low, mid)
186 | rc = self.count(lst, aux, mid + 1, high)
187 | mc = self.merge_count(lst, aux, low, mid, high)
188 | return lc + rc + mc
189 |
190 | def merge_count(self, lst, aux, low, mid, high):
191 | aux[low:high + 1] = lst[low:high + 1]
192 | count, left, right = 0, low, mid + 1
193 | for j in range(low, high + 1):
194 | if left > mid:
195 | lst[j] = aux[right]
196 | right += 1
197 | elif right > high:
198 | lst[j] = aux[left]
199 | left += 1
200 | elif aux[left] < aux[right]:
201 | lst[j] = aux[left]
202 | left += 1
203 | else:
204 | lst[j] = aux[right]
205 | right += 1
206 | count += mid - left + 1
207 | return count
208 |
209 |
210 | if __name__ == '__main__':
211 | doctest.testmod()
212 |
--------------------------------------------------------------------------------
/chapter_3/module_3_4.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | import doctest
4 | import string
5 | import collections.abc
6 |
7 |
8 | class Pair(object):
9 |
10 | def __init__(self, key, value):
11 | self._key = key
12 | self._value = value
13 |
14 | @property
15 | def key(self):
16 | return self._key
17 |
18 | @key.setter
19 | def key(self, key):
20 | self._key = key
21 |
22 | @property
23 | def value(self):
24 | return self._value
25 |
26 | @value.setter
27 | def value(self, val):
28 | self._value = val
29 |
30 |
31 | class SeperateChainingHT(object):
32 |
33 | """
34 | Sperated hash table with chaining method, if one key-value node
35 | put into the position already exists another nodes, just make all
36 | these nodes as a linked list, and the new node append to the linked list.
37 | >>> test_str = 'SEARCHEXAMPLE'
38 | >>> ht = SeperateChainingHT()
39 | >>> for index, s in enumerate(test_str):
40 | ... ht.put(s, index)
41 | ...
42 | >>> ht.put(['a', 'b'], 999)
43 | Traceback (most recent call last):
44 | ...
45 | AssertionError
46 | >>> ht.get('L')
47 | 11
48 | >>> ht.get('S')
49 | 0
50 | >>> ht.get('E')
51 | 12
52 | >>> ht.delete('H')
53 | >>> ht.get('H')
54 | >>>
55 | """
56 |
57 | def __init__(self):
58 | self.__init(997)
59 |
60 | def __init(self, size):
61 | self._len = size
62 | self._size = 0
63 | self._st = [[]] * self._len
64 |
65 | def __hash(self, key):
66 | return hash(key) & 0x7fffffff % self._len
67 |
68 | def put(self, key, value):
69 |
70 | assert isinstance(key, collections.abc.Hashable)
71 |
72 | slot = self._st[self.__hash(key)]
73 | item = next((i for i in slot if i.key == key), None)
74 | if not item:
75 | slot.append(Pair(key, value))
76 | else:
77 | item.value = value
78 |
79 | def get(self, key):
80 | slot = self._st[self.__hash(key)]
81 | item = next((i for i in slot if i.key == key), None)
82 | return item.value if item else None
83 |
84 | # 3.4.9 practice, implement a delete function for Seperate-Chaining hash table
85 | def delete(self, key):
86 | slot = self._st[self.__hash(key)]
87 | item = next((i for i in slot if i.key == key), None)
88 | if item:
89 | slot.remove(item)
90 |
91 | def keys(self):
92 | results = []
93 | for k in self._st:
94 | if k:
95 | results.extend(k)
96 | return results
97 |
98 |
99 | class LinearProbingHT(object):
100 |
101 | """
102 | Hash table with linear-probing strategy, when collision happens, which means
103 | hashed index is occupied by other element,
104 | then go to the next index, check the slot is available or not.
105 | This strategy need to make sure the list is 1/2 empty, because if the list has
106 | more than 1/2 * len elements, the performance of insertion will be decreased.
107 | >>> test_str = 'SEARCHEXAMPLE'
108 | >>> ht = LinearProbingHT()
109 | >>> for index, s in enumerate(test_str):
110 | ... ht.put(s, index)
111 | ...
112 | >>> ht.put(['a', 'b'], 999)
113 | Traceback (most recent call last):
114 | ...
115 | AssertionError
116 | >>> ht.get('L')
117 | 11
118 | >>> ht.get('S')
119 | 0
120 | >>> ht.get('E')
121 | 12
122 | >>> ht.delete('H')
123 | >>> ht.get('H')
124 | >>>
125 | """
126 |
127 | def __init__(self):
128 | self._len = 16 # the length of the list
129 | self._size = 0 # the amount of the variables
130 | self._keys = [None] * self._len
131 | self._vals = [None] * self._len
132 |
133 | def __hash(self, key):
134 | return hash(key) & 0x7fffffff % self._len
135 |
136 | def __resize(self, size):
137 | tmp = LinearProbingHT()
138 | for i in range(self._len):
139 | if self._keys[i] is not None:
140 | tmp.put(self._keys[i], self._vals[i])
141 | self._keys = tmp._keys
142 | self._vals = tmp._vals
143 | self._size = tmp._size
144 |
145 | def __contains(self, key):
146 | return self._keys[self.__hash(key)] is not None
147 |
148 | def put(self, key, value):
149 | assert isinstance(key, collections.abc.Hashable)
150 |
151 | if self._size >= self._len / 2:
152 | self.__resize(self._len * 2)
153 |
154 | index = self.__hash(key)
155 | while self._keys[index]:
156 | if self._keys[index] == key:
157 | self._vals[index] = value
158 | return
159 | index = (index + 1) % self._len
160 |
161 | self._keys[index], self._vals[index] = key, value
162 | self._size += 1
163 |
164 | def get(self, key):
165 | index = self.__hash(key)
166 | while self._keys[index]:
167 | if self._keys[index] == key:
168 | return self._vals[index]
169 | index = (index + 1) % self._len
170 | return None
171 |
172 | def delete(self, key):
173 | if not self.__contains(key):
174 | return
175 |
176 | index = self.__hash(key)
177 | while self._keys[index] != key:
178 | index = (index + 1) % self._len
179 |
180 | self._keys[index] = self._vals[index] = None
181 |
182 | index = (index + 1) % self._len
183 |
184 | while self._keys[index]:
185 | k, v = self._keys[index], self._vals[index]
186 | self._keys[index] = self._vals[index] = None
187 | self._size -= 1
188 | self.put(k, v)
189 | index = (index + 1) % self._len
190 |
191 | self._size -= 1
192 |
193 | if self._size and self._size == self._len / 8:
194 | self.__resize(self._len / 2)
195 |
196 | # 3.4.19 practice
197 | def keys(self):
198 | for index, k in enumerate(self._keys):
199 | if k:
200 | yield k
201 |
202 |
203 | # 3.4.4 practice
204 | def find_complete_hash_number(hash_string):
205 | def unique_index(a, m, hash_string):
206 | index = set()
207 | for s in hash_string:
208 | hash_index = (a * string.ascii_uppercase.index(s)) % m
209 | if hash_index not in index:
210 | index.add(hash_index)
211 | else:
212 | return False
213 | return True
214 |
215 | for m in range(len(hash_string), 100000):
216 | for a in range(1, 1001):
217 | if unique_index(a, m, hash_string):
218 | return a, m
219 | return None, None
220 |
221 |
222 | if __name__ == '__main__':
223 | doctest.testmod()
224 | # print(find_complete_hash_number('SEARCHXMPL'))
225 |
--------------------------------------------------------------------------------
/chapter_1/module_1_3_linked_list.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | from __future__ import print_function
4 | import doctest
5 | from common import Node
6 |
7 |
8 | class LinkedList(object):
9 |
10 | '''
11 | Linked list practice.
12 | >>> ######### init linked list test case.
13 | >>> lst = LinkedList()
14 | >>> for i in range(1, 5):
15 | ... lst.append(i)
16 | ...
17 | >>> lst.print_list()
18 | 1 2 3 4
19 | >>> ######## test delete_last() function, remove all nodes from linked list.
20 | >>> while lst.size():
21 | ... lst.delete_last()
22 | ...
23 | >>> lst.print_list()
24 | >>> ######## re-init linked list.
25 | >>> lst = LinkedList()
26 | >>> for i in range(1, 5):
27 | ... lst.append(i)
28 | ...
29 | >>> ######## test find() function.
30 | >>> lst.find(5)
31 | False
32 | >>> lst.find(4)
33 | True
34 | >>> lst.find(1)
35 | True
36 | >>> ######### test delete() function.
37 | >>> lst.delete(1)
38 | >>> lst.print_list()
39 | 2 3 4
40 | >>> lst.delete(4)
41 | >>> lst.print_list()
42 | 2 3 4
43 | >>> lst.delete(2)
44 | >>> lst.print_list()
45 | 2 4
46 | >>> ######## new test list.
47 | >>> lst2 = LinkedList()
48 | >>> for i in range(10):
49 | ... lst2.append(i)
50 | ...
51 | >>> ######## test remove_after function.
52 | >>> lst2.remove_after(8)
53 | >>> lst2.remove_after(0)
54 | >>> lst2.print_list()
55 | 0 2 3 4 5 6 7 8
56 | >>> ####### test insert_after function.
57 | >>> lst2.insert_after(0, 1)
58 | >>> lst2.print_list()
59 | 0 1 2 3 4 5 6 7 8
60 | >>> lst2.insert_after(8, 9)
61 | >>> lst2.print_list()
62 | 0 1 2 3 4 5 6 7 8 9
63 | >>> ###### test max_value function.
64 | >>> lst2.max_value()
65 | 9
66 | >>> ##### test remove function, cannot delete all-same-value list yet.
67 | >>> lst2.append(8)
68 | >>> lst2.append(1)
69 | >>> lst2.remove(1)
70 | >>> lst2.remove(8)
71 | >>> lst2.print_list()
72 | 0 2 3 4 5 6 7 9
73 | >>> lst3 = LinkedList()
74 | >>> for i in range(5):
75 | ... lst3.append(3)
76 | ...
77 | >>> lst3.remove(3)
78 | >>> lst3.remove(3)
79 | >>> lst3.print_list()
80 | >>>
81 | >>> for i in range(1, 10):
82 | ... lst3.append(i)
83 | ...
84 | >>> node = lst3.reverse()
85 | >>> lst = []
86 | >>> while node:
87 | ... lst.append(node.val)
88 | ... node = node.next_node
89 | ...
90 | >>> lst
91 | [9, 8, 7, 6, 5, 4, 3, 2, 1]
92 | '''
93 |
94 | def __init__(self):
95 | self._first = None
96 | self._size = 0
97 |
98 | def print_list(self):
99 | tmp = self._first
100 | while tmp:
101 | if not tmp.next_node:
102 | print(tmp.val)
103 | else:
104 | print(tmp.val, end=' ')
105 | tmp = tmp.next_node
106 |
107 | def append(self, val):
108 | if not self._first:
109 | self._first = Node(val)
110 | self._size += 1
111 | return
112 | tmp = self._first
113 | while tmp.next_node:
114 | tmp = tmp.next_node
115 | tmp.next_node = Node(val)
116 | self._size += 1
117 |
118 | # 1.3.19 practice
119 | def delete_last(self):
120 | tmp = self._first
121 | if not tmp:
122 | return
123 | if not self._first.next_node:
124 | self._first = None
125 | self._size -= 1
126 | return
127 | while tmp.next_node.next_node:
128 | tmp = tmp.next_node
129 | tmp.next_node = None
130 | self._size -= 1
131 |
132 | # 1.3.21 practice
133 | def find(self, val):
134 | tmp = self._first
135 | while tmp:
136 | if tmp.val == val:
137 | return True
138 | tmp = tmp.next_node
139 | return False
140 |
141 | def size(self):
142 | return self._size
143 |
144 | # 1.3.20 practice
145 | def delete(self, pos):
146 | if pos > self._size:
147 | return
148 | if pos == 1:
149 | self._first = self._first.next_node
150 | self._size -= 1
151 | return
152 | tmp, count = self._first, 1
153 | while count != pos - 1:
154 | count += 1
155 | tmp = tmp.next_node
156 | target = tmp.next_node
157 | tmp.next_node = tmp.next_node.next_node
158 | target.next_node = None
159 | self._size -= 1
160 |
161 | # 1.3.24 practice, accept val as parameter instead of node as parameter
162 | def remove_after(self, item):
163 | tmp = self._first
164 | while tmp.next_node:
165 | if tmp.val == item:
166 | tmp.next_node = tmp.next_node.next_node
167 | break
168 | self._size -= 1
169 | tmp = tmp.next_node
170 |
171 | # 1.3.25 practice, accept val as parameter instead of node as parameter
172 | def insert_after(self, current_node_item, new_node_item):
173 | tmp = self._first
174 | while tmp:
175 | if tmp.val == current_node_item:
176 | old_next_node = tmp.next_node
177 | new_node = Node(new_node_item)
178 | tmp.next_node = new_node
179 | new_node.next_node = old_next_node
180 | self._size += 1
181 | break
182 | tmp = tmp.next_node
183 |
184 | # 1.3.26 practice
185 | def remove(self, key):
186 | if not self._first.next_node and self._first.val == key:
187 | self._first = None
188 | self._size = 0
189 | return
190 |
191 | tmp = self._first
192 | prev = None
193 | while tmp:
194 | if tmp.val == key:
195 | if not prev:
196 | target = tmp
197 | tmp = tmp.next_node
198 | target.next_node = None
199 | else:
200 | prev.next_node = tmp.next_node
201 | self._size -= 1
202 | prev = tmp
203 | tmp = tmp.next_node
204 |
205 | # 1.3.27 practice
206 | def max_value(self):
207 | tmp = self._first
208 | max_val = None
209 | while tmp:
210 | if max_val is None:
211 | max_val = tmp.val
212 | if tmp.val > max_val:
213 | max_val = tmp.val
214 | tmp = tmp.next_node
215 | return max_val
216 |
217 | # 1.3.30 practice
218 | def reverse(self):
219 | first = self._first
220 | reverse_node = None
221 | while first:
222 | second = first.next_node
223 | first.next_node = reverse_node
224 | reverse_node = first
225 | first = second
226 | return reverse_node
227 |
228 |
229 | if __name__ == '__main__':
230 | doctest.testmod()
231 |
--------------------------------------------------------------------------------
/chapter_2/module_2_2.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | import doctest
4 | import random
5 |
6 |
7 | class MergeSort(object):
8 |
9 | """
10 | Top-bottom merge sort implementation, merge the two sub arrays
11 | of the whole list and make the list partial ordered,
12 | and the recursion process make sure the whole list is ordered.
13 | for a N-size array, top-bottom merge sort need 1/2NlgN to NlgN comparisons,
14 | and need to access array 6NlgN times at most.
15 | >>> ms = MergeSort()
16 | >>> lst = [4, 3, 2, 5, 7, 9, 0, 1, 8, 7, -1, 11, 13, 31, 24]
17 | >>> ms.sort(lst)
18 | >>> lst
19 | [-1, 0, 1, 2, 3, 4, 5, 7, 7, 8, 9, 11, 13, 24, 31]
20 | """
21 |
22 | def merge(self, aux, lst, low, mid, high):
23 | left, right = low, mid + 1
24 |
25 | # for i in range(low, high + 1):
26 | # aux[i] = lst[i]
27 |
28 | for j in range(low, high + 1):
29 | if left > mid:
30 | lst[j] = aux[right]
31 | right += 1
32 | elif right > high:
33 | lst[j] = aux[left]
34 | left += 1
35 | elif aux[left] < aux[right]:
36 | lst[j] = aux[left]
37 | left += 1
38 | else:
39 | lst[j] = aux[right]
40 | right += 1
41 |
42 | # 2.2.11 practice, sort the small sub array with insertion sort
43 | def insertion_sort(self, lst, low, high):
44 | for i in range(low + 1, high + 1):
45 | j = i
46 | while j > low and lst[j] < lst[j - 1]:
47 | lst[j], lst[j - 1] = lst[j - 1], lst[j]
48 | j -= 1
49 |
50 | def sort(self, lst):
51 | # 2.2.9 practice, make aux as a function parameter.
52 | aux = lst[:]
53 | self.__sort(aux, lst, 0, len(lst) - 1)
54 |
55 | def __sort(self, aux, lst, low, high):
56 | if high <= low:
57 | return
58 | if high - low <= 7:
59 | self.insertion_sort(lst, low, high)
60 | return
61 | mid = int((low + high) / 2)
62 | self.__sort(lst, aux, low, mid)
63 | self.__sort(lst, aux, mid + 1, high)
64 | # 2.2.11 practice, if assistance array aux[mid] < aux[mid+1], copy the
65 | # value into the origin list.
66 | if aux[mid] < aux[mid + 1]:
67 | lst[low:high - low + 1] = aux[low:high - low + 1]
68 | self.merge(aux, lst, low, mid, high)
69 |
70 |
71 | class MergeSortBU(object):
72 |
73 | """
74 | Bottom-up merge sort algorithm implementation, cut the whole N-size array into
75 | N/sz small arrays, then merge each two of them,
76 | the sz parameter will be twice after merge all the subarrays,
77 | util the sz parameter is larger than N.
78 |
79 | >>> ms = MergeSortBU()
80 | >>> lst = [4, 3, 2, 5, 7, 9, 0, 1, 8, 7, -1]
81 | >>> ms.sort(lst)
82 | >>> lst
83 | [-1, 0, 1, 2, 3, 4, 5, 7, 7, 8, 9]
84 | """
85 |
86 | def sort(self, lst):
87 | length = len(lst)
88 | aux = [None] * length
89 | size = 1
90 | while size < length:
91 | for i in range(0, length - size, size * 2):
92 | self.merge(aux, lst, i, i + size - 1, min(i + size * 2 - 1, length - 1))
93 | size *= 2
94 |
95 | def merge(self, aux, lst, low, mid, high):
96 | left, right = low, mid + 1
97 | for i in range(low, high + 1):
98 | aux[i] = lst[i]
99 |
100 | for j in range(low, high + 1):
101 | if left > mid:
102 | lst[j] = aux[right]
103 | right += 1
104 | elif right > high:
105 | lst[j] = aux[left]
106 | left += 1
107 | elif aux[left] < aux[right]:
108 | lst[j] = aux[left]
109 | left += 1
110 | else:
111 | lst[j] = aux[right]
112 | right += 1
113 |
114 |
115 | # 2.2.14 practice merge two sorted list
116 | def merge_list(lst1, lst2):
117 | """
118 | >>> merge_list([1, 2, 3, 4], [])
119 | [1, 2, 3, 4]
120 | >>> merge_list([], [1, 2, 3, 4])
121 | [1, 2, 3, 4]
122 | >>> merge_list([1, 2, 3, 4], [4, 5, 6])
123 | [1, 2, 3, 4, 4, 5, 6]
124 | >>> merge_list([1, 2, 3, 4], [1, 2, 3, 4])
125 | [1, 1, 2, 2, 3, 3, 4, 4]
126 | >>> merge_list([1, 2], [5, 6, 7, 8])
127 | [1, 2, 5, 6, 7, 8]
128 | >>> merge_list([2, 3, 5, 9], [2, 7, 11])
129 | [2, 2, 3, 5, 7, 9, 11]
130 | """
131 | assert lst1 or lst2
132 | if not lst1 or not lst2:
133 | return lst1[:] if not lst2 else lst2[:]
134 |
135 | i1 = i2 = 0
136 | new_lst = []
137 |
138 | for i in range(len(lst1) + len(lst2)):
139 | if i1 > len(lst1) - 1:
140 | new_lst.extend(lst2[i2:])
141 | break
142 | elif i2 > len(lst2) - 1:
143 | new_lst.extend(lst1[i1:])
144 | break
145 | elif lst1[i1] < lst2[i2]:
146 | new_lst.append(lst1[i1])
147 | i1 += 1
148 | else:
149 | new_lst.append(lst2[i2])
150 | i2 += 1
151 | return new_lst
152 |
153 |
154 | # 2.2.15 practice bottom-up merge list using queue, make each element as sub queue,
155 | # merge first two sub queue in the large queue and enqueue the result util
156 | # there is only one sub queue.
157 | def bu_merge_sort_q(lst):
158 | """
159 | >>> bu_merge_sort_q([3, 2, 4, 7, 8, 9, 1, 0])
160 | [0, 1, 2, 3, 4, 7, 8, 9]
161 | >>> test_lst = [i for i in range(10)]
162 | >>> random.shuffle(test_lst)
163 | >>> bu_merge_sort_q(test_lst)
164 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
165 | """
166 | for i in range(len(lst)):
167 | lst[i] = [lst[i]]
168 | while len(lst) != 1:
169 | lst1 = lst.pop(0)
170 | lst2 = lst.pop(0)
171 | lst.append(merge_list(lst1, lst2))
172 | lst.extend(lst.pop(0))
173 | return lst
174 |
175 |
176 | # 2.2.17 practice linked-list sort using merge sort
177 | def linked_list_merge_sort(head):
178 | def merge(node1, node2):
179 | if node1 is None or node2 is None:
180 | return node1 or node2
181 | pt = res = None
182 | if node1.val <= node2.val:
183 | pt = res = node1
184 | node1 = node1.next_node
185 | else:
186 | pt = res = node2
187 | node2 = node2.next_node
188 |
189 | while node1 and node2:
190 | if node1.val < node2.val:
191 | pt.next_node = node1
192 | node1 = node1.next_node
193 | else:
194 | pt.next_node = node2
195 | node2 = node2.next_node
196 | pt = pt.next_node
197 | if node1:
198 | pt.next_node = node1
199 | elif node2:
200 | pt.next_node = node2
201 | return res
202 |
203 | if head is None or head.next is None:
204 | return head
205 | fast_pt = slow_pt = head
206 | while fast_pt.next_node and fast_pt.next_node.next_node:
207 | fast_pt = fast_pt.next_node.next_node
208 | slow_pt = slow_pt.next_node
209 |
210 | linked_list_merge_sort(head)
211 | linked_list_merge_sort(slow_pt)
212 | return merge(head, slow_pt)
213 |
214 |
215 | # 2.2.19 practice, using merge function from merge-sort to count the reverse number
216 | class ReverseCount(object):
217 |
218 | """
219 | >>> rc = ReverseCount()
220 | >>> rc.reverse_count([1, 7, 2, 9, 6, 4, 5, 3])
221 | 14
222 | """
223 |
224 | def reverse_count(self, lst):
225 | sort_lst, aux_lst = lst[:], lst[:]
226 | return self.count(sort_lst, aux_lst, 0, len(lst) - 1)
227 |
228 | def count(self, lst, assist, low, high):
229 | if low >= high:
230 | return 0
231 | mid = int((high + low) / 2)
232 | lc = self.count(lst, assist, low, mid)
233 | rc = self.count(lst, assist, mid + 1, high)
234 | mc = self.merge_count(lst, assist, low, mid, high)
235 | return lc + rc + mc
236 |
237 | def merge_count(self, lst, assist, low, mid, high):
238 | assist[low:high + 1] = lst[low:high + 1]
239 | count, left, right = 0, low, mid + 1
240 | for j in range(low, high + 1):
241 | if left > mid:
242 | lst[j] = assist[right]
243 | right += 1
244 | elif right > high:
245 | lst[j] = assist[left]
246 | left += 1
247 | elif assist[left] < assist[right]:
248 | lst[j] = assist[left]
249 | left += 1
250 | else:
251 | lst[j] = assist[right]
252 | right += 1
253 | count += mid - left + 1
254 | return count
255 |
256 |
257 | if __name__ == '__main__':
258 | doctest.testmod()
259 |
--------------------------------------------------------------------------------
/chapter_6/module_6_1.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | from basic_data_struct import Bag
4 | from collections import defaultdict
5 | import random
6 | import doctest
7 |
8 |
9 | M_SIZE = 6
10 |
11 |
12 | class Entry(object):
13 |
14 | def __init__(self, key, value, node):
15 | self._key = key
16 | self._value = value
17 | self._next_node = node
18 |
19 | @property
20 | def key(self):
21 | return self._key
22 |
23 | @property
24 | def value(self):
25 | return self._value
26 |
27 | @property
28 | def next_node(self):
29 | return self._next_node
30 |
31 |
32 | class Node(object):
33 |
34 | def __init__(self, k):
35 | self._m_size = k
36 | self._children = [None] * M_SIZE
37 |
38 | @property
39 | def children(self):
40 | return self._children
41 |
42 | @children.setter
43 | def children(self, new_children):
44 | self._children = new_children
45 |
46 | @property
47 | def m_size(self):
48 | return self._m_size
49 |
50 | @m_size.setter
51 | def m_size(self, size):
52 | self._m_size = size
53 |
54 |
55 | class BTree(object):
56 |
57 | # these code is not working yet
58 | def __init__(self):
59 | self._root = Node(0)
60 | self._size = 0
61 | self._height = 0
62 |
63 | def size(self):
64 | return self._size
65 |
66 | def height(self):
67 | return self._height
68 |
69 | def put(self, key, value):
70 | u = self._insert(self._root, key, value, self._height)
71 | self._size += 1
72 | if not u:
73 | return
74 | tmp = Node(2)
75 | tmp.children[0] = Entry(self._root.children[0].key, None, self._root)
76 | tmp.children[1] = Entry(u.children[0].key, None, u)
77 | self._root = tmp
78 | self._height += 1
79 |
80 | def _insert(self, node, key, value, height):
81 | pos = 0
82 | new_entry = Entry(key, value, None)
83 | # external node
84 | if height == 0:
85 | print(key, value)
86 | while pos < node.m_size:
87 | if node.children[pos] and key < node.children[pos].key:
88 | break
89 | pos += 1
90 | else:
91 | while pos < node.m_size:
92 | if pos + 1 == node.m_size or key < node.children[pos + 1].key:
93 | u = self._insert(node.children[pos], key, value, height - 1)
94 | if not u:
95 | return None
96 | new_entry.key = u.children[0].key
97 | new_entry.next_node = u
98 | break
99 | pos += 1
100 |
101 | for i in range(node.m_size, pos, -1):
102 | node.children[i] = node.children[i - 1]
103 | node.children[pos] = new_entry
104 | node.m_size += 1
105 | if node.m_size < M_SIZE:
106 | return None
107 | return self._split(node)
108 |
109 | def _split(self, node):
110 | new_size = int(M_SIZE / 2)
111 | split_node = Node(new_size)
112 | node._m_size = new_size
113 | for i in range(new_size):
114 | split_node._children[i] = node._children[new_size + i]
115 | return split_node
116 |
117 | def get(self, key):
118 | return self._search(self._root, key, self._height)
119 |
120 | def _search(self, node, key, height):
121 | if height == 0:
122 | for i in range(node.m_size):
123 | if node.children[i].key == key:
124 | return node.children[i].value
125 | else:
126 | for i in range(node.m_size):
127 | if i + 1 == node.m_size or key < node.children[i + 1].key:
128 | return self._search(node.children[i].next_node, key, height - 1)
129 | return None
130 |
131 |
132 | class QuickThreeWay(object):
133 |
134 | def sort(self, lst):
135 | random.shuffle(lst)
136 | self.__sort(lst, 0, len(lst) - 1)
137 |
138 | def __sort(self, lst, low, high):
139 | if high <= low:
140 | return
141 |
142 | lt, i, gt, val = low, low + 1, high, lst[low]
143 | while i <= gt:
144 | if lst[i] < val:
145 | lst[lt], lst[i] = lst[i], lst[lt]
146 | lt += 1
147 | i += 1
148 | elif lst[i] > val:
149 | lst[gt], lst[i] = lst[i], lst[gt]
150 | gt -= 1
151 | else:
152 | i += 1
153 | self.__sort(lst, low, lt - 1)
154 | self.__sort(lst, gt + 1, high)
155 |
156 |
157 | class SuffixArray(object):
158 |
159 | def __init__(self, s):
160 | self._length = len(s)
161 | self._suffixes = []
162 | for i in range(self._length):
163 | self._suffixes.append(s[i:])
164 | qtw = QuickThreeWay()
165 | qtw.sort(self._suffixes)
166 |
167 | def length(self):
168 | return self._length
169 |
170 | def select(self, index):
171 | return self._suffixes[index]
172 |
173 | def lcp(self, index):
174 | return self._lcp(self._suffixes[index], self._suffixes[index - 1])
175 |
176 | def _lcp(self, s1, s2):
177 | min_len = min(len(s1), len(s2))
178 | for i in range(min_len):
179 | if s1[i] != s2[i]:
180 | return i
181 | return min_len
182 |
183 | def rank(self, key):
184 | low, high = 0, self._length
185 | while low <= high:
186 | mid = (low + high) // 2
187 | if self._suffixes[mid] > key:
188 | high = mid - 1
189 | elif self._suffixes[mid] < key:
190 | low = mid + 1
191 | else:
192 | return mid
193 |
194 |
195 | class LRS(object):
196 |
197 | '''
198 | >>> test_string = ('it was the best of times it was the worst of times '
199 | ... 'it was the age of wisdom it was the age of foolishness '
200 | ... 'it was the epoch of belief it was the epoch of incredulity '
201 | ... 'it was the season of light it was the season of darkness '
202 | ... 'it was the spring of hope it was the winter of despair')
203 | >>> LRS.run(test_string)
204 | 'st of times it was the '
205 | '''
206 |
207 | @staticmethod
208 | def run(input_string):
209 | sa = SuffixArray(input_string)
210 | length = len(input_string)
211 | lrs = ''
212 | for i in range(1, length):
213 | tmp_len = sa.lcp(i)
214 | if tmp_len > len(lrs):
215 | lrs = sa.select(i)[0:tmp_len]
216 | return lrs
217 |
218 |
219 | class FlowEdge(object):
220 |
221 | '''
222 | >>> edge = FlowEdge(1, 2, 2.0, 1)
223 | >>> edge
224 | 1->2 1/2.0
225 | '''
226 |
227 | def __init__(self, start, end, capacity,
228 | flow=None, edge=None):
229 | if edge:
230 | self._start = edge.start
231 | self._end = edge.end
232 | self._capacity = edge.capacity
233 | self._flow = edge.flow
234 | return
235 | self._start = start
236 | self._end = end
237 | self._capacity = capacity
238 | self._flow = flow
239 |
240 | @property
241 | def start(self):
242 | return self._start
243 |
244 | @property
245 | def end(self):
246 | return self._end
247 |
248 | @property
249 | def capacity(self):
250 | return self._capacity
251 |
252 | @property
253 | def flow(self):
254 | return self._flow
255 |
256 | def other(self, vertex):
257 | if vertex == self.start:
258 | return self._end
259 | elif vertex == self._end:
260 | return self._start
261 | raise RuntimeError('Illegal endpoint')
262 |
263 | def add_residual_flow_to(self, vertex, delta):
264 | if vertex == self._start:
265 | self._flow -= delta
266 | elif vertex == self._end:
267 | self._flow += delta
268 | raise RuntimeError('Illegal endpoint')
269 |
270 | def __repr__(self):
271 | return '{}->{} {}/{}'.format(
272 | self._start, self._end, self._flow, self._capacity)
273 |
274 |
275 | class FlowNetwork(object):
276 |
277 | def __init__(self):
278 | self._adj = defaultdict(Bag)
279 | self._vertices_size = 0
280 | self._edges_size = 0
281 |
282 | def vertices_size(self):
283 | return self._vertices_size
284 |
285 | def edges_size(self):
286 | return self._edges_size
287 |
288 | def add_edge(self, edge):
289 | self._edges_size += 1
290 | self._adj[edge.start].add(edge)
291 | self._adj[edge.end].add(edge)
292 |
293 | def adj_edges(self, vertex):
294 | return self._adj[vertex]
295 |
296 | def edges(self):
297 | for v in self._adj:
298 | for edge in self._adj[v]:
299 | if edge.end != v:
300 | yield edge
301 |
302 | def __repr__(self):
303 | s = '{} vertices, {} edges\n'.format(self._vertices_size, self._edges_size)
304 | for v in self._adj:
305 | tmp = '{}: {}'.format(v, ', '.join(e for e in self._adj[v] if e.end != v))
306 | s += tmp
307 |
308 |
309 | if __name__ == '__main__':
310 | doctest.testmod()
311 |
--------------------------------------------------------------------------------
/chapter_3/module_3_2.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | import doctest
4 | import random
5 |
6 |
7 | class Node(object):
8 |
9 | def __init__(self, key, val, size):
10 | self._left = self._right = None
11 | self._key = key
12 | self._val = val
13 | self._size = size
14 |
15 | @property
16 | def left(self):
17 | return self._left
18 |
19 | @left.setter
20 | def left(self, node):
21 | assert isinstance(node, (Node, type(None)))
22 | self._left = node
23 |
24 | @property
25 | def right(self):
26 | return self._right
27 |
28 | @right.setter
29 | def right(self, node):
30 | assert isinstance(node, (Node, type(None)))
31 | self._right = node
32 |
33 | @property
34 | def size(self):
35 | return self._size
36 |
37 | @size.setter
38 | def size(self, val):
39 | assert isinstance(val, int) and val >= 0
40 | self._size = val
41 |
42 | @property
43 | def key(self):
44 | return self._key
45 |
46 | @key.setter
47 | def key(self, val):
48 | self._key = val
49 |
50 | @property
51 | def val(self):
52 | return self._val
53 |
54 | @val.setter
55 | def val(self, value):
56 | self._val = value
57 |
58 |
59 | class BST(object):
60 |
61 | """
62 | Binary search tree implementation.
63 | >>> bst = BST()
64 | >>> bst.is_empty()
65 | True
66 | >>> test_str = 'EASYQUESTION'
67 | >>> for (index, element) in enumerate(test_str):
68 | ... bst.put(element, index)
69 | ...
70 | >>> bst.is_binary_tree()
71 | True
72 | >>> bst.get('Q')
73 | 4
74 | >>> bst.get('E')
75 | 6
76 | >>> bst.get('N')
77 | 11
78 | >>> bst.size()
79 | 10
80 | >>> bst.max_val().key
81 | 'Y'
82 | >>> bst.min_val().key
83 | 'A'
84 | >>> bst.select(0).key
85 | 'A'
86 | >>> bst.select(3).key
87 | 'N'
88 | >>> bst.select(4).key
89 | 'O'
90 | >>> bst.select(9).key
91 | 'Y'
92 | >>> bst.rank('A')
93 | 0
94 | >>> bst.rank('E')
95 | 1
96 | >>> bst.rank('Y')
97 | 9
98 | >>> bst.rank('T')
99 | 7
100 | >>> bst.rank('U')
101 | 8
102 | >>> bst.is_empty()
103 | False
104 | >>> node = bst.select(0)
105 | >>> node.key
106 | 'A'
107 | >>> node2 = bst.select(2)
108 | >>> node2.key
109 | 'I'
110 | >>> node3 = bst.select(9)
111 | >>> node3.key
112 | 'Y'
113 | >>> bst.keys()
114 | ['A', 'E', 'I', 'N', 'O', 'Q', 'S', 'T', 'U', 'Y']
115 | >>> bst.height()
116 | 5
117 | >>> random_key = bst.random_key()
118 | >>> random_key in test_str
119 | True
120 | >>> fn = bst.floor('B')
121 | >>> fn.key
122 | 'A'
123 | >>> fn2 = bst.floor('Z')
124 | >>> fn2.key
125 | 'Y'
126 | >>> fn3 = bst.floor('E')
127 | >>> fn3.key
128 | 'E'
129 | >>> cn = bst.ceiling('B')
130 | >>> cn.key
131 | 'E'
132 | >>> cn2 = bst.ceiling('R')
133 | >>> cn2.key
134 | 'S'
135 | >>> cn3 = bst.ceiling('S')
136 | >>> cn3.key
137 | 'S'
138 | >>> bst.delete_min()
139 | >>> bst.min_val().key
140 | 'E'
141 | >>> bst.delete_max()
142 | >>> bst.max_val().key
143 | 'U'
144 | >>> bst.delete('O')
145 | >>> bst.delete('S')
146 | >>> bst.keys()
147 | ['E', 'I', 'N', 'Q', 'T', 'U']
148 | >>> bst.is_binary_tree()
149 | True
150 | >>> bst.is_ordered()
151 | True
152 | >>> bst.is_rank_consistent()
153 | True
154 | >>> bst.check()
155 | True
156 | """
157 |
158 | def __init__(self):
159 | self._root = None
160 | self._exist_keys = set()
161 | self._last_visited_node = None
162 |
163 | def size(self):
164 | """
165 | Return the node's amount of the binary search tree.
166 | """
167 | if not self._root:
168 | return 0
169 | return self._root.size
170 |
171 | def is_empty(self):
172 | return self._root is None
173 |
174 | def node_size(self, node):
175 | return 0 if not node else node.size
176 |
177 | # 3.2.13 practice, implement get method with iteration.
178 | def get(self, key):
179 | """
180 | Return the corresponding value with the given key, iterate the whole tree,
181 | if the current node's key is equal to the given key, then return the node's value.
182 | if the current node's key is smaller than the given key,
183 | then jump to the right node of the current node,
184 | else jump to the left node of the current node.
185 | """
186 |
187 | # 3.2.28 practice add cache for bst.
188 | if self._last_visited_node and self._last_visited_node.key == key:
189 | return self._last_visited_node.val
190 |
191 | temp = self._root
192 |
193 | while temp:
194 | if temp.key == key:
195 | self._last_visited_node = temp
196 | return temp.val
197 |
198 | if temp.key > key:
199 | temp = temp.left
200 |
201 | if temp.key < key:
202 | temp = temp.right
203 | return temp
204 |
205 | # 3.2.13 practice, implement get method with iteration,
206 | # use set data structure for recording exist keys, if new key exists, stop
207 | # increment the node's size counter.
208 | def put(self, key, val):
209 | """
210 | Insert a new node into the binary search tree, iterate the whole tree,
211 | find the appropriate location for the new node and add the new node as the tree leaf.
212 | """
213 | key_exists = key in self._exist_keys
214 | if not key_exists:
215 | self._exist_keys.add(key)
216 | temp = self._root
217 | inserted_node = None
218 | new_node = Node(key, val, 1)
219 |
220 | while temp:
221 | inserted_node = temp
222 | if not key_exists:
223 | temp.size += 1
224 |
225 | if temp.key > key:
226 | temp = temp.left
227 | elif temp.key < key:
228 | temp = temp.right
229 | elif temp.key == key:
230 | temp.val = val
231 | return
232 |
233 | if not inserted_node:
234 | self._root = new_node
235 | return
236 | else:
237 | if inserted_node.key < key:
238 | inserted_node.right = new_node
239 | else:
240 | inserted_node.left = new_node
241 |
242 | inserted_node.size = self.node_size(
243 | inserted_node.left) + self.node_size(inserted_node.right) + 1
244 |
245 | self._last_visited_node = new_node
246 |
247 | # 3.2.14 practice
248 | def max_val(self):
249 | """
250 | Find the maximum value in the binary search tree.
251 | """
252 | if not self._root:
253 | return None
254 | tmp = self._root
255 | while tmp.right:
256 | tmp = tmp.right
257 | return tmp
258 |
259 | def __min_val(self, node):
260 | """
261 | Find the minimum value in the binary search tree which start with specific node.
262 | """
263 | assert isinstance(node, Node)
264 |
265 | tmp = node
266 | while tmp.left:
267 | tmp = tmp.left
268 | return tmp
269 |
270 | # 3.2.14 practice
271 | def min_val(self):
272 | """
273 | Find the minimum value in the binary search tree.
274 | """
275 | return self.__min_val(self._root)
276 |
277 | # 3.2.14 practice
278 | def select(self, k):
279 | """
280 | Find the kth node of the binary search tree,
281 | the solution is similar with get() or put() function.
282 | """
283 | assert isinstance(k, int) and k <= self.size()
284 |
285 | if not self._root:
286 | return None
287 |
288 | tmp = self._root
289 | while tmp:
290 | tmp_size = self.node_size(tmp.left)
291 | if tmp_size > k:
292 | tmp = tmp.left
293 | elif tmp_size < k:
294 | tmp = tmp.right
295 | k = k - tmp_size - 1
296 | else:
297 | return tmp
298 |
299 | # 3.2.14 practice
300 | def rank(self, key):
301 | """
302 | Find the rank of the node in the binary search tree by the given key.
303 | """
304 | result = 0
305 | if not self._root:
306 | return -1
307 | tmp = self._root
308 |
309 | while tmp:
310 | if tmp.key > key:
311 | tmp = tmp.left
312 | elif tmp.key < key:
313 | result += self.node_size(tmp.left) + 1
314 | tmp = tmp.right
315 | elif tmp.key == key:
316 | result += self.node_size(tmp.left)
317 | break
318 | return result
319 |
320 | def delete_min(self):
321 | self._root = self.__delete_min(self._root)
322 |
323 | def __delete_min(self, node):
324 | # find the minimum-value node.
325 | if not node.left:
326 | return node.right
327 | node.left = self.__delete_min(node.left)
328 | node.size = self.node_size(node.left) + self.node_size(node.right) + 1
329 | return node
330 |
331 | def delete_max(self):
332 | self._root = self.__delete_max(self._root)
333 |
334 | def __delete_max(self, node):
335 | # find the maximum-value node.
336 | if not node.right:
337 | return node.left
338 | node.right = self.__delete_max(node.right)
339 | node.size = self.node_size(node.left) + self.node_size(node.right) + 1
340 | return node
341 |
342 | def delete(self, key):
343 | self._root = self.__delete(self._root, key)
344 |
345 | def __delete(self, node, key):
346 | if not node:
347 | return None
348 | if key < node.key:
349 | node.left = self.__delete(node.left, key)
350 | elif key > node.key:
351 | node.right = self.__delete(node.right, key)
352 | else:
353 | # node's left or right side is None.
354 | if not node.left or not node.right:
355 | return (node.left or node.right)
356 | # node's both side is not None.
357 | tmp = node
358 | node = self.__min_val(tmp.right)
359 | node.right = self.__delete_min(tmp.right)
360 | node.left = tmp.left
361 | node.size = self.node_size(node.left) + self.node_size(node.right) + 1
362 | return node
363 |
364 | def keys(self):
365 | return self.keys_range(self.min_val().key, self.max_val().key)
366 |
367 | def keys_range(self, low, high):
368 | queue = []
369 | self.__keys(self._root, queue, low, high)
370 | return queue
371 |
372 | def __keys(self, node, queue, low, high):
373 | if not node:
374 | return
375 | if low < node.key:
376 | self.__keys(node.left, queue, low, high)
377 | if low <= node.key and high >= node.key:
378 | queue.append(node.key)
379 | if high > node.key:
380 | self.__keys(node.right, queue, low, high)
381 |
382 | # 3.2.6 practice, add height function for binary tree.
383 | def height(self):
384 | return self.__height(self._root)
385 |
386 | def __height(self, node):
387 | if not node:
388 | return -1
389 | return 1 + max(self.__height(node.left), self.__height(node.right))
390 |
391 | # 3.2.21 randomly choose a node from bianry search tree.
392 | def random_key(self):
393 | if not self._root:
394 | return None
395 | total_size = self._root.size
396 | rank = random.randint(0, total_size - 1)
397 | random_node = self.select(rank)
398 | return random_node.key
399 |
400 | # 3.2.29 practice, check if each node's size is
401 | # equals to the summation of left node's size and right node's size.
402 | def is_binary_tree(self):
403 | return self.__is_binary_tree(self._root)
404 |
405 | def __is_binary_tree(self, node):
406 | if not node:
407 | return True
408 | if node.size != self.node_size(node.left) + self.node_size(node.right) + 1:
409 | return False
410 | return self.__is_binary_tree(node.left) and self.__is_binary_tree(node.right)
411 |
412 | # 3.2.30 practice, check if each node in binary search tree is ordered
413 | # (less than right node and greater than left node)
414 | def is_ordered(self):
415 | return self.__is_ordered(self._root, None, None)
416 |
417 | def __is_ordered(self, node, min_key, max_key):
418 | if not node:
419 | return True
420 | if min_key and node.key <= min_key:
421 | return False
422 | if max_key and node.key >= max_key:
423 | return False
424 | return (self.__is_ordered(node.left, min_key, node.key) and
425 | self.__is_ordered(node.right, node.key, max_key))
426 |
427 | # 3.2.24 practice, check if each node's rank is correct.
428 | def is_rank_consistent(self):
429 | for i in range(self.size()):
430 | if i != self.rank(self.select(i).key):
431 | return False
432 |
433 | for key in self.keys():
434 | if key != self.select(self.rank(key)).key:
435 | return False
436 |
437 | return True
438 |
439 | # 3.2.32 practice, check if a data structure is binary search tree.
440 | def check(self):
441 | if not self.is_binary_tree():
442 | return False
443 | if not self.is_ordered():
444 | return False
445 | if not self.is_rank_consistent():
446 | return False
447 | return True
448 |
449 | def floor(self, key):
450 | tmp = None
451 | node = self._root
452 | while node:
453 | if key < node.key:
454 | node = node.left
455 | elif key > node.key:
456 | tmp = node
457 | node = node.right
458 | else:
459 | return node
460 | return tmp
461 |
462 | def ceiling(self, key):
463 | tmp = None
464 | node = self._root
465 | while node:
466 | if key < node.key:
467 | tmp = node
468 | node = node.left
469 | elif key > node.key:
470 | node = node.right
471 | else:
472 | return node
473 | return tmp
474 |
475 | if __name__ == '__main__':
476 | doctest.testmod()
477 |
--------------------------------------------------------------------------------
/chapter_5/module_5_2.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | import doctest
4 | from basic_data_struct import Queue
5 |
6 |
7 | class Node(object):
8 |
9 | def __init__(self):
10 | self._val = None
11 | self._size = 1
12 | self.next_nodes = {}
13 |
14 | @property
15 | def val(self):
16 | return self._val
17 |
18 | @val.setter
19 | def val(self, value):
20 | self._val = value
21 |
22 | # 5.2.10 practice
23 | @property
24 | def size(self):
25 | return self._size
26 |
27 | @size.setter
28 | def size(self, new_size):
29 | self._size = new_size
30 |
31 |
32 | class Trie(object):
33 |
34 | '''
35 | Trie is a special data structure for string querying. Trie is similar with other tree-like
36 | structures, But every node has R links, which R is alphabet's size, that means every link is
37 | corresponding to a character. A node with a value that means the node is the end of a string.
38 | For searching a string, we can keep getting the next node by the next character, until we
39 | reach a node with an actual value, otherwise the target string miss.
40 | Trie is as fast as symbol table and more flexible than binary tree, but it cost lots of
41 | extra space. The running time of searching and inserting be proportional to O(N) which N is
42 | the length of the searching key.
43 | Trie is suitable for those shorter keys and the alphabet is rather small. Because long key
44 | and large alphabet could take much more space.
45 | >>> trie = Trie()
46 | >>> trie.get('xxxx')
47 | >>> test_data = ['she', 'sells', 'sea', 'shells', 'by', 'the', 'sea', 'shore']
48 | >>> for index, d in enumerate(test_data):
49 | ... trie.put(d, index)
50 | >>> trie.size()
51 | 8
52 | >>> [trie.get(i).val for i in test_data]
53 | [0, 1, 6, 3, 4, 5, 6, 7]
54 | >>> [i for i in trie.keys()]
55 | ['by', 'sea', 'sells', 'she', 'shells', 'shore', 'the']
56 | >>> [trie.rank(i) for i in trie.keys()]
57 | [1, 2, 3, 4, 5, 6, 7]
58 | >>> [trie.select(i) for i in range(1, 8)]
59 | ['by', 'sea', 'sells', 'she', 'she', 'shore', 'the']
60 | >>> [i for i in trie.keys_with_prefix('sh')]
61 | ['she', 'shells', 'shore']
62 | >>> [i for i in trie.keys_that_match('.he')]
63 | ['she', 'the']
64 | >>> [i for i in trie.keys_that_match('s..')]
65 | ['sea', 'she']
66 | >>> trie.longest_prefix_of('shellsort')
67 | 6
68 | >>> trie.delete('she')
69 | >>> trie.size()
70 | 7
71 | >>> trie.get('she').val
72 | >>> [i for i in trie.keys()]
73 | ['by', 'sea', 'sells', 'shells', 'shore', 'the']
74 | >>> [trie.rank(i) for i in trie.keys()]
75 | [1, 2, 3, 4, 5, 6]
76 | '''
77 |
78 | def __init__(self):
79 | self._root = Node()
80 | self._size = 0
81 |
82 | def size(self):
83 | return self._size
84 |
85 | def get(self, key):
86 | tmp = self._root
87 | d = 0
88 |
89 | while tmp:
90 | if d == len(key):
91 | return tmp
92 | char = key[d]
93 | try:
94 | tmp = tmp.next_nodes[char]
95 | except KeyError:
96 | return None
97 | d += 1
98 | return tmp
99 |
100 | def put(self, key, value):
101 | exist_node = self.get(key)
102 | if exist_node:
103 | exist_node.val = value
104 | self._size += 1
105 | return
106 |
107 | tmp = self._root
108 | for i in key:
109 | if i not in tmp.next_nodes:
110 | tmp.next_nodes[i] = Node()
111 | else:
112 | tmp.next_nodes[i].size += 1
113 | tmp = tmp.next_nodes[i]
114 | tmp.val = value
115 | self._size += 1
116 |
117 | def keys(self):
118 | '''
119 | Return all the keys in trie tree.
120 | '''
121 | return self.keys_with_prefix('')
122 |
123 | def keys_with_prefix(self, prefix):
124 | '''
125 | Return all the keys starts with the given prefix in the trie tree.
126 | '''
127 | q = Queue()
128 | if prefix == '':
129 | self._collect(self._root, prefix, q)
130 | else:
131 | start_node = self.get(prefix)
132 | self._collect(start_node, prefix, q)
133 | return q
134 |
135 | def _collect(self, node, prefix, q):
136 | if not node:
137 | return
138 |
139 | if node.val is not None:
140 | q.enqueue(prefix)
141 |
142 | for i in range(256):
143 | if chr(i) in node.next_nodes:
144 | self._collect(node.next_nodes[chr(i)], prefix + chr(i), q)
145 |
146 | def keys_that_match(self, pattern):
147 | '''
148 | Return all the keys match the given pattern in the trie tree.
149 | '''
150 | q = Queue()
151 | self._keys_collect(self._root, '', pattern, q)
152 | return q
153 |
154 | def _keys_collect(self, node, prefix, pattern, q):
155 | length = len(prefix)
156 | if not node:
157 | return
158 |
159 | if length == len(pattern):
160 | if node.val is not None:
161 | q.enqueue(prefix)
162 | return
163 |
164 | char = pattern[length]
165 | for i in range(256):
166 | if (char == '.' or char == chr(i)) and chr(i) in node.next_nodes:
167 | self._keys_collect(node.next_nodes[chr(i)], prefix + chr(i), pattern, q)
168 |
169 | def longest_prefix_of(self, s):
170 | '''
171 | Return the longest prefix's length of the given string which the prefix is in the trie tree.
172 | '''
173 | tmp = self._root
174 | length = d = 0
175 |
176 | while tmp:
177 | if tmp.val:
178 | length = d
179 | if d == len(s):
180 | return length
181 | char = s[d]
182 | if char not in tmp.next_nodes:
183 | break
184 | tmp = tmp.next_nodes[char]
185 | d += 1
186 |
187 | return length
188 |
189 | def delete(self, key):
190 | self._root = self._delete(self._root, key, 0)
191 | self._size -= 1
192 |
193 | def _delete(self, node, key, d):
194 | if not node:
195 | return None
196 |
197 | if d == len(key):
198 | node.val = None
199 | node.size -= 1
200 | else:
201 | index = key[d]
202 | node.size -= 1
203 | node.next_nodes[index] = self._delete(node.next_nodes[index], key, d + 1)
204 |
205 | if node.val:
206 | return node
207 |
208 | for i in range(256):
209 | if chr(i) in node.next_nodes:
210 | return node
211 | return None
212 |
213 | # 5.2.8 practice
214 | def select(self, k):
215 | tmp = self._root
216 | result = ''
217 | while tmp and tmp.val is None:
218 | count = 0
219 | count_list = []
220 | sorted_keys = sorted(tmp.next_nodes.keys())
221 | for c in sorted_keys:
222 | count_list.append((c, tmp.next_nodes[c].size + count))
223 | count = tmp.next_nodes[c].size + count
224 |
225 | for index, elem in enumerate(count_list):
226 | key, count = elem
227 | if k <= count:
228 | tmp = tmp.next_nodes[key]
229 | result += key
230 | if index != 0:
231 | k -= count_list[index - 1][1]
232 | break
233 | return result
234 |
235 | # 5.2.8 practice
236 | def rank(self, key):
237 | tmp = self._root
238 | d = 0
239 | result = 0
240 |
241 | while d != len(key):
242 | char = key[d]
243 | if char not in tmp.next_nodes:
244 | return -1
245 | char_list = sorted(tmp.next_nodes.keys())
246 | for c in char_list:
247 | if c == char:
248 | break
249 | result += tmp.next_nodes[c].size
250 | if len(tmp.next_nodes) == 1 and tmp.size != 1:
251 | result += 1
252 | tmp = tmp.next_nodes[char]
253 |
254 | d += 1
255 | return result + 1
256 |
257 |
258 | class TNode(object):
259 |
260 | def __init__(self):
261 | self._char = None
262 | self._left = None
263 | self._right = None
264 | self._mid = None
265 | self._val = None
266 |
267 | @property
268 | def char(self):
269 | return self._char
270 |
271 | @char.setter
272 | def char(self, new_char):
273 | self._char = new_char
274 |
275 | @property
276 | def left(self):
277 | return self._left
278 |
279 | @left.setter
280 | def left(self, node):
281 | self._left = node
282 |
283 | @property
284 | def right(self):
285 | return self._right
286 |
287 | @right.setter
288 | def right(self, node):
289 | self._right = node
290 |
291 | @property
292 | def mid(self):
293 | return self._mid
294 |
295 | @mid.setter
296 | def mid(self, node):
297 | self._mid = node
298 |
299 | @property
300 | def val(self):
301 | return self._val
302 |
303 | @val.setter
304 | def val(self, value):
305 | self._val = value
306 |
307 |
308 | class TernarySearchTries(object):
309 |
310 | '''
311 | Ternary-Search-Trie is another tree-like data structure for string querying.
312 | Every node in Ternary-Search-Trie contains a value and three links, and from left
313 | to right each link means the key of the node is smaller, equal, and large than the
314 | current node. Ternary-Search-Trie is more compatible than Trie, but also every
315 | operation is a little bit more complicated than Trie.
316 | >>> tst = TernarySearchTries()
317 | >>> tst.get('test')
318 | >>> test_data = ['she', 'sells', 'sea', 'shells', 'by', 'the', 'sea', 'shore']
319 | >>> for index, d in enumerate(test_data):
320 | ... tst.put(d, index)
321 | >>> tst.size()
322 | 8
323 | >>> [tst.get(i).val for i in test_data]
324 | [0, 1, 6, 3, 4, 5, 6, 7]
325 | >>> [i for i in tst.keys_with_prefix('sh')]
326 | ['she', 'shells', 'shore']
327 | >>> tst.longest_prefix_of('shellsort')
328 | 6
329 | >>> [i for i in tst.keys_that_match('.he')]
330 | ['she', 'the']
331 | '''
332 |
333 | def __init__(self):
334 | self._root = None
335 | self._size = 0
336 |
337 | def size(self):
338 | return self._size
339 |
340 | def get(self, key):
341 | tmp = self._root
342 | if not tmp:
343 | return None
344 | d = 0
345 | while d < len(key) and tmp:
346 | char = key[d]
347 | if char < tmp.char:
348 | tmp = tmp.left
349 | elif char > tmp.char:
350 | tmp = tmp.right
351 | elif d < len(key) - 1:
352 | tmp = tmp.mid
353 | d += 1
354 | else:
355 | break
356 | return tmp if tmp else None
357 |
358 | def put(self, key, value):
359 | if not key:
360 | return
361 | self._root = self._put(self._root, key, value, 0)
362 | self._size += 1
363 |
364 | def _put(self, node, key, value, d):
365 | char = key[d]
366 | if not node:
367 | node = TNode()
368 | node.char = char
369 |
370 | if char < node.char:
371 | node.left = self._put(node.left, key, value, d)
372 | elif char > node.char:
373 | node.right = self._put(node.right, key, value, d)
374 | elif d < len(key) - 1:
375 | node.mid = self._put(node.mid, key, value, d + 1)
376 | else:
377 | node.val = value
378 | return node
379 |
380 | def keys(self):
381 | '''
382 | Return all the keys in trie tree.
383 | '''
384 | return self.keys_with_prefix('')
385 |
386 | # 5.2.9 practice, implementation is available on the official website
387 | def keys_with_prefix(self, prefix):
388 | '''
389 | Return all the keys starts with the given prefix in the trie tree.
390 | '''
391 | q = Queue()
392 | node = self.get(prefix)
393 | if not node:
394 | return q
395 | if node.val:
396 | q.enqueue(prefix)
397 | self._collect(node.mid, prefix, q)
398 | return q
399 |
400 | def _collect(self, node, prefix, q):
401 | if not node:
402 | return
403 | self._collect(node.left, prefix, q)
404 | if node.val is not None:
405 | q.enqueue(prefix + node.char)
406 | self._collect(node.mid, prefix + node.char, q)
407 | self._collect(node.right, prefix, q)
408 |
409 | # 5.2.9 practice, implementation is available on the official website
410 | def longest_prefix_of(self, key):
411 | if not key or key.strip() == '':
412 | return 0
413 |
414 | length = d = 0
415 | tmp = self._root
416 |
417 | while tmp:
418 | if d == len(key):
419 | return length
420 | char = key[d]
421 | if char < tmp.char:
422 | tmp = tmp.left
423 | elif char > tmp.char:
424 | tmp = tmp.right
425 | else:
426 | d += 1
427 | if tmp.val:
428 | length = d
429 | tmp = tmp.mid
430 | return length
431 |
432 | # 5.2.9 practice, implementation is available on the official website
433 | def keys_that_match(self, pattern):
434 | q = Queue()
435 | self._keys_collect(self._root, '', 0, pattern, q)
436 | return q
437 |
438 | def _keys_collect(self, node, prefix, index, pattern, q):
439 | if not node:
440 | return
441 |
442 | char = pattern[index]
443 | if char == '.' or char < node.char:
444 | self._keys_collect(node.left, prefix, index, pattern, q)
445 |
446 | if char == '.' or char == node.char:
447 | if node.val is not None and index == len(pattern) - 1:
448 | q.enqueue(prefix + node.char)
449 | if index < len(pattern) - 1:
450 | self._keys_collect(node.mid, prefix + node.char, index + 1, pattern, q)
451 |
452 | if char == '.' or char > node.char:
453 | self._keys_collect(node.right, prefix, index, pattern, q)
454 |
455 | if __name__ == '__main__':
456 | doctest.testmod()
457 |
--------------------------------------------------------------------------------
/chapter_4/basic_data_struct.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | import doctest
4 | from collections import defaultdict
5 |
6 | """
7 | copy from module_1_3.py, this is for avoiding package import problems.
8 | """
9 |
10 |
11 | class Node(object):
12 |
13 | def __init__(self, val):
14 | self._val = val
15 | self.next_node = None
16 |
17 | @property
18 | def val(self):
19 | return self._val
20 |
21 | @val.setter
22 | def val(self, value):
23 | self._val = value
24 |
25 | @property
26 | def next_node(self):
27 | return self._next_node
28 |
29 | @next_node.setter
30 | def next_node(self, node):
31 | self._next_node = node
32 |
33 |
34 | class Stack(object):
35 |
36 | def __init__(self):
37 | self._first = None
38 | self._size = 0
39 |
40 | def __iter__(self):
41 | node = self._first
42 | while node:
43 | yield node.val
44 | node = node.next_node
45 |
46 | def is_empty(self):
47 | return self._first is None
48 |
49 | def size(self):
50 | return self._size
51 |
52 | def push(self, val):
53 | node = Node(val)
54 | old = self._first
55 | self._first = node
56 | self._first.next_node = old
57 | self._size += 1
58 |
59 | def pop(self):
60 | if self._first:
61 | old = self._first
62 | self._first = self._first.next_node
63 | self._size -= 1
64 | return old.val
65 | return None
66 |
67 | # 1.3.7 practice
68 | def peek(self):
69 | if self._first:
70 | return self._first.val
71 | return None
72 |
73 |
74 | class Queue(object):
75 |
76 | def __init__(self, q=None):
77 | self._first = None
78 | self._last = None
79 | self._size = 0
80 | if q:
81 | for item in q:
82 | self.enqueue(item)
83 |
84 | def __iter__(self):
85 | node = self._first
86 | while node:
87 | yield node.val
88 | node = node.next_node
89 |
90 | def is_empty(self):
91 | return self._first is None
92 |
93 | def size(self):
94 | return self._size
95 |
96 | def enqueue(self, val):
97 | old_last = self._last
98 | self._last = Node(val)
99 | self._last.next_node = None
100 | if self.is_empty():
101 | self._first = self._last
102 | else:
103 | old_last.next_node = self._last
104 | self._size += 1
105 |
106 | def dequeue(self):
107 | if not self.is_empty():
108 | val = self._first.val
109 | self._first = self._first.next_node
110 | if self.is_empty():
111 | self._last = None
112 | self._size -= 1
113 | return val
114 | return None
115 |
116 |
117 | class Bag(object):
118 |
119 | def __init__(self):
120 | self._first = None
121 | self._size = 0
122 |
123 | def __iter__(self):
124 | node = self._first
125 | while node is not None:
126 | yield node.val
127 | node = node.next_node
128 |
129 | def __contains__(self, item):
130 | tmp = self._first
131 | while tmp:
132 | if tmp == item:
133 | return True
134 | return False
135 |
136 | def add(self, val):
137 | node = Node(val)
138 | old = self._first
139 | self._first = node
140 | self._first.next_node = old
141 | self._size += 1
142 |
143 | def is_empty(self):
144 | return self._first is None
145 |
146 | def size(self):
147 | return self._size
148 |
149 |
150 | class MinPQ(object):
151 |
152 | def __init__(self, data=None):
153 | self._pq = []
154 | if data:
155 | for item in data:
156 | self.insert(data)
157 |
158 | def is_empty(self):
159 | return len(self._pq) == 0
160 |
161 | def size(self):
162 | return len(self._pq)
163 |
164 | def swim(self, pos):
165 | while pos > 0 and self._pq[(pos - 1) // 2] > self._pq[pos]:
166 | self._pq[(pos - 1) // 2], self._pq[pos] = self._pq[pos], self._pq[(pos - 1) // 2]
167 | pos = (pos - 1) // 2
168 |
169 | def sink(self, pos):
170 | length = len(self._pq) - 1
171 | while 2 * pos + 1 <= length:
172 | index = 2 * pos + 1
173 | if index < length and self._pq[index] > self._pq[index + 1]:
174 | index += 1
175 | if self._pq[pos] <= self._pq[index]:
176 | break
177 | self._pq[index], self._pq[pos] = self._pq[pos], self._pq[index]
178 | pos = index
179 |
180 | def insert(self, val):
181 | self._pq.append(val)
182 | self.swim(len(self._pq) - 1)
183 |
184 | def del_min(self):
185 | min_val = self._pq[0]
186 | last_index = len(self._pq) - 1
187 | self._pq[0], self._pq[last_index] = self._pq[last_index], self._pq[0]
188 | self._pq.pop(last_index)
189 | self.sink(0)
190 | return min_val
191 |
192 | def min_val(self):
193 | return self._pq[0]
194 |
195 |
196 | class DisjointNode(object):
197 |
198 | def __init__(self, parent, size=1):
199 | self._parent = parent
200 | self._size = size
201 |
202 | @property
203 | def parent(self):
204 | return self._parent
205 |
206 | @parent.setter
207 | def parent(self, new_parent):
208 | self._parent = new_parent
209 |
210 | @property
211 | def size(self):
212 | return self._size
213 |
214 | @size.setter
215 | def size(self, val):
216 | assert val > 0
217 | self._size = val
218 |
219 |
220 | class GenericUnionFind(object):
221 |
222 | """
223 | >>> guf = GenericUnionFind()
224 | >>> connections = [(4, 3), (3, 8), (6, 5), (9, 4),
225 | ... (2, 1), (8, 9), (5, 0), (7, 2), (6, 1), (1, 0), (6, 7)]
226 | >>> for i, j in connections:
227 | ... guf.union(i, j)
228 | ...
229 | >>> guf.connected(1, 4)
230 | False
231 | >>> guf.connected(8, 4)
232 | True
233 | >>> guf.connected(1, 5)
234 | True
235 | >>> guf.connected(1, 7)
236 | True
237 | """
238 |
239 | def __init__(self, tuple_data=None):
240 | self._id = {}
241 | if tuple_data:
242 | for a, b in tuple_data:
243 | self.union(a, b)
244 |
245 | def count(self):
246 | pass
247 |
248 | def connected(self, p, q):
249 | return self.find(p) and self.find(q) and self.find(p) == self.find(q)
250 |
251 | def find(self, node):
252 | if node not in self._id:
253 | return None
254 | tmp = node
255 | while self._id[tmp].parent != tmp:
256 | tmp = self._id[tmp].parent
257 | return self._id[tmp].parent
258 |
259 | def union(self, p, q):
260 | p_root = self.find(p)
261 | q_root = self.find(q)
262 |
263 | if p_root == q_root:
264 | if p_root is None and q_root is None:
265 | self._id[p] = DisjointNode(q)
266 | self._id[q] = DisjointNode(q, 2)
267 | return
268 | return
269 |
270 | if p_root is None:
271 | self._id[p] = DisjointNode(q_root, 1)
272 | self._id[q_root].size += 1
273 | return
274 |
275 | if q_root is None:
276 | self._id[q] = DisjointNode(p_root, 1)
277 | self._id[p_root].size += 1
278 | return
279 |
280 | if self._id[p_root].size < self._id[q_root].size:
281 | self._id[p_root].parent = q_root
282 | self._id[q_root].size += self._id[p_root].size
283 | else:
284 | self._id[q_root].parent = p_root
285 | self._id[p_root].size += self._id[q_root].size
286 |
287 |
288 | class MaxPQ(object):
289 |
290 | def __init__(self, data=None):
291 | self._pq = []
292 | if data:
293 | for item in data:
294 | self.insert(item)
295 |
296 | def is_empty(self):
297 | return len(self._pq) == 0
298 |
299 | def size(self):
300 | return len(self._pq)
301 |
302 | def swim(self, pos):
303 | while pos > 0 and self._pq[(pos - 1) // 2] < self._pq[pos]:
304 | self._pq[(pos - 1) // 2], self._pq[pos] = self._pq[pos], self._pq[(pos - 1) // 2]
305 | pos = (pos - 1) // 2
306 |
307 | def sink(self, pos):
308 | length = len(self._pq) - 1
309 | while 2 * pos + 1 <= length:
310 | index = 2 * pos + 1
311 | if index < length and self._pq[index] < self._pq[index + 1]:
312 | index += 1
313 | if self._pq[pos] >= self._pq[index]:
314 | break
315 | self._pq[index], self._pq[pos] = self._pq[pos], self._pq[index]
316 | pos = index
317 |
318 | def insert(self, val):
319 | self._pq.append(val)
320 | self.swim(len(self._pq) - 1)
321 |
322 | def del_max(self):
323 | max_val = self._pq[0]
324 | last_index = len(self._pq) - 1
325 | self._pq[0], self._pq[last_index] = self._pq[last_index], self._pq[0]
326 | self._pq.pop(last_index)
327 | self.sink(0)
328 | return max_val
329 |
330 | def max_val(self):
331 | return self._pq[0]
332 |
333 |
334 | class IndexMinPQ(object):
335 |
336 | def __init__(self, max_size):
337 | assert max_size > 0
338 | self._max_size = max_size
339 | self._index = [-1] * (max_size + 1)
340 | self._reverse_index = [-1] * (max_size + 1)
341 | self._keys = [None] * (max_size + 1)
342 | self._keys_size = 0
343 |
344 | def is_empty(self):
345 | return self._keys_size == 0
346 |
347 | def size(self):
348 | return self._keys_size
349 |
350 | def contains(self, index):
351 | if index >= self._max_size:
352 | return False
353 | return self._reverse_index[index] != -1
354 |
355 | def insert(self, index, element):
356 | if index >= self._max_size or self.contains(index):
357 | return
358 |
359 | self._keys_size += 1
360 | self._index[self._keys_size] = index
361 | self._reverse_index[index] = self._keys_size
362 | self._keys[index] = element
363 | self.swim(self._keys_size)
364 |
365 | def min_index(self):
366 | return None if self._keys_size == 0 else self._index[1]
367 |
368 | def min_key(self):
369 | return None if self._keys_size == 0 else self._keys[self._index[1]]
370 |
371 | def exchange(self, pos_a, pos_b):
372 | self._index[pos_a], self._index[pos_b] = self._index[pos_b], self._index[pos_a]
373 | self._reverse_index[self._index[pos_a]] = pos_a
374 | self._reverse_index[self._index[pos_b]] = pos_b
375 |
376 | def swim(self, pos):
377 | while pos > 1 and self._keys[self._index[pos // 2]] > self._keys[self._index[pos]]:
378 | self.exchange(pos // 2, pos)
379 | pos //= 2
380 |
381 | def sink(self, pos):
382 | length = self._keys_size
383 | while 2 * pos <= length:
384 | tmp = 2 * pos
385 | if tmp < length and self._keys[self._index[tmp]] > self._keys[self._index[tmp + 1]]:
386 | tmp += 1
387 | if not self._keys[self._index[tmp]] < self._keys[self._index[pos]]:
388 | break
389 | self.exchange(tmp, pos)
390 | pos = tmp
391 |
392 | def change_key(self, i, key):
393 | if i < 0 or i >= self._max_size or not self.contains(i):
394 | return
395 | self._keys[i] = key
396 | self.swim(self._reverse_index[i])
397 | self.sink(self._reverse_index[i])
398 |
399 | def delete_min(self):
400 | if self._keys_size == 0:
401 | return
402 | min_index = self._index[1]
403 | self.exchange(1, self._keys_size)
404 | self._keys_size -= 1
405 | self.sink(1)
406 | self._reverse_index[min_index] = -1
407 | self._keys[self._index[self._keys_size + 1]] = None
408 | self._index[self._keys_size + 1] = -1
409 | return min_index
410 |
411 |
412 | # data structure for EdgeWeightedDiGraph Topological
413 |
414 | class DirectedCycle(object):
415 |
416 | def __init__(self, graph):
417 | self._marked = defaultdict(bool)
418 | self._edge_to = {}
419 | self._on_stack = defaultdict(bool)
420 | self._cycle = Stack()
421 | for v in graph.vertices():
422 | if not self._marked[v]:
423 | self.dfs(graph, v)
424 |
425 | def dfs(self, graph, vertex):
426 | self._on_stack[vertex] = True
427 | self._marked[vertex] = True
428 |
429 | for edge in graph.adjacent_edges(vertex):
430 | end = edge.end
431 | if self.has_cycle():
432 | return
433 | elif not self._marked[end]:
434 | self._edge_to[end] = vertex
435 | self.dfs(graph, end)
436 | elif self._on_stack[end]:
437 | tmp = vertex
438 | while tmp != end:
439 | self._cycle.push(tmp)
440 | tmp = self._edge_to[tmp]
441 | self._cycle.push(end)
442 | self._cycle.push(vertex)
443 | self._on_stack[vertex] = False
444 |
445 | def has_cycle(self):
446 | return not self._cycle.is_empty()
447 |
448 | def cycle(self):
449 | return self._cycle
450 |
451 |
452 | class DepthFirstOrder(object):
453 |
454 | def __init__(self, graph):
455 | self._pre = Queue()
456 | self._post = Queue()
457 | self._reverse_post = Stack()
458 | self._marked = defaultdict(bool)
459 |
460 | for v in graph.vertices():
461 | if not self._marked[v]:
462 | self.dfs(graph, v)
463 |
464 | def dfs(self, graph, vertex):
465 | self._pre.enqueue(vertex)
466 | self._marked[vertex] = True
467 | for edge in graph.adjacent_edges(vertex):
468 | if not self._marked[edge.end]:
469 | self.dfs(graph, edge.end)
470 |
471 | self._post.enqueue(vertex)
472 | self._reverse_post.push(vertex)
473 |
474 | def prefix(self):
475 | return self._pre
476 |
477 | def postfix(self):
478 | return self._post
479 |
480 | def reverse_postfix(self):
481 | return self._reverse_post
482 |
483 |
484 | class Topological(object):
485 |
486 | def __init__(self, graph):
487 | cycle_finder = DirectedCycle(graph)
488 | self._order = None
489 | if not cycle_finder.has_cycle():
490 | df_order = DepthFirstOrder(graph)
491 | self._order = df_order.reverse_postfix()
492 |
493 | def order(self):
494 | return self._order
495 |
496 | def is_DAG(self):
497 | return self._order is not None
498 |
499 |
500 | if __name__ == '__main__':
501 | doctest.testmod()
502 |
--------------------------------------------------------------------------------
/chapter_4/module_4_1.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | import copy
4 | import doctest
5 | import random
6 | from collections import defaultdict
7 | from basic_data_struct import Bag, Queue, Stack
8 |
9 |
10 | class Graph(object):
11 |
12 | """
13 | Undirected graph implementation. The cost of space is proportional to O(V + E)
14 | (V is the number of vertices and E is the number of edges). Adding
15 | an edge only takes constant time. The running time of
16 | Checking if node v is adjacent to w and traveling all adjacent point of v
17 | is related to the degree of v. This implementation supports multiple
18 | input data types(immutable).
19 | TODO: Test file input.
20 | >>> g = Graph()
21 | >>> test_data = [(0, 5), (4, 3), (0, 1), (9, 12), (6, 4), (5, 4), (0, 2), # from book tinyG.txt
22 | ... (11, 12), (9, 10), (0, 6), (7, 8), (9, 11), (5, 3)]
23 | >>> for a, b in test_data:
24 | ... g.add_edge(a, b)
25 | ...
26 | >>> g.vertices_size()
27 | 13
28 | >>> len(test_data) == g.edges_size()
29 | True
30 | >>> adjacent_vertices = ' '.join([str(v) for v in g.get_adjacent_vertices(0)])
31 | >>> adjacent_vertices
32 | '6 2 1 5'
33 | >>> g.degree(0)
34 | 4
35 | >>> g.degree(9)
36 | 3
37 | >>> g.max_degree()
38 | 4
39 | >>> g.number_of_self_loops()
40 | 0
41 | >>> g
42 | 13 vertices, 13 edges
43 | 0: 6 2 1 5
44 | 1: 0
45 | 2: 0
46 | 3: 5 4
47 | 4: 5 6 3
48 | 5: 3 4 0
49 | 6: 0 4
50 | 7: 8
51 | 8: 7
52 | 9: 11 10 12
53 | 10: 9
54 | 11: 9 12
55 | 12: 11 9
56 |
57 | >>> g2 = Graph(graph=g)
58 | >>> g2.add_edge(4, 9)
59 | >>> g.has_edge(4, 9)
60 | False
61 | >>> g2.has_edge(4, 9)
62 | True
63 | >>> g2.has_edge(9, 4)
64 | True
65 | >>> g2.add_edge(4, 9)
66 | >>> [i for i in g2.get_adjacent_vertices(4)]
67 | [9, 5, 6, 3]
68 | """
69 |
70 | def __init__(self, input_file=None, graph=None):
71 | self._edges_size = 0
72 | self._adj = defaultdict(Bag)
73 | # 4.1.3 practice, add a graph parameter for constructor method.
74 | if graph:
75 | self._adj = copy.deepcopy(graph._adj)
76 | self._edges_size = graph.edges_size()
77 |
78 | def vertices_size(self):
79 | return len(self._adj.keys())
80 |
81 | def edges_size(self):
82 | return self._edges_size
83 |
84 | def add_edge(self, vertext_a, vertext_b):
85 | # 4.1.5 practice, no self cycle or parallel edges.
86 | if self.has_edge(vertext_a, vertext_b) or vertext_a == vertext_b:
87 | return
88 | self._adj[vertext_a].add(vertext_b)
89 | self._adj[vertext_b].add(vertext_a)
90 |
91 | self._edges_size += 1
92 |
93 | # 4.1.4 practice, add has_edge method
94 | def has_edge(self, vertext_a, vertext_b):
95 | if vertext_a not in self._adj or vertext_b not in self._adj:
96 | return False
97 | edge = next((i for i in self._adj[vertext_a] if i == vertext_b), None)
98 | return edge is not None
99 |
100 | def get_adjacent_vertices(self, vertex):
101 | return self._adj[vertex]
102 |
103 | def vertices(self):
104 | return self._adj.keys()
105 |
106 | def degree(self, vertex):
107 | assert vertex in self._adj
108 | return self._adj[vertex].size()
109 |
110 | def max_degree(self):
111 | result = 0
112 | for vertex in self._adj:
113 | v_degree = self.degree(vertex)
114 | if v_degree > result:
115 | result = v_degree
116 | return result
117 |
118 | def avg_degree(self):
119 | return float(2 * self._edges_size) / self.vertices_size()
120 |
121 | def number_of_self_loops(self):
122 | count = 0
123 | for k in self._adj:
124 | for vertex in self._adj[k]:
125 | if vertex == k:
126 | count += 1
127 | return int(count / 2)
128 |
129 | # 4.1.31 check the number of parallel edges with linear running time.
130 | def number_of_parallel_edges(self):
131 | count = 0
132 | for k in self._adj:
133 | tmp = set()
134 | for vertex in self._adj[k]:
135 | if vertex not in tmp:
136 | tmp.add(vertex)
137 | else:
138 | count += 1
139 | return int(count / 2)
140 |
141 | def __repr__(self):
142 | s = str(self.vertices_size()) + ' vertices, ' + str(self._edges_size) + ' edges\n'
143 | for k in self._adj:
144 | try:
145 | lst = ' '.join([vertex for vertex in self._adj[k]])
146 | except TypeError:
147 | lst = ' '.join([str(vertex) for vertex in self._adj[k]])
148 | s += '{}: {}\n'.format(k, lst)
149 | return s
150 |
151 |
152 | class DepthFirstPaths(object):
153 |
154 | """
155 | Undirected graph depth-first searching algorithms implementation.
156 | Depth-First-Search recurvisely reaching all vertices that are adjacent to it,
157 | and then treat these adjacent_vertices as start_vertex and searching again util all the
158 | connected vertices is marked.
159 | >>> g = Graph()
160 | >>> test_data = [(0, 5), (2, 4), (2, 3), (1, 2), (0, 1), (3, 4), (3, 5), (0, 2)]
161 | >>> for a, b in test_data:
162 | ... g.add_edge(a, b)
163 | ...
164 | >>> dfp = DepthFirstPaths(g, 0)
165 | >>> [dfp.has_path_to(i) for i in range(6)]
166 | [True, True, True, True, True, True]
167 | >>> [i for i in dfp.path_to(4)]
168 | [0, 2, 3, 4]
169 | >>> [i for i in dfp.path_to(1)]
170 | [0, 2, 1]
171 | """
172 |
173 | def __init__(self, graph, start_vertex):
174 | self._marked = defaultdict(bool)
175 | self._edge_to = {}
176 | self._start = start_vertex
177 | self.dfs(graph, self._start)
178 |
179 | def dfs(self, graph, vertex):
180 | self._marked[vertex] = True
181 |
182 | for v in graph.get_adjacent_vertices(vertex):
183 | if not self._marked[v]:
184 | self._edge_to[v] = vertex
185 | self.dfs(graph, v)
186 |
187 | def has_path_to(self, vertex):
188 | return self._marked[vertex]
189 |
190 | def vertices_size(self):
191 | return len(self._marked.keys())
192 |
193 | def path_to(self, vertex):
194 | if not self.has_path_to(vertex):
195 | return None
196 |
197 | tmp = vertex
198 | path = Stack()
199 | while tmp != self._start:
200 | path.push(tmp)
201 | tmp = self._edge_to[tmp]
202 | path.push(self._start)
203 | return path
204 |
205 |
206 | class BreadthFirstPaths(object):
207 |
208 | """
209 | Breadth-First-Search algorithm implementation. This algorithm
210 | uses queue as assist data structure. First enqueue the start_vertex,
211 | marked it as visited and dequeue the vertex, then marked all the
212 | adjacent vertices of start_vertex and enqueue them. Continue this process
213 | util all connected vertices are marked.
214 | With Breadth-First-Search algorithm, we can find the shortest path from x to y.
215 | The worst scenario of running time is proportional to O(V + E) (V is the number
216 | of vertices and E is the number of edges).
217 | >>> g = Graph()
218 | >>> test_data = [(0, 5), (2, 4), (2, 3), (1, 2), (0, 1), (3, 4), (3, 5), (0, 2)]
219 | >>> for a, b in test_data:
220 | ... g.add_edge(a, b)
221 | ...
222 | >>> bfp = BreadthFirstPaths(g, 0)
223 | >>> [bfp.has_path_to(i) for i in range(6)]
224 | [True, True, True, True, True, True]
225 | >>> [i for i in bfp.path_to(4)]
226 | [0, 2, 4]
227 | >>> [i for i in bfp.path_to(5)]
228 | [0, 5]
229 | >>> bfp.dist_to(4)
230 | 2
231 | >>> bfp.dist_to(5)
232 | 1
233 | >>> bfp.dist_to('not a vertex')
234 | -1
235 | """
236 |
237 | def __init__(self, graph, start_vertex):
238 | self._marked = defaultdict(bool)
239 | self._edge_to = {}
240 |
241 | self._start = start_vertex
242 | self._dist = {start_vertex: 0}
243 | self.bfs(graph, self._start)
244 |
245 | def bfs(self, graph, vertex):
246 | queue = Queue()
247 | self._marked[vertex] = True
248 | queue.enqueue(vertex)
249 | while not queue.is_empty():
250 | tmp = queue.dequeue()
251 | for v in graph.get_adjacent_vertices(tmp):
252 | if not self._marked[v]:
253 | self._edge_to[v] = tmp
254 | self._dist[v] = self._dist[tmp] + 1
255 | self._marked[v] = True
256 | queue.enqueue(v)
257 |
258 | def has_path_to(self, vertex):
259 | return self._marked[vertex]
260 |
261 | def path_to(self, vertex):
262 | if not self.has_path_to(vertex):
263 | return None
264 |
265 | tmp = vertex
266 | path = Stack()
267 | while tmp != self._start:
268 | path.push(tmp)
269 | tmp = self._edge_to[tmp]
270 | path.push(self._start)
271 | return path
272 |
273 | # 4.1.13 practice, implement dist_to method which only takes constant time.
274 | def dist_to(self, vertex):
275 | return self._dist.get(vertex, -1)
276 |
277 | def max_distance(self):
278 | return max(self._dist.values())
279 |
280 |
281 | class ConnectedComponent(object):
282 |
283 | """
284 | Construct connected components using Depth-First-Search algorithm.
285 | Using this algorithm we need to construct all the connected components
286 | from the beginning which the cost of running time and space are both
287 | proportional to O(V + E). But it takes only constant time for querying
288 | if two vertices are connected.
289 | >>> g = Graph()
290 | >>> test_data = [(0, 5), (4, 3), (0, 1), (9, 12), (6, 4), (5, 4), (0, 2),
291 | ... (11, 12), (9, 10), (0, 6), (7, 8), (9, 11), (5, 3)]
292 | >>> for a, b in test_data:
293 | ... g.add_edge(a, b)
294 | ...
295 | >>> cc = ConnectedComponent(g)
296 | >>> cc.connected(0, 8)
297 | False
298 | >>> cc.connected(0, 4)
299 | True
300 | >>> cc.connected(0, 9)
301 | False
302 | >>> cc.vertex_id(0)
303 | 0
304 | >>> cc.vertex_id(7)
305 | 1
306 | >>> cc.vertex_id(11)
307 | 2
308 | >>> cc.count()
309 | 3
310 | """
311 |
312 | def __init__(self, graph):
313 | self._marked = defaultdict(bool)
314 | self._id = defaultdict(int)
315 | self._count = 0
316 |
317 | for s in graph.vertices():
318 | if not self._marked[s]:
319 | self.dfs(graph, s)
320 | self._count += 1
321 |
322 | def dfs(self, graph, vertex):
323 | self._marked[vertex] = True
324 | self._id[vertex] = self._count
325 | for s in graph.get_adjacent_vertices(vertex):
326 | if not self._marked[s]:
327 | self.dfs(graph, s)
328 |
329 | def connected(self, vertex_1, vertex_2):
330 | return self._id[vertex_1] == self._id[vertex_2]
331 |
332 | def vertex_id(self, vertex):
333 | return self._id[vertex]
334 |
335 | def count(self):
336 | return self._count
337 |
338 |
339 | class Cycle(object):
340 |
341 | """
342 | Using Depth-First-Search algorithm to check whether a graph has a cycle.
343 | if a graph is tree-like structure(no cycle), then has_cycle is never reached.
344 | >>> g = Graph()
345 | >>> test_data = [(0, 1), (0, 2), (0, 6), (0, 5), (3, 5), (6, 4)]
346 | >>> for a, b in test_data:
347 | ... g.add_edge(a, b)
348 | ...
349 | >>> cycle = Cycle(g)
350 | >>> cycle.has_cycle()
351 | False
352 | >>> g2 = Graph()
353 | >>> has_cycle_data = [(0, 1), (0, 2), (0, 6), (0, 5), (3, 5), (6, 4), (3, 4)]
354 | >>> for a, b in has_cycle_data:
355 | ... g2.add_edge(a, b)
356 | ...
357 | >>> cycle2 = Cycle(g2)
358 | >>> cycle2.has_cycle()
359 | True
360 | """
361 |
362 | def __init__(self, graph):
363 | self._marked = defaultdict(bool)
364 | self._has_cycle = False
365 | for vertex in graph.vertices():
366 | if not self._marked[vertex]:
367 | self.dfs(graph, vertex, vertex)
368 |
369 | def dfs(self, graph, vertex_1, vertex_2):
370 | self._marked[vertex_1] = True
371 | for adj in graph.get_adjacent_vertices(vertex_1):
372 | if not self._marked[adj]:
373 | self.dfs(graph, adj, vertex_1)
374 | else:
375 | if adj != vertex_2:
376 | self._has_cycle = True
377 |
378 | def has_cycle(self):
379 | return self._has_cycle
380 |
381 |
382 | class TwoColor(object):
383 |
384 | """
385 | Using Depth-First-Search algorithm to solve Two-Color problems.
386 | >>> g = Graph()
387 | >>> test_data = [(0, 5), (2, 4), (2, 3), (1, 2), (0, 1), (3, 4), (3, 5), (0, 2)]
388 | >>> for a, b in test_data:
389 | ... g.add_edge(a, b)
390 | ...
391 | >>> tc = TwoColor(g)
392 | >>> tc.is_bipartite()
393 | False
394 | """
395 |
396 | def __init__(self, graph):
397 | self._marked = defaultdict(bool)
398 | self._color = defaultdict(bool)
399 | self._is_twocolorable = True
400 |
401 | for vertex in graph.vertices():
402 | if not self._marked[vertex]:
403 | self.dfs(graph, vertex)
404 |
405 | def dfs(self, graph, vertex):
406 | self._marked[vertex] = True
407 | for v in graph.get_adjacent_vertices(vertex):
408 | if not self._marked[v]:
409 | self._color[v] = self._color[vertex]
410 | self.dfs(graph, v)
411 | else:
412 | if self._color[v] == self._color[vertex]:
413 | self._is_twocolorable = False
414 |
415 | def is_bipartite(self):
416 | return self._is_twocolorable
417 |
418 |
419 | # 4.1.16 practice, implement GraphProperties class.
420 | class GraphProperties(object):
421 |
422 | """
423 | >>> g = Graph()
424 | >>> test_data = [(0, 5), (2, 4), (2, 3), (1, 2), (0, 1), (3, 4), (3, 5), (0, 2)]
425 | >>> for a, b in test_data:
426 | ... g.add_edge(a, b)
427 | ...
428 | >>> gp = GraphProperties(g)
429 | >>> gp.eccentricity(0)
430 | 2
431 | >>> gp.eccentricity(1)
432 | 2
433 | >>> gp.diameter()
434 | 2
435 | >>> gp.radius()
436 | 2
437 | """
438 |
439 | def __init__(self, graph):
440 | self._eccentricities = {}
441 | self._diameter = 0
442 | self._radius = 9999999999
443 | dfp = DepthFirstPaths(graph, random.sample(graph.vertices(), 1)[0])
444 | if dfp.vertices_size() != graph.vertices_size():
445 | raise Exception('graph is not connected.')
446 |
447 | for vertex in graph.vertices():
448 | bfp = BreadthFirstPaths(graph, vertex)
449 | dist = bfp.max_distance()
450 | if dist < self._radius:
451 | self._radius = dist
452 | if dist > self._diameter:
453 | self._diameter = dist
454 | self._eccentricities[vertex] = dist
455 |
456 | def eccentricity(self, vertex):
457 | return self._eccentricities.get(vertex, -1)
458 |
459 | def diameter(self):
460 | return self._diameter
461 |
462 | def radius(self):
463 | return self._radius
464 |
465 | def center(self):
466 | centers = [k for k, v in self._eccentricities.items() if v == self._radius]
467 | random.shuffle(centers)
468 | return centers[0]
469 |
470 | # 4.1.17 practice
471 | def girth(self):
472 | pass
473 |
474 |
475 | if __name__ == '__main__':
476 | doctest.testmod()
477 |
--------------------------------------------------------------------------------
/chapter_5/module_5_3.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | import doctest
4 |
5 |
6 | def brute_force_backward_search(pattern, txt):
7 | '''
8 | >>> test_data = 'ABACADABRAC'
9 | >>> pattern = 'ABRA'
10 | >>> brute_force_backward_search(pattern, test_data)
11 | 6
12 | >>> pattern2 = 'ACNOTEXIST'
13 | >>> brute_force_backward_search(pattern2, test_data)
14 | 11
15 | '''
16 | assert pattern != '' and txt != '' and pattern is not None and txt is not None
17 |
18 | p_index = t_index = 0
19 | while p_index < len(pattern) and t_index < len(txt):
20 | if txt[t_index] == pattern[p_index]:
21 | p_index += 1
22 | else:
23 | t_index -= p_index
24 | p_index = 0
25 | t_index += 1
26 |
27 | if p_index == len(pattern):
28 | return t_index - len(pattern)
29 | return len(txt)
30 |
31 |
32 | class KMP(object):
33 |
34 | '''
35 | >>> kmp = KMP('AACAA')
36 | >>> kmp.search('AABRAACADABRAACAADABRA')
37 | 12
38 | >>> kmp = KMP('rab')
39 | >>> kmp.search('abacadabrabracabracadabrabrabracad')
40 | 8
41 | >>> kmp.count('abacadabrabracabracadabrabrabracad')
42 | 3
43 | >>> kmp.search_all('abacadabrabracabracadabrabrabracad')
44 | [8, 23, 26]
45 | >>> kmp2 = KMP('abracadabra')
46 | >>> kmp2.search('abacadabrabracabracadabrabrabracad')
47 | 14
48 | >>> kmp2.count('abacadabrabracabracadabrabrabracad')
49 | 1
50 | >>> kmp2.search_all('abacadabrabracabracadabrabrabracad')
51 | [14]
52 | >>> kmp3 = KMP('bcara')
53 | >>> kmp3.search('abacadabrabracabracadabrabrabracad')
54 | 34
55 | >>> kmp3.count('abacadabrabracabracadabrabrabracad')
56 | 0
57 | >>> kmp3.search_all('abacadabrabracabracadabrabrabracad')
58 | []
59 | >>> kmp4 = KMP('rabrabracad')
60 | >>> kmp4.search('abacadabrabracabracadabrabrabracad')
61 | 23
62 | >>> kmp5 = KMP('abacad')
63 | >>> kmp5.search('abacadabrabracabracadabrabrabracad')
64 | 0
65 | '''
66 |
67 | def __init__(self, pattern):
68 | self._pat = pattern
69 | arr = [0] * len(pattern)
70 | self._dfa = [arr[:] for _ in range(256)]
71 | self._dfa[ord(pattern[0])][0] = 1
72 |
73 | x = 0
74 | for j in range(1, len(pattern)):
75 | for c in range(256):
76 | self._dfa[c][j] = self._dfa[c][x]
77 | self._dfa[ord(pattern[j])][j] = j + 1
78 | x = self._dfa[ord(pattern[j])][x]
79 |
80 | def search(self, txt):
81 | p_index = t_index = 0
82 | while t_index < len(txt) and p_index < len(self._pat):
83 | p_index = self._dfa[ord(txt[t_index])][p_index]
84 | t_index += 1
85 | if p_index == len(self._pat):
86 | return t_index - len(self._pat)
87 | return len(txt)
88 |
89 | # 5.3.8 practice
90 | def count(self, txt):
91 | counter = 0
92 | p_index = t_index = 0
93 | while t_index < len(txt) and p_index < len(self._pat):
94 | p_index = self._dfa[ord(txt[t_index])][p_index]
95 | t_index += 1
96 | if p_index == len(self._pat):
97 | counter += 1
98 | p_index = 0
99 | return counter
100 |
101 | # 5.3.8 practice
102 | def search_all(self, txt):
103 | positions = []
104 | p_index = t_index = 0
105 | while t_index < len(txt) and p_index < len(self._pat):
106 | p_index = self._dfa[ord(txt[t_index])][p_index]
107 | t_index += 1
108 | if p_index == len(self._pat):
109 | positions.append(t_index - len(self._pat))
110 | p_index = 0
111 | return positions
112 |
113 |
114 | class BoyerMoore(object):
115 |
116 | '''
117 | >>> bm = BoyerMoore('NEEDLE')
118 | >>> bm.search('FINDINAHAYSTACKNEEDLE')
119 | 15
120 | >>> bm = BoyerMoore('rab')
121 | >>> bm.search('abacadabrabracabracadabrabrabracad')
122 | 8
123 | >>> bm.count('abacadabrabracabracadabrabrabracad')
124 | 3
125 | >>> bm.search_all('abacadabrabracabracadabrabrabracad')
126 | [8, 23, 26]
127 | >>> bm2 = BoyerMoore('abracadabra')
128 | >>> bm2.search('abacadabrabracabracadabrabrabracad')
129 | 14
130 | >>> bm2.count('abacadabrabracabracadabrabrabracad')
131 | 1
132 | >>> bm2.search_all('abacadabrabracabracadabrabrabracad')
133 | [14]
134 | >>> bm3 = BoyerMoore('bcara')
135 | >>> bm3.search('abacadabrabracabracadabrabrabracad')
136 | 34
137 | >>> bm3.count('abacadabrabracabracadabrabrabracad')
138 | 0
139 | >>> bm3.search_all('abacadabrabracabracadabrabrabracad')
140 | []
141 | >>> bm4 = BoyerMoore('rabrabracad')
142 | >>> bm4.search('abacadabrabracabracadabrabrabracad')
143 | 23
144 | >>> bm5 = BoyerMoore('abacad')
145 | >>> bm5.search('abacadabrabracabracadabrabrabracad')
146 | 0
147 | '''
148 |
149 | def __init__(self, pattern):
150 | self._pat = pattern
151 | self._right = [-1] * 256
152 | for index, char in enumerate(pattern):
153 | self._right[ord(char)] = index
154 |
155 | def search(self, txt):
156 | txt_len = len(txt)
157 | pat_len = len(self._pat)
158 | skip = index = 0
159 | while index <= txt_len - pat_len:
160 | skip = 0
161 | for j in range(pat_len - 1, -1, -1):
162 | if self._pat[j] != txt[index + j]:
163 | skip = j - self._right[ord(txt[index + j])]
164 | if skip < 1:
165 | skip = 1
166 | break
167 | if skip == 0:
168 | return index
169 | index += skip
170 | return txt_len
171 |
172 | # 5.3.9 practice
173 | def count(self, txt):
174 | txt_len = len(txt)
175 | pat_len = len(self._pat)
176 | skip = index = counter = 0
177 | while index <= txt_len - pat_len:
178 | skip = 0
179 | for j in range(pat_len - 1, -1, -1):
180 | if self._pat[j] != txt[index + j]:
181 | skip = j - self._right[ord(txt[index + j])]
182 | if skip < 1:
183 | skip = 1
184 | break
185 | if skip == 0:
186 | counter += 1
187 | skip = 1
188 | index += skip
189 | return counter
190 |
191 | # 5.3.9 practice
192 | def search_all(self, txt):
193 | txt_len = len(txt)
194 | pat_len = len(self._pat)
195 | skip = index = 0
196 | positions = []
197 | while index <= txt_len - pat_len:
198 | skip = 0
199 | for j in range(pat_len - 1, -1, -1):
200 | if self._pat[j] != txt[index + j]:
201 | skip = j - self._right[ord(txt[index + j])]
202 | if skip < 1:
203 | skip = 1
204 | break
205 | if skip == 0:
206 | positions.append(index)
207 | skip = 1
208 | index += skip
209 | return positions
210 |
211 |
212 | class RabinKarp(object):
213 |
214 | '''
215 | >>> rk = RabinKarp('rab')
216 | >>> rk.search('abacadabrabracabracadabrabrabracad')
217 | 8
218 | >>> rk.count('abacadabrabracabracadabrabrabracad')
219 | 3
220 | >>> rk.search_all('abacadabrabracabracadabrabrabracad')
221 | [8, 23, 26]
222 | >>> rk2 = RabinKarp('abracadabra')
223 | >>> rk2.search('abacadabrabracabracadabrabrabracad')
224 | 14
225 | >>> rk2.count('abacadabrabracabracadabrabrabracad')
226 | 1
227 | >>> rk2.search_all('abacadabrabracabracadabrabrabracad')
228 | [14]
229 | >>> rk3 = RabinKarp('bcara')
230 | >>> rk3.search('abacadabrabracabracadabrabrabracad')
231 | 34
232 | >>> rk3.count('abacadabrabracabracadabrabrabracad')
233 | 0
234 | >>> rk3.search_all('abacadabrabracabracadabrabrabracad')
235 | []
236 | >>> rk4 = RabinKarp('rabrabracad')
237 | >>> rk4.search('abacadabrabracabracadabrabrabracad')
238 | 23
239 | >>> rk5 = RabinKarp('abacad')
240 | >>> rk5.search('abacadabrabracabracadabrabrabracad')
241 | 0
242 | '''
243 |
244 | def __init__(self, pattern):
245 | self._pat = pattern
246 | self._pat_len = len(pattern)
247 | self._q = 997
248 | self._rm = 1
249 | for i in range(1, self._pat_len):
250 | self._rm = (256 * self._rm) % self._q
251 | self._pat_hash = self._hash(pattern, self._pat_len)
252 |
253 | def check(self, i, txt=None):
254 | # 5.3.12 practice, implement LasVegas version check method.
255 | if txt:
256 | for j in range(self._pat_len):
257 | if not self._pat[j] != txt[i + j]:
258 | return False
259 | return True
260 |
261 | def _hash(self, txt, length):
262 | result = 0
263 | for i in range(length):
264 | result = (256 * result + ord(txt[i])) % self._q
265 | return result
266 |
267 | def search(self, txt):
268 | txt_len = len(txt)
269 | txt_hash = self._hash(txt, self._pat_len)
270 | if self._pat_hash == txt_hash and self.check(0):
271 | return 0
272 |
273 | for i in range(self._pat_len, txt_len):
274 | txt_hash = (txt_hash + self._q - self._rm * ord(txt[i - self._pat_len])
275 | % self._q) % self._q
276 | txt_hash = (txt_hash * 256 + ord(txt[i])) % self._q
277 | if self._pat_hash == txt_hash:
278 | if self.check(i - self._pat_len + 1):
279 | return i - self._pat_len + 1
280 | return txt_len
281 |
282 | # 5.3.10 practice
283 | def search_all(self, txt):
284 | txt_len = len(txt)
285 | txt_hash = self._hash(txt, self._pat_len)
286 | positions = []
287 | if self._pat_hash == txt_hash and self.check(0):
288 | positions.append(0)
289 |
290 | for i in range(self._pat_len, txt_len):
291 | txt_hash = (txt_hash + self._q - self._rm * ord(txt[i - self._pat_len])
292 | % self._q) % self._q
293 | txt_hash = (txt_hash * 256 + ord(txt[i])) % self._q
294 | if self._pat_hash == txt_hash:
295 | if self.check(i - self._pat_len + 1):
296 | positions.append(i - self._pat_len + 1)
297 | return positions
298 |
299 | # 5.3.10 practice
300 | def count(self, txt):
301 | txt_len = len(txt)
302 | txt_hash = self._hash(txt, self._pat_len)
303 | count = 0
304 | if self._pat_hash == txt_hash and self.check(0):
305 | count += 1
306 |
307 | for i in range(self._pat_len, txt_len):
308 | txt_hash = (txt_hash + self._q - self._rm * ord(txt[i - self._pat_len])
309 | % self._q) % self._q
310 | txt_hash = (txt_hash * 256 + ord(txt[i])) % self._q
311 | if self._pat_hash == txt_hash:
312 | if self.check(i - self._pat_len + 1):
313 | count += 1
314 | return count
315 |
316 |
317 | # 5.3.1 practice, brute force string search algorithm with specific interfaces.
318 | class Brute(object):
319 |
320 | '''
321 | >>> brute = Brute('rab')
322 | >>> brute.search('abacadabrabracabracadabrabrabracad')
323 | 8
324 | >>> brute.count('abacadabrabracabracadabrabrabracad')
325 | 3
326 | >>> [i for i in brute.search_all('abacadabrabracabracadabrabrabracad')]
327 | [8, 23, 26]
328 | >>> brute = Brute('abracadabra')
329 | >>> brute.search('abacadabrabracabracadabrabrabracad')
330 | 14
331 | >>> [i for i in brute.search_all('abacadabrabracabracadabrabrabracad')]
332 | [14]
333 | >>> brute.count('abacadabrabracabracadabrabrabracad')
334 | 1
335 | >>> brute = Brute('bcara')
336 | >>> brute.search('abacadabrabracabracadabrabrabracad')
337 | 34
338 | >>> brute.count('abacadabrabracabracadabrabrabracad')
339 | 0
340 | >>> brute.search_all('abacadabrabracabracadabrabrabracad')
341 | []
342 | >>> brute = Brute('rabrabracad')
343 | >>> brute.search('abacadabrabracabracadabrabrabracad')
344 | 23
345 | >>> brute = Brute('abacad')
346 | >>> brute.search('abacadabrabracabracadabrabrabracad')
347 | 0
348 | '''
349 |
350 | def __init__(self, pattern):
351 | self._pat = pattern
352 | self._pat_len = len(pattern)
353 |
354 | def search(self, txt):
355 | txt_len = len(txt)
356 | for i in range(txt_len - self._pat_len + 1):
357 | j = 0
358 | while j < self._pat_len:
359 | if txt[j + i] != self._pat[j]:
360 | break
361 | j += 1
362 | if j == self._pat_len:
363 | return i
364 | return txt_len
365 |
366 | # 5.3.7 practice
367 | def count(self, txt):
368 | counter = 0
369 | txt_len = len(txt)
370 | for i in range(txt_len - self._pat_len + 1):
371 | j = 0
372 | while j < self._pat_len:
373 | if txt[j + i] != self._pat[j]:
374 | break
375 | j += 1
376 | if j == self._pat_len:
377 | counter += 1
378 | return counter
379 |
380 | # 5.3.7 practice
381 | def search_all(self, txt):
382 | positions = []
383 | txt_len = len(txt)
384 | for i in range(txt_len - self._pat_len + 1):
385 | j = 0
386 | while j < self._pat_len:
387 | if txt[j + i] != self._pat[j]:
388 | break
389 | j += 1
390 | if j == self._pat_len:
391 | positions.append(i)
392 | return positions
393 |
394 |
395 | # 5.3.4 practice, counting consecutive empty spaces,
396 | # the running would be proportional to O(n)
397 | def empty_space(txt, count):
398 | '''
399 | >>> empty_space(' xxxx ', 3)
400 | 0
401 | >>> empty_space('xxx xxxXXXXXX Xxxx', 3)
402 | 3
403 | >>> empty_space('xxxx ', 3)
404 | 4
405 | >>> empty_space('xxx ', 3)
406 | 4
407 | '''
408 |
409 | index, length = 0, len(txt)
410 | while index < length - count + 1:
411 | if txt[index] == ' ':
412 | i = 0
413 | while i < count:
414 | if txt[i + index] != ' ':
415 | index += i
416 | break
417 | i += 1
418 | if i == count:
419 | return index
420 | index += 1
421 | return length - 1
422 |
423 |
424 | # 5.3.5 practice, implement brute force
425 | # algorithm comparing substring from right to left.
426 | class BruteForceRL(object):
427 |
428 | '''
429 | >>> brute = BruteForceRL('rab')
430 | >>> brute.search('abacadabrabracabracadabrabrabracad')
431 | 8
432 | >>> brute = BruteForceRL('abracadabra')
433 | >>> brute.search('abacadabrabracabracadabrabrabracad')
434 | 14
435 | >>> brute = BruteForceRL('bcara')
436 | >>> brute.search('abacadabrabracabracadabrabrabracad')
437 | 34
438 | >>> brute = BruteForceRL('rabrabracad')
439 | >>> brute.search('abacadabrabracabracadabrabrabracad')
440 | 23
441 | >>> brute = BruteForceRL('abacad')
442 | >>> brute.search('abacadabrabracabracadabrabrabracad')
443 | 0
444 | '''
445 |
446 | def __init__(self, pattern):
447 | self._pat = pattern
448 | self._pat_len = len(pattern)
449 |
450 | def search(self, txt):
451 | txt_len = len(txt)
452 | for i in range(txt_len - self._pat_len + 1):
453 | j = self._pat_len - 1
454 | while j >= 0:
455 | if txt[j + i] != self._pat[j]:
456 | break
457 | j -= 1
458 | if j == -1:
459 | return i
460 | return txt_len
461 |
462 | if __name__ == '__main__':
463 | doctest.testmod()
464 |
--------------------------------------------------------------------------------
/chapter_3/module_3_3.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | import doctest
4 |
5 | RED = 1
6 | BLACK = 2
7 |
8 |
9 | class Node(object):
10 |
11 | def __init__(self, key, value, size, color):
12 | self._key = key
13 | self._value = value
14 | self._size = size
15 | self._color = color
16 | self._left = None
17 | self._right = None
18 |
19 | @property
20 | def color(self):
21 | return self._color
22 |
23 | @color.setter
24 | def color(self, new_color):
25 | assert new_color in (RED, BLACK)
26 | self._color = new_color
27 |
28 | @property
29 | def key(self):
30 | return self._key
31 |
32 | @key.setter
33 | def key(self, new_key):
34 | self._key = new_key
35 |
36 | @property
37 | def value(self):
38 | return self._value
39 |
40 | @value.setter
41 | def value(self, val):
42 | self._value = val
43 |
44 | @property
45 | def left(self):
46 | return self._left
47 |
48 | @left.setter
49 | def left(self, node):
50 | assert isinstance(node, (Node, type(None)))
51 | self._left = node
52 |
53 | @property
54 | def right(self):
55 | return self._right
56 |
57 | @right.setter
58 | def right(self, node):
59 | assert isinstance(node, (Node, type(None)))
60 | self._right = node
61 |
62 | @property
63 | def size(self):
64 | return self._size
65 |
66 | @size.setter
67 | def size(self, val):
68 | assert isinstance(val, int) and val >= 0
69 | self._size = val
70 |
71 |
72 | class RBTree(object):
73 |
74 | """
75 | >>> rbt = RBTree()
76 | >>> rbt.is_empty()
77 | True
78 | >>> rbt.size()
79 | 0
80 | >>> for index, e in enumerate('EASYQUITION'):
81 | ... rbt.put(e, index)
82 | ...
83 | >>> rbt.check()
84 | True
85 | >>> node1 = rbt.get('A').value
86 | >>> node1
87 | 1
88 | >>> rbt.get('E').value
89 | 0
90 | >>> rbt.get('Y').value
91 | 3
92 | >>> rbt.get('N').value
93 | 10
94 | >>> rbt.is_empty()
95 | False
96 | >>> rbt.size() ### duplicate values 'I'
97 | 10
98 | >>> rbt.min_val().value
99 | 1
100 | >>> rbt.max_val().value
101 | 3
102 | >>> rbt.delete_min()
103 | >>> rbt.min_val().value
104 | 0
105 | >>> rbt.delete_min()
106 | >>> rbt.min_val().value
107 | 8
108 | >>> rbt.delete_max()
109 | >>> rbt.max_val().value
110 | 5
111 | >>> rbt.delete_max()
112 | >>> rbt.max_val().value
113 | 7
114 | >>> rbt.check()
115 | True
116 | """
117 |
118 | def __init__(self):
119 | self._root = None
120 |
121 | def __is_red(self, node):
122 | if not node:
123 | return False
124 | return node.color == RED
125 |
126 | def size(self):
127 | return self.__node_size(self._root)
128 |
129 | def is_empty(self):
130 | return self._root is None
131 |
132 | def __node_size(self, node):
133 | return 0 if not node else node.size
134 |
135 | def __rotate_left(self, node):
136 | assert node and self.__is_red(node.right)
137 |
138 | rotate_node = node.right
139 | node.right = rotate_node.left
140 | rotate_node.left = node
141 | rotate_node.color = node.color
142 | node.color = RED
143 | rotate_node.size = node.size
144 | node.size = self.__node_size(node.left) + self.__node_size(node.right) + 1
145 | return rotate_node
146 |
147 | def __rotate_right(self, node):
148 | assert node and self.__is_red(node.left)
149 |
150 | rotate_node = node.left
151 | node.left = rotate_node.right
152 | rotate_node.right = node
153 | rotate_node.color = node.color
154 | node.color = RED
155 | rotate_node.size = node.size
156 | node.size = self.__node_size(node.left) + self.__node_size(node.right) + 1
157 | return rotate_node
158 |
159 | def __flip_colors(self, node):
160 | assert node and node.left and node.right
161 | assert (not self.__is_red(node) and self.__is_red(node.left) and
162 | self.__is_red(node.right) or
163 | self.__is_red(node) and
164 | not self.__is_red(node.left) and
165 | not self.__is_red(node.right))
166 |
167 | node.color = RED if node.color == BLACK else BLACK
168 | node.left.color = RED if node.left.color == BLACK else BLACK
169 | node.right.color = RED if node.right.color == BLACK else BLACK
170 |
171 | def get(self, key):
172 | return self.__get(self._root, key)
173 |
174 | def __get(self, node, key):
175 | tmp = node
176 | while tmp:
177 | if tmp.key > key:
178 | tmp = tmp.left
179 | elif tmp.key < key:
180 | tmp = tmp.right
181 | else:
182 | break
183 | return tmp
184 |
185 | def min_val(self):
186 | return self.__min_val(self._root)
187 |
188 | def __min_val(self, node):
189 | tmp = node
190 | while tmp.left:
191 | tmp = tmp.left
192 | return tmp
193 |
194 | def put(self, key, value):
195 | self._root = self.__put(self._root, key, value)
196 | self._root.color = BLACK
197 |
198 | def __put(self, node, key, value):
199 | if not node:
200 | return Node(key, value, 1, RED)
201 | if key < node.key:
202 | node.left = self.__put(node.left, key, value)
203 | elif key > node.key:
204 | node.right = self.__put(node.right, key, value)
205 | else:
206 | node.value = value
207 |
208 | # according to the book's definition, red node only exists in left node,
209 | # if right node is red, rotate left, make sure left node is red.
210 | if self.__is_red(node.right) and not self.__is_red(node.left):
211 | node = self.__rotate_left(node)
212 |
213 | # a red-black tree could not exist two consecutive red left node,
214 | # in this case, rotate right, then the left node and right node is both red,
215 | # the next move would be flip both node's colors.
216 | if self.__is_red(node.left) and node.left.left and self.__is_red(node.left.left):
217 | node = self.__rotate_right(node)
218 |
219 | if self.__is_red(node.left) and self.__is_red(node.right):
220 | self.__flip_colors(node)
221 |
222 | node.size = self.__node_size(node.left) + self.__node_size(node.right) + 1
223 | return node
224 |
225 | def __balance(self, node):
226 | assert node is not None
227 |
228 | if self.__is_red(node.right):
229 | node = self.__rotate_left(node)
230 |
231 | if self.__is_red(node.left) and self.__is_red(node.left.left):
232 | node = self.__rotate_right(node)
233 |
234 | if self.__is_red(node.left) and self.__is_red(node.right):
235 | self.__flip_colors(node)
236 |
237 | node.size = self.__node_size(node.left) + self.__node_size(node.right) + 1
238 | return node
239 |
240 | def __move_red_left(self, node):
241 | assert node is not None
242 | assert (self.__is_red(node) and not self.__is_red(node.left) and
243 | not self.__is_red(node.left.left))
244 |
245 | self.__flip_colors(node)
246 | # if node.right.left node is red, that means there is one more node can be "borrow",
247 | # then move one node to node's right side.
248 | if self.__is_red(node.right.left):
249 | node.right = self.__rotate_right(node.right)
250 | node = self.__rotate_left(node)
251 | return node
252 |
253 | # 3.3.39 delete minimum key in red-black tree, the java implementation is on the book,
254 | # this is python implementation of the book's answer.
255 | def delete_min(self):
256 |
257 | if self.is_empty():
258 | return None
259 |
260 | # this is for keeping red-black tree's balance
261 | if not self.__is_red(self._root.left) and not self.__is_red(self._root.right):
262 | self._root.color = RED
263 | self._root = self.__delete_min(self._root)
264 | if not self.is_empty():
265 | self._root.color = BLACK
266 |
267 | def __delete_min(self, node):
268 | if not node.left:
269 | return None
270 | # if node's left node and node's left's left node is not red, "borrow" one node
271 | # from node's right side to keep the red-black tree balance.
272 | if not self.__is_red(node.left) and not self.__is_red(node.left.left):
273 | node = self.__move_red_left(node)
274 | node.left = self.__delete_min(node.left)
275 | return self.__balance(node)
276 |
277 | def __move_red_right(self, node):
278 | self.__flip_colors(node)
279 | # this is the same priciple to the __move_red_left function, move one node from
280 | # the node's right side if the two consecutive left node is not red.
281 | if not self.__is_red(node.left.left):
282 | node = self.__rotate_right(node)
283 | return node
284 |
285 | # 3.3.39 delete maximum key in red-black tree, the java implementation is on the book,
286 | # this is python implementation of the book's answer, there is a little bit different with
287 | # delete_min function.
288 | def delete_max(self):
289 | # this is for keeping red-black tree's balance
290 | if not self.__is_red(self._root.left) and not self.__is_red(self._root.right):
291 | self._root.color = RED
292 | self._root = self.__delete_max(self._root)
293 | if not self.is_empty():
294 | self._root.color = BLACK
295 |
296 | def __delete_max(self, node):
297 | if self.__is_red(node.left):
298 | node = self.__rotate_right(node)
299 | if not node.right:
300 | return None
301 | if not self.__is_red(node.right) and not self.__is_red(node.right.left):
302 | node = self.__move_red_right(node)
303 | node.right = self.__delete_max(node.right)
304 | return self.__balance(node)
305 |
306 | def delete(self, key):
307 | if not self.__is_red(self._root.left) and not self.__is_red(self._root.right):
308 | self._root.color = RED
309 | self._root = self.__delete(self._root, key)
310 | if not self.is_empty():
311 | self._root.color = BLACK
312 |
313 | def __delete(self, node, key):
314 | if key < node.key:
315 | # same principle with delete_min function
316 | if not self.__is_red(node.left) and not self.__is_red(node.left.left):
317 | node = self.__move_red_left(node)
318 | node.left = self.__delete(node.left, key)
319 | else:
320 | if self.__is_red(node.left):
321 | node = self.__rotate_right(node)
322 |
323 | if key == node.key and node.right is None:
324 | return None
325 |
326 | if not self.__is_red(node.right) and not self.__is_red(node.right.left):
327 | node = self.__move_red_right(node)
328 |
329 | if key == node.key:
330 | node.value = self.__get(node.right, self.__min_val(node.right).key)
331 | node.key = self.__min_val(node.right).key
332 | node.right = self.__delete_min(node.right)
333 | else:
334 | node.right = self.__delete(node.right, key)
335 | return self.__balance(node)
336 |
337 | def select(self, k):
338 | """
339 | Find the kth node of the binary search tree,
340 | the solution is similar with get() or put() function.
341 | """
342 | assert isinstance(k, int) and k <= self.size()
343 |
344 | if not self._root:
345 | return None
346 |
347 | tmp = self._root
348 | while tmp:
349 | tmp_size = self.__node_size(tmp.left)
350 | if tmp_size > k:
351 | tmp = tmp.left
352 | elif tmp_size < k:
353 | tmp = tmp.right
354 | k = k - tmp_size - 1
355 | else:
356 | return tmp
357 |
358 | def rank(self, key):
359 | """
360 | Find the rank of the node in the binary search tree by the given key.
361 | """
362 | result = 0
363 | if not self._root:
364 | return -1
365 | tmp = self._root
366 |
367 | while tmp:
368 | if tmp.key > key:
369 | tmp = tmp.left
370 | elif tmp.key < key:
371 | result += self.__node_size(tmp.left) + 1
372 | tmp = tmp.right
373 | elif tmp.key == key:
374 | result += self.__node_size(tmp.left)
375 | break
376 | return result
377 |
378 | def max_val(self):
379 | """
380 | Find the maximum value in the binary search tree.
381 | """
382 | if not self._root:
383 | return None
384 | tmp = self._root
385 | while tmp.right:
386 | tmp = tmp.right
387 | return tmp
388 |
389 | def keys(self):
390 | return self.keys_range(self.min_val().key, self.max_val().key)
391 |
392 | def keys_range(self, low, high):
393 | queue = []
394 | self.__keys(self._root, queue, low, high)
395 | return queue
396 |
397 | def __keys(self, node, queue, low, high):
398 | if not node:
399 | return
400 | if low < node.key:
401 | self.__keys(node.left, queue, low, high)
402 | if low <= node.key and high >= node.key:
403 | queue.append(node.key)
404 | if high > node.key:
405 | self.__keys(node.right, queue, low, high)
406 |
407 | def is_rbt(self):
408 | return self.__is_rbt(self._root)
409 |
410 | def __is_rbt(self, node):
411 | if not node:
412 | return True
413 | if self.__is_red(node.right):
414 | return False
415 | if node != self._root and self.__is_red(node) and self.__is_red(node.left):
416 | return False
417 | return self.__is_rbt(node.left) and self.__is_rbt(node.right)
418 |
419 | def is_binary_tree(self):
420 | return self.__is_binary_tree(self._root)
421 |
422 | def __is_binary_tree(self, node):
423 | if not node:
424 | return True
425 | if node.size != self.__node_size(node.left) + self.__node_size(node.right) + 1:
426 | return False
427 | return self.__is_binary_tree(node.left) and self.__is_binary_tree(node.right)
428 |
429 | def is_ordered(self):
430 | return self.__is_ordered(self._root, None, None)
431 |
432 | def __is_ordered(self, node, min_key, max_key):
433 | if not node:
434 | return True
435 | if min_key and node.key <= min_key:
436 | return False
437 | if max_key and node.key >= max_key:
438 | return False
439 | return (self.__is_ordered(node.left, min_key, node.key) and
440 | self.__is_ordered(node.right, node.key, max_key))
441 |
442 | def is_rank_consistent(self):
443 | for i in range(self.size()):
444 | if i != self.rank(self.select(i).key):
445 | return False
446 |
447 | for key in self.keys():
448 | if key != self.select(self.rank(key)).key:
449 | return False
450 |
451 | return True
452 |
453 | def check(self):
454 | if not self.is_binary_tree():
455 | return False
456 | if not self.is_ordered():
457 | return False
458 | if not self.is_rank_consistent():
459 | return False
460 | if not self.is_rbt():
461 | return False
462 | return True
463 |
464 |
465 | if __name__ == '__main__':
466 | doctest.testmod()
467 |
--------------------------------------------------------------------------------
/chapter_2/module_2_4.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | import doctest
4 | import random
5 | import bisect
6 |
7 |
8 | class MaxPQ(object):
9 |
10 | """
11 | >>> mpq = MaxPQ(10)
12 | >>> lst = [i for i in range(10)]
13 | >>> random.shuffle(lst)
14 | >>> for i in lst:
15 | ... mpq.insert_effective(i)
16 | ...
17 | >>> mpq.min_val()
18 | 0
19 | >>> print_lst = []
20 | >>> while not mpq.is_empty():
21 | ... print_lst.append(str(mpq.del_max()))
22 | ...
23 | >>> ' '.join(print_lst)
24 | '9 8 7 6 5 4 3 2 1 0'
25 | """
26 |
27 | def __init__(self, size):
28 | self._pq = [None] * (size + 1)
29 | self._size = 0
30 | self._min = None
31 |
32 | def is_empty(self):
33 | return self._size == 0
34 |
35 | def size(self):
36 | return self._size
37 |
38 | def swim(self, pos):
39 | while pos > 1 and self._pq[pos // 2] < self._pq[pos]:
40 | self._pq[pos // 2], self._pq[pos] = self._pq[pos], self._pq[pos // 2]
41 | pos //= 2
42 |
43 | def sink(self, pos):
44 | while 2 * pos <= self._size:
45 | index = 2 * pos
46 | if index < self._size and self._pq[index] < self._pq[index + 1]:
47 | index += 1
48 | if self._pq[pos] >= self._pq[index]:
49 | break
50 | self._pq[index], self._pq[pos] = self._pq[pos], self._pq[index]
51 | pos = index
52 |
53 | def insert(self, val):
54 | self._size += 1
55 | self._pq[self._size] = val
56 | if self._min is None or self._min > val:
57 | self._min = val
58 | self.swim(self._size)
59 |
60 | def min_val(self):
61 | return self._min
62 |
63 | def del_max(self):
64 | max_val = self._pq[1]
65 | self._pq[1], self._pq[self._size] = self._pq[self._size], self._pq[1]
66 | self._pq[self._size] = None
67 | self._size -= 1
68 | self.sink(1)
69 | return max_val
70 |
71 | # 2.4.26 practice
72 | def swim_effective(self, pos):
73 | val = self._pq[pos]
74 | while pos > 1 and self._pq[pos // 2] < val:
75 | self._pq[pos] = self._pq[pos // 2]
76 | pos //= 2
77 | self._pq[pos] = val
78 |
79 | def insert_effective(self, val):
80 | self._size += 1
81 | self._pq[self._size] = val
82 | if self._min is None or self._min > val:
83 | self._min = val
84 | self.swim_effective(self._size)
85 |
86 | def max_val(self):
87 | return self._pq[1]
88 |
89 |
90 | class MinPQ(object):
91 |
92 | """
93 | >>> mpq = MinPQ(10)
94 | >>> lst = [i for i in range(10)]
95 | >>> random.shuffle(lst)
96 | >>> for i in lst:
97 | ... mpq.insert(i)
98 | ...
99 | >>> print_lst = []
100 | >>> while not mpq.is_empty():
101 | ... print_lst.append(str(mpq.del_min()))
102 | ...
103 | >>> ' '.join(print_lst)
104 | '0 1 2 3 4 5 6 7 8 9'
105 | """
106 |
107 | def __init__(self, size):
108 | self._pq = [None] * (size + 1)
109 | self._size = 0
110 |
111 | def is_empty(self):
112 | return self._size == 0
113 |
114 | def size(self):
115 | return self._size
116 |
117 | def swim(self, pos):
118 | while pos > 1 and self._pq[int(pos / 2)] > self._pq[pos]:
119 | self._pq[int(pos / 2)], self._pq[pos] = self._pq[pos], self._pq[int(pos / 2)]
120 | pos //= 2
121 |
122 | def sink(self, pos):
123 | while 2 * pos <= self._size:
124 | index = 2 * pos
125 | if index < self._size and self._pq[index] > self._pq[index + 1]:
126 | index += 1
127 | if self._pq[pos] <= self._pq[index]:
128 | break
129 | self._pq[index], self._pq[pos] = self._pq[pos], self._pq[index]
130 | pos = index
131 |
132 | def insert(self, val):
133 | self._size += 1
134 | self._pq[self._size] = val
135 | self.swim(self._size)
136 |
137 | def del_min(self):
138 | min_val = self._pq[1]
139 | self._pq[1], self._pq[self._size] = self._pq[self._size], self._pq[1]
140 | self._pq[self._size] = None
141 | self._size -= 1
142 | self.sink(1)
143 | return min_val
144 |
145 | def min_val(self):
146 | return self._pq[1]
147 |
148 |
149 | # 2.4.22 practice, a little change for python version, the queue's size is not limited.
150 | class MaxPQDynamic(object):
151 |
152 | """
153 | >>> mpq = MaxPQDynamic()
154 | >>> lst = [i for i in range(10)]
155 | >>> random.shuffle(lst)
156 | >>> for i in lst:
157 | ... mpq.insert(i)
158 | ...
159 | >>> print_lst = []
160 | >>> while not mpq.is_empty():
161 | ... print_lst.append(str(mpq.del_max()))
162 | ...
163 | >>> ' '.join(print_lst)
164 | '9 8 7 6 5 4 3 2 1 0'
165 | """
166 |
167 | def __init__(self):
168 | self._pq = []
169 |
170 | def is_empty(self):
171 | return len(self._pq) == 0
172 |
173 | def size(self):
174 | return len(self._pq)
175 |
176 | def swim(self, pos):
177 | while pos > 0 and self._pq[(pos - 1) // 2] < self._pq[pos]:
178 | self._pq[(pos - 1) // 2], self._pq[pos] = self._pq[pos], self._pq[(pos - 1) // 2]
179 | pos = (pos - 1) // 2
180 |
181 | def sink(self, pos):
182 | length = len(self._pq) - 1
183 | while 2 * pos + 1 <= length:
184 | index = 2 * pos + 1
185 | if index < length and self._pq[index] < self._pq[index + 1]:
186 | index += 1
187 | if self._pq[pos] >= self._pq[index]:
188 | break
189 | self._pq[index], self._pq[pos] = self._pq[pos], self._pq[index]
190 | pos = index
191 |
192 | def insert(self, val):
193 | self._pq.append(val)
194 | self.swim(len(self._pq) - 1)
195 |
196 | def del_max(self):
197 | max_val = self._pq[0]
198 | last_index = len(self._pq) - 1
199 | self._pq[0], self._pq[last_index] = self._pq[last_index], self._pq[0]
200 | self._pq.pop(last_index)
201 | self.sink(0)
202 | return max_val
203 |
204 | def max_val(self):
205 | return self._pq[0]
206 |
207 |
208 | class MinPQDynamic(object):
209 |
210 | """
211 | >>> mpq = MinPQDynamic()
212 | >>> lst = [i for i in range(10)]
213 | >>> random.shuffle(lst)
214 | >>> for i in lst:
215 | ... mpq.insert(i)
216 | ...
217 | >>> print_lst = []
218 | >>> while not mpq.is_empty():
219 | ... print_lst.append(str(mpq.del_min()))
220 | ...
221 | >>> ' '.join(print_lst)
222 | '0 1 2 3 4 5 6 7 8 9'
223 | """
224 |
225 | def __init__(self):
226 | self._pq = []
227 |
228 | def is_empty(self):
229 | return len(self._pq) == 0
230 |
231 | def size(self):
232 | return len(self._pq)
233 |
234 | def swim(self, pos):
235 | while pos > 0 and self._pq[(pos - 1) // 2] > self._pq[pos]:
236 | self._pq[(pos - 1) // 2], self._pq[pos] = self._pq[pos], self._pq[(pos - 1) // 2]
237 | pos = (pos - 1) // 2
238 |
239 | def binary_swim(self, pos):
240 | index, vals, temp, target = [], [], pos, self._pq[pos]
241 | while temp:
242 | index.append(temp)
243 | vals.append(self._pq[temp])
244 | temp = (temp - 1) // 2
245 |
246 | insert_pos = bisect.bisect_left(vals, target)
247 | if insert_pos == len(vals):
248 | return
249 |
250 | i = insert_pos - 1
251 | while i < len(vals) - 1:
252 | self._pq[index[i + 1]] = self._pq[index[i]]
253 | i += 1
254 |
255 | self._pq[insert_pos - 1] = target
256 |
257 | def sink(self, pos):
258 | length = len(self._pq) - 1
259 | while 2 * pos + 1 <= length:
260 | index = 2 * pos + 1
261 | if index < length and self._pq[index] > self._pq[index + 1]:
262 | index += 1
263 | if self._pq[pos] <= self._pq[index]:
264 | break
265 | self._pq[index], self._pq[pos] = self._pq[pos], self._pq[index]
266 | pos = index
267 |
268 | def insert(self, val):
269 | self._pq.append(val)
270 | self.swim(len(self._pq) - 1)
271 |
272 | def del_min(self):
273 | min_val = self._pq[0]
274 | last_index = len(self._pq) - 1
275 | self._pq[0], self._pq[last_index] = self._pq[last_index], self._pq[0]
276 | self._pq.pop(last_index)
277 | self.sink(0)
278 | return min_val
279 |
280 | def min_val(self):
281 | return self._pq[0]
282 |
283 |
284 | # 2.4.30 practice
285 | class MeanHeap(object):
286 |
287 | """
288 | >>> mh = MeanHeap()
289 | >>> for i in range(9):
290 | ... mh.insert(i)
291 | ...
292 | >>> mh.median()
293 | 4
294 | >>> mh.insert(9)
295 | >>> mh.median()
296 | 4.5
297 | >>> mh.insert(10)
298 | >>> mh.median()
299 | 5
300 | """
301 |
302 | def __init__(self):
303 | self._min_heap = MinPQDynamic()
304 | self._max_heap = MaxPQDynamic()
305 |
306 | def is_empty(self):
307 | return self._min_heap.is_empty() and self._max_heap.is_empty()
308 |
309 | def size(self):
310 | return self._min_heap.size() and self._max_heap.size()
311 |
312 | def median(self):
313 | if self.is_empty():
314 | return None
315 | if self._min_heap.size() < self._max_heap.size():
316 | return self._max_heap.max_val()
317 |
318 | if self._max_heap.size() < self._min_heap.size():
319 | return self._min_heap.min_val()
320 |
321 | return (self._min_heap.min_val() + self._max_heap.max_val()) / 2
322 |
323 | def insert(self, val):
324 | if self._min_heap.is_empty():
325 | self._min_heap.insert(val)
326 | return
327 |
328 | if self._max_heap.is_empty():
329 | self._max_heap.insert(val)
330 | return
331 |
332 | if val < self._max_heap.max_val():
333 | if self._max_heap.size() < self._min_heap.size():
334 | self._max_heap.insert(val)
335 | else:
336 | self._min_heap.insert(self._max_heap.del_max())
337 | self._max_heap.insert(val)
338 |
339 | if val > self._min_heap.min_val():
340 | if self._min_heap.size() < self._max_heap.size():
341 | self._min_heap.insert(val)
342 | else:
343 | self._max_heap.insert(self._min_heap.del_min())
344 | self._min_heap.insert(val)
345 |
346 | if val > self._max_heap.max_val() and val < self._min_heap.min_val():
347 | if self._max_heap.size() < self._min_heap.size():
348 | self._max_heap.insert(val)
349 | else:
350 | self._min_heap.insert(val)
351 |
352 |
353 | # 2.4.33, 2.4.34 index minimum priority queue.
354 | class IndexMinPQ(object):
355 |
356 | """
357 | >>> test_data = 'testexmaple'
358 | >>> imp = IndexMinPQ(len(test_data))
359 | >>> imp.is_empty()
360 | True
361 | >>> for index, s in enumerate(test_data):
362 | ... imp.insert(index, s)
363 | ...
364 | >>> imp.is_empty()
365 | False
366 | >>> imp.size()
367 | 11
368 | >>> [imp.contains(i) for i in (12, -1, 1, 4, 10)]
369 | [False, False, True, True, True]
370 | >>> imp.min_index()
371 | 7
372 | """
373 |
374 | def __init__(self, max_size):
375 | assert max_size > 0
376 | self._max_size = max_size
377 | self._index = [-1] * (max_size + 1)
378 | self._reverse_index = [-1] * (max_size + 1)
379 | self._keys = [None] * (max_size + 1)
380 | self._keys_size = 0
381 |
382 | def is_empty(self):
383 | return self._keys_size == 0
384 |
385 | def size(self):
386 | return self._keys_size
387 |
388 | def contains(self, index):
389 | if index < 0 or index >= self._max_size:
390 | return False
391 | return self._reverse_index[index] != -1
392 |
393 | def insert(self, index, element):
394 | if index < 0 or index >= self._max_size or self.contains(index):
395 | return
396 |
397 | self._keys_size += 1
398 | self._index[self._keys_size] = index
399 | self._reverse_index[index] = self._keys_size
400 | self._keys[index] = element
401 | self.swim(self._keys_size)
402 |
403 | def min_index(self):
404 | return None if self._keys_size == 0 else self._index[1]
405 |
406 | def min_key(self):
407 | return None if self._keys_size == 0 else self._keys[self._index[1]]
408 |
409 | def exchange(self, pos_a, pos_b):
410 | self._index[pos_a], self._index[pos_b] = self._index[pos_b], self._index[pos_a]
411 | self._reverse_index[self._index[pos_a]] = pos_a
412 | self._reverse_index[self._index[pos_b]] = pos_b
413 |
414 | def swim(self, pos):
415 | while pos > 1 and self._keys[self._index[pos // 2]] > self._keys[self._index[pos]]:
416 | self.exchange(pos // 2, pos)
417 | pos //= 2
418 |
419 | def sink(self, pos):
420 | length = self._keys_size
421 | while 2 * pos <= length:
422 | tmp = 2 * pos
423 | if tmp < length and self._keys[self._index[tmp]] > self._keys[self._index[tmp + 1]]:
424 | tmp += 1
425 | if not self._keys[self._index[tmp]] < self._keys[self._index[pos]]:
426 | break
427 | self.exchange(tmp, pos)
428 | pos = tmp
429 |
430 | def change_key(self, i, key):
431 | if i < 0 or i >= self._max_size or not self.contains(i):
432 | return
433 | self._keys[i] = key
434 | self.swim(self._reverse_index[i])
435 | self.sink(self._reverse_index[i])
436 |
437 | def delete_min(self):
438 | if self._keys_size == 0:
439 | return
440 | min_index = self._index[1]
441 | self.exchange(1, self._keys_size)
442 | self._keys_size -= 1
443 | self.sink(1)
444 | self._reverse_index[min_index] = -1
445 | self._keys[self._index[self._keys_size + 1]] = None
446 | self._index[self._keys_size + 1] = -1
447 | return min_index
448 |
449 |
450 | class Node(object):
451 |
452 | def __init__(self, i, j):
453 | self._sum = i ** 3 + j ** 3
454 | self.i = i
455 | self.j = j
456 |
457 | def __cmp__(self, other):
458 | if self._sum < other._sum:
459 | return -1
460 | elif self._sum > other._sum:
461 | return 1
462 | return 0
463 |
464 | def __str__(self):
465 | return '{} = {}^3 + {}^3'.format(self._sum, self.i, self.j)
466 |
467 |
468 | # 2.4.25 practice, cube sum implementation.
469 | def cubesum():
470 | min_pq = MinPQDynamic()
471 | n = 10 ** 6
472 | for i in range(n):
473 | min_pq.insert(Node(i, i))
474 |
475 | while not min_pq.is_empty():
476 | node = min_pq.del_min()
477 | if node.j < n:
478 | min_pq.insert(Node(node.i, node.j + 1))
479 |
480 |
481 | def heap_sort(lst):
482 | """
483 | Heap-sort implementation, using priority queue sink() method as util function,
484 | first build the maximum priority queue, and exchange list[0] and lst[size], then size minus one,
485 | and sink the list[0] again, util size equals zero.
486 |
487 | >>> lst = []
488 | >>> lst = [i for i in range(10)]
489 | >>> random.shuffle(lst)
490 | >>> heap_sort(lst)
491 | >>> lst
492 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
493 | """
494 | def sink(lst, pos, size):
495 | while 2 * pos + 1 <= size:
496 | index = 2 * pos + 1
497 | if index < size and lst[index + 1] > lst[index]:
498 | index += 1
499 | if lst[pos] >= lst[index]:
500 | break
501 | lst[pos], lst[index] = lst[index], lst[pos]
502 | pos = index
503 |
504 | size = len(lst) - 1
505 | for i in range(size // 2, -1, -1):
506 | sink(lst, i, size)
507 |
508 | while size:
509 | lst[0], lst[size] = lst[size], lst[0]
510 | size -= 1
511 | sink(lst, 0, size)
512 |
513 |
514 | if __name__ == '__main__':
515 | doctest.testmod()
516 | # cubesum()
517 |
--------------------------------------------------------------------------------
/chapter_4/module_4_2.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding:UTF-8 -*-
3 | import copy
4 | import doctest
5 | from collections import defaultdict
6 | from basic_data_struct import Bag, Stack, Queue
7 |
8 |
9 | class Digragh(object):
10 |
11 | """
12 | Directed graph implementation. Every edges is directed, so if v is
13 | reachable from w, w might not be reachable from v.There would ba an
14 | assist data structure to mark all available vertices, because
15 | self._adj.keys() is only for the vertices which outdegree is not 0.
16 | Directed graph is almost the same with Undirected graph,many codes
17 | from Gragh can be reusable.
18 | >>> # 4.2.6 practice
19 | >>> graph = Digragh()
20 | >>> test_data = [(4, 2), (2, 3), (3, 2), (6, 0), (0, 1), (2, 0),
21 | ... (11, 12), (12, 9), (9, 10), (9, 11), (8, 9), (10, 12),
22 | ... (11, 4), (4, 3), (3, 5), (7, 8), (8, 7), (5, 4), (0, 5),
23 | ... (6, 4), (6, 9), (7, 6)]
24 | >>> for a, b in test_data:
25 | ... graph.add_edge(a, b)
26 | ...
27 | >>> graph.vertices_size()
28 | 13
29 | >>> graph.edges_size()
30 | 22
31 | >>> [i for i in graph.get_adjacent_vertices(2)]
32 | [0, 3]
33 | >>> [j for j in graph.get_adjacent_vertices(6)]
34 | [9, 4, 0]
35 | >>> [v for v in graph.vertices()]
36 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
37 | >>> graph
38 | 13 vertices, 22 edges
39 | 0: 5 1
40 | 2: 0 3
41 | 3: 5 2
42 | 4: 3 2
43 | 5: 4
44 | 6: 9 4 0
45 | 7: 6 8
46 | 8: 7 9
47 | 9: 11 10
48 | 10: 12
49 | 11: 4 12
50 | 12: 9
51 |
52 | >>>
53 | """
54 |
55 | def __init__(self, graph=None):
56 | self._edges_size = 0
57 | self._adj = defaultdict(Bag)
58 | self._vertices = set()
59 |
60 | # 4.2.3 practice, generate graph from another graph.
61 | if graph:
62 | self._adj = copy.deepcopy(graph._adj)
63 | self._vertices_size = graph.vertices_size()
64 | self._edges_size = graph.edges_size()
65 | self._vertices = copy.copy(graph.vertices())
66 |
67 | def vertices_size(self):
68 | return len(self._vertices)
69 |
70 | def edges_size(self):
71 | return self._edges_size
72 |
73 | def add_edge(self, start, end):
74 | # 4.2.5 practice, parallel edge and self cycle are not allowed
75 | if self.has_edge(start, end) or start == end:
76 | return
77 | self._vertices.add(start)
78 | self._vertices.add(end)
79 | self._adj[start].add(end)
80 | self._edges_size += 1
81 |
82 | def get_adjacent_vertices(self, vertex):
83 | return self._adj[vertex]
84 |
85 | def vertices(self):
86 | return self._vertices
87 |
88 | def reverse(self):
89 | reverse_graph = Digragh()
90 | for vertex in self.vertices():
91 | for adjacent_vertex in self.get_adjacent_vertices(vertex):
92 | reverse_graph.add_edge(adjacent_vertex, vertex)
93 | return reverse_graph
94 |
95 | # 4.2.4 practice, add has_edge method for Digraph
96 | def has_edge(self, start, end):
97 | edge = next((i for i in self._adj[start] if i == end), None)
98 | return edge is not None
99 |
100 | def __repr__(self):
101 | s = str(len(self._vertices)) + ' vertices, ' + str(self._edges_size) + ' edges\n'
102 | for k in self._adj:
103 | try:
104 | lst = ' '.join([vertex for vertex in self._adj[k]])
105 | except TypeError:
106 | lst = ' '.join([str(vertex) for vertex in self._adj[k]])
107 | s += '{}: {}\n'.format(k, lst)
108 | return s
109 |
110 |
111 | class DirectedDFS(object):
112 |
113 | """
114 | Depth-First-Search algorithm with directed graph, which can solve directed
115 | graph reachable problem.
116 | >>> graph = Digragh()
117 | >>> test_data = [(4, 2), (2, 3), (3, 2), (6, 0), (0, 1), (2, 0),
118 | ... (11, 12), (12, 9), (9, 10), (9, 11), (8, 9), (10, 12),
119 | ... (11, 4), (4, 3), (3, 5), (7, 8), (8, 7), (5, 4), (0, 5),
120 | ... (6, 4), (6, 9), (7, 6)]
121 | >>> for a, b in test_data:
122 | ... graph.add_edge(a, b)
123 | ...
124 | >>> dfs = DirectedDFS(graph, 1)
125 | >>> [i for i in graph.vertices() if dfs.marked(i)]
126 | [1]
127 | >>> dfs1 = DirectedDFS(graph, 2)
128 | >>> [i for i in graph.vertices() if dfs1.marked(i)]
129 | [0, 1, 2, 3, 4, 5]
130 | >>> dfs2 = DirectedDFS(graph, 1, 2, 6)
131 | >>> [i for i in graph.vertices() if dfs2.marked(i)]
132 | [0, 1, 2, 3, 4, 5, 6, 9, 10, 11, 12]
133 | """
134 |
135 | def __init__(self, graph, *sources):
136 | self._marked = defaultdict(bool)
137 | for vertex in sources:
138 | if not self._marked[vertex]:
139 | self.dfs(graph, vertex)
140 |
141 | def dfs(self, graph, vertex):
142 | self._marked[vertex] = True
143 | for adjacent_vertex in graph.get_adjacent_vertices(vertex):
144 | if not self._marked[adjacent_vertex]:
145 | self.dfs(graph, adjacent_vertex)
146 |
147 | def marked(self, vertex):
148 | return self._marked[vertex]
149 |
150 |
151 | class DirectedCycle(object):
152 |
153 | """
154 | Using Depth-First-Search algorithm to check
155 | whether a cycle exists in a directed graph.
156 | There is an assist attribute call _on_stack,
157 | if an adjacent vertex is in _on_stack(True),
158 | that means a cycle exists.
159 | >>> graph = Digragh()
160 | >>> test_data = [(4, 2), (2, 3), (3, 2), (6, 0), (0, 1), (2, 0),
161 | ... (11, 12), (12, 9), (9, 10), (9, 11), (8, 9), (10, 12),
162 | ... (11, 4), (4, 3), (3, 5), (7, 8), (8, 7), (5, 4), (0, 5),
163 | ... (6, 4), (6, 9), (7, 6)]
164 | >>> for a, b in test_data:
165 | ... graph.add_edge(a, b)
166 | ...
167 | >>> dc = DirectedCycle(graph)
168 | >>> dc.has_cycle()
169 | True
170 | >>> [i for i in dc.cycle()]
171 | [3, 5, 4, 3]
172 | """
173 |
174 | def __init__(self, graph):
175 | self._marked = defaultdict(bool)
176 | self._edge_to = {}
177 | self._on_stack = defaultdict(bool)
178 | self._cycle = Stack()
179 | for v in graph.vertices():
180 | if not self._marked[v]:
181 | self.dfs(graph, v)
182 |
183 | def dfs(self, graph, vertex):
184 | self._on_stack[vertex] = True
185 | self._marked[vertex] = True
186 |
187 | for v in graph.get_adjacent_vertices(vertex):
188 | if self.has_cycle():
189 | return
190 | elif not self._marked[v]:
191 | self._edge_to[v] = vertex
192 | self.dfs(graph, v)
193 | elif self._on_stack[v]:
194 | tmp = vertex
195 | while tmp != v:
196 | self._cycle.push(tmp)
197 | tmp = self._edge_to[tmp]
198 | self._cycle.push(v)
199 | self._cycle.push(vertex)
200 | self._on_stack[vertex] = False
201 |
202 | def has_cycle(self):
203 | return not self._cycle.is_empty()
204 |
205 | def cycle(self):
206 | return self._cycle
207 |
208 |
209 | class DepthFirstOrder(object):
210 |
211 | def __init__(self, graph):
212 | self._pre = Queue()
213 | self._post = Queue()
214 | self._reverse_post = Stack()
215 | self._marked = defaultdict(bool)
216 |
217 | for v in graph.vertices():
218 | if not self._marked[v]:
219 | self.dfs(graph, v)
220 |
221 | def dfs(self, graph, vertex):
222 | self._pre.enqueue(vertex)
223 | self._marked[vertex] = True
224 | for v in graph.get_adjacent_vertices(vertex):
225 | if not self._marked[v]:
226 | self.dfs(graph, v)
227 |
228 | self._post.enqueue(vertex)
229 | self._reverse_post.push(vertex)
230 |
231 | def prefix(self):
232 | return self._pre
233 |
234 | def postfix(self):
235 | return self._post
236 |
237 | def reverse_postfix(self):
238 | return self._reverse_post
239 |
240 |
241 | class Topological(object):
242 |
243 | """
244 | Topological-Sorting implementation. Topological-Sorting
245 | has to be applied on a directed acyclic graph. If there is
246 | an edge u->w, then u is before w. This implementation is using
247 | Depth-First-Search algorithm, for any edge v->w, dfs(w)
248 | will return before dfs(v), because the input graph should
249 | not contain any cycle.
250 | Another Topological-Sorting implementation is using queue to
251 | enqueue a vertex which indegree is 0. Then dequeue and marked
252 | it, enqueue all its adjacent vertex util all the vertices in the
253 | graph is marked. This implementation is not given.
254 | >>> test_data = [(2, 3), (0, 6), (0, 1), (2, 0), (11, 12),
255 | ... (9, 12), (9, 10), (9, 11), (3, 5), (8, 7),
256 | ... (5, 4), (0, 5), (6, 4), (6, 9), (7, 6)]
257 | >>> graph = Digragh()
258 | >>> for a, b in test_data:
259 | ... graph.add_edge(a, b)
260 | ...
261 | >>> topo = Topological(graph)
262 | >>> topo.is_DAG()
263 | True
264 | >>> [i for i in topo.order()]
265 | [8, 7, 2, 3, 0, 6, 9, 10, 11, 12, 1, 5, 4]
266 | """
267 |
268 | def __init__(self, graph):
269 | cycle_finder = DirectedCycle(graph)
270 | self._order = None
271 | if not cycle_finder.has_cycle():
272 | df_order = DepthFirstOrder(graph)
273 | self._order = df_order.reverse_postfix()
274 |
275 | def order(self):
276 | return self._order
277 |
278 | def is_DAG(self):
279 | return self._order is not None
280 |
281 |
282 | class KosarajuSCC(object):
283 |
284 | """
285 | >>> test_data = ((4, 2), (2, 3), (3, 2), (6, 0), (0, 1), (2, 0),
286 | ... (11, 12), (12, 9), (9, 10), (9, 11), (7, 9), (10, 12),
287 | ... (11, 4), (4, 3), (3, 5), (6, 8), (8, 6), (5, 4), (0, 5),
288 | ... (6, 4), (6, 9), (7, 6))
289 | >>> graph = Digragh()
290 | >>> for a, b in test_data:
291 | ... graph.add_edge(a, b)
292 | ...
293 | >>> scc = KosarajuSCC(graph)
294 | >>> count = scc.count()
295 | >>> output = defaultdict(Queue)
296 | >>> for v in graph.vertices():
297 | ... output[scc.vertex_id(v)].enqueue(v)
298 | ...
299 | >>> ['{}: {}'.format(k, ', '.join(map(str, v))) for k, v in output.items()]
300 | ['0: 1', '1: 0, 2, 3, 4, 5', '2: 9, 10, 11, 12', '3: 6, 8', '4: 7']
301 | """
302 |
303 | def __init__(self, graph):
304 | self._marked = defaultdict(bool)
305 | self._id = {}
306 | self._count = 0
307 | order = DepthFirstOrder(graph.reverse())
308 | for v in order.reverse_postfix():
309 | if not self._marked[v]:
310 | self.dfs(graph, v)
311 | self._count += 1
312 |
313 | def dfs(self, graph, vertex):
314 | self._marked[vertex] = True
315 | self._id[vertex] = self._count
316 | for v in graph.get_adjacent_vertices(vertex):
317 | if not self._marked[v]:
318 | self.dfs(graph, v)
319 |
320 | def strongly_connected(self, vertex_1, vertex_2):
321 | return self._id[vertex_1] == self._id[vertex_2]
322 |
323 | def vertex_id(self, vertex):
324 | return self._id[vertex]
325 |
326 | def count(self):
327 | return self._count
328 |
329 |
330 | class TransitiveClosure(object):
331 |
332 | """
333 | This class can check if v is reachable
334 | from w in a directed graph using DirectedDFS.
335 | The cost of running time is proportional to
336 | O(V(V + E)), and the cost of space is proportional
337 | to O(V*V), so this is not a good solution for
338 | large scale graphs.
339 | >>> test_data = ((4, 2), (2, 3), (3, 2), (6, 0), (0, 1), (2, 0),
340 | ... (11, 12), (12, 9), (9, 10), (9, 11), (7, 9), (10, 12),
341 | ... (11, 4), (4, 3), (3, 5), (6, 8), (8, 6), (5, 4), (0, 5),
342 | ... (6, 4), (6, 9), (7, 6))
343 | >>> graph = Digragh()
344 | >>> for a, b in test_data:
345 | ... graph.add_edge(a, b)
346 | ...
347 | >>> tc = TransitiveClosure(graph)
348 | >>> tc.reachable(1, 5)
349 | False
350 | >>> tc.reachable(1, 0)
351 | False
352 | >>> tc.reachable(0, 1)
353 | True
354 | >>> tc.reachable(0, 9)
355 | False
356 | >>> tc.reachable(8, 12)
357 | True
358 | """
359 |
360 | def __init__(self, graph):
361 | self._all = {}
362 | for vertex in graph.vertices():
363 | self._all[vertex] = DirectedDFS(graph, vertex)
364 |
365 | def reachable(self, start, end):
366 | return self._all[start].marked(end)
367 |
368 |
369 | # 4.2.7 practice, implement Degrees class
370 | # which compute degrees of vertices in a directed graph.
371 | class Degrees(object):
372 |
373 | """
374 | >>> test_data = ((4, 2), (2, 3), (3, 2), (6, 0), (0, 1), (2, 0),
375 | ... (11, 12), (12, 9), (9, 10), (9, 11), (7, 9), (10, 12),
376 | ... (11, 4), (4, 3), (3, 5), (6, 8), (8, 6), (5, 4), (0, 5),
377 | ... (6, 4), (6, 9), (7, 6))
378 | >>> graph = Digragh()
379 | >>> for a, b in test_data:
380 | ... graph.add_edge(a, b)
381 | ...
382 | >>> degree = Degrees(graph)
383 | >>> degree.indegree(0)
384 | 2
385 | >>> degree.outdegree(0)
386 | 2
387 | >>> degree.indegree(1)
388 | 1
389 | >>> degree.outdegree(1)
390 | 0
391 | >>> degree.indegree(9)
392 | 3
393 | >>> degree.outdegree(9)
394 | 2
395 | >>> degree.is_map()
396 | False
397 | >>> [i for i in degree.sources()]
398 | []
399 | """
400 |
401 | def __init__(self, graph):
402 | self._indegree = defaultdict(int)
403 | self._outdegree = defaultdict(int)
404 | length = 0
405 | for v in graph.vertices():
406 | length += 1
407 | for adj in graph.get_adjacent_vertices(v):
408 | self._indegree[adj] += 1
409 | self._outdegree[v] += 1
410 |
411 | self._sources = (k for k, v in self._indegree.items() if v == 0)
412 | self._sinks = (k for k, v in self._outdegree.items() if v == 0)
413 | self._is_map = len([k for k, v in self._outdegree.items() if v == 1]) == length
414 |
415 | def indegree(self, vertex):
416 | return self._indegree[vertex]
417 |
418 | def outdegree(self, vertex):
419 | return self._outdegree[vertex]
420 |
421 | def sources(self):
422 | return self._sources
423 |
424 | def sinks(self):
425 | return self._sinks
426 |
427 | def is_map(self):
428 | return self._is_map
429 |
430 |
431 | # 4.2.20 practice, check if euler cycle exists.
432 | class Euler(object):
433 |
434 | """
435 | >>> test_data = ((4, 2), (2, 3), (3, 2), (6, 0), (0, 1), (2, 0),
436 | ... (11, 12), (12, 9), (9, 10), (9, 11), (7, 9), (10, 12),
437 | ... (11, 4), (4, 3), (3, 5), (6, 8), (8, 6), (5, 4), (0, 5),
438 | ... (6, 4), (6, 9), (7, 6))
439 | >>> graph = Digragh()
440 | >>> for a, b in test_data:
441 | ... graph.add_edge(a, b)
442 | ...
443 | >>> euler = Euler(graph)
444 | >>> euler.is_euler_cycle_exists()
445 | False
446 | """
447 |
448 | def __init__(self, graph):
449 | self._indegree = defaultdict(int)
450 | self._outdegree = defaultdict(int)
451 | length = 0
452 | for v in graph.vertices():
453 | length += 1
454 | for adj in graph.get_adjacent_vertices(v):
455 | self._indegree[adj] += 1
456 | self._outdegree[v] += 1
457 |
458 | self._euler_cycle_exists = len([k for k, v in self._indegree.items()
459 | if self._outdegree[k] == v]) == length
460 |
461 | def is_euler_cycle_exists(self):
462 | return self._euler_cycle_exists
463 |
464 |
465 | # 4.2.24 practice, check if a graph contains hamilton path,
466 | # the following step is very simple and is given in the book.
467 | def hamilton_path_exists(graph):
468 | """
469 | >>> test_data = [(2, 3), (0, 6), (0, 1), (2, 0), (11, 12),
470 | ... (9, 12), (9, 10), (9, 11), (3, 5), (8, 7),
471 | ... (5, 4), (0, 5), (6, 4), (6, 9), (7, 6)]
472 | >>> graph = Digragh()
473 | >>> for a, b in test_data:
474 | ... graph.add_edge(a, b)
475 | ...
476 | >>> graph = Digragh()
477 | >>> for a, b in test_data:
478 | ... graph.add_edge(a, b)
479 | ...
480 | >>> hamilton_path_exists(graph)
481 | False
482 | >>> graph_2 = Digragh(graph)
483 | >>> graph_2.add_edge(7, 2)
484 | >>> graph_2.add_edge(3, 0)
485 | >>> graph_2.add_edge(12, 1)
486 | >>> graph_2.add_edge(1, 5)
487 | >>> graph_2.add_edge(10, 11)
488 | >>> hamilton_path_exists(graph_2)
489 | True
490 | """
491 |
492 | ts = Topological(graph)
493 | vertices = [v for v in ts.order()]
494 | has_path = True
495 | for i in range(len(vertices) - 1):
496 | if not graph.has_edge(vertices[i], vertices[i+1]):
497 | has_path = False
498 | return has_path
499 |
500 |
501 | # 4.2.25 practice
502 | def unique_topologial_sort_order(graph):
503 | return hamilton_path_exists(graph)
504 |
505 |
506 | # 4.2.30 practice, see http://algs4.cs.princeton.edu/42digraph/TopologicalX.java.html.
507 | class TopologicalWithDegree(object):
508 |
509 | """
510 | >>> test_data = [(2, 3), (0, 6), (0, 1), (2, 0), (11, 12),
511 | ... (9, 12), (9, 10), (9, 11), (3, 5), (8, 7),
512 | ... (5, 4), (0, 5), (6, 4), (6, 9), (7, 6)]
513 | >>> graph = Digragh()
514 | >>> for a, b in test_data:
515 | ... graph.add_edge(a, b)
516 | ...
517 | >>> twd = TopologicalWithDegree(graph)
518 | >>> twd.has_order()
519 | True
520 | >>> [v for v in twd.order()]
521 | [2, 8, 0, 3, 7, 1, 5, 6, 9, 4, 11, 10, 12]
522 | >>> twd.rank(8)
523 | 1
524 | >>> twd.rank(10)
525 | 11
526 | """
527 |
528 | def __init__(self, graph):
529 | indegree = defaultdict(int)
530 | self._order = Queue()
531 | self._rank = defaultdict(int)
532 | count = 0
533 | for v in graph.vertices():
534 | for adj in graph.get_adjacent_vertices(v):
535 | indegree[adj] += 1
536 | queue = Queue()
537 | for v in graph.vertices():
538 | if indegree[v] == 0:
539 | queue.enqueue(v)
540 |
541 | while not queue.is_empty():
542 | vertex = queue.dequeue()
543 | self._order.enqueue(vertex)
544 | self._rank[vertex] = count
545 | count += 1
546 | for v in graph.get_adjacent_vertices(vertex):
547 | indegree[v] -= 1
548 | if indegree[v] == 0:
549 | queue.enqueue(v)
550 |
551 | if count != graph.vertices_size():
552 | self._order = None
553 |
554 | assert self.check(graph)
555 |
556 | def has_order(self):
557 | return self._order is not None
558 |
559 | def order(self):
560 | return self._order
561 |
562 | def rank(self, vertex):
563 | if vertex not in self._rank:
564 | return -1
565 | return self._rank[vertex]
566 |
567 | def check(self, graph):
568 | # digraph is acyclic
569 | if self.has_order():
570 | # check that ranks provide a valid topological order
571 | for vertex in graph.vertices():
572 | # check that vertex has a rank number
573 | if vertex not in self._rank:
574 | return 1
575 | for adj in graph.get_adjacent_vertices(vertex):
576 | if self._rank[vertex] > self._rank[adj]:
577 | return 2
578 | # check that ranks provide a valid topological order
579 | for index, v in enumerate(self._order):
580 | if index != self._rank[v]:
581 | return 3
582 | return True
583 | return False
584 |
585 | if __name__ == '__main__':
586 | doctest.testmod()
587 |
--------------------------------------------------------------------------------