├── .gitattributes ├── greedy_activity_selection.py ├── randomization.py ├── README.md ├── bfs.py ├── min_span_tree.py ├── shortest_paths.py ├── dfs.py ├── min_priority_queue.py ├── dynamic_programming.py ├── huffman.py └── sorting.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /greedy_activity_selection.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Oct 19 13:01:15 2019 4 | 5 | @author: Sai 6 | """ 7 | 8 | # Activity selection problem - maximize the number of activities given a list 9 | # of activities that are sorted by their finish times 10 | # Pg 421 CLRS 11 | 12 | def activity_selection(s, f): 13 | list_activities = [0] # Always take the first activity 14 | m = 0 # Index of the activity last added 15 | 16 | for k in range(1, len(f)): 17 | if s[k] >= f[m]: # Activity can be added 18 | list_activities.append(k) 19 | m = k # Update index 20 | 21 | return list_activities 22 | 23 | # Test case from Pg 415 24 | s = [1, 3, 0, 5, 3, 5, 6, 8, 8, 2, 12] 25 | f = [4, 5, 6, 7, 9, 9, 10, 11, 12, 14, 16] 26 | assert activity_selection(s, f) == [0, 3, 7, 10] -------------------------------------------------------------------------------- /randomization.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Sep 21 19:50:51 2019 4 | 5 | @author: Sai 6 | """ 7 | 8 | import random 9 | 10 | 11 | def random01(): 12 | # Function that returns 0 with probability 1/2 and 1 with probability 1/2 13 | return random.randrange(2) 14 | 15 | print(random01()) 16 | print(random01()) 17 | 18 | def randomab(a, b): 19 | # Function that returns an integer between a and b with equal probability 20 | # Solution to Problem 5.1-2 in CLRS 21 | 22 | assert a < b 23 | 24 | minimum = a 25 | maximum = b 26 | 27 | while abs(maximum - minimum) > 0.5: 28 | if random01() == 0: 29 | minimum = (minimum+maximum)/2 30 | else: 31 | maximum = (minimum+maximum)/2 32 | 33 | return int(minimum) 34 | 35 | print(randomab(2, 10)) 36 | print(randomab(2, 10)) 37 | print(randomab(2, 10)) 38 | print(randomab(2, 10)) 39 | 40 | # Randomize-In-Place algorithm from Pg 126 41 | def randomize_in_place(A): 42 | for j in range(len(A)): 43 | random_idx = randomab(j, len(A)) 44 | tmp = A[j] 45 | A[j] = A[random_idx] 46 | A[random_idx] = tmp 47 | 48 | return A 49 | 50 | A = [1, 5, 7, 6 ,2, 4, 8, 4, 8, 6] 51 | print(randomize_in_place(A)) 52 | print(randomize_in_place(A)) 53 | print(randomize_in_place(A)) 54 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CLRS-Python-Implementations 2 | Python implementations of some algorithms from the CLRS textbook https://en.wikipedia.org/wiki/Introduction_to_Algorithms 3 | 4 | #### Sorting (sorting.py) 5 | 6 | - Insertion sort 7 | - Insertion sort in non-ascending order 8 | - Mergesort 9 | - Heapsort 10 | * draw_heap() - a function that draws a visual representation of the heap - useful for debugging! 11 | - Quicksort 12 | - Randomized quicksort 13 | - Counting sort 14 | 15 | 16 | #### Randomization (randomization.py) 17 | - random(a, b) - generates a random number between a and b using random(0, 1) 18 | - Randomize an array in-place 19 | 20 | 21 | #### Dynamic Programming (dynamic_programming.py) 22 | - Rod cutting problem 23 | * Recursive solution 24 | * Top-down memoized solution 25 | * Bottom-up solution 26 | 27 | - Longest common subsequence 28 | 29 | 30 | #### Min priority queue (min_priority_queue.py) 31 | 32 | 33 | #### Greedy algorithm for activity selection (greedy_activity_selection.py) 34 | 35 | #### Huffman code for data compression (huffman.py) 36 | 37 | #### Basic graph algorithms 38 | - Breadth-first search (bfs.py) 39 | - Depth-first search (dfs.py) 40 | - Topological sorting of a directed acyclic graph (DAG) (dfs.py) 41 | 42 | #### Prim's algorithm for minimum spanning tree (min_span_tree.py) 43 | 44 | #### Shortest path algorithms (shortest_paths.py) 45 | - Bellman-Ford algorithm for graphs with negative edges 46 | - Dijkstra's algorithm 47 | -------------------------------------------------------------------------------- /bfs.py: -------------------------------------------------------------------------------- 1 | from queue import Queue 2 | 3 | # Basic graph algorithms - Breadth-first search 4 | 5 | # Data structures for representing graphs 6 | 7 | # Node object 8 | class node(object): 9 | def __init__(self): 10 | self.num = -1 # Vertex index 11 | self.adj = [] # Adjacency list of integers that correspond to adjacent nodes 12 | self.depth = float('Inf') # Depth of the node from root 13 | self.pred = None # Predecessor node for BFS 14 | self.visited = False # Boolean indicating whether the node has been visited 15 | 16 | # Graph object 17 | class graph(object): 18 | def __init__(self, nodes, root): 19 | self.root = root 20 | self.root.depth = 0 21 | self.nodes = nodes # List of nodes 22 | 23 | for idx, cur_node in enumerate(nodes): # The vertex index for each node is the position of the node in the list 24 | if not isinstance(cur_node, node): 25 | raise TypeError 26 | cur_node.num = idx 27 | 28 | # Breadth-first search - Pg 595 29 | def BFS(graph, root=None): 30 | if root == None: 31 | root = graph.root 32 | 33 | root.visited = True 34 | root.pred = None 35 | root.depth = 0 36 | 37 | node_queue = Queue() 38 | node_queue.put(root) 39 | 40 | while not node_queue.empty(): 41 | cur_node = node_queue.get() 42 | cur_node.visited = True 43 | 44 | for neighbor in cur_node.adj: 45 | # print(neighbor) 46 | if graph.nodes[neighbor].visited == False: 47 | node_queue.put(graph.nodes[neighbor]) 48 | 49 | # For a node to be marked as visited, it needs a predecessor and depth 50 | graph.nodes[neighbor].pred = cur_node 51 | graph.nodes[neighbor].depth = cur_node.depth + 1 52 | graph.nodes[neighbor].visited = True 53 | 54 | 55 | # Print path - Make sure you run BFS before running this 56 | # Pg 601 CLRS 57 | def print_path(graph, node): 58 | if node.num == graph.root.num: 59 | print(graph.root.num) 60 | elif node.pred == None: 61 | print('No path from root to node '+str(node.num)) 62 | else: 63 | print_path(graph, node.pred) 64 | print(str(node.num)) 65 | 66 | 67 | 68 | # Test BFS - test case from Pg 590 CLRS 69 | n0 = node(); n0.adj = [1, 4]; 70 | n1 = node(); n1.adj = [0, 4, 2, 3]; 71 | n2 = node(); n2.adj = [1, 3]; 72 | n3 = node(); n3.adj = [1, 4, 2]; 73 | n4 = node(); n4.adj = [3, 0, 1]; 74 | 75 | G = graph([n0, n1, n2, n3, n4], root=n2) 76 | 77 | BFS(G) 78 | 79 | assert n2.depth == 0 80 | assert n1.depth == 1 81 | assert n4.depth == 2 82 | assert n0.depth == 2 83 | 84 | print_path(G, n4) 85 | print('\n') 86 | print_path(G, n0) 87 | 88 | -------------------------------------------------------------------------------- /min_span_tree.py: -------------------------------------------------------------------------------- 1 | # Prim's algorithm for building a minimum spanning tree 2 | 3 | import copy 4 | import math 5 | 6 | # Node object 7 | class node(object): 8 | def __init__(self): 9 | self.num = -1 # Vertex index 10 | self.adj = [] # Adjacency list of integers that correspond to adjacent nodes 11 | self.weight = [] # List of weights for the edges in the same order as adjacency list 12 | self.key = float('Inf') # Key for priority heap 13 | self.pred = None # Predecessor node 14 | 15 | def __eq__(self, other): # Equality method needed for comparing nodes after deep copy 16 | return self.num == other.num 17 | 18 | # Graph object 19 | class graph(object): 20 | def __init__(self, nodes, root): 21 | self.root = root 22 | self.nodes = nodes # List of nodes 23 | 24 | for idx, cur_node in enumerate(nodes): # The vertex index for each node is the position of the node in the list 25 | if not isinstance(cur_node, node): 26 | raise TypeError 27 | cur_node.num = idx 28 | 29 | def get_keys(self): # List of all keys in the graph. For debugging. 30 | keys = [] 31 | for node in self.nodes: 32 | keys.append(node.key) 33 | return keys 34 | 35 | 36 | # Min priority queue of nodes that operates on a key that can be arbitrarily set 37 | class min_priority_queue(object): 38 | def __init__(self, A): 39 | self.heap = copy.deepcopy(A) # Need to have own copy so that the graph nodes are unaffected by extract_min 40 | self.heap_size = len(self.heap) 41 | 42 | for j in range(math.floor(self.heap_size/2)-1, -1, -1): 43 | self.min_heapify(j) 44 | 45 | def get_nums(self): 46 | nums = [] 47 | for node in self.heap: 48 | nums.append(node.num) 49 | return nums 50 | 51 | def left(self, k): 52 | return (k << 1) + 1 53 | 54 | def right(self, k): 55 | return (k << 1) + 2 56 | 57 | def parent(self, k): 58 | return math.ceil(k/2) - 1 59 | 60 | def min_heapify(self, k): 61 | smallest = k 62 | if self.left(k) < self.heap_size and self.heap[self.left(k)].key < self.heap[smallest].key: 63 | smallest = self.left(k) 64 | 65 | if self.right(k) < self.heap_size and self.heap[self.right(k)].key < self.heap[smallest].key: 66 | smallest = self.right(k) 67 | 68 | if smallest != k: 69 | # print('swapping') 70 | tmp = self.heap[k] 71 | self.heap[k] = self.heap[smallest] 72 | self.heap[smallest] = tmp 73 | 74 | self.min_heapify(smallest) 75 | 76 | def extract_min(self): 77 | # print(self.get_nums()) 78 | to_return = self.heap[0] 79 | 80 | self.heap[0] = self.heap[self.heap_size-1] 81 | del self.heap[self.heap_size-1] 82 | self.heap_size -= 1 83 | 84 | self.min_heapify(0) 85 | 86 | # print(self.get_nums()) 87 | return to_return 88 | 89 | def decrease_key(self, node_num, new_val): 90 | # Decrease the key value of the node with number node_num to new_val 91 | # Need to use node_num to identify the node here because the ordering of the nodes in the graph and heap are not the same 92 | k = None 93 | for idx, node in enumerate(self.heap): 94 | if node.num == node_num: 95 | k = idx 96 | # print('k, node_num '+str(k)+' '+str(node_num)) 97 | assert new_val < self.heap[k].key # Make sure new value is smaller than the current value 98 | 99 | self.heap[k].key = new_val 100 | while self.parent(k) > -1 and self.heap[self.parent(k)].key > new_val: 101 | tmp = self.heap[self.parent(k)] 102 | self.heap[self.parent(k)] = self.heap[k] 103 | self.heap[k] = tmp 104 | 105 | k = self.parent(k) 106 | 107 | 108 | # Prim's algorithm for minimum spanning tree 109 | def prim_mst(graph, root=None): 110 | if root == None: 111 | root = graph.root 112 | 113 | # Initialize the nodes 114 | for node in graph.nodes: 115 | node.pred = None 116 | node.key = float('Inf') 117 | 118 | root.key = 0 119 | heap = min_priority_queue(graph.nodes) 120 | 121 | while heap.heap_size > 0: 122 | cur_node = heap.extract_min() 123 | # print('cur node: '+str(cur_node.num)) 124 | # print(graph.get_keys()) 125 | 126 | for node, weight in zip(cur_node.adj, cur_node.weight): 127 | if graph.nodes[node] in heap.heap and graph.nodes[node].key > weight: 128 | graph.nodes[node].key = weight 129 | graph.nodes[node].pred = cur_node 130 | heap.decrease_key(graph.nodes[node].num, weight) 131 | 132 | # print(graph.get_keys()) 133 | 134 | 135 | # Slightly modified version of the graph from Pg 635 136 | # Modify w(0, 7) = 20 and w(7, 8) = 21 to force the same MST as the textbook 137 | n0 = node(); n1 = node(); n2 = node(); n3 = node(); n4 = node(); n5 = node(); n6 = node(); n7 = node(); n8 = node(); 138 | n0.adj = [1, 7]; n0.weight = [4, 20]; 139 | n1.adj = [2, 7]; n1.weight = [8, 11]; 140 | n2.adj = [3, 5, 8, 1]; n2.weight = [7, 4, 2, 8]; 141 | n3.adj = [2, 4, 5]; n3.weight = [7, 9, 14]; 142 | n4.adj = [3, 5]; n4.weight = [9, 10]; 143 | n5.adj = [2, 3, 4, 6]; n5.weight = [4, 14, 10, 2]; 144 | n6.adj = [5, 7, 8]; n6.weight = [2, 1, 6]; 145 | n7.adj = [0, 1, 6, 8]; n7.weight = [20, 11, 1, 21]; 146 | n8.adj = [2, 6, 7]; n8.weight = [2, 6, 21]; 147 | 148 | G = graph([n0, n1, n2, n3, n4, n5, n6, n7, n8], n0) 149 | 150 | # First test the min priority queue 151 | n0.key = 54; n1.key = 28; n2.key = 45; n3.key = 15; n4.key = 36; n5.key = 12; n6.key = 32; n7.key = 93; n8.key = float('Inf'); 152 | 153 | heap = min_priority_queue(G.nodes) 154 | 155 | # Check that the heap property is maintained for all nodes 156 | for j in range(heap.heap_size): 157 | if heap.left(j) < heap.heap_size: 158 | assert heap.heap[heap.left(j)].key >= heap.heap[j].key 159 | if heap.right(j) < heap.heap_size: 160 | assert heap.heap[heap.right(j)].key >= heap.heap[j].key 161 | 162 | # Test extract_min 163 | min_node = heap.extract_min() 164 | assert min_node.key == 12 165 | min_node = heap.extract_min() 166 | assert min_node.key == 15 167 | min_node = heap.extract_min() 168 | assert min_node.key == 28 169 | 170 | # Test decrease_key 171 | heap.decrease_key(8, -1) 172 | min_node = heap.extract_min() 173 | assert min_node.key == -1 174 | 175 | # Now find the minimum spanning tree 176 | prim_mst(G) 177 | 178 | assert n1.pred == n0 179 | assert n8.pred == n2 180 | assert n5.pred == n2 181 | assert n7.pred == n6 182 | assert n4.pred == n3 183 | 184 | -------------------------------------------------------------------------------- /shortest_paths.py: -------------------------------------------------------------------------------- 1 | # Single source shortest path algorithms 2 | 3 | import copy 4 | import math 5 | 6 | # Node object 7 | class node(object): 8 | def __init__(self): 9 | self.num = -1 # Vertex index 10 | self.adj = [] # Adjacency list of integers that correspond to adjacent nodes 11 | self.weight = [] # List of weights for the edges in the same order as adjacency list 12 | self.key = float('Inf') # Key for priority heap 13 | self.pred = None # Predecessor node 14 | 15 | def __eq__(self, other): # Equality method needed for comparing nodes after deep copy 16 | return self.num == other.num 17 | 18 | # Graph object 19 | class graph(object): 20 | def __init__(self, nodes, root): 21 | self.root = root 22 | self.nodes = nodes # List of nodes 23 | 24 | for idx, cur_node in enumerate(nodes): # The vertex index for each node is the position of the node in the list 25 | if not isinstance(cur_node, node): 26 | raise TypeError 27 | cur_node.num = idx 28 | 29 | def get_keys(self): # List of all keys in the graph. For debugging. 30 | keys = [] 31 | for node in self.nodes: 32 | keys.append(node.key) 33 | return keys 34 | 35 | 36 | # Relaxation function for Bellman-Ford algorithm 37 | def relax_bf(u, v, weight): 38 | if v.key > u.key + weight: 39 | v.key = u.key + weight 40 | v.pred = u 41 | 42 | 43 | # Bellman-Ford algorithm 44 | def bellman_ford(graph, source=None): 45 | if source == None: 46 | source = graph.root 47 | 48 | source.key = 0 49 | 50 | for j in range(len(graph.nodes)-1): # Relax all edges V-1 times 51 | for node in graph.nodes: 52 | for idx, adj_node in enumerate(node.adj): 53 | relax_bf(node, graph.nodes[adj_node], node.weight[idx]) 54 | 55 | for node in graph.nodes: # Check for negative cycles by testing if the weights change after another relaxation 56 | for idx, adj_node in enumerate(node.adj): 57 | if graph.nodes[adj_node].key > node.key + node.weight[idx]: 58 | return False 59 | 60 | return True 61 | 62 | # Test case from Pg 652 63 | n0 = node(); n1 = node(); n2 = node(); n3 = node(); n4 = node(); 64 | n0.adj = [1, 3]; n0.weight = [6, 7]; 65 | n1.adj = [2, 3, 4]; n1.weight = [5, 8, -4]; 66 | n2.adj = [1]; n2.weight = [-2]; 67 | n3.adj = [2, 4]; n3.weight = [-3, 9]; 68 | n4.adj = [0, 2]; n4.weight = [2, 7]; 69 | 70 | G = graph([n0, n1, n2, n3, n4], root=n0) 71 | 72 | bellman_ford(G) 73 | 74 | assert G.get_keys() == [0, 2, 4, 7, -2] 75 | 76 | 77 | # Simple test case with a negative cycle 78 | n0 = node(); n1 = node(); n2 = node(); n3 = node(); n4 = node(); 79 | n0.adj = [1, 4]; n0.weight = [3, 2]; 80 | n1.adj = [2]; n1.weight = [6]; 81 | n2.adj = [3, 4]; n2.weight = [-7, 5]; 82 | n3.adj = [0]; n3.weight = [-3]; 83 | n4.adj = []; n4.weight = []; 84 | 85 | G = graph([n0, n1, n2, n3, n4], root=n0) 86 | 87 | assert bellman_ford(G) == False 88 | 89 | 90 | 91 | # Min priority queue of nodes that operates on a key that can be arbitrarily set - for Dijkstra's algorithm 92 | class min_priority_queue(object): 93 | def __init__(self, A): 94 | self.heap = copy.deepcopy(A) # Need to have own copy so that the graph nodes are unaffected by extract_min 95 | self.heap_size = len(self.heap) 96 | 97 | for j in range(math.floor(self.heap_size/2)-1, -1, -1): 98 | self.min_heapify(j) 99 | 100 | def get_nums(self): 101 | nums = [] 102 | for node in self.heap: 103 | nums.append(node.num) 104 | return nums 105 | 106 | def left(self, k): 107 | return (k << 1) + 1 108 | 109 | def right(self, k): 110 | return (k << 1) + 2 111 | 112 | def parent(self, k): 113 | return math.ceil(k/2) - 1 114 | 115 | def min_heapify(self, k): 116 | smallest = k 117 | if self.left(k) < self.heap_size and self.heap[self.left(k)].key < self.heap[smallest].key: 118 | smallest = self.left(k) 119 | 120 | if self.right(k) < self.heap_size and self.heap[self.right(k)].key < self.heap[smallest].key: 121 | smallest = self.right(k) 122 | 123 | if smallest != k: 124 | # print('swapping') 125 | tmp = self.heap[k] 126 | self.heap[k] = self.heap[smallest] 127 | self.heap[smallest] = tmp 128 | 129 | self.min_heapify(smallest) 130 | 131 | def extract_min(self): 132 | # print(self.get_nums()) 133 | to_return = self.heap[0] 134 | 135 | self.heap[0] = self.heap[self.heap_size-1] 136 | del self.heap[self.heap_size-1] 137 | self.heap_size -= 1 138 | 139 | self.min_heapify(0) 140 | 141 | # print(self.get_nums()) 142 | return to_return 143 | 144 | def decrease_key(self, node_num, new_val): 145 | # Decrease the key value of the node with number node_num to new_val 146 | # Need to use node_num to identify the node here because the ordering of the nodes in the graph and heap are not the same 147 | k = None 148 | for idx, node in enumerate(self.heap): 149 | if node.num == node_num: 150 | k = idx 151 | # print('k, node_num '+str(k)+' '+str(node_num)) 152 | assert new_val < self.heap[k].key # Make sure new value is smaller than the current value 153 | 154 | self.heap[k].key = new_val 155 | while self.parent(k) > -1 and self.heap[self.parent(k)].key > new_val: 156 | tmp = self.heap[self.parent(k)] 157 | self.heap[self.parent(k)] = self.heap[k] 158 | self.heap[k] = tmp 159 | 160 | k = self.parent(k) 161 | 162 | 163 | def relax_dijkstra(u, v, weight, heap): 164 | # Relaxation function for Dijkstra's algorithm which also calls the 'decrease_key' function at the end 165 | # print('v. key, u.key, weight '+str(v.key)+' '+str(u.key)+' '+str(weight)) 166 | if v.key > u.key + weight: 167 | v.key = u.key + weight 168 | v.pred = u 169 | heap.decrease_key(v.num, u.key + weight) 170 | 171 | 172 | # Dijkstra's algorithm 173 | def dijkstra(graph, root=None): 174 | if root == None: 175 | root = graph.root 176 | 177 | root.key = 0 178 | heap = min_priority_queue(graph.nodes) 179 | 180 | while heap.heap_size > 0: 181 | cur_node = heap.extract_min() 182 | # print('cur node '+str(cur_node.num)) 183 | 184 | for node, weight in zip(cur_node.adj, cur_node.weight): 185 | # print(node, weight) 186 | relax_dijkstra(cur_node, graph.nodes[node], weight, heap) 187 | 188 | 189 | # Graph from Pg 659 of CLRS 190 | n0 = node(); n1 = node(); n2 = node(); n3 = node(); n4 = node(); 191 | n0.adj = [1, 3]; n0.weight = [10, 5]; 192 | n1.adj = [2, 3]; n1.weight = [1, 2]; 193 | n2.adj = [4]; n2.weight = [4]; 194 | n3.adj = [1, 2, 4]; n3.weight = [3, 9, 2]; 195 | n4.adj = [0, 2]; n4.weight = [7, 6] 196 | 197 | G = graph([n0, n1, n2, n3, n4], root=n0) 198 | 199 | dijkstra(G) 200 | 201 | assert G.get_keys() == [0, 8, 9, 5, 7] 202 | 203 | 204 | 205 | -------------------------------------------------------------------------------- /dfs.py: -------------------------------------------------------------------------------- 1 | # Depth-first search and related graph algorithms 2 | 3 | # Data structures for representing graphs 4 | 5 | # Node object 6 | class node(object): 7 | def __init__(self): 8 | self.num = -1 # Vertex index 9 | self.adj = [] # Adjacency list of integers that correspond to adjacent nodes 10 | self.pred = None # Predecessor node 11 | self.visited = False # Boolean indicating whether the node has been visited 12 | self.discovered = -1 # Time when the node is discovered 13 | self.finished = -1 # Time when the node is finished 14 | 15 | # Graph object 16 | class graph(object): 17 | def __init__(self, nodes, root): 18 | self.root = root 19 | self.nodes = nodes # List of nodes 20 | 21 | for idx, cur_node in enumerate(nodes): # The vertex index for each node is the position of the node in the list 22 | if not isinstance(cur_node, node): 23 | raise TypeError 24 | cur_node.num = idx 25 | 26 | self.time = 0 # Global time used to mark when the nodes are discovered and finished 27 | 28 | 29 | # First, a simple iterative version using a Python list as a stack. 30 | # Note: this version cannot compute discovered and finished times 31 | def DFS_iterative(graph, root=None): 32 | if root == None: 33 | root = graph.root 34 | 35 | # Initialize the nodes 36 | for node in graph.nodes: 37 | node.pred = None 38 | node.visited = False 39 | 40 | node_stack = [] 41 | node_stack.append(root) 42 | root.visited = True 43 | # graph.time += 1 # To maintain consistency with the recursive version 44 | # root.discovered = graph.time 45 | 46 | while len(node_stack) > 0: 47 | cur_node = node_stack.pop() 48 | for neighbor in cur_node.adj: 49 | if graph.nodes[neighbor].visited == False: 50 | graph.nodes[neighbor].pred = cur_node 51 | node_stack.append(graph.nodes[neighbor]) 52 | graph.nodes[neighbor].visited = True 53 | 54 | 55 | # Test case from Pg 605 56 | n0 = node(); n1 = node(); n2 = node(); n3 = node(); n4 = node(); n5 = node(); 57 | n0.adj = [1, 3] 58 | n1.adj = [4] 59 | n2.adj = [4, 5] 60 | n3.adj = [1] 61 | n4.adj = [3] 62 | n5.adj = [5] 63 | 64 | G = graph([n0, n1, n2, n3, n4, n5], root=n0) 65 | DFS_iterative(G) 66 | 67 | assert n0.visited == True 68 | assert n3.visited == True 69 | assert n5.visited == False 70 | assert n2.visited == False 71 | 72 | G = graph([n0, n1, n2, n3, n4, n5], root=n2) 73 | DFS_iterative(G) 74 | 75 | assert n0.visited == False 76 | assert n1.visited == True 77 | assert n2.visited == True 78 | assert n3.visited == True 79 | assert n4.visited == True 80 | assert n5.visited == True 81 | 82 | # Iterative version that also computes the discovered and finished times 83 | # While it works, it is much more inefficient than the recursive case 84 | def DFS_iterative_disc_finish_times(graph, root=None): 85 | if root == None: 86 | root = graph.root 87 | 88 | # Initialize the nodes 89 | for node in graph.nodes: 90 | node.pred = None 91 | node.visited = False 92 | node.discovered = -1 93 | node.finished = -1 94 | graph.time = 0 95 | 96 | node_stack = [] 97 | node_stack.append(root) 98 | root.visited = True 99 | 100 | graph.time += 1 101 | root.discovered = graph.time 102 | 103 | while len(node_stack) > 0: 104 | cur_node = node_stack[-1] # Peek the stack 105 | if cur_node.discovered == -1: # Discovered time is added only the first time the node is peeked 106 | graph.time += 1 107 | cur_node.discovered = graph.time 108 | 109 | # Check if this cur_node should be popped 110 | should_pop = True 111 | for neighbor in cur_node.adj: 112 | if graph.nodes[neighbor].visited == False: 113 | should_pop = False 114 | 115 | if should_pop: 116 | graph.time += 1 117 | cur_node.finished = graph.time 118 | node_stack.pop() 119 | 120 | # If not, process the node 121 | for neighbor in cur_node.adj: 122 | if graph.nodes[neighbor].visited == False: 123 | graph.nodes[neighbor].pred = cur_node 124 | node_stack.append(graph.nodes[neighbor]) 125 | graph.nodes[neighbor].visited = True 126 | 127 | 128 | G = graph([n0, n1, n2, n3, n4, n5], root=n0) 129 | DFS_iterative_disc_finish_times(G) 130 | 131 | discovered_array = [-1]*len(G.nodes) 132 | finished_array = [-1]*len(G.nodes) 133 | for idx, cur_node in enumerate(G.nodes): 134 | discovered_array[idx] = cur_node.discovered 135 | finished_array[idx] = cur_node.finished 136 | 137 | print('iterative discovered: '+str(discovered_array)) 138 | print('iterative finished: '+str(finished_array)) 139 | 140 | 141 | # Recrusive version Pg 604 142 | def DFS(graph): 143 | # Initialize the nodes 144 | for node in graph.nodes: 145 | node.pred = None 146 | node.visited = False 147 | node.discovered = -1 148 | node.finished = -1 149 | graph.time = 0 150 | 151 | for node in graph.nodes: 152 | if node.visited == False: 153 | DFS_visit(graph, node) 154 | 155 | def DFS_visit(graph, cur_node): 156 | graph.time += 1 157 | cur_node.discovered = graph.time 158 | cur_node.visited = True 159 | 160 | for neighbor in cur_node.adj: 161 | if graph.nodes[neighbor].visited == False: 162 | graph.nodes[neighbor].pred = cur_node 163 | DFS_visit(graph, graph.nodes[neighbor]) 164 | 165 | graph.time += 1 166 | cur_node.finished = graph.time 167 | 168 | 169 | # Test case from Pg 605 170 | DFS(G) 171 | 172 | discovered_array = [-1]*len(G.nodes) 173 | finished_array = [-1]*len(G.nodes) 174 | for idx, cur_node in enumerate(G.nodes): 175 | discovered_array[idx] = cur_node.discovered 176 | finished_array[idx] = cur_node.finished 177 | 178 | print('recursive discovered: '+str(discovered_array)) 179 | print('recursive finished: '+str(finished_array)) 180 | 181 | assert discovered_array == [1, 2, 9, 4, 3, 10] 182 | assert finished_array == [8, 7, 12, 5, 6, 11] 183 | 184 | 185 | # Topological sort of a DAG with DFS 186 | # Returns an array that is an arrangement of the initial nodes sorted topologically 187 | def topo_sort(graph): 188 | DFS(graph) 189 | 190 | finished_array = [-1]*len(graph.nodes) 191 | for idx, node in enumerate(graph.nodes): 192 | finished_array[idx] = node.finished 193 | 194 | topo_sort_list = [] 195 | for j in range(len(finished_array)): 196 | min_idx = finished_array.index(min(finished_array)) 197 | topo_sort_list.append(min_idx) 198 | finished_array[min_idx] = float('Inf') 199 | 200 | return topo_sort_list[::-1] 201 | 202 | # Test case based on Pg 613 of CLRS 203 | n0 = node(); n1 = node(); n2 = node(); n3 = node(); n4 = node(); n5 = node(); n6 = node(); n7 = node(); n8 = node(); 204 | n0.adj = [1, 7]; n1.adj = [2, 7]; n2.adj = [5]; n3.adj = [2, 4]; n4.adj = [5]; n5.adj = []; n6.adj = [7]; 205 | 206 | G = graph([n0, n1, n2, n3, n4, n5, n6, n7, n8], root=n0) 207 | 208 | print('Topological sort: '+str(topo_sort(G))) 209 | 210 | -------------------------------------------------------------------------------- /min_priority_queue.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Oct 20 13:59:50 2019 4 | 5 | @author: Sai 6 | """ 7 | 8 | import math 9 | import copy 10 | 11 | class min_priority_queue(object): 12 | def __init__(self, A): 13 | self.heap = copy.deepcopy(A) # Need to have own copy so that the array is unaffected when extract_min is run 14 | self.heap_size = len(self.heap) 15 | 16 | for j in range(math.floor(self.heap_size/2)-1, -1, -1): 17 | self.min_heapify(j) 18 | 19 | def left(self, k): 20 | return (k << 1) + 1 21 | 22 | def right(self, k): 23 | return (k << 1) + 2 24 | 25 | def parent(self, k): 26 | return math.ceil(k/2) - 1 27 | 28 | def min_heapify(self, k): 29 | # print(k) 30 | # print(self.heap) 31 | smallest = k 32 | if self.left(k) < self.heap_size and self.heap[self.left(k)] < self.heap[smallest]: 33 | smallest = self.left(k) 34 | # Do NOT use elif in the line below - did it twice so far 35 | if self.right(k) < self.heap_size and self.heap[self.right(k)] < self.heap[smallest]: 36 | smallest = self.right(k) 37 | 38 | if smallest != k: 39 | tmp = self.heap[k] 40 | self.heap[k] = self.heap[smallest] 41 | self.heap[smallest] = tmp 42 | 43 | self.min_heapify(smallest) 44 | 45 | def extract_min(self): 46 | to_return = self.heap[0] 47 | self.heap[0] = self.heap[self.heap_size-1] 48 | self.heap_size -= 1 49 | # print('min heapifying') 50 | self.min_heapify(0) 51 | 52 | return to_return 53 | 54 | def decrease_key(self, k, new_val): 55 | # Decrease the key value of the k-th element to new_val 56 | 57 | # Recursive version 58 | # if self.parent(k) > -1 and new_val < self.heap[self.parent(k)]: 59 | # # Swap with the parent 60 | # tmp = self.heap[self.parent(k)] 61 | # self.heap[self.parent(k)] = new_val 62 | # self.heap[k] = tmp 63 | # 64 | # # Recurse 65 | # self.decrease_key(self.parent(k), new_val) 66 | 67 | 68 | # Iterative versions are always better.. 69 | assert new_val < self.heap[k] # Make sure new value is smaller than the current value 70 | 71 | self.heap[k] = new_val 72 | while self.parent(k) > -1 and self.heap[self.parent(k)] > new_val: 73 | tmp = self.heap[self.parent(k)] 74 | self.heap[self.parent(k)] = new_val 75 | self.heap[k] = tmp 76 | 77 | k = self.parent(k) 78 | 79 | def push(self, new_val): 80 | # To push a new value, we add it at the end and let it float up 81 | self.heap_size += 1 82 | self.heap[self.heap_size-1] = float('inf') 83 | 84 | self.decrease_key(self.heap_size-1, new_val) 85 | 86 | 87 | def draw_heap(self, spacing=2, width=2): 88 | # Draws the heap 89 | 90 | # Number of levels = 1 + height of tree 91 | num_levels = math.floor(math.log(self.heap_size+1,2))+1 92 | print('heap size '+str(self.heap_size+1)) 93 | print('num levels '+str(num_levels)) 94 | # print(self.heap) 95 | 96 | # Calculate the spacing and offset arrays 97 | spacing_array = [0]*num_levels 98 | offset_array = [0]*num_levels 99 | 100 | spacing_array[-1] = spacing # Spacing at the last level is set to a fixed value 101 | offset_array[-1] = 0 # Last level has no offset 102 | 103 | # Now iterate backwards and calculate spacing and offsets for all levels 104 | for j in range(num_levels-2, -1, -1): 105 | spacing_array[j] = spacing_array[j+1]*2 + width # Spacing of the m-th level is twice the spacing of the m+1-th level 106 | offset_array[j] = offset_array[j+1] + spacing_array[j+1]/2 + width # Offset of m-th level = offset of previous level + spacing of previous level/2 107 | 108 | # Iterate backwards again and construct the string at each level 109 | str_list = ['']*num_levels 110 | fmt = '{:^'+str(width)+'d}' 111 | for height in range(num_levels-1, -1, -1): 112 | str_level = ' '*int(offset_array[height]) # First add the offset 113 | for val in range(2**height): 114 | if 2**height-1+val < self.heap_size: 115 | str_level += fmt.format(self.heap[2**height-1+val]) # Then add the number 116 | str_level += ' '*int(spacing_array[height]) # And the space between the numbers 117 | else: 118 | str_level += 'X'*width # If outside the heap, show 'X' 119 | str_level += ' '*int(spacing_array[height]) 120 | str_list[height] = str_level 121 | 122 | # Now print 123 | for height in range(num_levels): 124 | print(str_list[height]+'\n') 125 | 126 | return str_list 127 | 128 | 129 | # First let's test the heap 130 | array = [1, 3, 9, 10, 14, 8, 2, 4, 16, 7, 36, 7, 8, 99, 12, 5, 43, 1, 45, 4, 22, 67, 8, 11, 48, 9, 36, 46, 77, 99] 131 | A = min_priority_queue(array) 132 | 133 | # Check that the heap property is maintained for all nodes 134 | for j in range(A.heap_size): 135 | if A.left(j) < A.heap_size: 136 | assert A.heap[A.left(j)] >= A.heap[j] 137 | if A.right(j) < A.heap_size: 138 | assert A.heap[A.right(j)] >= A.heap[j] 139 | 140 | # Now test extract_min 141 | #print(A.heap) 142 | assert A.extract_min() == min(array) 143 | array.remove(min(array)) 144 | #print(A.heap) 145 | assert A.extract_min() == min(array) 146 | array.remove(min(array)) 147 | assert A.extract_min() == min(array) 148 | array.remove(min(array)) 149 | assert A.extract_min() == min(array) 150 | array.remove(min(array)) 151 | assert A.extract_min() == min(array) 152 | array.remove(min(array)) 153 | assert A.extract_min() == min(array) 154 | array.remove(min(array)) 155 | assert A.extract_min() == min(array) 156 | 157 | # Now test push 158 | A.draw_heap() 159 | A.push(-1) 160 | A.draw_heap() 161 | # Check that the heap property is maintained for all nodes 162 | for j in range(A.heap_size): 163 | if A.left(j) < A.heap_size: 164 | assert A.heap[A.left(j)] >= A.heap[j] 165 | if A.right(j) < A.heap_size: 166 | assert A.heap[A.right(j)] >= A.heap[j] 167 | 168 | assert A.extract_min() == -1 # Because the pushed value is negative and all other numbers in the heap are positive 169 | 170 | A.draw_heap() 171 | A.push(10) 172 | A.draw_heap() 173 | # Check that the heap property is maintained for all nodes 174 | for j in range(A.heap_size): 175 | if A.left(j) < A.heap_size: 176 | assert A.heap[A.left(j)] >= A.heap[j] 177 | if A.right(j) < A.heap_size: 178 | assert A.heap[A.right(j)] >= A.heap[j] -------------------------------------------------------------------------------- /dynamic_programming.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Oct 6 17:09:55 2019 4 | 5 | @author: Sai 6 | """ 7 | import time 8 | 9 | # Rod cutting problem - Chap 15.1 of CLRS 10 | rod_length = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 11 | price_array = [0, 1, 5, 8, 9, 10, 17, 17, 20, 24, 30] 12 | 13 | # Recursive solution Pg 363 14 | def cut_rod_recursive(price, length): 15 | if length == 0: 16 | return 0 17 | q = -1 18 | for idx in range(1,length+1): 19 | q = max(q, price[idx]+cut_rod_recursive(price, length-idx)) 20 | return q 21 | 22 | # Test cases from Pg 362 23 | assert cut_rod_recursive(price_array, 7) == 18 24 | assert cut_rod_recursive(price_array, 9) == 25 25 | assert cut_rod_recursive(price_array, 10) == 30 26 | 27 | # Print some runtimes for arbitrary price array 28 | t = time.time() 29 | cut_rod_recursive([2]*51, 10) 30 | print('Recursive version run time for length = 10: '+str(time.time()-t)) 31 | t = time.time() 32 | cut_rod_recursive([2]*51, 15) 33 | print('Recursive version run time for length = 15: '+str(time.time()-t)) 34 | t = time.time() 35 | cut_rod_recursive([2]*51, 20) 36 | print('Recursive version run time for length = 20: '+str(time.time()-t)) 37 | #t = time.time() 38 | #cut_rod_recursive([2]*51, 23) 39 | #print('Recursive version run time for length = 23: '+str(time.time()-t)) 40 | 41 | 42 | # Top-down memoized solution Pg 365 43 | def cut_rod_memoized(price, length): 44 | optimal_price = [-1]*(length+1) 45 | optimal_price[0] = 0 46 | return cut_rod_memoized_aux(price, length, optimal_price) 47 | 48 | def cut_rod_memoized_aux(price, length, optimal_price): 49 | if optimal_price[length] == -1: 50 | # Calculate recursively 51 | q = -1 52 | for idx in range(1, length+1): 53 | q = max(q, price[idx]+cut_rod_memoized_aux(price, length-idx, optimal_price)) 54 | optimal_price[length] = q 55 | return q 56 | else: 57 | # Lookup in the array 58 | return optimal_price[length] 59 | 60 | # Test cases from Pg 362 61 | assert cut_rod_memoized(price_array, 7) == 18 62 | assert cut_rod_memoized(price_array, 9) == 25 63 | assert cut_rod_memoized(price_array, 10) == 30 64 | 65 | # Print some runtimes for arbitrary price array 66 | t = time.time() 67 | cut_rod_memoized([2]*51, 10) 68 | print('Memoized version run time for length = 10: '+str(time.time()-t)) 69 | t = time.time() 70 | cut_rod_memoized([2]*51, 15) 71 | print('Memoized version run time for length = 15: '+str(time.time()-t)) 72 | t = time.time() 73 | cut_rod_memoized([2]*51, 20) 74 | print('Memoized version run time for length = 20: '+str(time.time()-t)) 75 | t = time.time() 76 | cut_rod_memoized([2]*101, 100) 77 | print('Memoized version run time for length = 100: '+str(time.time()-t)) 78 | 79 | 80 | # Bottom-up version Pg 366 81 | def cut_rod_bottom_up(price, length): 82 | optimal_price = [-1]*(length+1) 83 | optimal_price[0] = 0 84 | 85 | for i in range(1, length+1): 86 | q = -1 87 | for j in range(1, i+1): 88 | q = max(q, price[j]+optimal_price[i-j]) 89 | optimal_price[i] = q 90 | 91 | return optimal_price[length] 92 | 93 | # Test cases from Pg 362 94 | assert cut_rod_bottom_up(price_array, 7) == 18 95 | assert cut_rod_bottom_up(price_array, 9) == 25 96 | assert cut_rod_bottom_up(price_array, 10) == 30 97 | 98 | # Print some runtimes for arbitrary price array 99 | t = time.time() 100 | cut_rod_bottom_up([2]*51, 10) 101 | print('Bottom up version run time for length = 10: '+str(time.time()-t)) 102 | t = time.time() 103 | cut_rod_bottom_up([2]*51, 15) 104 | print('Bottom up version run time for length = 15: '+str(time.time()-t)) 105 | t = time.time() 106 | cut_rod_bottom_up([2]*51, 20) 107 | print('Bottom up version run time for length = 20: '+str(time.time()-t)) 108 | t = time.time() 109 | cut_rod_bottom_up([2]*101, 100) 110 | print('Bottom up version run time for length = 100: '+str(time.time()-t)) 111 | 112 | # Bottom up cut-rod with printing of the cuts 113 | def cut_rod_bottom_up_print(price, length): 114 | optimal_price = [-1]*(length+1) 115 | optimal_price[0] = 0 116 | cuts = [0]*(length+1) 117 | 118 | for i in range(1, length+1): 119 | q = -1 120 | for j in range(1, i+1): 121 | if price[j]+optimal_price[i-j] > q: 122 | q = price[j]+optimal_price[i-j] 123 | cuts[i] = j 124 | 125 | optimal_price[i] = q 126 | 127 | cut_length = length 128 | while cut_length > 0: 129 | print(cuts[cut_length]) 130 | cut_length = cut_length - cuts[cut_length] 131 | 132 | return optimal_price[length] 133 | 134 | # Test cases from Pg 362 135 | assert cut_rod_bottom_up_print(price_array, 7) == 18 136 | assert cut_rod_bottom_up_print(price_array, 9) == 25 137 | assert cut_rod_bottom_up_print(price_array, 10) == 30 138 | 139 | 140 | # Longest common subsequence - Pg 394-395 141 | def lcs(seq1, seq2): 142 | # Array to store the length of the LCS of the substrings seen so far 143 | length = [[0]*(len(seq2)+1) for i in range(len(seq1)+1)] 144 | 145 | # Array to store the steps taken. Used to reconstruct the LCS. The values mean the following: 146 | # 1 - drop the last character in seq1 147 | # 2 - drop the last character in seq2 148 | # 3 - last characters are the same, so the character is added to the LCS 149 | step = [[0]*(len(seq2)+1) for i in range(len(seq1)+1)] 150 | 151 | for i1, c1 in enumerate(seq1, 1): 152 | for i2, c2 in enumerate(seq2, 1): 153 | # print(i1, i2, c1, c2) 154 | if c1 == c2: 155 | length[i1][i2] = length[i1-1][i2-1] + 1 156 | step[i1][i2] = 3 157 | # print(3) 158 | elif length[i1-1][i2] > length[i1][i2-1]: 159 | length[i1][i2] = length[i1-1][i2] 160 | step[i1][i2] = 1 161 | # print(1) 162 | else: 163 | length[i1][i2] = length[i1][i2-1] 164 | step[i1][i2] = 2 165 | # print(2) 166 | # print(length) 167 | # print(length) 168 | # print(step) 169 | 170 | p1 = len(seq1) 171 | p2 = len(seq2) 172 | LCS = '' 173 | while p1 > 0 and p2 > 0: 174 | if step[p1][p2] == 1: 175 | p1 -= 1 176 | elif step[p1][p2] == 2: 177 | p2 -= 1 178 | elif step[p1][p2] == 3: 179 | # print(p1, p2, seq1[p1-1], seq2[p2-1]) 180 | LCS += seq1[p1-1] 181 | p1 -= 1 182 | p2 -= 1 183 | 184 | return LCS[::-1] 185 | 186 | seq1 = 'TCG' 187 | seq2 = 'TAC' 188 | #print(lcs(seq1, seq2)) 189 | assert lcs(seq1, seq2) == 'TC' 190 | 191 | seq1 = 'TGACTGGGT' 192 | seq2 = 'GGGG' 193 | #print(lcs(seq1, seq2)) 194 | assert lcs(seq1, seq2) == 'GGGG' 195 | 196 | # Test case from Pg 391 of CLRS 197 | seq1 = 'ACCGGTCGAGTGCGCGGAAGCCGGCCGAA' 198 | seq2 = 'GTCGTTCGGAATGCCGTTGCTCTGTAAA' 199 | #print(lcs(seq1, seq2)) 200 | assert lcs(seq1, seq2) == 'GTCGTCGGAAGCCGGCCGAA' -------------------------------------------------------------------------------- /huffman.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Oct 19 13:44:32 2019 4 | 5 | @author: Sai 6 | """ 7 | 8 | import math 9 | import copy 10 | 11 | # Huffman codes for data compression 12 | # Pg 428 CLRS 13 | 14 | # First the data structure for holding the Huffman code 15 | class huffman_node(object): 16 | # Node object has the sum of frequency all leaves under the node 17 | # It's children can either be other nodes or leaves 18 | def __init__(self, freq): 19 | self.freq = freq 20 | self.left = None 21 | self.right = None 22 | 23 | # Equality operator is useful for testing 24 | def __eq__(self, other): 25 | if not isinstance(other, huffman_node): 26 | return NotImplemented 27 | else: 28 | return self.freq == other.freq 29 | 30 | class huffman_leaf(object): 31 | # Leaf object has a character and an associated frequency 32 | def __init__(self, char, freq): 33 | self.char = char 34 | self.freq = freq 35 | self.parent = None 36 | 37 | # Equality operator is useful for testing 38 | def __eq__(self, other): 39 | if not isinstance(other, huffman_leaf): 40 | return NotImplemented 41 | else: 42 | return self.freq == other.freq and self.char == other.char 43 | 44 | 45 | class huffman_tree(object): 46 | # Huffman tree object 47 | def __init__(self, char_freq_dict): 48 | # When the tree is initialized we have a bunch of disconnected leaves 49 | self.root = None 50 | self.leaves = [] 51 | for char in iter(char_freq_dict): 52 | new_leaf = huffman_leaf(char, char_freq_dict[char]) 53 | self.leaves.append(new_leaf) 54 | 55 | def build_code(self): 56 | self.min_queue = min_queue_huffman(self.leaves) 57 | num_leaves = len(self.leaves) 58 | for j in range(num_leaves-1): 59 | x = self.min_queue.extract_min() 60 | y = self.min_queue.extract_min() 61 | 62 | z = huffman_node(x.freq + y.freq) 63 | z.left = x 64 | z.right = y 65 | 66 | self.min_queue.push_node(z) 67 | # print(z.freq) 68 | 69 | self.root = self.min_queue.extract_min() 70 | 71 | # Traverse the tree and build a character-to-code dictionary 72 | # cur_node = self.root 73 | 74 | def encode(self, message): 75 | assert self.root != None # Make sure the build_code function was run 76 | encoded_str = '' 77 | pass 78 | 79 | 80 | def decode(self, message): 81 | assert self.root != None # Make sure the build_code function was run 82 | decoded_str = '' 83 | cur_char = 0 # Pointer to the current character being read 84 | while cur_char < len(message): 85 | # print('root') 86 | cur_node = self.root 87 | while isinstance(cur_node, huffman_node): 88 | if message[cur_char] == '0': 89 | # print('0') 90 | cur_node = cur_node.left 91 | cur_char += 1 92 | elif message[cur_char] == '1': 93 | # print('1') 94 | cur_node = cur_node.right 95 | cur_char += 1 96 | else: 97 | print('unrecognized char') 98 | break 99 | decoded_str += cur_node.char 100 | # print(decoded_str) 101 | 102 | return decoded_str 103 | 104 | 105 | class min_queue_huffman(object): 106 | # A min priority queue for nodes in Huffman code 107 | def __init__(self, A): 108 | self.heap = copy.deepcopy(A) # Need to have own copy so that the originial array is unaffected when extract_min is run 109 | self.heap_size = len(self.heap) 110 | 111 | for j in range(math.floor(self.heap_size/2)-1, -1, -1): 112 | self.min_heapify(j) 113 | 114 | def left(self, k): 115 | return (k << 1) + 1 116 | 117 | def right(self, k): 118 | return (k << 1) + 2 119 | 120 | def parent(self, k): 121 | return math.ceil(k/2) - 1 122 | 123 | def min_heapify(self, k): 124 | smallest = k 125 | if self.left(k) < self.heap_size and self.heap[self.left(k)].freq < self.heap[smallest].freq: 126 | smallest = self.left(k) 127 | # Do NOT use elif in the line below - did it twice so far 128 | if self.right(k) < self.heap_size and self.heap[self.right(k)].freq < self.heap[smallest].freq: 129 | smallest = self.right(k) 130 | 131 | if smallest != k: 132 | tmp = self.heap[k] 133 | self.heap[k] = self.heap[smallest] 134 | self.heap[smallest] = tmp 135 | 136 | self.min_heapify(smallest) 137 | 138 | def extract_min(self): 139 | to_return = self.heap[0] 140 | self.heap[0] = self.heap[self.heap_size-1] 141 | self.heap_size -= 1 142 | self.min_heapify(0) 143 | 144 | return to_return 145 | 146 | 147 | def push_node(self, new_node): 148 | # To push in a new Huffman node, add it at the end and let it float up 149 | assert isinstance(new_node, huffman_node) == True # Can only push nodes, not leaves 150 | # print(new_node.freq) 151 | self.heap_size += 1 152 | self.heap[self.heap_size-1] = new_node 153 | 154 | # If the frequency of the new node is larger than its parent, then we are done 155 | if new_node.freq > self.heap[self.parent(self.heap_size-1)].freq: 156 | # print('passed') 157 | pass 158 | else: 159 | self.decrease_key(self.heap_size-1, new_node) 160 | 161 | 162 | def decrease_key(self, k, new_node): 163 | # Auxillary function for push 164 | # Puts the new node in the correct place 165 | while self.parent(k) > -1 and self.heap[self.parent(k)].freq > new_node.freq: 166 | tmp = self.heap[self.parent(k)] 167 | self.heap[self.parent(k)] = new_node 168 | self.heap[k] = tmp 169 | 170 | k = self.parent(k) 171 | 172 | 173 | def draw_heap(self, spacing=2, width=2): 174 | # Draws the heap 175 | 176 | # Number of levels = 1 + height of tree 177 | num_levels = math.floor(math.log(self.heap_size,2))+1 178 | print('heap size '+str(self.heap_size)) 179 | print('num levels '+str(num_levels)) 180 | # print(self.heap) 181 | 182 | # Calculate the spacing and offset arrays 183 | spacing_array = [0]*num_levels 184 | offset_array = [0]*num_levels 185 | 186 | spacing_array[-1] = spacing # Spacing at the last level is set to a fixed value 187 | offset_array[-1] = 0 # Last level has no offset 188 | 189 | # Now iterate backwards and calculate spacing and offsets for all levels 190 | for j in range(num_levels-2, -1, -1): 191 | spacing_array[j] = spacing_array[j+1]*2 + width # Spacing of the m-th level is twice the spacing of the m+1-th level 192 | offset_array[j] = offset_array[j+1] + spacing_array[j+1]/2 + width # Offset of m-th level = offset of previous level + spacing of previous level/2 193 | 194 | # Iterate backwards again and construct the string at each level 195 | str_list = ['']*num_levels 196 | fmt = '{:^'+str(width)+'d}' 197 | for height in range(num_levels-1, -1, -1): 198 | str_level = ' '*int(offset_array[height]) # First add the offset 199 | for val in range(2**height): 200 | if 2**height-1+val < self.heap_size: 201 | str_level += fmt.format(self.heap[2**height-1+val].freq) # Then add the number 202 | str_level += ' '*int(spacing_array[height]) # And the space between the numbers 203 | else: 204 | str_level += 'X'*width # If outside the heap, show 'X' 205 | str_level += ' '*int(spacing_array[height]) 206 | str_list[height] = str_level 207 | 208 | # Now print 209 | for height in range(num_levels): 210 | print(str_list[height]+'\n') 211 | 212 | return str_list 213 | 214 | # First test the Huffman node and leaf 215 | n1 = huffman_node(10) 216 | assert n1.freq == 10 217 | leaf1 = huffman_leaf('a', 10) 218 | assert leaf1.char == 'a' 219 | assert leaf1.freq == 10 220 | leaf2 = huffman_leaf('b', 20) 221 | assert leaf2.char == 'b' 222 | assert leaf2.freq == 20 223 | 224 | # Now test the Huffman tree object 225 | char_freq_dict = {'a':10, 'b':20, 'c':30, 'd':40} 226 | htree = huffman_tree(char_freq_dict) 227 | assert htree.leaves[0] == leaf1 228 | assert htree.leaves[1] == leaf2 229 | assert htree.leaves[2].freq == 30 230 | assert htree.leaves[3].char == 'd' 231 | 232 | # Make a list of leaves to test the min_queue_huffman class 233 | leaves = [huffman_leaf('a', 10), huffman_leaf('b', 20), huffman_leaf('c', 30), huffman_leaf('d', 40), huffman_leaf('e', 50), huffman_leaf('f', 60), huffman_leaf('g', 70)] 234 | min_queue = min_queue_huffman(leaves) 235 | min_queue.draw_heap() 236 | 237 | assert min_queue.extract_min() == leaf1 238 | assert min_queue.extract_min() == leaf2 239 | min_queue.draw_heap() 240 | 241 | node10 = huffman_node(10) 242 | node20 = huffman_node(20) 243 | min_queue.push_node(node10) 244 | min_queue.draw_heap() 245 | min_queue.push_node(node20) 246 | min_queue.draw_heap() 247 | assert min_queue.extract_min() == node10 248 | assert min_queue.extract_min() == node20 249 | 250 | 251 | # Now we are ready to test the code building 252 | # Test case from Pg 429 CLRS 253 | char_freq_dict = {'a':45, 'b':13, 'c':12, 'd':16, 'e':9, 'f':5} 254 | htree = huffman_tree(char_freq_dict) 255 | htree.build_code() 256 | hroot = htree.root 257 | assert hroot.freq == 100 258 | assert hroot.left.char == 'a' 259 | assert hroot.left.freq == char_freq_dict['a'] 260 | assert hroot.right.left.left.char == 'c' 261 | assert hroot.right.left.left.freq == char_freq_dict['c'] 262 | assert hroot.right.right.left.right.char == 'e' 263 | assert hroot.right.right.left.right.freq == char_freq_dict['e'] 264 | 265 | # Test case from https://www.geeksforgeeks.org/huffman-coding-greedy-algo-3/ 266 | char_freq_dict = {'a':5, 'b':9, 'c':12, 'd':13, 'e':16, 'f':45} 267 | htree = huffman_tree(char_freq_dict) 268 | htree.build_code() 269 | hroot = htree.root 270 | assert hroot.freq == 100 271 | assert hroot.left.char == 'f' 272 | assert hroot.left.freq == char_freq_dict['f'] 273 | assert hroot.right.left.left.char == 'c' 274 | assert hroot.right.left.left.freq == char_freq_dict['c'] 275 | assert hroot.right.right.left.right.char == 'b' 276 | assert hroot.right.right.left.right.freq == char_freq_dict['b'] 277 | 278 | 279 | # Now test decoding 280 | char_freq_dict = {'a':45, 'b':13, 'c':12, 'd':16, 'e':9, 'f':5} 281 | htree = huffman_tree(char_freq_dict) 282 | htree.build_code() 283 | 284 | encoded_str = '001011101' 285 | assert htree.decode(encoded_str) == 'aabe' 286 | 287 | encoded_str = '110011011111001010' 288 | assert htree.decode(encoded_str) == 'fedcba' 289 | 290 | encoded_str = '010110011111011100110011011111001010' 291 | assert htree.decode(encoded_str) == 'abcdeffedcba' -------------------------------------------------------------------------------- /sorting.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Sep 8 12:42:29 2019 4 | 5 | @author: Sai 6 | """ 7 | import math 8 | import random 9 | 10 | # 9/8/2019 11 | # Insertion sort - Pg 18 CLRS 12 | def insertion_sort(array): 13 | for (idx, key) in enumerate(array[1:]): 14 | i = idx # Even though i and idx are equal, key = a[i+1] 15 | while i > -1 and array[i] > key: 16 | array[i+1] = array[i] 17 | i = i - 1 18 | array[i+1] = key 19 | return array 20 | 21 | array = [5, 2, 4, 6, 1, 3] 22 | #print(insertion_sort(array)) 23 | assert insertion_sort(array) == [1, 2, 3, 4, 5, 6] 24 | array = [31, 41, 59, 26, 41, 58] 25 | #print(insertion_sort(array)) 26 | assert insertion_sort(array) == [26, 31, 41, 41, 58, 59] 27 | 28 | # Insertion sort in nonascending order - Pg 22 CLRS - Exercise 2.1.2 29 | def insertion_sort_descending(array): 30 | for (idx, key) in enumerate(array[1:]): 31 | i = idx # Even though i and idx are equal, key = a[i+1] 32 | while i > -1 and array[i] < key: 33 | array[i+1] = array[i] 34 | i = i - 1 35 | array[i+1] = key 36 | return array 37 | 38 | array = [5, 2, 4, 6, 1, 3] 39 | #print(insertion_sort_descending(array)) 40 | assert insertion_sort_descending(array) == [6, 5, 4, 3, 2, 1] 41 | array = [31, 41, 59, 26, 41, 58] 42 | #print(insertion_sort_descending(array)) 43 | assert insertion_sort_descending(array) == [59, 58, 41, 41, 31, 26] 44 | 45 | # Mergesort - Pg 31-34 CLRS 46 | # First write the merge function and show that it works 47 | def merge(array, p, q, r): 48 | left = array[p:q+1] 49 | right = array[q+1:r+1] 50 | left.append(float('inf')) 51 | right.append(float('inf')) 52 | # print(left, right) 53 | i = 0; j = 0; 54 | for k in range(r-p+1): 55 | if left[i] <= right[j]: 56 | array[k+p] = left[i] 57 | i += 1 58 | else: 59 | array[k+p] = right[j] 60 | j += 1 61 | return array 62 | 63 | array = [1, 2, 4, 7, 3, 6, 9, 12] 64 | #print(merge(array, 0, 3, len(array)-1)) 65 | assert merge(array, 0, 3, len(array)-1) == [1, 2, 3, 4, 6, 7, 9, 12] 66 | 67 | # The merge sort itself is pretty simple 68 | def merge_sort(array, p=0, r=None): 69 | if r == None: 70 | r = len(array)-1 71 | if p < r: 72 | q = math.floor((r+p)/2) 73 | merge_sort(array, p, q) 74 | merge_sort(array, q+1, r) 75 | # print('p, r: ', p, r) 76 | merge(array, p, q, r) 77 | # print(array) 78 | return array 79 | 80 | array = [5, 2, 4, 6, 1, 3] 81 | #print(merge_sort(array)) 82 | assert merge_sort(array) == [1, 2, 3, 4, 5, 6] 83 | array = [31, 41, 59, 26, 41, 58] 84 | #print(merge_sort(array)) 85 | assert merge_sort(array) == [26, 31, 41, 41, 58, 59] 86 | 87 | 88 | # Heapsort - Chapter 6 CLRS 89 | 90 | # First define heap object 91 | class heap(object): # Heap class 92 | 93 | def __init__(self, A): 94 | # Initializing with an array automatically builds the heap 95 | 96 | self.heap = A # Array representation of the heap 97 | self.heap_size = len(self.heap)-1 # Index where the heap ends. Default is the last element 98 | 99 | for j in range(math.floor(len(self.heap)/2)-1, -1, -1): 100 | # print(j) 101 | self.max_heapify(j) 102 | 103 | 104 | def max_heapify(self, j): 105 | # print(j) 106 | l = self.left(j) 107 | r = self.right(j) 108 | # print(j) 109 | largest = j 110 | 111 | if l <= self.heap_size and self.heap[l] > self.heap[j]: 112 | largest = l 113 | 114 | if r <= self.heap_size and self.heap[r] > self.heap[largest]: 115 | largest = r 116 | 117 | if largest != j: 118 | tmp = self.heap[largest] 119 | self.heap[largest] = self.heap[j] 120 | self.heap[j] = tmp 121 | 122 | self.max_heapify(largest) 123 | 124 | # print(j) 125 | # print(self.heap) 126 | 127 | 128 | def left(self, j): 129 | # Returns the index of the left child node 130 | return (j << 1) + 1 131 | 132 | def right(self, j): 133 | # Returns the index of the right child node 134 | return (j << 1) + 2 135 | 136 | def draw_heap(self, spacing=2, width=2): 137 | # Draws the heap 138 | 139 | assert width%2 == 0 140 | 141 | # Number of levels = 1 + height of tree 142 | num_levels = math.floor(math.log(self.heap_size+1,2))+1 143 | print('heap size '+str(self.heap_size+1)) 144 | print('num levels '+str(num_levels)) 145 | 146 | # Calculate the spacing and offset arrays 147 | spacing_array = [0]*num_levels 148 | offset_array = [0]*num_levels 149 | 150 | spacing_array[-1] = spacing # Spacing at the last level is set to a fixed value 151 | offset_array[-1] = 0 # Last level has no offset 152 | 153 | # Now iterate backwards and calculate spacing and offsets for all levels 154 | for j in range(num_levels-2, -1, -1): 155 | spacing_array[j] = spacing_array[j+1]*2 + width # Spacing of the m-th level is twice the spacing of the m+1-th level 156 | offset_array[j] = offset_array[j+1] + spacing_array[j+1]/2 + width # Offset of m-th level = offset of previous level + spacing of previous level/2 157 | 158 | # Iterate backwards again and construct the string at each level 159 | str_list = ['']*num_levels 160 | fmt = '{:^'+str(width)+'d}' 161 | for height in range(num_levels-1, -1, -1): 162 | str_level = ' '*int(offset_array[height]) # First add the offset 163 | for val in range(2**height): 164 | if 2**height-1+val <= self.heap_size: 165 | str_level += fmt.format(self.heap[2**height-1+val]) # Then add the number 166 | str_level += ' '*int(spacing_array[height]) # And the space between the numbers 167 | str_list[height] = str_level 168 | 169 | # Now print 170 | for height in range(num_levels): 171 | print(str_list[height]+'\n') 172 | 173 | return str_list 174 | 175 | 176 | A = heap([1, 3, 9, 10, 14, 8, 2, 4, 16, 7, 36, 7, 8, 99, 12, 5, 43, 1, 1, 56, 5, 23, 67, 8, 9, 4, 65, 6, 22]) 177 | 178 | # Check that the left and right functions work as expected 179 | assert(A.left(2)==5) 180 | assert(A.right(1)==4) 181 | assert(A.left(3)==7) 182 | 183 | # Check that the heap property is maintained for all nodes 184 | for j in range(A.heap_size+1): 185 | l = A.left(j) 186 | r = A.right(j) 187 | 188 | if l <= A.heap_size: 189 | assert A.heap[A.left(j)] <= A.heap[j] 190 | if r <= A.heap_size: 191 | assert A.heap[A.right(j)] <= A.heap[j] 192 | 193 | str_list = A.draw_heap() 194 | 195 | 196 | 197 | # Heapsort algorithm 198 | def heapsort(array): 199 | 200 | # Build the heap 201 | H = heap(array) 202 | 203 | # Take out the first element and reduce the size of the heap 204 | for j in range(len(array)): 205 | # Swap the largest element in the heap with the last element in the heap 206 | tmp = H.heap[0] 207 | H.heap[0] = H.heap[H.heap_size] 208 | H.heap[H.heap_size] = tmp 209 | 210 | # Decrement heap size 211 | H.heap_size -= 1 212 | 213 | # Max-Heapify 214 | H.max_heapify(0) 215 | 216 | return H.heap 217 | 218 | array = [5, 2, 4, 6, 1, 3] 219 | #print(heapsort(array)) 220 | assert heapsort(array) == [1, 2, 3, 4, 5, 6] 221 | array = [31, 41, 59, 26, 41, 58] 222 | #print(heapsort(array)) 223 | assert heapsort(array) == [26, 31, 41, 41, 58, 59] 224 | 225 | 226 | # Quicksort - Chapter 7 CLRS 227 | 228 | # First define the partioning function 229 | def partition(A, p=0, r=None): 230 | # Partition the array A[p:r] for use with Quicksort 231 | if r == None: 232 | r = len(A)-1 233 | 234 | i = p - 1 235 | for j_iter in range(r-p): 236 | j = j_iter + p 237 | # print(j) 238 | # print(A) 239 | 240 | if A[j] < A[r]: 241 | i = i + 1 242 | tmp = A[i] 243 | A[i] = A[j] 244 | A[j] = tmp 245 | 246 | tmp = A[i+1] 247 | A[i+1] = A[r] 248 | A[r] = tmp 249 | 250 | return i + 1 251 | 252 | array = [5, 2, 4, 6, 1, 3] 253 | partition(array) 254 | assert array == [2, 1, 3, 6, 5, 4] 255 | 256 | array = [2, 8, 7, 1, 3, 5, 6, 4] 257 | partition(array) 258 | assert array == [2, 1, 3, 4, 7, 5, 6, 8] 259 | 260 | # Quicksort 261 | def quicksort(A, p=0, r=None): 262 | if r == None: 263 | r = len(A)-1 264 | 265 | # print(p, r) 266 | # print(A) 267 | 268 | if p < r: 269 | q = partition(A, p, r) 270 | quicksort(A, p, q-1) 271 | quicksort(A, q+1, r) 272 | 273 | return A 274 | 275 | array = [5, 2, 4, 6, 1, 3] 276 | #print(quicksort(array)) 277 | assert quicksort(array) == [1, 2, 3, 4, 5, 6] 278 | 279 | array = [31, 41, 59, 26, 41, 58] 280 | #print(quicksort(array)) 281 | assert quicksort(array) == [26, 31, 41, 41, 58, 59] 282 | 283 | 284 | # Randomized partition 285 | def partition_random(A, p=0, r=None): 286 | if r == None: 287 | r = len(A)-1 288 | 289 | rand_idx = random.randint(p, r) 290 | tmp = A[rand_idx] 291 | A[rand_idx] = A[r] 292 | A[r] = tmp 293 | 294 | return partition(A, p, r) 295 | 296 | array = [5, 2, 4, 6, 1, 3] 297 | partition_random(array) 298 | print(array) 299 | 300 | def quicksort_random(A, p=0, r=None): 301 | if r == None: 302 | r = len(A)-1 303 | 304 | if p < r: 305 | q = partition_random(A, p, r) 306 | quicksort_random(A, p, q-1) 307 | quicksort_random(A, q+1, r) 308 | 309 | return A 310 | 311 | array = [5, 2, 4, 6, 1, 3] 312 | #print(quicksort(array)) 313 | assert quicksort_random(array) == [1, 2, 3, 4, 5, 6] 314 | 315 | array = [31, 41, 59, 26, 41, 58] 316 | #print(quicksort(array)) 317 | assert quicksort_random(array) == [26, 31, 41, 41, 58, 59] 318 | 319 | 320 | # Given array A of size n which contains integers from p to p+n, sort A in place 321 | def consecutive_numbers_sort_in_place(A, p=0): 322 | j = 0 323 | for j in range(len(A)): 324 | while A[j] != p+j: 325 | pos = A[j] - p 326 | 327 | tmp = A[pos] 328 | A[pos] = A[j] 329 | A[j] = tmp 330 | return A 331 | 332 | A = [1, 2, 4, 0, 5, 7, 6, 8, 10, 9, 3] 333 | assert consecutive_numbers_sort_in_place(A) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 334 | 335 | 336 | # Counting sort - Pg 195 337 | def counting_sort(A, max_elem=None): 338 | if max_elem == None: 339 | max_elem = max(A) 340 | 341 | # Initialize the output and count arrays 342 | out = [0]*len(A) 343 | count = [0]*(max_elem+1) # Including zeros 344 | 345 | # Populate the count array 346 | for val in A: 347 | count[val] += 1 348 | # print(count) 349 | 350 | # Keep a running sum 351 | for idx in range(len(count)-1): 352 | idx = idx + 1 353 | count[idx] = count[idx] + count[idx-1] 354 | # print(count) 355 | 356 | # Now sort 357 | for idx in range(len(A)-1,-1,-1): 358 | out[count[A[idx]]-1] = A[idx] 359 | count[A[idx]] -= 1 360 | 361 | return out 362 | 363 | array = [0, 4, 5, 1, 5, 2, 3, 4, 2, 0, 0, 3, 1, 2, 4] 364 | assert counting_sort(array) == [0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5] 365 | 366 | array = [0, 1, 2, 5, 6, 9, 7, 8, 4, 4, 6, 3, 8, 5, 2, 7, 9, 0, 1, 5, 7, 4, 2, 6, 9] 367 | assert counting_sort(array) == [0, 0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9] 368 | 369 | array = [1, 1, 1, 1, 1] 370 | assert counting_sort(array) == [1, 1, 1, 1, 1] 371 | 372 | --------------------------------------------------------------------------------