├── Aho-Corasick.cpp ├── Gale-Shapley.cpp ├── Huffman_Tree.cpp ├── KMP.cpp ├── README.md ├── binary_heap.cpp ├── directed_graph.cpp ├── divide_and_conquer.cpp ├── greedy.cpp ├── trie.cpp ├── undirected_graph.cpp └── union_find.cpp /Aho-Corasick.cpp: -------------------------------------------------------------------------------- 1 | // The Aho-Corasick Algorithm for multi-pattern matching 2 | // Output sensitive. O(max(input, output)) time complexity 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | // The algorithm currently applies to strings consisting of lower-case English characters but could be easily adapted to a broader set of inputs. 11 | const int SIZE = 26; 12 | const char FIRST = 'a'; 13 | 14 | // Based on TrieNode 15 | struct ACNode { 16 | ACNode* children[SIZE] {}; 17 | int pattern = -1; // The index of the pattern in p ending at current node, or -1 if non-existent 18 | int depth = 0; // 0-indexed 19 | // Fail is the ending node of the longest prefix in trie which is a proper suffix of the string ending with current node 20 | // Denotes which node to move to if next match fails 21 | ACNode* fail = nullptr; 22 | // Out is the ending node of the longest pattern in trie which is a proper suffix of the string end with current node 23 | ACNode* out = nullptr; 24 | }; 25 | 26 | // Builds a trie from given list of patterns, neglecting fail and out pointers 27 | ACNode* buildTrie (vector p) { 28 | ACNode* root = new ACNode(); 29 | ACNode* cur; 30 | int charCount; 31 | string s; 32 | for (int i=0; ichildren[c-FIRST]) { 38 | cur->children[c-FIRST] = new ACNode(); 39 | } 40 | cur = cur->children[c-FIRST]; 41 | cur->depth = ++charCount; 42 | } 43 | cur->pattern = i; 44 | } 45 | return root; 46 | } 47 | 48 | // Fills the suffix link (fail pointer) for each node 49 | void fillSuffixLink (ACNode* root) { 50 | // Traverse the trie in BFS order 51 | queue q; 52 | // The root has no suffix link, while the suffix links of direct descendants of root point to the root 53 | for (ACNode* child : root->children) { 54 | if (child) { 55 | q.push(child); 56 | child->fail = root; 57 | } 58 | } 59 | ACNode* child; 60 | ACNode* nextFail; 61 | while (!q.empty()) { 62 | // When visiting a node, fills the suffix link for all its children 63 | for (int i=0; ichildren[i]; 65 | if (!child) { 66 | continue; 67 | } 68 | q.push(child); 69 | nextFail = q.front()->fail; 70 | // If cur's fail node has a child C with the same value as cur's current child of attention, then 71 | // make cur's current child's suffix link point to C. 72 | // Otherwise recursively search for such a node C in cur's chain of failed pointers until the root is reached, 73 | // in which case cur's suffix link must point to root. 74 | while (nextFail != root && !nextFail->children[i]) { 75 | nextFail = nextFail->fail; 76 | } 77 | if (nextFail != root || (nextFail == root && root->children[i])) { 78 | child->fail = nextFail->children[i]; 79 | } else { 80 | child->fail = root; 81 | } 82 | } 83 | q.pop(); 84 | } 85 | } 86 | 87 | // Fills the output link for each node. Not all nodes has an output link. 88 | // Pre: all suffix links filled 89 | void fillOutputLink (ACNode* root) { 90 | // Traverse the trie in BFS order 91 | queue q; 92 | ACNode* cur; 93 | q.push(root); 94 | while (!q.empty()) { 95 | cur = q.front(); 96 | for (ACNode* child : cur->children) { 97 | if (child) { 98 | q.push(child); 99 | } 100 | } 101 | // If cur's fail node denotes a pattern, let cur's output link point to it. 102 | // Otherwise' let cur's output link point to cur's fail node's output node (may be null). 103 | if (cur != root) { 104 | if (cur->fail->pattern != -1) { 105 | cur->out = cur->fail; 106 | } else { 107 | cur->out = cur->fail->out; 108 | } 109 | } 110 | q.pop(); 111 | } 112 | } 113 | 114 | // Builds an automaton from a list of patterns 115 | ACNode* buildAutomaton (vector p) { 116 | ACNode* root = buildTrie(p); 117 | fillSuffixLink(root); 118 | fillOutputLink(root); 119 | return root; 120 | } 121 | 122 | // Prints all occurrences (start index) of all given patterns in a string 123 | // Pre: p contains distinct patterns 124 | void query (string s, vector p) { 125 | ACNode* root = buildAutomaton(p); 126 | ACNode* cur = root; 127 | ACNode* outl; 128 | int i = 0; 129 | while (i < s.length()) { 130 | if (cur->children[s[i]-'a']) { 131 | cur = cur->children[s[i]-'a']; // If next character matches, move cur to the matching child node 132 | outl = cur; 133 | // If cur denotes the end of a pattern, declare discovery 134 | if (cur->pattern != -1) { 135 | cout << "Pattern " << p[cur->pattern] << " found at index " << i - cur->depth + 1 << endl; 136 | } 137 | // If the string ending with cur has proper suffixes which are patterns, 138 | // follow the output link and declare discovery for each pattern found 139 | while ((outl = outl->out) && (outl->pattern != -1)) { 140 | cout << "Pattern " << p[outl->pattern] << " found at index " << i - outl->depth + 1 << endl; 141 | } 142 | i++; 143 | } else if (cur != root) { 144 | cur = cur->fail; 145 | } else { 146 | i++; 147 | } 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /Gale-Shapley.cpp: -------------------------------------------------------------------------------- 1 | // Gale-Shapley Algorithm 2 | 3 | // Gives a stable matching among N men and N women. 4 | // Each member of the proposing party (men in this case) is matched with his best valid partner, while members of the receiving party (women) are matched with their worst valid partners. 5 | 6 | // O(N^2) time 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | 14 | // Preference ranges from 0 to N-1, inclusive. Smaller number = higher preference 15 | 16 | struct woman { 17 | vector pList; // a list of preferences, in same order as men. Can also use a map. 18 | int partner = -1; // current partner, -1 if none 19 | int rank; // preference of her current partner 20 | }; 21 | 22 | struct man { 23 | vector wList; // list of all women, sorted by decreasing preference. 24 | int next = 0; // next woman in list to propose to according to preference 25 | }; 26 | 27 | // Pre: size(men) == size(women) 28 | vector> match (vector &men, vector &women) { 29 | vector> result; 30 | vector freeMen(men.size()); // a list of men currently not engaged 31 | iota(freeMen.begin(), freeMen.end(), 0); 32 | while (!freeMen.empty()) { 33 | int cur = freeMen.back(); 34 | man &m = men.at(cur); 35 | woman &w = women.at(m.wList.at(m.next)); 36 | if (w.partner == -1) { 37 | w.partner = cur; 38 | w.rank = w.pList.at(cur); 39 | freeMen.pop_back(); 40 | } else if (w.pList.at(cur) < w.rank) { 41 | freeMen.pop_back(); 42 | freeMen.push_back(w.partner); 43 | w.partner = cur; 44 | w.rank = w.pList.at(cur); 45 | } 46 | m.next++; 47 | } 48 | vector temp; 49 | for (int i=0; i 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | using namespace std; 16 | 17 | struct HTNode { 18 | char name; 19 | double weight; 20 | HTNode* left; 21 | HTNode* right; 22 | }; 23 | 24 | struct cmp { 25 | bool operator()(HTNode* a, HTNode* b) { 26 | return a->weight > b->weight; 27 | } 28 | }; 29 | 30 | // Builds and returns the root of a Huffman tree with given a map of character weights 31 | HTNode* buildHT(unordered_map weights) { 32 | priority_queue, cmp> pq; 33 | for (auto &i : weights) { 34 | pq.push(new HTNode{i.first, i.second, nullptr, nullptr}); 35 | } 36 | HTNode *first, *second; 37 | // In each iteration, take the two nodes with minimum weight in the pq and subject them to a newly-created 38 | // parent with weight as their sum 39 | while (pq.size() > 1) { 40 | first = pq.top(); 41 | pq.pop(); 42 | second = pq.top(); 43 | pq.pop(); 44 | // Non-leaf nodes are assigned name '\0' 45 | pq.push(new HTNode{0, first->weight+second->weight, first, second}); 46 | } 47 | return pq.top(); 48 | } 49 | 50 | // Builds a Huffman tree and returns therefrom a map of prefix codes given a map of character weights 51 | unordered_map getHuffmanCode(unordered_map weights) { 52 | unordered_map encoding; 53 | // Builds Huffman tree 54 | HTNode* root = buildHT(weights); 55 | 56 | // If there is only one character, encode it with "0" 57 | if (!root->left) { 58 | encoding[root->name] = "0"; 59 | return encoding; 60 | } 61 | 62 | HTNode* cur = root; 63 | stack> s; 64 | string code; 65 | 66 | // Uses a post-order-like traversal scheme. However, codes are assigned in a pre-order fashion. 67 | while (true) { 68 | if (cur) { 69 | if (s.empty() || cur != s.top().first) { 70 | s.push(make_pair(cur, false)); 71 | if (cur->name != 0) { 72 | encoding[cur->name] = code; // Only nodes that represent characters are encoded 73 | } 74 | if (cur = cur->left) { 75 | code.push_back('0'); 76 | } 77 | } else if (!s.top().second) { 78 | s.top().second = true; 79 | if (cur = cur->right) { 80 | code.push_back('1'); 81 | } 82 | } else { 83 | s.pop(); 84 | code.pop_back(); 85 | if (s.empty()) { 86 | break; 87 | } else { 88 | cur = s.top().first; 89 | } 90 | } 91 | } else if (s.empty()) { 92 | break; 93 | } else { 94 | cur = s.top().first; 95 | } 96 | } 97 | 98 | return encoding; 99 | } 100 | 101 | // ---------------------------------------------------------------------------------------------------------- 102 | -------------------------------------------------------------------------------- /KMP.cpp: -------------------------------------------------------------------------------- 1 | // KMP Algorithm for linear-time pattern searching 2 | 3 | #include 4 | #include 5 | 6 | using namespace std; 7 | 8 | // Longest proper prefix which is also a suffix 9 | vector findLPS (string p) { 10 | vector lps {0}; 11 | int i = 1; 12 | int len = 0; 13 | 14 | while (i < p.length()) { 15 | if (p[i] == p[len]) { 16 | lps.push_back(++len); 17 | i++; 18 | } else if (len > 0) { 19 | len = lps[len-1]; 20 | } else { 21 | lps.push_back(0); 22 | i++; 23 | } 24 | } 25 | 26 | return lps; 27 | } 28 | 29 | // Finds the index of first occurence of p in s, or -1 if not found 30 | int findFirst (string s, string p) { 31 | vector lps = findLPS(p); 32 | int i = 0; 33 | int j = 0; 34 | 35 | while (i + p.length() - j <= s.length()) { 36 | if (j == p.length()) { 37 | return i - j; 38 | } 39 | 40 | if (s[i] == p[j]) { 41 | i++; 42 | j++; 43 | } else if (j == 0) { 44 | i++; 45 | } else { 46 | j = lps[j-1]; 47 | } 48 | } 49 | 50 | return -1; 51 | } 52 | 53 | // Finds all occurrences of p in s 54 | vector findAll (string s, string p) { 55 | vector indexes; 56 | vector lps = findLPS(p); 57 | int i = 0; 58 | int j = 0; 59 | 60 | while (i + p.length() - j <= s.length()) { 61 | if (j == p.length()) { 62 | indexes.push_back(i-j); 63 | j = lps[j-1]; 64 | } 65 | 66 | if (s[i] == p[j]) { 67 | i++; 68 | j++; 69 | } else if (j == 0) { 70 | i++; 71 | } else { 72 | j = lps[j-1]; 73 | } 74 | } 75 | 76 | return indexes; 77 | } 78 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Algorithms-And-Data-Structures 2 | 3 | This repo contains C++ implementations of common algorithms and data structures, grouped by topics. The list will be sporadically updated. 4 | 5 | ## Data Structures: 6 | - [Binary heap][binary_heap] 7 | - [Huffman Tree][Huffman_Tree] 8 | - [Trie (Prefix tree)][trie] 9 | - [Union-find][union_find] 10 | 11 | ## Algorithms: 12 | ### Graphs 13 | - [BFS & DFS][undirected_graph] 14 | 15 | > Undirected Graphs 16 | - [Bipartiteness testing][undirected_graph] 17 | - [Connected components][undirected_graph] 18 | - [Articulation points and bridges: Tarjan's Algorithm][undirected_graph] 19 | - [Minimum spanning trees: Prim's Algorithm][undirected_graph] 20 | - [Minimum spanning trees: Kruskal's Algorithm][union_find] 21 | - [Maximum spacing k-clustering][union_find] 22 | 23 | > Directed Graphs 24 | - [Topological sorting][directed_graph] 25 | - [Strongly connected components: Kosaraju's Algorithm and Tarjan's Algorithm][directed_graph] 26 | - [Shortest paths: Dijkstra's Algorithm][directed_graph] 27 | - [(Trees) Lowest common ancestor: Tarjan's Offline Algorithm][union_find] 28 | 29 | ### Pattern Searching 30 | - [Single-pattern searching: Knuth-Morris-Pratt Algorithm][KMP] 31 | - [Multi-pattern searching: Aho-Corasick Algorithm][Aho-Corasick] 32 | 33 | ### Divide and Conquer 34 | - [Binary search][divide_and_conquer] 35 | - [Merge sort and counting inversions][divide_and_conquer] 36 | - [Closest pair of points in a 2D plane][divide_and_conquer] 37 | 38 | ### Miscellaneous 39 | - [A selection of greedy algorithms focused on interval scheduling and optimal caching][greedy] 40 | - [Stable matching: Gale-Shapley Algorithm][Gale-Shapley] 41 | 42 | [Aho-Corasick]: /Aho-Corasick.cpp 43 | [Gale-Shapley]: /Gale-Shapley.cpp 44 | [Huffman_Tree]: /Huffman_Tree.cpp 45 | [KMP]: /KMP.cpp 46 | [binary_heap]: /binary_heap.cpp 47 | [directed_graph]: /directed_graph.cpp 48 | [divide_and_conquer]: /divide_and_conquer.cpp 49 | [greedy]: /greedy.cpp 50 | [trie]: /trie.cpp 51 | [undirected_graph]: /undirected_graph.cpp 52 | [union_find]: /union_find.cpp 53 | 54 | -------------------------------------------------------------------------------- /binary_heap.cpp: -------------------------------------------------------------------------------- 1 | // Binary min-heap implemented with vectors 2 | // Can be easily converted to max-heaps 3 | 4 | #include 5 | #include 6 | 7 | using namespace std; 8 | 9 | typedef vector heap; 10 | 11 | void swap (heap &h, int i, int j) { 12 | int a = h[i-1]; 13 | h[i-1] = h[j-1]; 14 | h[j-1] = a; 15 | } 16 | 17 | void heapify_up (heap &h, int i) { 18 | if (i > 1) { 19 | int p = i/2; 20 | if (h[p-1] > h[i-1]) { 21 | swap(h, i, p); 22 | heapify_up(h, p); 23 | } 24 | } 25 | } 26 | 27 | void heapify_down (heap &h, int i) { 28 | int n = int(h.size()); 29 | int j; 30 | if (2*i == n) { 31 | j = n; 32 | } else if (2*i < n) { 33 | j = h[2*i-1]>h[2*i] ? (2*i+1) : (2*i); 34 | } else { 35 | return; 36 | } 37 | if (h[i-1] > h[j-1]) { 38 | swap(h, i, j); 39 | heapify_down(h, j); 40 | } 41 | } 42 | 43 | void insert (heap &h, int k) { 44 | int n = int(h.size()); 45 | h.push_back(k); 46 | heapify_up(h, n+1); 47 | } 48 | 49 | int findMin (const heap &h) { 50 | return h[0]; 51 | } 52 | 53 | void remove (heap &h, int i) { 54 | h[i-1] = h.back(); 55 | h.pop_back(); 56 | heapify_up(h, i); 57 | heapify_down(h, i); 58 | } 59 | 60 | int extractMin (heap &h) { 61 | int a = findMin(h); 62 | remove(h, 1); 63 | return a; 64 | } 65 | 66 | void printHeap (const heap &h) { 67 | for (int i : h) { 68 | cout << i << endl; 69 | } 70 | } 71 | 72 | void printSorted (heap &h) { 73 | while (h.size() > 0) { 74 | cout << extractMin(h) << endl; 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /directed_graph.cpp: -------------------------------------------------------------------------------- 1 | // Algorithms related to directed graphs 2 | // Graphs are represented with adjacency lists with n nodes as distinct ints from 0 to n-1 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | typedef vector> graph; // Adjacency lists 12 | typedef pair pi; 13 | typedef vector> weighted_graph; // (node,distance) pairs 14 | 15 | // 1. TOPOLOGICAL SORTING --------------------------------------------------------------------------------- 16 | 17 | // Returns a topological order of a DAG, or an empty vector if the input is not a DAG 18 | // The graph is not required to be connected 19 | 20 | vector topologicalOrder (adjList l) { 21 | vector to; 22 | vector activePre(l.size()); 23 | queue leadingNodes; 24 | // Preprocessing 25 | for (vector v : l) { 26 | for (int node : v) { 27 | activePre[node]++; 28 | } 29 | } 30 | for (int i=0; i> getSCC (graph g) { 64 | 65 | // A slightly modified DFS procedure on g to order nodes by finishing time 66 | stack s; 67 | stack f; // finishing time in descending order 68 | // meaning of status: 69 | // 0: unexplored 70 | // 1: all unexplored(0) children pushed to stack (recursive call started) 71 | // 2: all children explored(>0) (recursive call returned) 72 | vector status(g.size()); 73 | int cur; 74 | for (int i=0; i 0) { // more strictly, status[i] is either 0 or 2 76 | continue; 77 | } 78 | 79 | s.push(i); 80 | while (!s.empty()) { 81 | cur = s.top(); 82 | if (status[cur] == 0) { 83 | for (int node : g[cur]) { 84 | if (status[node] == 0) { 85 | s.push(node); 86 | } 87 | } 88 | status[cur]++; 89 | } else if (status[cur] == 1) { 90 | f.push(cur); 91 | s.pop(); 92 | status[cur]++; 93 | } else { 94 | s.pop(); 95 | } 96 | } 97 | } 98 | 99 | // Create transpose graph t 100 | graph t(g.size()); 101 | for (int i=0; i> scc; 109 | vector curSet; 110 | vector explored(t.size()); // 0 or 1 for ordinary dfs 111 | int root; 112 | while (!f.empty()) { 113 | root = f.top(); 114 | f.pop(); 115 | if (explored[root]) { 116 | continue; 117 | } 118 | 119 | s.push(root); 120 | while (!s.empty()) { 121 | cur = s.top(); 122 | s.pop(); 123 | if (explored[cur]) { 124 | continue; 125 | } 126 | explored[cur] = 1; 127 | curSet.push_back(cur); 128 | for (int node : t[cur]) { 129 | if (!explored[node]) { 130 | s.push(node); 131 | } 132 | } 133 | } 134 | 135 | scc.push_back(curSet); 136 | curSet.clear(); 137 | } 138 | 139 | return scc; 140 | } 141 | 142 | // ---------------------------------------------------------------------------------------------------------- 143 | 144 | 145 | // 3. TARJAN'S ALGORITHM FOR STRONGLY CONNECTED COMPONENTS -------------------------------------------------- 146 | 147 | // Returns a vector of vectors, each of which contains an SCC 148 | // Iterative version 149 | 150 | vector> getSCC2 (graph g) { 151 | vector> SCC; 152 | vector curSet; 153 | stack s1; // Standard DFS stack 154 | stack s2; // Stack for grouping SCCs. Elements are pushed in the same order as they are explored. 155 | vector exp(g.size()); // The visiting order of each node 156 | vector low(g.size()); // The earliest-visited node reachable from each node 157 | vector cleared(g.size()); // 1 if a node has already been idenfitied as part of an SCC 158 | int cur; 159 | int order = 0; // Visiting order 160 | 161 | for (int i=0; i shortestDistance(weighted_graph g, int source) { 222 | vector explored(g.size()); 223 | vector shortestDistance(g.size(), -1); 224 | // pi.second is a node i and p.first is the shortest distance from source to i based on current explored set 225 | // pq may contain duplicate nodes but will be properly dealt with in the main loop 226 | priority_queue,greater> pq; 227 | shortestDistance[source] = 0; 228 | pq.push(make_pair(0, source)); 229 | int cur; 230 | 231 | // In each iteration, extract the unexplored node with smallest distance from source, add it to the explored 232 | // set and push its still unexplored children for consideration 233 | while (!pq.empty()) { 234 | cur = pq.top().second; 235 | pq.pop(); 236 | if (explored[cur]) { 237 | continue; 238 | } 239 | for (pi p : g[cur]) { 240 | if (shortestDistance[p.first] == -1 || shortestDistance[cur] + p.second < shortestDistance[p.first]) { 241 | // Update shortest distance based on current explored set 242 | shortestDistance[p.first] = shortestDistance[cur] + p.second; 243 | pq.push(make_pair(shortestDistance[p.first], p.first)); 244 | } 245 | } 246 | explored[cur] = 1; 247 | } 248 | 249 | return shortestDistance; 250 | } 251 | 252 | // ---------------------------------------------------------------------------------------------------------- 253 | 254 | 255 | // 5. TARJAN'S OFF-LINE LOWEST COMMON ANCESTOR ALGORITHM FOR TREES ------------------------------------------ 256 | 257 | // Provided a tree of N nodes and a series of Q node pairs, find the LCA of each given pair of nodes. 258 | // Runs in approximately O(N+Q) time 259 | 260 | // Prints the LCA of each pair in queries 261 | // The tree is represented by adjacency lists of directed, hierarchal edges. Each node has an edge to its 262 | // children but not to its parent 263 | 264 | // Refer to the union-find template for implementation. 265 | 266 | // ---------------------------------------------------------------------------------------------------------- 267 | -------------------------------------------------------------------------------- /divide_and_conquer.cpp: -------------------------------------------------------------------------------- 1 | // Representative Divide and Conquer Algorithms 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | typedef pair point; 11 | 12 | 13 | // 0. BINARY SEARCH -------------------------------------------------------------------------------------------- 14 | 15 | // T(n) = T(n/2) + O(1) => T(n) = O(logn) 16 | 17 | // Given an array of distinct integers sorted in ascending order and a target, returns the index of the target 18 | // in the array or -1 if it does not exist. 19 | 20 | int search(vector nums, int target) { 21 | int lo = 0; 22 | int hi = v.size() - 1; 23 | int mid; 24 | while (lo <= hi) { 25 | mid = lo + (hi-lo) / 2; 26 | if (nums[mid] == target) { 27 | return mid; 28 | } else if (nums[mid] < target) { 29 | lo = mid + 1; 30 | } else { // nums[mid] > target 31 | hi = mid - 1; 32 | } 33 | } 34 | return -1; 35 | } 36 | 37 | // ------------------------------------------------------------------------------------------------------------- 38 | 39 | 40 | // 1. MERGE SORT AND COUNTING INVERSIONS ----------------------------------------------------------------------- 41 | 42 | // T(n) = 2T(n/2) + O(n) => T(n) = O(nlogn) 43 | 44 | // Given a vector v and bounds such that the two subarrays v[l:m-1] and v[m:r-1] are already sorted, merges 45 | // them into a larger sorted subarray v[l:r-1] and counts inversions with one element in the left subpart and 46 | // the other in the right subpart (cross inversions) 47 | int mergeAndCount(vector &v, int l, int r, int m) { 48 | vector temp; 49 | int inv = 0; 50 | int lcur = l; 51 | int rcur = m; 52 | while (lcur < m || rcur < r) { 53 | // Note that a pair of duplicate elements does not form an inversion 54 | if (rcur == r || (lcur < m && v[lcur] <= v[rcur])) { 55 | temp.push_back(v[lcur]); 56 | lcur++; 57 | } else { 58 | // When an element from the right subpart is selected, we know it forms inversions with all remaining 59 | // elements in the left subpart 60 | temp.push_back(v[rcur]); 61 | inv += (m - lcur); 62 | rcur++; 63 | } 64 | } 65 | for (int i=l, j=0; i &v, int l, int r) { 74 | if (r == l + 1) { 75 | return 0; 76 | } 77 | int m = l + (r-l)/2; 78 | // count(all) = count(leftPart) + count(rightPart) + count(crossInversions) 79 | return sortAndCount(v, l, m) + sortAndCount(v, m, r) + mergeAndCount(v, l, r, m); 80 | } 81 | 82 | // ---------------------------------------------------------------------------------------------------------- 83 | 84 | 85 | // 2. CLOSEST PAIR OF POINTS -------------------------------------------------------------------------------- 86 | 87 | // T(n) = 2T(n/2) + O(n) => T(n) = O(nlogn) 88 | // sqrt-optimized 89 | 90 | // Given n points in 2D space represented by (x,y) pairs , finds the distance between the closest pair of points 91 | 92 | // Compare by x-coordinate 93 | bool compByX(point a, point b) { 94 | return a.first < b.first; 95 | } 96 | 97 | // Compare by y-coordinate 98 | bool compByY(point a, point b) { 99 | return a.second < b.second; 100 | } 101 | 102 | // Calculate square of distance between two points 103 | double disSquared(point a, point b) { 104 | return pow(b.first-a.first, 2) + pow(b.second-a.second, 2); 105 | } 106 | 107 | // Recursive auxillary function (that does most of the work) 108 | // Takes in two arrays Px and Py that represent the same set of points P sorted by x and y respectively 109 | // Returns the square of distance between the closest points 110 | double MDSRec(vector px, vector py) { 111 | 112 | // Base cases 113 | if (px.size() == 2) { 114 | return disSquared(px[0], px[1]); 115 | } 116 | if (px.size() == 3) { 117 | return min(disSquared(px[0], px[1]), min(disSquared(px[0], px[2]), disSquared(px[1], px[2]))); 118 | } 119 | 120 | // Divides the set P into Q (left half) and R (right half) based on x-cord 121 | int mid = px.size() / 2; 122 | double qmax = px[mid-1].first; // x-cord of the rightmost point in Q 123 | vector qx, qy, rx, ry; // Q = Px[0:mid-1], R = Px[mid:len-1] 124 | for (int i=0; i sy; 145 | for (point p : py) { 146 | if (abs(p.first - qmax) < delta) { 147 | sy.push_back(p); 148 | } 149 | } 150 | // Computes the closest distance between pairs (a,b) with a and b belonging to different subsets (Q or R) 151 | for (int i=0; i points) { 163 | vector px = points; 164 | vector py = points; 165 | sort(px.begin(), px.end(), compByX); 166 | sort(py.begin(), py.end(), compByY); 167 | return sqrt(MDSRec(px, py)); 168 | } 169 | 170 | // ---------------------------------------------------------------------------------------------------------- 171 | 172 | -------------------------------------------------------------------------------- /greedy.cpp: -------------------------------------------------------------------------------- 1 | // Representative Greedy Algorithms 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | typedef pair pi; 12 | 13 | // ----------------------------------------------------------------------------------------------------- 14 | // INTERVAL SCHEDULING 15 | 16 | // Finds a conflict-free schedule holidng the most number of events 17 | // Runs in O(nlogn) time, or O(n) if input is already sorted 18 | // ----------------------------------------------------------------------------------------------------- 19 | 20 | // Comparing by end time ascending 21 | bool endTimeAsc(pi a, pi b) { 22 | return a.second < b.second; 23 | } 24 | 25 | // Each event is represented by a pair consisting of its start and finish time 26 | void getMaxSchedule(vector schedule) { 27 | sort(schedule.begin(),schedule.end(),endTimeAsc); 28 | int lastEnd = -1; 29 | // Adds the event of the earliest end time while avoiding conflict 30 | for (pi event : schedule) { 31 | if (event.first >= lastEnd) { 32 | cout << event.first << " " << event.second << endl; 33 | lastEnd = event.second; 34 | } 35 | } 36 | } 37 | 38 | 39 | // ----------------------------------------------------------------------------------------------------- 40 | // INTERVAL PARTITIONING 41 | 42 | // Partitions all events into the least number of groups, each of which has a non-conflicting schedule 43 | // Runs in O(nlogn) time 44 | // ----------------------------------------------------------------------------------------------------- 45 | 46 | // Comparing by start time ascending 47 | bool startTimeAsc(pi a, pi b) { 48 | return a.first < b.first; 49 | } 50 | 51 | // Each event is represented by a pair consisting of its start and finish time 52 | vector> getLeastPartition(vector schedule) { 53 | int group; 54 | int numGroup = 0; 55 | vector lastEnd; 56 | vector> partition; 57 | // Priority queue in ascending order of latest endtime 58 | // For each pair, second is the group number and first is the endtime of the last event in the group 59 | priority_queue, greater> earliestFinish; 60 | sort(schedule.begin(), schedule.end(), startTimeAsc); 61 | for (pi event : schedule) { 62 | // If there is no group created or if the earliest endtimes of last events in all groups 63 | // are after the current event's start time, create a new group 64 | if (numGroup == 0 || earliestFinish.top().first > event.first) { 65 | partition.push_back(vector {event}); 66 | earliestFinish.push(make_pair(event.second,numGroup++)); 67 | } else { 68 | // Push the current event into the non-conflicting group with earliest endtime, 69 | // and update the position of current group in priority queue 70 | group = earliestFinish.top().second; 71 | partition[group].push_back(event); 72 | earliestFinish.pop(); 73 | earliestFinish.push(make_pair(event.second,group)); 74 | } 75 | } 76 | return partition; 77 | } 78 | 79 | 80 | // ----------------------------------------------------------------------------------------------------- 81 | // MINIMIZING MAX LATENESS 82 | 83 | // Given a start time and a set of events where each event has a duration and a deadline, we fit all 84 | // events in a schedule, despite some of them running late. Define the lateness l(i) of an event i to 85 | // be its finish time in the schedule minus its deadline, or 0 if finished in time. The max lateness of 86 | // a given schedule is max(l(i)) for all events i. We seek a schedule that minimizes the max lateness. 87 | // Runs in O(nlogn) time, or O(n) if input is already sorted 88 | // ----------------------------------------------------------------------------------------------------- 89 | 90 | // Comparing by deadline ascending 91 | bool deadlineAsc(pi a, pi b) { 92 | return a.second < b.second; 93 | } 94 | 95 | // Finds a schedule with the minimum max lateness and returns its value. 96 | // Each event is represented by a pair consisting of its duration and deadline 97 | int minMaxLateness(vector events, int startTime) { 98 | sort(events.begin(), events.end(), deadlineAsc); 99 | int maxLateness = 0; 100 | int curStart = startTime; 101 | // In each iteration, add the item in the list with the earliest deadline 102 | for (pi event : events) { 103 | maxLateness = max(maxLateness, curStart + event.first - event.second); 104 | curStart += event.first; 105 | } 106 | return maxLateness; 107 | } 108 | 109 | 110 | // ----------------------------------------------------------------------------------------------------- 111 | // OPTIMAL CACHING - FARTHEST IN FUTURE APPROACH 112 | 113 | // Given a sequence of n items and a cache of size k<=n which may initially contain some items, we need 114 | // to access each item in the order of the sequence. If the current item i is not in the cache and the 115 | // cache is full, we evict an existing item to leave space for i. Such an action is defined as a cache 116 | // miss. Find an eviction schedule that results in the minimum number of cache misses. 117 | // Runs in O(nlogn) time 118 | // ----------------------------------------------------------------------------------------------------- 119 | 120 | // Returns the minimum cache misses given a sequence of items to be cached, an list of items initially 121 | // in the cache and the cache capacity 122 | // Each item is represented by a unique int 123 | int minCacheMiss(vector items, vector initialCache, int cacheCap) { 124 | int n = (int)items.size(); 125 | int cacheSize = (int)initialCache.size(); 126 | map inCache; 127 | // The map stores the indexes at which each item is called 128 | map> calls; 129 | // The pq stores items currently in cache in descending order of next call time. 130 | // May contain duplicate items but does not affect the result. 131 | priority_queue cache; 132 | int cur; 133 | int miss = 0; 134 | for (int i=0; i 4 | #include 5 | 6 | using namespace std; 7 | 8 | // Modify the following to adapt to a different alphabet 9 | const int SIZE = 26; 10 | const char FIRST = 'a'; 11 | 12 | struct TrieNode { 13 | TrieNode* children[SIZE] {}; 14 | int numChildren = 0; 15 | bool isWord = false; 16 | }; 17 | 18 | void TrieInsert (TrieNode* root, string word) { 19 | TrieNode* cur = root; 20 | for (char c : word) { 21 | if (cur->children[c-FIRST] == nullptr) { 22 | cur->children[c-FIRST] = new TrieNode(); 23 | cur->numChildren++; 24 | } 25 | cur = cur->children[c-FIRST]; 26 | } 27 | cur->isWord = true; 28 | } 29 | 30 | bool TrieContains (TrieNode* root, string word) { 31 | TrieNode* cur = root; 32 | for (char c : word) { 33 | if (cur->children[c-FIRST] == nullptr) { 34 | return false; 35 | } 36 | cur = cur->children[c-FIRST]; 37 | } 38 | return cur->isWord; 39 | } 40 | 41 | bool TrieHasPrefix (TrieNode* root, string prefix) { 42 | TrieNode* cur = root; 43 | for (char c : prefix) { 44 | if (cur->children[c-FIRST] == nullptr) { 45 | return false; 46 | } 47 | cur = cur->children[c-FIRST]; 48 | } 49 | return true; 50 | } 51 | 52 | void TrieDelete (TrieNode* &root, string word) { 53 | if (!TrieContains(root, word)) { 54 | return; 55 | } 56 | 57 | vector deQ; 58 | int front = -1; 59 | TrieNode* cur = root; 60 | for (int i=0; inumChildren > 1 || cur->isWord) { 62 | front = i; 63 | } 64 | deQ.push_back(cur); 65 | cur = cur->children[word[i]-FIRST]; 66 | } 67 | 68 | if (cur->numChildren > 0) { 69 | cur->isWord = false; 70 | return; 71 | } 72 | 73 | for (int i=(int)deQ.size()-1; i>front; i--) { 74 | delete deQ.back(); 75 | deQ.pop_back(); 76 | } 77 | if (front == -1) { 78 | root = nullptr; 79 | } else { 80 | deQ.back()->children[word[front]-'a'] = nullptr; 81 | deQ.back()->numChildren--; 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /undirected_graph.cpp: -------------------------------------------------------------------------------- 1 | // Algorithms related to undirected graphs 2 | // Graphs are represented with adjacency lists with n nodes as distinct ints from 0 to n-1 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | typedef vector> graph; // Adjacency Lists 12 | typedef pair pi; 13 | typedef vector> weighted_graph; // (node,distance) pairs 14 | 15 | // 1. BFS -------------------------------------------------------------------------------------------------- 16 | 17 | // Abstracted BFS for a single connected component in a graph 18 | // Prints all nodes reachable from root in BFS order 19 | 20 | void BFS (graph l, int root) { 21 | vector discovered(l.size(),0); // 0 or 1 22 | queue q; 23 | q.push(root); 24 | discovered[root] = 1; 25 | while (!q.empty()) { 26 | for (int node : l[q.front()]) { 27 | if (!discovered[node]) { 28 | q.push(node); 29 | discovered[node] = 1; 30 | } 31 | } 32 | cout << q.front() << endl; // Can be replaced with necessary node operations 33 | q.pop(); 34 | } 35 | } 36 | 37 | // ---------------------------------------------------------------------------------------------------------- 38 | 39 | 40 | // 2. DFS --------------------------------------------------------------------------------------------------- 41 | 42 | // Abstracted iterative DFS for a single connected component in a graph 43 | // Prints all reachable nodes in DFS order starting with root 44 | 45 | void DFS (graph l, int root) { 46 | vector explored(l.size(),0); // 0 or 1 47 | stack s; 48 | s.push(root); 49 | int cur = root; 50 | while (!s.empty()) { 51 | cur = s.top(); 52 | s.pop(); 53 | if (explored[cur]) { 54 | continue; 55 | } 56 | explored[cur] = 1; 57 | cout << cur << endl; // Can be replaced with necessary node operations 58 | for (int node : l[cur]) { 59 | if (!explored[node]) { 60 | s.push(node); 61 | } 62 | } 63 | } 64 | } 65 | 66 | // ---------------------------------------------------------------------------------------------------------- 67 | 68 | 69 | // 3. TESTING BIPARTITENESS --------------------------------------------------------------------------------- 70 | 71 | // Given an undirected graph represented by adjacency lists, returns whether or not it is bipartite 72 | 73 | bool isBipartite(graph g) { 74 | queue q; 75 | vector group(g.size()); // Each node is assigned 1 or -1 76 | vector discovered(g.size()); 77 | int cur; 78 | 79 | // BFS 80 | for (int i=0; i> connectedComp (graph l) { 120 | vector> ans; 121 | vector cur; 122 | vector discovered(l.size(),0); // 0 or 1 123 | queue q; 124 | 125 | for (int i=0; i getAP (graph g) { 159 | vector ap(g.size()); // Records whether or not a node is an AP 160 | vector ans; // Records all APs 161 | vector exp(g.size()); // DFS exploring order, starts with 1 162 | // For any node A, low = min(x,y) 163 | // x = the order of earliest-visited node reachable from the subtree rooted at A 164 | // y = the order of earliest-visited non-parent ancestor that has a back edge from A 165 | vector low(g.size()); // Earliest-visited node reachable from 166 | vector cleared(g.size()); // Whether the recursive call from a node has returned 167 | vector parent(g.size(),-1); // Parent of each node in DFS tree 168 | stack s; 169 | int order = 0; 170 | int cur; 171 | 172 | for (int i=0; i=low[cur] 199 | if (parent[cur] != -1) { 200 | for (int node : g[cur]) { 201 | if (exp[node] > exp[cur]) { 202 | // If node is cur's child 203 | low[cur] = min(low[node],low[cur]); 204 | if (low[node] >= exp[cur]) { 205 | ap[cur] = 1; 206 | } 207 | } else if (node != parent[cur]) { 208 | // If node is cur's non-parent ancestor 209 | low[cur] = min(low[cur],exp[node]); 210 | } 211 | } 212 | } else { 213 | // If cur is the root, cur is an AP iff cur has more than one disjoint subtrees 214 | ap[cur] = count(parent.begin(),parent.end(),cur)>1 ? 1 : 0; 215 | } 216 | cleared[cur] = 1; 217 | s.pop(); 218 | } 219 | } 220 | } 221 | 222 | for (int i=0; i> getBridges (graph g) { 239 | vector> bridges; 240 | vector exp(g.size()); 241 | vector low(g.size()); 242 | vector parent(g.size(),-1); 243 | vector cleared(g.size()); 244 | stack s; 245 | int order = 0; 246 | int cur; 247 | 248 | for (int i=0; i exp[cur]) { 275 | low[cur] = min(low[cur],low[node]); 276 | // cur-node is a bridge iff low[node]>exp[cur].Note that equality does not imply a bridge. 277 | if (low[node] > exp[cur]) { 278 | bridges.push_back(make_pair(cur, node)); 279 | } 280 | } else if (parent[cur] != node) { 281 | low[cur] = min(low[cur],exp[node]); 282 | } 283 | } 284 | cleared[cur] = 1; 285 | s.pop(); 286 | } 287 | } 288 | } 289 | 290 | return bridges; 291 | } 292 | 293 | // ---------------------------------------------------------------------------------------------------------- 294 | 295 | 296 | // 7. PRIM'S MINIMUM SPANNING TREE ALGORITHM ---------------------------------------------------------------- 297 | 298 | // Returns a vector of pairs, each indicating an edge in the MST 299 | // Optimal for dense graphs 300 | // Runs in O(mlogn) time 301 | 302 | vector getMST(weighted_graph g) { 303 | vector MST; 304 | vector explored(g.size()); 305 | vector minLen(g.size(), -1); // Length of shortest edge from an explored node to the current node 306 | // Each pair denotes an edge. Pair structure: (len, (start, end)) 307 | priority_queue, vector>, greater>> pq; 308 | pq.push(make_pair(0, make_pair(0, 0))); 309 | pi curEdge; 310 | 311 | // In each iteration, add a node not yet explored with the minimum minLen to the set and add the shortest 312 | // edge connecting it from an explored node to the MST 313 | while (!pq.empty()) { 314 | curEdge = pq.top().second; 315 | pq.pop(); 316 | if (explored[curEdge.second]) { 317 | continue; 318 | } 319 | for (pi p : g[curEdge.second]) { 320 | if (minLen[p.first] == -1 || p.second < minLen[p.first]) { 321 | minLen[p.first] = p.second; 322 | pq.push(make_pair(p.second, make_pair(curEdge.second, p.first))); 323 | } 324 | } 325 | explored[curEdge.second] = 1; 326 | MST.push_back(curEdge); 327 | } 328 | 329 | MST.erase(MST.begin()); // The self-edge (0,0) will always be first in the resulting vector. Exclude it. 330 | return MST; 331 | } 332 | 333 | // ---------------------------------------------------------------------------------------------------------- 334 | 335 | 336 | // 8. KRUSKAL'S MINIMUM SPANNING TREE ALGORITHM ------------------------------------------------------------- 337 | 338 | // Takes in a list of weighted edges as opposed to adjacency lists 339 | // Returns a vector of pairs, each indicating an edge in the MST 340 | // Optimal for sparse graphs 341 | // Runs in O(mlogn) time 342 | // Refer to the union-find template for implementation 343 | 344 | // ---------------------------------------------------------------------------------------------------------- 345 | 346 | 347 | // 9. MAXIMUM SPACING K-CLUSTERING -------------------------------------------------------------------------- 348 | 349 | // Given n nodes and pairwise distances, returns a k-clustering with maximum spacing. That is, we seek to 350 | // maximize the minumum distance between any pair of nodes belonging to different clusters. 351 | // Based on Kruskal's Algorithm 352 | // Refer to the union-find template for implementation 353 | 354 | // ---------------------------------------------------------------------------------------------------------- 355 | -------------------------------------------------------------------------------- /union_find.cpp: -------------------------------------------------------------------------------- 1 | // Union-Find Data Structure and its Applications 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | typedef pair pi; 11 | typedef vector> tree; 12 | 13 | // UNION FIND ----------------------------------------------------------------------------------------------- 14 | 15 | struct UFNode { 16 | int val; 17 | UFNode* next; 18 | }; 19 | 20 | // Requires input to be distinct ints from 0 to n-1; otherwise vectors need to be replaced with maps 21 | struct UnionFind { 22 | vector nodes; 23 | vector groupSize; 24 | }; 25 | 26 | // Returns a union-find given a vector of distinct ints from 0 to n-1 27 | // This function has to be adapted should input form changes 28 | UnionFind makeUF(int n) { 29 | UnionFind uf {.nodes = vector(n), .groupSize = vector(n, 1)}; 30 | for (int i=0; i v; 40 | UFNode *cur = uf.nodes[a]; 41 | while (cur->next) { 42 | v.push_back(cur); 43 | cur = cur->next; 44 | } 45 | for (UFNode *node : v) { 46 | node->next = cur; 47 | } 48 | return cur->val; 49 | } 50 | 51 | // Given two valid items, merge the groups they belong to 52 | // O(logn) worst case, O(1) average 53 | void Union(UnionFind &uf, int a, int b) { 54 | UFNode *first = uf.nodes[Find(uf, a)]; 55 | UFNode *second = uf.nodes[Find(uf, b)]; 56 | // Do nothing if they are in the same group 57 | if (first == second) { 58 | return; 59 | } 60 | // Merge the smaller group into the larger group 61 | if (uf.groupSize[first->val] >= uf.groupSize[second->val]) { 62 | second->next = first; 63 | uf.groupSize[first->val] += uf.groupSize[second->val]; 64 | uf.groupSize[second->val] = 0; 65 | } else { 66 | first->next = second; 67 | uf.groupSize[second->val] += uf.groupSize[first->val]; 68 | uf.groupSize[first->val] = 0; 69 | } 70 | } 71 | 72 | // ---------------------------------------------------------------------------------------------------------- 73 | 74 | 75 | // KRUSKAL'S MINIMUM SPANNING TREE ALGORITHM ---------------------------------------------------------------- 76 | 77 | // Requires the above methods to be included 78 | // Takes in a list of weighted edges, each as a pair 79 | // Returns a vector of pairs, each indicating an edge in the MST 80 | // Optimal for sparse graphs 81 | // Runs in O(mlogn) time 82 | 83 | // Sort by edge length ascending 84 | bool lenAsc(pair a, pair b) { 85 | return a.first < b.first; 86 | } 87 | 88 | // Each pair in edges is structured as (length, endpoints) 89 | vector getMST2(int n, vector> edges) { 90 | vector MST; 91 | sort(edges.begin(), edges.end(), lenAsc); 92 | UnionFind nodes = makeUF(n); 93 | for (pair edge : edges) { 94 | // If endpoints not in the same connected component, connect them 95 | if (Find(nodes, edge.second.first) != Find(nodes, edge.second.second)) { 96 | Union(nodes, edge.second.first, edge.second.second); 97 | MST.push_back(edge.second); 98 | } 99 | } 100 | return MST; 101 | } 102 | 103 | // ---------------------------------------------------------------------------------------------------------- 104 | 105 | 106 | // MAXIMUM SPACING K-CLUSTERING ----------------------------------------------------------------------------- 107 | 108 | // Given n nodes and pairwise distances, returns a k-clustering with maximum spacing. That is, we seek to 109 | // maximize the minumum distance between any pair of nodes belonging to different clusters. 110 | // Based on Kruskal's Algorithm 111 | 112 | // Each pair in edges is structured as (length, endpoints) 113 | vector> maxSpaceCluster(vector> edges, int n, int k) { 114 | // Differs from Kruskal's only in that it stops after adding n-k edges 115 | vector> clusters; 116 | sort(edges.begin(), edges.end(), lenAsc); 117 | UnionFind uf = makeUF(n); 118 | int numClusters = n; 119 | for (pair edge : edges) { 120 | if (numClusters == k) { 121 | break; 122 | } 123 | if (Find(uf, edge.second.first) != Find(uf, edge.second.second)) { 124 | Union(uf, edge.second.first, edge.second.second); 125 | numClusters--; 126 | } 127 | } 128 | 129 | // Generates lists of clusters from union-find 130 | vector added(n,-1); 131 | int group; 132 | int clusInd = 0; 133 | for (int i=0; i queries) { 158 | // Two copies of each query is saved. 159 | vector> req(t.size()); 160 | for (pi q : queries) { 161 | req[q.first].push_back(q.second); 162 | req[q.second].push_back(q.first); 163 | } 164 | 165 | // Post-order DFS 166 | // Idea: the LCA of two nodes belonging to different subtrees rooted at u (including u itself) must be u 167 | stack s; 168 | UnionFind uf = makeUF((int)t.size()); 169 | vector explored(t.size()); // 0: unexplored. 1: children pushed to stack. 2: all children explored. 170 | vector par(t.size(), root); // Records the parent of each node. Let the root's parent be itself 171 | vector anc(t.size()); // Current ancestor 172 | s.push(root); 173 | int cur; 174 | while (!s.empty()) { 175 | cur = s.top(); 176 | if (explored[cur] == 0) { 177 | for (int child : t[cur]) { 178 | s.push(child); 179 | par[child] = cur; 180 | } 181 | explored[cur]++; 182 | } else { 183 | // Since trees contain no cycles, each node will only be pushed once. Thus, the case where cur has 184 | // already been popped (explored[cur] == 2) need not be considered. 185 | for (int target : req[cur]) { 186 | // Answer each query the second time it is encountered 187 | if (explored[target] == 2) { 188 | cout << "LCM of " << cur << " and " << target << " is " << anc[Find(uf, target)] << endl; 189 | } 190 | } 191 | Union(uf, par[cur], cur); // Merging up 192 | anc[Find(uf, cur)] = par[cur]; // Common ancestor moves up one layer 193 | explored[cur]++; 194 | s.pop(); 195 | } 196 | } 197 | } 198 | 199 | // ---------------------------------------------------------------------------------------------------------- 200 | 201 | --------------------------------------------------------------------------------