├── DWayHeap.java ├── PatriciaTrie.java ├── README.md ├── Trie.java ├── containers.js ├── dway_heap.py ├── dway_heap ├── __init__.py ├── dway_heap.py ├── dway_heap_test.py ├── dway_max_heap.py └── dway_min_heap.py ├── genetic_algorithm.py ├── graph.js ├── horowitz_sahni.py ├── karger ├── karger.py ├── karger_profile.py └── union_find.py ├── martello_toth_reduction.py ├── network_flow.py ├── patricia_trie.py ├── simulated_annealing.py ├── ss_tree.py ├── sudoku ├── sudoku_profiler.py ├── sudoku_solver.py └── sudoku_tester.py └── union_find.py /DWayHeap.java: -------------------------------------------------------------------------------- 1 | import java.util.Arrays; 2 | import java.util.Collection; 3 | import java.util.LinkedList; 4 | import java.util.NoSuchElementException; 5 | 6 | public class DWayHeap> { 7 | 8 | /** 9 | * The actual container for elements added to the heap. 10 | */ 11 | private LinkedList heap; 12 | 13 | /** 14 | * States the branching factor of the heap - How many child each node has. 15 | */ 16 | private int branch; 17 | 18 | /** 19 | * If true (by default) the heap is a minheap, otherwise a maxheap. 20 | */ 21 | public boolean minheap; 22 | 23 | /** 24 | * Constructor: creates a d-way minheap. 25 | * 26 | * @param d The branching factor for the heap 27 | * @throws IllegalArgumentException Iff d < 2. 28 | */ 29 | public DWayHeap(Integer d) throws IllegalArgumentException { 30 | init(d, true, null); 31 | } 32 | 33 | /** 34 | * Constructor: creates a d-way heap; whether it's a minheap or a maxheap depends 35 | * on the second parameter. 36 | * 37 | * @param d The branching factor for the heap 38 | * @param minHeap If true the heap is a minheap (smallest element at its top); 39 | * Otherwise it is a maxheap. 40 | * @throws IllegalArgumentException Iff d < 2. 41 | */ 42 | public DWayHeap(Integer d, boolean minHeap) throws IllegalArgumentException { 43 | init(d, minHeap, null); 44 | } 45 | 46 | /** 47 | * Constructor: creates a d-way minheap from the collection passed as second argumenr, 48 | * using the heapify algorithm to build it in linear time. 49 | * 50 | * @param d The branching factor for the heap. 51 | * @param list The initial set of elements to be contained by the heap. 52 | * @throws IllegalArgumentException Iff d < 2. 53 | */ 54 | public DWayHeap(Integer d, Collection list) 55 | throws IllegalArgumentException { 56 | init(d, true, list); 57 | } 58 | 59 | /** 60 | * Constructor: creates a d-way minheap from the collection passed as second argumenr, 61 | * using the heapify algorithm to build it in linear time; 62 | * Whether it's a minheap or a maxheap depends on the second parameter. 63 | * 64 | * @param d The branching factor for the heap. 65 | * @param minHeap If true the heap is a minheap (smallest element at its top); 66 | * Otherwise it is a maxheap. 67 | * @param list The initial set of elements to be contained by the heap. 68 | * @throws IllegalArgumentException Iff d < 2. 69 | */ 70 | public DWayHeap(Integer d, boolean minHeap, Collection list) { 71 | init(d, minHeap, list); 72 | } 73 | 74 | /** 75 | * Helper method to allow to reuse this class both as minheap and maxheap. 76 | * Evaluate which of the two elements passed as parameters is the smallest. 77 | * 78 | * @param x First input to evaluate; 79 | * @param y Second input to evaluate; 80 | * @return true <=> x is smaller than y. 81 | */ 82 | private boolean lt(T x, T y) { 83 | return minheap ? x.compareTo(y) < 0 : x.compareTo(y) > 0; 84 | } 85 | 86 | /** 87 | * Helper method to allow to reuse this class both as minheap and maxheap. 88 | * Evaluate which of the two elements passed as parameters is the smallest. 89 | * 90 | * @param x First input to evaluate; 91 | * @param y Second input to evaluate; 92 | * @return true <=> x is not larger than y. 93 | */ 94 | private boolean le(T x, T y) { 95 | return lt(x, y) || x.compareTo(y) == 0; 96 | } 97 | 98 | /** 99 | * Helper method: initialize the heap. 100 | * @param d The branching factor for the heap. 101 | * @param minHeap If true the heap is a minheap (smallest element at its top); 102 | * Otherwise it is a maxheap. 103 | * @param list The initial set of elements to be contained by the heap. 104 | * @throws IllegalArgumentException Iff d < 2. 105 | */ 106 | private void init(Integer d, boolean minHeap, Collection list) 107 | throws IllegalArgumentException { 108 | if (d < 2) { 109 | throw new IllegalArgumentException("Branching factor must be 2 or greater"); 110 | } 111 | this.minheap = minHeap; 112 | this.branch = d; 113 | if (list != null && !list.isEmpty()) { 114 | heapify(list); 115 | } else { 116 | heap = new LinkedList(); 117 | } 118 | } 119 | 120 | /** 121 | * Given a position inside the heap, returns the index of its parent. 122 | * @param child The index of the element whose parent is required. 123 | * @return i >= 0 if child has a parent, -1 otherwise. 124 | */ 125 | private int parentIndex(int child) { 126 | if (child == 0) 127 | return -1; 128 | else 129 | return (child - 1) / branch; // auto floor 130 | } 131 | 132 | /** 133 | * Given a position inside the heap, returns the index of its leftmost child. 134 | * This index might or might not be inside the heap - no check is performed at this stage. 135 | * @param parent The index of the element whose children must be located. 136 | * @return The index of the first child of the element passed. 137 | */ 138 | private int childIndex(int parent) { 139 | return parent * branch + 1; 140 | } 141 | 142 | /** 143 | * Build a heap from a collection of elements, in linear worst case time. 144 | * Running time: O(n) in the worst case. 145 | * @param list The collection to be inserted in the heap. 146 | */ 147 | private void heapify(Collection list) { 148 | heap = new LinkedList(list); 149 | 150 | int child, parent; 151 | T tmp; 152 | for (child = heap.size() - 1; child > 0; child--) { 153 | parent = this.parentIndex(child); 154 | if (this.lt(heap.get(child), heap.get(parent))) { 155 | tmp = heap.get(child); 156 | heap.set(child, heap.get(parent)); 157 | heap.set(parent, tmp); 158 | } 159 | } 160 | } 161 | 162 | /** 163 | * Remove the top element from the heap, and returns it. 164 | * The top element is guaranteed to be the maximum in a maxheap, 165 | * and the minimum in a minheap. 166 | * Running time: O(log_d(n)) in the worst case. 167 | * @return The top element in the heap, if it isn't empty. 168 | * @throws NoSuchElementException If the heap is empty. 169 | */ 170 | public T remove() throws NoSuchElementException { 171 | T res = heap.getFirst(); // Stores the top element without removing it 172 | 173 | if (heap.size() == 1) { 174 | heap.removeFirst(); // Only one element 175 | return res; 176 | } 177 | T parentElem, bestChildElem; 178 | parentElem = heap.removeLast(); 179 | 180 | int i, parent = 0, child = 1, bestChild; 181 | 182 | while (child < heap.size()) { 183 | 184 | bestChild = child; 185 | bestChildElem = heap.get(child); 186 | for (i = 1; i < branch && child + i < heap.size(); i++) { 187 | if (lt(heap.get(child + i), bestChildElem)) { 188 | bestChild = child + i; 189 | bestChildElem = heap.get(bestChild); 190 | } 191 | } 192 | if (lt(parentElem, bestChildElem)) { 193 | heap.set(parent, parentElem); 194 | return res; // The heap properties have been restored 195 | } else { 196 | heap.set(parent, bestChildElem); 197 | parent = bestChild; 198 | child = this.childIndex(parent); 199 | } 200 | } 201 | heap.set(parent, parentElem); 202 | return res; 203 | } 204 | 205 | /** 206 | * Add a new element to the heap, ensuring that the heap properties are not violated. 207 | * Running time: O(log_d(n)) in the worst case. 208 | * @param el The new element to be added. 209 | * @return This heap object, to allow method chaining. 210 | */ 211 | public DWayHeap add(T el) { 212 | heap.add(el); 213 | if (heap.size() > 1) { 214 | pullUp(heap.size() - 1); 215 | } 216 | return this; 217 | } 218 | 219 | /** 220 | * Helper method: climb an element towards the top of the heap until heap 221 | * properties are not violated anymore. 222 | * Running time: O(log(n)) in the worst case. 223 | * @param child The index of the element that need to be moved towards the top. 224 | */ 225 | private void pullUp(int child) { 226 | T el = heap.get(child); 227 | 228 | int parent = this.parentIndex(child); 229 | T parentElem; 230 | while (parent >= 0) { 231 | parentElem = heap.get(parent); 232 | if (lt(el, parentElem)) { 233 | heap.set(child, parentElem); 234 | child = parent; 235 | parent = this.parentIndex(parent); 236 | } else { 237 | break; 238 | } 239 | } 240 | heap.set(child, el); 241 | } 242 | 243 | /** 244 | * Decrease the priority of an element in the heap: it replaces one element of type T 245 | * with another one, which must be smaller [greater, for a maxheap]. 246 | * Running time: O(log_d(n)) in the worst case. 247 | * @param oldElement The element to be replaced. 248 | * @param newElement The new element with whom the old one must be replaced. 249 | * It must have a lower priority than the old one 250 | * (hence be smaller if it is a minheap, larger in a maxheap). 251 | * @return This heap object, to allow method chaining. 252 | * @throws NoSuchElementException If oldElement is not in the heap. 253 | * @throws IllegalArgumentException If the new element is greater [smaller, 254 | * for a maxheap] than the old one. 255 | */ 256 | public DWayHeap decreasePriority(T oldElement, T newElement) throws NoSuchElementException, IllegalArgumentException { 257 | if (lt(oldElement, newElement)) { 258 | throw new IllegalArgumentException(); 259 | } 260 | int child = this.heap.indexOf(oldElement); 261 | 262 | if (child == -1) throw new NoSuchElementException(); 263 | heap.set(child, newElement); 264 | pullUp(child); 265 | 266 | return this; 267 | } 268 | 269 | /** 270 | * Check that whether the heap contains at least one element. 271 | * @return True <=> the heap contains at least one element. 272 | */ 273 | public boolean isEmpty() { 274 | return heap.isEmpty(); 275 | } 276 | 277 | /** 278 | * Asserts the properties of the heap. 279 | */ 280 | private void checkHeapProperties() { 281 | if (isEmpty()) { 282 | assert (heap.size() == 0); 283 | } 284 | 285 | int parent = 0, child = 1; 286 | 287 | while (child < heap.size()) { 288 | for (int i = 0; i < branch && child + i < heap.size(); i++) { 289 | try { 290 | assert (le(heap.get(parent), heap.get(child + i))); 291 | } catch (AssertionError e) { 292 | System.out.println(branch); 293 | System.out.println(parent + " " + (child + i)); 294 | System.out.println(heap); 295 | throw e; 296 | } 297 | } 298 | parent += 1; 299 | child = branch * parent + 1; 300 | } 301 | } 302 | 303 | 304 | /** 305 | * Tests the class 306 | */ 307 | private static void test() { 308 | boolean exceptionThrown = false; 309 | 310 | //Branching factor must be >= 2 311 | try { 312 | new DWayHeap(1); 313 | } catch (IllegalArgumentException e) { 314 | exceptionThrown = true; 315 | } 316 | assert (exceptionThrown); 317 | try { 318 | new DWayHeap(-1); 319 | } catch (IllegalArgumentException e) { 320 | exceptionThrown = true; 321 | } 322 | assert (exceptionThrown); 323 | 324 | //Test lt and le helper methods for minheaps and maxheaps 325 | DWayHeap testHeap = new DWayHeap(2); 326 | assert (testHeap.lt(1, 2)); 327 | assert (testHeap.le(1, 1)); 328 | assert (!testHeap.lt(2, 1)); 329 | 330 | testHeap = new DWayHeap(2, false); 331 | assert (testHeap.lt(2, 1)); 332 | assert (testHeap.le(1, 1)); 333 | assert (!testHeap.lt(1, 2)); 334 | 335 | LinkedList stack; 336 | 337 | // Min heap, branch factor d 338 | for (int d = 2; d < 7; d++) { 339 | stack = new LinkedList(); 340 | testHeap = new DWayHeap(2); 341 | 342 | assert (testHeap.isEmpty()); 343 | 344 | exceptionThrown = false; 345 | try { 346 | testHeap.remove(); 347 | } catch (NoSuchElementException e) { 348 | exceptionThrown = true; 349 | } 350 | assert (exceptionThrown); 351 | 352 | testHeap.add((int) (Math.random() * 100)); 353 | assert (!testHeap.isEmpty()); 354 | for (int i = 0; i < 100 + (int) (50 * Math.random()); i++) { 355 | testHeap.add((int) (Math.random() * 100)); 356 | 357 | testHeap.checkHeapProperties(); 358 | if (Math.random() < 0.15) { 359 | testHeap.remove(); 360 | testHeap.checkHeapProperties(); 361 | } 362 | } 363 | 364 | stack.add(Integer.MIN_VALUE); 365 | 366 | while (!testHeap.isEmpty()) { 367 | int tmp = testHeap.remove(); 368 | assert (tmp >= stack.getLast()); 369 | stack.addLast(tmp); 370 | } 371 | } 372 | 373 | // Max heap, branch factor d 374 | for (int d = 2; d < 7; d++) { 375 | stack = new LinkedList(); 376 | testHeap = new DWayHeap(2, false); 377 | 378 | assert (testHeap.isEmpty()); 379 | 380 | exceptionThrown = false; 381 | try { 382 | testHeap.remove(); 383 | } catch (NoSuchElementException e) { 384 | exceptionThrown = true; 385 | } 386 | assert (exceptionThrown); 387 | 388 | testHeap.add((int) (Math.random() * 100)); 389 | assert (!testHeap.isEmpty()); 390 | for (int i = 0; i < 100 + (int) (50 * Math.random()); i++) { 391 | testHeap.add((int) (Math.random() * 100)); 392 | if (Math.random() < 0.15) { 393 | testHeap.remove(); 394 | testHeap.checkHeapProperties(); 395 | } 396 | } 397 | 398 | stack.add(Integer.MAX_VALUE); 399 | 400 | while (!testHeap.isEmpty()) { 401 | int tmp = testHeap.remove(); 402 | try { 403 | assert (tmp <= stack.getLast()); 404 | } catch (AssertionError e) { 405 | System.out.println(testHeap.heap); 406 | throw e; 407 | } 408 | stack.addLast(tmp); 409 | } 410 | } 411 | // Test heapify 412 | DWayHeap testHeapDouble = new DWayHeap(3, 413 | Arrays.asList(new Double[] { 0.1, 4.0, 1.321, 3.1415, -7.1 })); 414 | testHeapDouble.checkHeapProperties(); 415 | for (int i = 0; i < 50; i++) { 416 | testHeapDouble.add(Math.random()); 417 | } 418 | testHeapDouble.checkHeapProperties(); 419 | 420 | testHeapDouble = new DWayHeap(4, null); 421 | testHeapDouble.checkHeapProperties(); 422 | for (int i = 0; i < 50; i++) { 423 | testHeapDouble.add(Math.random()); 424 | } 425 | testHeapDouble.checkHeapProperties(); 426 | 427 | testHeapDouble = new DWayHeap(3, false, 428 | Arrays.asList(new Double[] { 0.1, 4.0, 1.321, 3.1415, -7.1 })); 429 | testHeapDouble.checkHeapProperties(); 430 | testHeapDouble.decreasePriority(4.0, 8.0); 431 | for (int i = 0; i < 50; i++) { 432 | testHeapDouble.add(Math.random()); 433 | } 434 | testHeapDouble.checkHeapProperties(); 435 | 436 | testHeap = new DWayHeap(4); 437 | 438 | for (Integer i : new Integer[]{3, 1, 5, 2, 4, 6, 8, 7, 0}){ 439 | testHeap.add(i); 440 | } 441 | 442 | int j = 0; 443 | for (int i = 0; i < 9; i++) { 444 | try { 445 | j = testHeap.remove(); 446 | assert(j == i); 447 | } catch(AssertionError e) { 448 | System.out.println(i + " " + j); 449 | System.out.println(testHeap.heap); 450 | throw e; 451 | } 452 | } 453 | } 454 | 455 | /** 456 | * @param args 457 | */ 458 | public static void main(String[] args) { 459 | test(); 460 | } 461 | 462 | } 463 | -------------------------------------------------------------------------------- /PatriciaTrie.java: -------------------------------------------------------------------------------- 1 | import java.util.ArrayList; 2 | import java.util.HashSet; 3 | 4 | 5 | /** 6 | * 7 | * @author mlarocca 8 | * 9 | * A Patricia Trie (aka Radix Tree) is a prefix trie where common paths are compressed by storing the common substring in a single node. 10 | * This way each node has at list 2 children. 11 | * 12 | * @param : The type of Objects linked to the string prefixes: for example, if the strings are text to be matched, each prefix could be linked to a list of Strings 13 | * that contains it. 14 | */ 15 | public class PatriciaTrie{ 16 | 17 | /** 18 | * root: Root of the RadixTree 19 | */ 20 | private PatriciaTrieNode root = new PatriciaTrieNode(); 21 | 22 | /** 23 | * 24 | * @author mlarocca 25 | * 26 | * A single node of the tree. 27 | * Each node contains 28 | * * A label, i.e. the compressed path represented by the node (a substring of any prefix); 29 | * * A list of Children (empty for leaves); 30 | * * A list of T objects represented by (or connected) to the prefix. 31 | */ 32 | private class PatriciaTrieNode{ 33 | private String label; 34 | private ArrayList children = new ArrayList(); 35 | private HashSet items = new HashSet(); 36 | 37 | /** 38 | * Empty constructor. (Used to create the root of the trie) 39 | * Label defaults to null, children and items list have already been initialized to empty containers. 40 | * 41 | */ 42 | public PatriciaTrieNode(){ 43 | 44 | } 45 | 46 | /** 47 | * Constructor 48 | * @param l: The label of the node; 49 | * @param item: An object of type T connected to the prefix represented by 50 | * the path from the root of the tree to this node. 51 | */ 52 | public PatriciaTrieNode(String label, T item){ 53 | this.label = label; 54 | this.items.add(item); 55 | } 56 | 57 | /** 58 | * 59 | * @param label: The label of the node; 60 | * @param childrenListReference: A reference to a list of nodes that 61 | * must be set as children of the newly 62 | * created node. 63 | * WARNING: The list passed MUST be already a copy 64 | * of the original one, or MUST NOT be 65 | * referenced elsewhere, 'cause it will 66 | * not be cloned here. 67 | * @param itemsList: A list of T object associated with the prefix 68 | * represented by a path from the root to the current 69 | * node. 70 | * WARNING: The list itself will be cloned, but the 71 | * contained object won't. 72 | */ 73 | @SuppressWarnings("unchecked") 74 | public PatriciaTrieNode(String label, ArrayList childrenListReference, HashSet itemsList){ 75 | this.label = label; 76 | this.children = childrenListReference; //Note: Children are kept in lessicographic order 77 | this.items = (HashSet)(itemsList.clone()); 78 | } 79 | 80 | 81 | /** 82 | * Search a string in the tree starting at the current node. 83 | * 84 | * @param s: The query string; 85 | * @return: - If the string is a substring of the label of any 86 | * children of this node, a reference to this node will 87 | * be returned; 88 | * - If the any of the children's label is a substring of 89 | * the search string, it will search the remaining of the 90 | * string starting from that children, and return the 91 | * result of the recursive call; 92 | * - Otherwise, there is no match for the search string. 93 | */ 94 | public PatriciaTrieNode search(String s){ 95 | int l = 0, r = children.size()-1, pos = 0; 96 | char tmp_c, c; 97 | try{ 98 | //If the search string is empty or null, return null 99 | //Needed to prevent crash when empty string or null are searched 100 | c = s.charAt(0); 101 | }catch(StringIndexOutOfBoundsException e){ 102 | return null; 103 | } 104 | //Binary search on the first character of the string and of the children's label 105 | while (l <= r){ 106 | pos = (l+r)/2; 107 | PatriciaTrieNode child = children.get(pos); 108 | String label = child.label; 109 | int l_len = label.length(); 110 | tmp_c = label.charAt(0); 111 | if (tmp_c == c){ 112 | int i = 1; 113 | int s_len = s.length(); 114 | 115 | int n = Math.min(s_len, l_len); 116 | for (; i < n; i++){ 117 | if (s.charAt(i) != label.charAt(i)){ 118 | break; 119 | } 120 | } 121 | if (i == s_len){ 122 | return child; 123 | }else if (i == l_len){ 124 | return child.search(s.substring(i)); 125 | }else{ 126 | return null; 127 | } 128 | }else if (tmp_c < c){ 129 | l = pos + 1; 130 | }else{ 131 | r = pos - 1; 132 | } 133 | } 134 | 135 | return null; 136 | } 137 | 138 | 139 | /** 140 | * Insert a new string (and all its prefixes) into the subtree rooted in 141 | * this node. 142 | * - If the string is a prefix of any of this node's children's label, 143 | * then it just adds the item associated with it to that node's 144 | * items list. 145 | * - If any of the children's label is a prefix of the new string, adds 146 | * the item to that node's list and then recursively insert the rest 147 | * of the string starting from that same node; 148 | * - If any of the children's label is a partial match to the string 149 | * (meaning a prefix of the search string matches a prefix of the 150 | * node's label) then splits that node at the first difference and 151 | * continues the insertion of the rest of the string from the newly 152 | * created node. 153 | * - Otherwise, creates a new node and adds it to this node's children. 154 | * 155 | * @param s: The string to insert; 156 | * @param item: The T object associated with the string to insert; 157 | * 158 | */ 159 | public void insertChild(String s, T item){ 160 | int size = children.size(), l = 0, r = size - 1, pos = 0; 161 | int s_len = s.length(); 162 | char tmp_c, c; 163 | try{ 164 | //If the search string is empty or null, return null 165 | //Needed to prevent crash when empty string or null are searched 166 | c = s.charAt(0); 167 | }catch(StringIndexOutOfBoundsException e){ 168 | return ; 169 | } 170 | //Binary search on the first character of the new string 171 | while (l <= r){ 172 | pos = (l+r)/2; 173 | PatriciaTrieNode child = children.get(pos); 174 | String label = child.label; 175 | 176 | tmp_c = label.charAt(0); 177 | if (tmp_c == c){ 178 | //The first character of the new string matches the first 179 | //character of one of the children's label. 180 | int l_len = label.length(); 181 | int n = Math.min(l_len, s_len); 182 | int i = 1; 183 | 184 | for (; i < n; i++){ 185 | if (label.charAt(i) != s.charAt(i)){ 186 | break; 187 | } 188 | } 189 | 190 | if (i < l_len){ 191 | //The new string partially matches node's label 192 | String restOfl = label.substring(i); 193 | child.label = s.substring(0, i); 194 | 195 | PatriciaTrieNode new_child = new PatriciaTrieNode(restOfl, child.children, child.items); 196 | child.children = new ArrayList(2); //old var has exausted its life 197 | child.children.add(new_child); 198 | child.items.add(item); 199 | if (i < s_len){ 200 | String restOfs = s.substring(i); 201 | new_child = new PatriciaTrieNode(restOfs, item); 202 | if ( restOfl.compareTo(restOfs) < 0){ 203 | child.children.add(new_child); 204 | }else{ 205 | child.children.add(0, new_child); 206 | } 207 | } 208 | }else if (i < s_len){ 209 | //Node's label is a prefix of the new string 210 | child.items.add(item); 211 | String restOfs = s.substring(i); 212 | child.insertChild(restOfs, item); 213 | }else{ 214 | //The new string is a prefix of node's label 215 | child.items.add(item); 216 | } 217 | return ; 218 | }else if (tmp_c < c){ 219 | l = pos + 1; 220 | }else{ 221 | r = pos - 1; 222 | } 223 | } 224 | 225 | //No path even partially matches the new string: a new node must be added to the tree. 226 | PatriciaTrieNode node = new PatriciaTrieNode(s, item); 227 | this.children.add(l, node); 228 | return ; 229 | } 230 | 231 | 232 | /** 233 | * Removes a new string (and all its prefixes) from the subtree rooted in 234 | * this node, if there's any match. 235 | * 236 | * 237 | * @param s: The string to insert; 238 | * @param item: The T object associated with the string to delete; 239 | * 240 | */ 241 | public void removeItem(String s, T item){ 242 | int l = 0, r = children.size()-1, pos = 0; 243 | char tmp_c, c; 244 | try{ 245 | //If the search string is empty or null, return null 246 | //Needed to prevent crash when empty string or null are searched 247 | c = s.charAt(0); 248 | }catch(StringIndexOutOfBoundsException e){ 249 | return ; 250 | } 251 | //Binary search on the string's first character; 252 | while (l <= r){ 253 | pos = (l+r)/2; 254 | PatriciaTrieNode child = children.get(pos); 255 | String label = child.label; 256 | tmp_c = label.charAt(0); 257 | if (tmp_c == c){ 258 | //At least one prefix of the string matches this child 259 | child.items.remove(item); 260 | 261 | int l_len = label.length(); 262 | int i = 1; 263 | int s_len = s.length(); 264 | 265 | int n = Math.min(s_len, l_len); 266 | for (; i < n; i++){ 267 | if (s.charAt(i) != label.charAt(i)){ 268 | break; 269 | } 270 | } 271 | 272 | if (i == l_len && i < s_len){ 273 | //The node's label is a prefix of the string to remove 274 | child.removeItem(s.substring(i), item); 275 | return ; 276 | }else{ 277 | //There can be no further match 278 | return ; 279 | } 280 | }else if (tmp_c < c){ 281 | l = pos + 1; 282 | }else{ 283 | r = pos - 1; 284 | } 285 | } 286 | 287 | } 288 | } 289 | 290 | /** 291 | * Inserts a new string, and all its prefixes, into the tree. 292 | * 293 | * @param label: The string to add; 294 | * @param item: The item associated with the string. 295 | */ 296 | public void insertString(String label, T item){ 297 | root.insertChild(label, item); 298 | } 299 | 300 | /** 301 | * Removes a string and all its prefixes from the tree. 302 | * It travels from the root of the tree along the path corresponding to the 303 | * string and removes the object associated with the string from the items 304 | * list of each node visited. 305 | * 306 | * @param s: The string to remove; 307 | * @param item: The object associated with the string. 308 | */ 309 | public void removeString(String s, T item){ 310 | root.removeItem(s, item); 311 | } 312 | 313 | /** 314 | * Search a string in the tree and return the set of item stored in the 315 | * node corresponding to the end of the string. 316 | * 317 | * @param s: The string to search; 318 | * @return: A Container (HashSet) filled with the objects associated with 319 | * the entire string (possibly an empty container). 320 | */ 321 | public HashSet search(String s){ 322 | try { 323 | return root.search(s).items; 324 | }catch(NullPointerException e){ 325 | return null; 326 | } 327 | } 328 | 329 | /** 330 | * Search a list of string in the tree and return the set of item stored in 331 | * the node corresponding to the end of the string. 332 | * 333 | * @param sArray: An array of strings to search; 334 | * @return: A Container (HashSet) filled with the objects associated with 335 | * all of the strings in the query array (if no such object exists, 336 | * it returns an empty list. 337 | */ 338 | public HashSet search(String[] sArray){ 339 | HashSet results, tmp_result, tmp; 340 | try{ 341 | results = search(sArray[0]); 342 | }catch(IndexOutOfBoundsException e){ 343 | return null; 344 | } 345 | 346 | int n = sArray.length; 347 | for (int i = 1; i < n; i++){ 348 | try{ 349 | if (results.isEmpty()){ 350 | return results; 351 | } 352 | tmp_result = search(sArray[i]); 353 | tmp = new HashSet(results.size()); 354 | for (T item: tmp_result){ 355 | if (results.contains(item)){ 356 | tmp.add(item); 357 | } 358 | } 359 | results = tmp; 360 | }catch(NullPointerException e){ 361 | return null; 362 | } 363 | } 364 | return results; 365 | } 366 | 367 | 368 | } 369 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Algorithms 2 | ========== 3 | 4 | General purpose implementation of advanced algorithms 5 | 6 | 1. **SS-Tree** and K-Nearest neighbours - Python 7 | 8 | SS Trees are spatial search structures derived from R-Trees, the main difference being that instead of rectangular boxes, the points are partitioned using circle (2D), spheres (3D) or hyperspheres (n dimensional spaces, n>=4). 9 | These implementation deals with 2D space searches, but it can easily be extended for higher dimensional spaces. 10 | Each node of the tree is a sphere: intermediate nodes' spheres are big enough to include inside them the children's ones, so that the plan is partitioned, going from the root to the leaves, in finer and finer grain. 11 | Like R-trees, SS-Trees are derived from B-Trees, so that each node can hold between T and 2*T-1 children (actually between T and 2*T in this implementation), for some T >= 1. 12 | When, on insertion, it becomes necessary to split a node, the split is performed trying to reduce the variance of the 2 new nodes created (i.e. trying to reduce the size of the enclosing spheres) 13 | Trees are assumed to be static (deletion on the tree is not provided - if sporadic removal are needed, it can be performed a lazy deletion, marking the removed elements as "deleted"; otherwise a proper delete method should be added). 14 | 15 | 2. **Trie** (aka Prefix Tree) - Java 16 | 17 | A trie, or prefix tree, is an ordered tree data structure that is used to store a dynamic set or associative array where the keys are usually strings. Unlike a binary search tree, no node in the tree stores the key associated with that node; instead, its position in the tree defines the key with which it is associated. All the descendants of a node have a common prefix of the string associated with that node, and the root is associated with the empty string. Values are normally not associated with every node, only with leaves and some inner nodes that correspond to keys of interest. 18 | In this implementation, to each string inserted in the Trie must be associated a single object, stored together with the string, and the presence or absence of this object marks a path in the trie as one of the stored strings or an intermediate path. 19 | Insertion of multiple instances of the same string is not supported: the latter occurrence of a string inserted will overwrite the object previously associated with the same string. 20 | 21 | To support multiple occurrences of the same string, a list of objects might be stored. 22 | 23 | If no object needs to be associated with a string and the it is simply needed to assess whether or not a string has been inserted in the trie, 2 solutions are possible: 24 | 25 | 1) A Trie can be created, without modifying anything, at the cost of passing a fake not-null value as the second parameter of the insert function; 26 | 27 | 2) The implementation can be slightly changed, removing the generics code and adding a boolean parameter to the Node class. 28 | 29 | Two versions of the remove method are implemented: 30 | 31 | 1) A "lazy" way: it won't actually remove any node from the trie, it will just set to null the object associated with the string to remove. 32 | This approach prevent from searching the trie for prefixes of its strings; the lazy approach, however, speeds up significantly the deletion at the cost of keeping a bigger tree (since dead edges and paths won't be removed for the tree), so it is particularly useful only when it is expected to have a much greater number of insertion than deletion from the tree. 33 | 34 | 2) A thorough approach, which deletes dead paths when strings are removed from the trie. This is the suggested approach when string removal is expected to be a common operation on the trie (for dynamic tries). 35 | 36 | NOTE: The two methods SHOULD NOT be mixed (once lazy, always lazy...). It is care of the caller to avoid such things, so you'd better leave only the method you want to be used when you add this code to your project. 37 | 38 | 3. **PatriciaTrie** (aka RadixTree) - Java 39 | 40 | Radix trees are a space-optimized trie data structures where each node with only one child is merged with its child. The result is that every internal node has at least two children. Unlike in regular tries, edges can be labeled with sequences of characters as well as single characters. This makes them much more efficient for small sets (especially if the strings are long) and for sets of strings that share long prefixes. 41 | (http://en.wikipedia.org/wiki/Radix_tree) 42 | In this implementation, when a string is inserted in the tree, an object is passed along with it; a reference to this object is stored in every node of the tree corresponding to a prefix of the string inserted. 43 | For example, if the strings inserted into the tree are titles of papers, these objects could be the full papers' text. 44 | If one is not interested in such features but only in establishing wheter or not a given prefix is stored into the tree, the list of associated objects may be replaced with a counter, for example. 45 | WARNING: The "remove item" operation only removes from the tree a certain item associated with a path, but doen't remove the path itself 46 | 47 | 4. **PatriciaTrie** (aka RadixTree) - Python ( *patricia_trie.py* ) 48 | Same algorithms, in a fast python implementation 49 | 50 | 5. **Horowitz-Sahni algorithm** - Python 51 | 52 | H.S. algorithm is a branch and bound algorithm that efficiently solves the 0-1 Knapsack problem, provided that the elements to be inserted into the knapsack are sorted accordind to the ratio p[i]/w[i], from the largest to the smallest, where p[i] is the value of the i-th element and w[i] is its weight. 53 | An iterative version of the algorithm is provided; in the main cycle, it ries to add as much elements to the knapsack as possible according to their scaled value ("forward move") and then, when it funds a critical element (i.e. one that cannot be added to the knapsack) estimates an upper bound in particular Dantzig's upper bound) for the maximum value that is possible to get with the current elements included in the solution: if this bound is lower than the best value obtained so far, prunes the recursion and perform a backtracking move, looking for the closest '1' in the subset bit mask (if it exists), and removing the corresponding element from the knapsack. 54 | To improve performance, some features of the Martello-Toth algorithm are 55 | added (for instance a tighter bound than Danzing's is computed). 56 | 57 | 6. **Martello-Toth reduction for 0-1 Knapsack** - Python 58 | 59 | Tries to reduce the 0-1 Knapsack problem by finding the elements that must be part of any optimal solution (set J1) and those that can't appear in an optimal solution (set J0). The core is represented by all the elements that neither belongs to J1 or J0, and the exact solution may now be computed on this smaller set rather than on the whole set of elements: the global solution will then be the union of the solution on the core problem and the elements in the set J1. 60 | The critical element (whose index is s) is the only one that might appear in both sets: if it is so and the intersection between the two sets is not empty, then the reduction is not valid. 61 | During the reduction process, a value p_star is computed: this is a lower to the optimal solution. If the sum of the core problem solution and the value of the elements in J1 is lower than p_star, then p_star is the solution to the problem (it might be worth keeping track of the elements corresponding to the highest value of p_star found, for this reason). 62 | 63 | 7. **Genetic Algorithm Template** - Python 64 | 65 | The class is designed on the Template Pattern: it implements just the sketch of a genetic algorithm, with a random initialization, and then a cycle, with a new __population created at each iteration from the __population at the previous one. 66 | This class specifies only the selection algorithm (round robin selection) and the elitism criteria; the details of chromosomes' structure, of the crossover and of the mutations algorithms (including the number of different kinds of mutations), together with their ratio of application, are completely left to the specific class that models evolving individuals. 67 | A base class for individuals, on which problem specific individuals might be modeled (also through inheritance) and a short example of how to use it are also provided. 68 | 69 | 8. **Simulated Annealing Template** - Python 70 | 71 | The class is designed on the Template Pattern: it implements just the sketch of the simulated annealing algorithm, leaving the problem specific operation for the Solution class to specify. 72 | A base class for individuals, on which problem specific individuals might be modeled (also through inheritance) and a short example of how to use it are also provided. 73 | 74 | 9. **Queue, PriorityQueue** - Javascript ( *container.js* ) 75 | 76 | 10. **Graphs: Depth-First Search, Breadth-First Search, Dijkstra, Prim** - JavaScript (graph.js, requires container.js) 77 | 78 | 11. **Network Flow** - Python ( *network_flow.py* ) 79 | 80 | Two algorithms are given: 81 | * Edmonds-Karp, which runs in O(|V|*|E|^2) 82 | * Relabel-to-Front, which runs in O(|V|^3) 83 | 84 | Both algorithms takes as input the list of the edges of the graph as a dictionary, with pairs of vertices as keys associated to edges' capacity. 85 | The only limitations for the input are: 86 | 87 | 1) (Trivially) No two vertex can share the same label 88 | 89 | 2) Vertex can have any label of any hashable type; labels, however, can't be or evaluate to None 90 | 91 | 3) If (u,v) belongs to the graph, (v,u) can't be in it 92 | 93 | 12. **Sudoku Solver** - Python ( *sudoku/* ) 94 | 95 | A fast sudoku solver, nice example of heuristic-driven backtracking. 96 | Includes: 97 | 98 | 1) *sudoku_solver.py* - A very fast sudoku solver 99 | 100 | 2) *sudoku_tester.py* - A tester module that achieves 100% statement and branches coverage (with *coverage.py*) 101 | 102 | 3) *sudoku_profiler.py* - A profiler for the solver module. 103 | 104 | It accepts any valid iterable as input, as long as its size is correct (9x9) and its values are valid (I see no reason not to accept tuples or dictionaries as well as lists). 105 | Please find more in the file comments. 106 | So far it looks like it doesn't break on any input, but... let me know if you manage to crash it. 107 | 108 | 13. **Union-Find** - Python ( *union_find.py* ) 109 | 110 | Union-Find data structure, with weighted trees and path compression 111 | 112 | 14. **D-ary Heap** - Python ( *dway_heap.py* ), Java ( *DWayHeap.java* ) 113 | 114 | Implementation of a priority queue with a [d-way heap](http://en.wikipedia.org/wiki/D-ary_heap) (aka d-ary heap or d-heap) 115 | D-way heaps are pretty useful in practice in the implementation of Dijkstra and Prim algorithms for graphs, among many other things. While Fibonacci's heaps would be theoretically faster, no simple and fast implementation of such data structures is known. In practice, a 4-way heap is the best solution for the priority queues in these algorithms. 116 | 117 | To be used in those algorithms, the data structure supports the operation decrease_priority, if keys in the heap are unique (as it is the case for algorithms on graph without repeated vertices). 118 | 119 | 15. **Karger Randomized Contraction algorithm for finding Minimum Cut in undirected Graphs** - Python ( *karger/karger.py* ) 120 | 121 | Karger's algorithm is a randomized algorithm to compute a minimum cut of a connected graph. It was invented by David Karger and first published in 1993. 122 | 123 | A cut is a set of edges that, if removed, would disconnect the Graph; a minimum cut is the smallest possible set of edges that, when removed, produce a disconnected Graph. 124 | Every minimum cut corresponds to a partitioning of the Graph vertices into two non-empty subsets, such that the edges in the cut only have their endpoints in the two different subsets. 125 | 126 | Karger algorithm builds a cut of the graph by randomly creating this partitions, and in particular by choosing at each iteration a random edge, and contracting the graph around it: basically, merging its two endpoints in a single vertex, and updating the remaining edges, such that the self-loops introduced (like the chosen edge itself) are removed from the new Graph, and storing parallel-edges (if the algorithm chooses an edge (_u_,_v_) and both _u_ and _v_ have edges to a third vertex _w_, then the new Graph will have two edges between the new vertex _z_ and _w_) 127 | After _n-2_ iterations, only two macro-vertex will be left, and the parallel edges between them will form the cut. 128 | 129 | The algorithm is a Montecarlo algorithm, i.e. its running time is deterministic, but it isn't guaranteed that at every iteration the best solution will be found. 130 | 131 | Actually the probability of finding the minimum cut in one run of the algorithm is pretty low, with an upper bound of 1 over _n_ squared, where _n_ is the number of vertices in the Graph. Nonetheless, by running the algorithm multiple times and storing the best result found, the probability that none of the runs founds the minimum cut becomes very small: 1 over _e_ (Neper) for _n_ squared runs, and 1 over _n_ for _n_ ^2 * log(_n_) runs - for large values of _n_, i.e. for large Graphs, that's a negligible probability. 132 | 133 | The implementation provided is written in Python, assumes the Graph represented with adjacency list (as a Dictionary) and is restricted to having only integer vertices labels (ideally the number from 0 to n-1): this limitation allows to exploit the _union-find_ implementation provided, and can be easily overcome by mapping the original labels to the range [0..n-1]. -------------------------------------------------------------------------------- /Trie.java: -------------------------------------------------------------------------------- 1 | import java.io.IOException; 2 | import java.util.ArrayList; 3 | 4 | 5 | /** 6 | * 7 | * @author mlarocca 8 | * 9 | * A Trie (Prefix Tree) where to each string inserted in the tree can be associated 10 | * one object of a generic type. 11 | * This implementation DOES NOT deal with duplicate keys: every time a string in 12 | * the tree is inserted, the previous value associated with the string is overwritten. 13 | * 14 | * Leaves should correspond to strings in the trie (might not be so because of a lazy 15 | * implementation of the delete method). 16 | * Intermediate nodes can either correspond to string in the trie (if the item field 17 | * is not null) or to intermediate paths (if item == null). 18 | * 19 | * @param - The type of objects this container can hold. 20 | */ 21 | public class Trie{ 22 | 23 | /** 24 | * Trie's root. 25 | */ 26 | private TrieNode root = new TrieNode(); 27 | 28 | private class TrieNode{ 29 | 30 | private char edgeLabel; //Represent the label of the edge from the parent of this node; 31 | private final ArrayList children = new ArrayList(); //children are stored in lexicographic order) 32 | private T item = null; 33 | 34 | /** 35 | * Empty constructor (used to create trie's root). 36 | * The associated item is init to null, and the list of children is initialized to an empty list. 37 | */ 38 | public TrieNode(){ 39 | 40 | } 41 | 42 | /** 43 | * Constructor. 44 | * The associated item is init to null, and the list of children is initialized 45 | * to an empty list. 46 | * @param c: The label of the edge that connects this node to its parent. 47 | */ 48 | public TrieNode(char c){ 49 | this.edgeLabel = c; 50 | } 51 | 52 | 53 | /** 54 | * Search if the current node has a child connected by an edge labeled 55 | * with the given character - i.e., it searches if there is a path from 56 | * these node towards the bottom of the tree which starts with the 57 | * given character. 58 | * 59 | * 60 | * @param c: The character to search 61 | * @return 62 | */ 63 | public TrieNode search(char c){ 64 | int l = 0, r = children.size() - 1, pos = 0; 65 | char tmp_c; 66 | 67 | //Binary search on the edges to the children (they are stored in lexicographic order). 68 | while (l <= r){ 69 | pos = (l + r) / 2; 70 | tmp_c = children.get(pos).edgeLabel; 71 | if (tmp_c == c){ 72 | return children.get(pos); 73 | }else if (tmp_c < c){ 74 | l = pos + 1; 75 | }else{ 76 | r = pos - 1; 77 | } 78 | } 79 | 80 | return null; 81 | } 82 | 83 | /** 84 | * Insert a new edge leaving this node, if an edge with the same label 85 | * isn't already present. 86 | * 87 | * @param c: The label of the edge to insert. 88 | * @return: The child of this node that is connected to it by an edge 89 | * whose label is the specified character (possibly a newly 90 | * created edge). 91 | */ 92 | public TrieNode insertNode(char c){ 93 | int size = children.size(), l = 0, r = size - 1, pos = 0; 94 | char tmp_c; 95 | //Binary search over the edges to the children (sorted lexicographically) 96 | while (l <= r){ 97 | pos = (l + r) / 2; 98 | TrieNode child = children.get(pos); 99 | tmp_c = child.edgeLabel; 100 | if (tmp_c == c){ 101 | //The edge already exist => returns it 102 | return child; 103 | }else if (tmp_c < c){ 104 | l = pos + 1; 105 | }else{ 106 | r = pos - 1; 107 | } 108 | } 109 | 110 | //The edge doesn't exist: a new node needs to be created 111 | TrieNode node = new TrieNode(c); 112 | this.children.add(l, node); 113 | return node; 114 | } 115 | 116 | /** 117 | * Change the object associated with the string corresponding to the 118 | * path from the root of the trie to this node; 119 | * 120 | * @param item: The new object to store 121 | */ 122 | public void setItem(T item){ 123 | this.item = item; 124 | } 125 | 126 | /** 127 | * Sets the object associated with the path from the root to this node 128 | * to null - i.e., it removes the string corresponding to that path 129 | * from the trie. 130 | * 131 | * @return: True <=> the item has been correctly removed; 132 | */ 133 | public boolean removeItem(){ 134 | if (this.item == null){ 135 | return false; 136 | } 137 | this.item = null; 138 | return true; 139 | } 140 | 141 | 142 | /** 143 | * Checks whether a node is still useful or can be deleted; 144 | * 145 | * @return: True <=> the node can be safely deleted from the tree 146 | * (iff it has no children and no string is 147 | * associated with a path up to this node) 148 | */ 149 | public boolean isEmpty(){ 150 | return this.item == null && this.children.size() == 0; 151 | } 152 | 153 | } 154 | 155 | /** 156 | * Insert a string into the trie, together with an object associated with it. 157 | * 158 | * @param s: The string to insert; 159 | * @param item: The object associated with the string to insert. 160 | */ 161 | public void insertString(String s, T item){ 162 | char[] cArray = s.toCharArray(); 163 | TrieNode node = root; 164 | //Insert the string into the trie, char by char, by adding one edge for each one of its char. 165 | for (char c: cArray){ 166 | node = node.insertNode(c); 167 | } 168 | node.setItem(item); 169 | } 170 | 171 | /** 172 | * Removes a string previously inserted into the trie. 173 | * 174 | * @param s: The string to remove; 175 | * 176 | * @return: True <=> the string was stored in the trie and has been removed correctly. 177 | */ 178 | public boolean removeString(String s){ 179 | char[] cArray = s.toCharArray(); 180 | ArrayList stack = new ArrayList(); 181 | TrieNode node = root; 182 | for (char c: cArray){ 183 | try{ 184 | node = node.search(c); 185 | stack.add(node); 186 | }catch(NullPointerException e){ 187 | //If search returns null, the string isn't in the trie; 188 | return false; 189 | } 190 | } 191 | try{ 192 | if (!node.removeItem()){ 193 | //The string wasn't in the trie 194 | return false; 195 | } 196 | 197 | if (node.isEmpty()){ 198 | char c, tmp_c; 199 | int l, r, pos, n = stack.size() - 1; 200 | ArrayList children; 201 | stack.remove(n--); //Removes the node corresponding to the string deleted from the stack; 202 | 203 | //Deletes nodes of the path from the root to the node corresponding to the deleted string 204 | //until a not empty node is found, or the root is reached 205 | //(removes nodes that has become obsolete) 206 | do{ 207 | c = node.edgeLabel; 208 | node = stack.remove(n--); //Next element on the stack is the parent of the TrieNode previously stored in node. 209 | children = node.children; 210 | 211 | //Binary search on the edges to the children to find the position of the child node. 212 | l = 0; 213 | r = children.size() - 1; 214 | while (l <= r){ 215 | pos = (l + r) / 2; 216 | tmp_c = children.get(pos).edgeLabel; 217 | if (tmp_c == c){ 218 | children.remove(pos); 219 | break; 220 | }else if (tmp_c < c){ 221 | l = pos + 1; 222 | }else{ 223 | r = pos - 1; 224 | } 225 | } 226 | }while (n >= 0 && node.isEmpty()); //n >= 0 <=> !stack.isEmpty() 227 | } 228 | 229 | }catch(NullPointerException e){ 230 | //If node is null, the string isn't in the trie; 231 | return false; 232 | } 233 | //The string has been correctly removed 234 | return true; 235 | } 236 | 237 | 238 | 239 | /** 240 | * Removes a string previously inserted into the trie. 241 | * 242 | * @param s: The string to remove; 243 | * 244 | * @return: True <=> the string was stored in the trie and has been removed correctly. 245 | */ 246 | public boolean lazyRemoveString(String s){ 247 | char[] cArray = s.toCharArray(); 248 | TrieNode node = root; 249 | for (char c: cArray){ 250 | try{ 251 | node = node.search(c); 252 | }catch(NullPointerException e){ 253 | //If search returns null, the string isn't in the trie; 254 | return false; 255 | } 256 | } 257 | try{ 258 | node.removeItem(); 259 | }catch(NullPointerException e){ 260 | //If node is null, the string isn't in the trie; 261 | return false; 262 | } 263 | //The string has been correctly removed 264 | return true; 265 | } 266 | 267 | /** 268 | * Searches a string to test if it belongs to the trie. 269 | * 270 | * @param s: The string to search; 271 | * @return: If the string is stored into the tree, the item associated 272 | * with it is returned; otherwise returns null. 273 | */ 274 | public T search(String s){ 275 | char[] cArray = s.toCharArray(); 276 | TrieNode node = root, child; 277 | for (char c: cArray){ 278 | try{ 279 | child = node.search(c); 280 | node = child; 281 | }catch(NullPointerException e){ 282 | return null; 283 | } 284 | } 285 | try{ 286 | return node.item; 287 | }catch(NullPointerException e){ 288 | return null; 289 | } 290 | } 291 | 292 | } 293 | -------------------------------------------------------------------------------- /containers.js: -------------------------------------------------------------------------------- 1 | function queue(){ 2 | "use strict"; 3 | 4 | var top = 0, bottom = -1, coda = [], that = {}; 5 | 6 | Object.defineProperty( that, "isEmpty", { 7 | value: function(){ 8 | return (bottom < top); 9 | }, 10 | writable: false, 11 | enumerable: false, 12 | configurable: false 13 | }); 14 | 15 | Object.defineProperty( that, "push", { 16 | value: function(el){ 17 | coda[bottom] = el; 18 | bottom += 1; 19 | }, 20 | writable: false, 21 | enumerable: false, 22 | configurable: false 23 | }); 24 | 25 | Object.defineProperty( that, "top", { 26 | value: function(){ 27 | if (bottom >= top){ 28 | return coda[top]; 29 | } 30 | }, 31 | writable: false, 32 | enumerable: false, 33 | configurable: false 34 | }); 35 | 36 | Object.defineProperty( that, "pop", { 37 | value: function(){ 38 | var el; 39 | if (bottom >= top){ 40 | el = coda[top]; 41 | delete coda[top]; 42 | top += 1; 43 | } 44 | 45 | return el; 46 | }, 47 | writable: false, 48 | enumerable: false, 49 | configurable: false 50 | }); 51 | 52 | Object.preventExtensions(that); 53 | return that; 54 | } 55 | 56 | function priorityQueue(){ 57 | "use strict"; 58 | var top = 1, bottom = 0, coda = [], that = {}; 59 | 60 | Object.defineProperty( that, "isEmpty", { 61 | value: function(){ 62 | return (bottom < top); 63 | }, 64 | writable: false, 65 | enumerable: false, 66 | configurable: false 67 | }); 68 | 69 | Object.defineProperty( that, "push", { 70 | value: function(el){ 71 | var i, parent, tmp; 72 | if ( !el || !el.hasOwnProperty("key") || !el.hasOwnProperty("compareTo") || typeof el.compareTo != 'function'){ 73 | throw "Illegal argument: the element must have a key and be comparable"; 74 | } 75 | bottom += 1; 76 | coda[bottom] = el; 77 | for (i = bottom; i>top; ){ 78 | parent = top + Math.floor( ( i - top ) / 2); 79 | //alert(i + " | " + parent); 80 | if ( coda[parent].compareTo(coda[i]) > 0 ){ 81 | tmp = coda[parent]; 82 | coda[parent] = coda[i]; 83 | coda[i] = tmp; 84 | }else{ 85 | break; 86 | } 87 | i = parent; 88 | } 89 | }, 90 | writable: false, 91 | enumerable: false, 92 | configurable: false 93 | }); 94 | 95 | Object.defineProperty( that, "decrease", { 96 | value: function(key, newVal){ 97 | var i, parent, tmp; 98 | for (i=top; i<=bottom; i++){ 99 | if ( coda[i].key === key ){ 100 | break; 101 | } 102 | } 103 | if (i>bottom){ 104 | return false; //Element not found; 105 | } 106 | 107 | coda[i].val = newVal; 108 | for (; i>top; ){ 109 | parent = top + Math.floor( ( i - top ) / 2); 110 | //alert(i + " | " + parent); 111 | if ( coda[parent].compareTo(coda[i]) > 0 ){ 112 | tmp = coda[parent]; 113 | coda[parent] = coda[i]; 114 | coda[i] = tmp; 115 | }else{ 116 | break; 117 | } 118 | i = parent; 119 | } 120 | return true; //Element found 121 | }, 122 | writable: false, 123 | enumerable: false, 124 | configurable: false 125 | }); 126 | 127 | Object.defineProperty( that, "top", { 128 | value: function(){ 129 | if (bottom >= top){ 130 | return coda[top]; 131 | } 132 | }, 133 | writable: false, 134 | enumerable: false, 135 | configurable: false 136 | }); 137 | 138 | Object.defineProperty( that, "pop", { 139 | value: function(){ 140 | var el, i, j, tmp; 141 | if (bottom >= top){ 142 | el = coda[top]; 143 | coda[top] = coda[bottom]; 144 | delete coda[bottom]; 145 | bottom -= 1; 146 | i = top; 147 | j = top + 1; 148 | while (j <= bottom){ 149 | if (j+1 <= bottom && coda[j].compareTo(coda[j+1]) > 0 ){ 150 | j += 1; 151 | } 152 | if ( coda[i].compareTo(coda[j]) <= 0 ){ 153 | break; 154 | }else{ 155 | tmp = coda[i]; 156 | coda[i] = coda[j]; 157 | coda[j] = tmp; 158 | i = j; 159 | j = top + (j-top) * 2; 160 | } 161 | } 162 | 163 | } 164 | 165 | return el; 166 | }, 167 | writable: false, 168 | enumerable: false, 169 | configurable: false 170 | }); 171 | 172 | Object.defineProperty( that, "toString", { 173 | value: function(){ 174 | var i, arr=[]; 175 | //alert(top + " " + bottom); 176 | for(i=top; i<=bottom; i++){ 177 | arr.push(coda[i].key); 178 | } 179 | return arr.join(); 180 | }, 181 | writable: false, 182 | enumerable: false, 183 | configurable: false 184 | }); 185 | 186 | Object.preventExtensions(that); 187 | return that; 188 | 189 | } 190 | 191 | 192 | /* 193 | var PQ = priorityQueue(); 194 | PQ.push(node(4)); 195 | PQ.push(node(3)); 196 | PQ.push(node(5)); 197 | PQ.push(node(2)); 198 | PQ.push(node(1)); 199 | PQ.push(node(0)); 200 | PQ.push(node(-2)); 201 | 202 | //alert("PQ"); 203 | getConsole().innerHTML += "
" + PQ.toString(); 204 | while(!PQ.isEmpty()){ 205 | alert( PQ.pop().key); 206 | getConsole().innerHTML += "
" + PQ.toString(); 207 | } 208 | */ -------------------------------------------------------------------------------- /dway_heap.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 08/apr/2013 3 | 4 | @author: mlarocca 5 | ''' 6 | 7 | 8 | class dway_heap(object): 9 | 10 | PRIORITY_INDEX = 0 11 | ELEMENT_INDEX = 1 12 | 13 | def __init__(self, d, max_heap=False): 14 | ''' : Constructor : 15 | Create a new d-way heap priority queue. 16 | : param d : The branch-factor of the heap. 17 | (MUST be at least 2) 18 | : type d : int (Other types will be converted to int) 19 | : param max_heap : If passed and truthy, the heap will 20 | be a max heap instead than a min heap. 21 | : type max_heap : boolean 22 | : raise IllegalArgumentException : If d is less than 2. 23 | : return : self, as all constructors. 24 | ''' 25 | self.__d = int(d) 26 | 27 | if self.__d < 2: 28 | raise Exception("IllegalArgumentException: minimum allowed branch-factor is 2") 29 | 30 | #if it is a max queue, uses the reverse ordering 31 | #The function projects the 32 | self.__smaller = ((lambda x, y: x[dway_heap.PRIORITY_INDEX] > 33 | y[dway_heap.PRIORITY_INDEX]) 34 | if max_heap 35 | else (lambda x, y: x[dway_heap.PRIORITY_INDEX] < 36 | y[dway_heap.PRIORITY_INDEX]) 37 | ) 38 | #same function, but without the need to project the priority the tuple 39 | self.__smaller_priority = ((lambda x, y: x > y) if max_heap 40 | else (lambda x, y: x < y)) 41 | 42 | self.__queue = [] 43 | 44 | def empty(self): 45 | ''' Shortcut for heap.size() == 0 46 | : return : True <=> there is no element in the heap 47 | ''' 48 | return len(self.__queue) <= 0 49 | 50 | def size(self): 51 | ''' Return the number of elements in the heap 52 | : return : The number of elements in the heap 53 | ''' 54 | return len(self.__queue) 55 | 56 | def put(self, element, priority): 57 | ''' Insert a new element in the heap 58 | : param element : The element to insert. 59 | : param priority : The priority associated with the element. 60 | : return : self, to allow method chaining. 61 | 62 | ''' 63 | new_item = (priority, element) 64 | self.__queue.append(new_item) #insert a placeholder to enlarge the array 65 | 66 | pos = len(self.__queue) - 1 67 | parent = (pos - 1) / self.__d 68 | 69 | while parent >= 0: 70 | 71 | if self.__smaller(new_item, self.__queue[parent]): 72 | 73 | self.__queue[pos] = self.__queue[parent] 74 | pos = parent 75 | parent = (parent - 1) / self.__d 76 | else: 77 | parent = pos 78 | break 79 | 80 | if parent < 0: 81 | parent = 0 82 | 83 | self.__queue[parent] = new_item 84 | assert self.check() 85 | 86 | return self 87 | 88 | def top(self): 89 | ''' Returns the top element, and removes it from the heap. 90 | 91 | : raise Exception : If the heap is empty. 92 | : return : The top element. 93 | ''' 94 | size = self.size() 95 | if size == 0: 96 | raise Exception("The Heap is empty") 97 | 98 | #removes the last element from the queue 99 | item = self.__queue.pop() 100 | size -= 1 101 | 102 | if size == 0: 103 | return item[dway_heap.ELEMENT_INDEX] 104 | else: 105 | res = self.__queue[0] 106 | 107 | pos = 0 108 | tmp_pos = pos * self.__d + 1 109 | child_pos = tmp_pos 110 | 111 | while child_pos < size: 112 | #Look for the smallest children 113 | i = 1 114 | while i < self.__d and tmp_pos + i < size: 115 | if self.__smaller(self.__queue[tmp_pos + i], self.__queue[child_pos]): 116 | child_pos = tmp_pos + i 117 | i += 1 118 | if self.__smaller(self.__queue[child_pos], item): 119 | self.__queue[pos] = self.__queue[child_pos] 120 | 121 | pos = child_pos 122 | tmp_pos = pos * self.__d + 1 123 | child_pos = tmp_pos 124 | else: 125 | break 126 | 127 | self.__queue[pos] = item 128 | 129 | assert self.check() 130 | return res[dway_heap.ELEMENT_INDEX] 131 | 132 | def decrease_priority(self, element, priority): 133 | ''' Decrease the priority of a given key 134 | WARNING: duplicates keys aren't handled! 135 | : param element : The "key", aka the element whose priority must be 136 | decreased. 137 | : param priority : The new priority for the element. 138 | : raise IllegalArgumentException : If the new priority for a key is 139 | greater than the the existing one. 140 | : return : True iff the key was in the heap and its priority 141 | has been successfully updated. 142 | ''' 143 | for pos in xrange(self.size()): 144 | item = self.__queue[pos] 145 | if item[dway_heap.ELEMENT_INDEX] == element: 146 | if not self.__smaller_priority(priority, item[dway_heap.PRIORITY_INDEX]): 147 | raise Exception("Existing key priority can only be decreased!") 148 | self.__queue[pos] = new_item = (priority, element) 149 | break 150 | if pos == self.size(): 151 | return False #Key not found 152 | 153 | parent = (pos - 1) / self.__d 154 | 155 | while parent >= 0: 156 | 157 | if self.__smaller(new_item, self.__queue[parent]): 158 | 159 | self.__queue[pos] = self.__queue[parent] 160 | pos = parent 161 | parent = (parent - 1) / self.__d 162 | else: 163 | parent = pos 164 | break 165 | 166 | if parent < 0: 167 | parent = 0 168 | self.__queue[parent] = new_item 169 | 170 | assert self.check() 171 | 172 | def check(self): 173 | ''' Check queue integrity 174 | : raise AssertionError : If the main property of the dway heap is violated 175 | : return : True iff the heap is valid 176 | ''' 177 | pos = 0 178 | child = 1 179 | 180 | while child < self.size(): 181 | i = 0 182 | while i < self.__d and child + i < self.size(): 183 | assert not self.__smaller(self.__queue[child + i], self.__queue[pos]) 184 | i += 1 185 | 186 | pos += 1 187 | child = pos * self.__d + 1 188 | 189 | return True 190 | 191 | def clear(self): 192 | ''' Remove all the elements in the heap. 193 | : return : self, to allow method chaining. 194 | ''' 195 | self.__queue = [] 196 | return self 197 | 198 | def heapsort(self): 199 | ''' Return a sorted array with all the elements in the heap. 200 | WARNING: All the elemens will be removed from the heap! 201 | 202 | : return : An array with the elements in the heap. 203 | ''' 204 | if self.empty(): 205 | return [] 206 | else: 207 | res = [] 208 | while not self.empty(): 209 | res.append( self.top()) 210 | 211 | return res 212 | 213 | 214 | def __str__(self): 215 | ''' : override : 216 | ''' 217 | return " ".join(map(lambda x : str(x[dway_heap.ELEMENT_INDEX]), self.__queue)) 218 | 219 | 220 | if __name__ == '__main__': 221 | 222 | from random import randrange 223 | 224 | def test(): 225 | ''' Test the data structure 226 | ''' 227 | print "Test min heap" 228 | 229 | for d in xrange(2,10): 230 | #Test d way 231 | print d 232 | pq = dway_heap(d) 233 | 234 | assert pq.empty() 235 | for i in xrange(d ** randrange(2, 4) + randrange(d)): 236 | k = randrange(1000000) 237 | pq.put(k, k) 238 | #Test insert 239 | assert not pq.empty() 240 | assert pq.size() == i + 1 241 | 242 | print "---------------" 243 | A = pq.heapsort() 244 | 245 | #check that A is sorted (i.e. elements are popped from the queue in the right order 246 | B = sorted(A) 247 | assert(B == A) 248 | 249 | print "Decrease key in min heap" 250 | #Check decrease_key 251 | for d in xrange(2,10): 252 | #Test d way 253 | print d 254 | pq = dway_heap(d) 255 | memo = {} 256 | assert pq.empty() 257 | for i in xrange(d ** randrange(2, 4) + randrange(d)): 258 | k = randrange(1000000) 259 | pq.put(k,k) 260 | memo[(k, k)] = 1 261 | #Test insert 262 | assert not pq.empty() 263 | assert pq.size() == i + 1 264 | memo = memo.keys() 265 | for _ in xrange(min(5, randrange(pq.size()))): 266 | index = randrange(len(memo)) 267 | item = memo.pop(index) 268 | pq.decrease_priority(item[dway_heap.ELEMENT_INDEX], item[dway_heap.PRIORITY_INDEX] / 2) 269 | item = (item[dway_heap.PRIORITY_INDEX] / 2, item[dway_heap.ELEMENT_INDEX]) 270 | memo.append(item) 271 | 272 | print "Test max heap" 273 | for d in xrange(2,10): 274 | #Test d way 275 | print d 276 | pq = dway_heap(d, True) 277 | 278 | assert pq.empty() 279 | for i in xrange(d ** randrange(2, 4) + randrange(d)): 280 | k = randrange(1000000) 281 | pq.put(k, k) 282 | #Test insert 283 | assert not pq.empty() 284 | assert pq.size() == i + 1 285 | 286 | print "---------------" 287 | A = pq.heapsort() 288 | 289 | #check that A is sorted (i.e. elements are popped from the queue in the right order 290 | B = sorted(A, reverse=True) 291 | assert(B == A) 292 | 293 | print "Decrease key in max heap" 294 | #Check decrease_key 295 | for d in xrange(2,10): 296 | #Test d way 297 | print d 298 | pq = dway_heap(d, True) 299 | memo = {} 300 | assert pq.empty() 301 | for i in xrange(d ** randrange(2, 4) + randrange(d)): 302 | k = randrange(1000000) 303 | pq.put(k,k) 304 | memo[(k, k)] = 1 305 | #Test insert 306 | assert not pq.empty() 307 | assert pq.size() == i + 1 308 | memo = memo.keys() 309 | for _ in xrange(min(5, randrange(pq.size()))): 310 | index = randrange(len(memo)) 311 | item = memo.pop(index) 312 | pq.decrease_priority(item[dway_heap.ELEMENT_INDEX], item[dway_heap.PRIORITY_INDEX] * 2) 313 | item = (item[dway_heap.PRIORITY_INDEX] * 2, item[dway_heap.ELEMENT_INDEX]) 314 | memo.append(item) 315 | 316 | print "Test OK" 317 | #END of test definition 318 | 319 | test() -------------------------------------------------------------------------------- /dway_heap/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 28/giu/2013 3 | 4 | @author: mlarocca 5 | ''' 6 | __all__ = ["dway_heap", "dway_min_heap"] -------------------------------------------------------------------------------- /dway_heap/dway_heap.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 28/giu/2013 3 | 4 | @author: mlarocca 5 | ''' 6 | def empty(heap): 7 | ''' Shortcut for size(queue) == 0 8 | : param heap : A valid dway min-heap. 9 | : return : True <=> there is no element in the heap 10 | ''' 11 | try: 12 | queue = heap["queue"] 13 | except KeyError: 14 | raise TypeError("Invalid heap object") 15 | except TypeError: 16 | raise TypeError("Invalid heap object") 17 | 18 | return len(queue) <= 0 19 | 20 | 21 | def size(heap): 22 | ''' Return the number of elements in the heap 23 | : param heap : A valid dway min-heap. 24 | : return : The number of elements in the heap 25 | ''' 26 | try: 27 | queue = heap["queue"] 28 | except KeyError: 29 | raise TypeError("Invalid heap object") 30 | except TypeError: 31 | raise TypeError("Invalid heap object") 32 | return len(queue) 33 | 34 | 35 | def peek(heap): 36 | ''' Returns the top element, WITHOUT removing it from the heap. 37 | : param heap : A valid dway min-heap. 38 | : raise Exception : If the heap is empty. 39 | : return : The top element. 40 | ''' 41 | try: 42 | queue = heap["queue"] 43 | except KeyError: 44 | raise TypeError("Invalid heap object") 45 | except TypeError: 46 | raise TypeError("Invalid heap object") 47 | 48 | try: 49 | return queue[0] 50 | except IndexError: 51 | raise IndexError("peek on an empty queue") 52 | 53 | def create_heap(d): 54 | ''' : Constructor : 55 | Create a new d-way heap priority queue. 56 | : param d : The branch-factor of the heap. 57 | (MUST be at least 2) 58 | : type d : int (Other types will be converted to int) 59 | : raise IllegalArgumentException : If d is less than 2. 60 | : return : A new dway-heap pseudo-object. 61 | ''' 62 | if d < 2: 63 | raise Exception("IllegalArgumentException: minimum allowed branch-factor is 2") 64 | return {"d": d, "queue": []} -------------------------------------------------------------------------------- /dway_heap/dway_heap_test.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 28/giu/2013 3 | 4 | @author: mlarocca 5 | ''' 6 | import unittest 7 | import dway_min_heap, dway_max_heap 8 | from dway_heap import * 9 | from random import randrange 10 | 11 | class Test(unittest.TestCase): 12 | 13 | def test_min_heap(self): 14 | ''' Test the data structure 15 | ''' 16 | print "Test min heap" 17 | 18 | for BRANCHING_FACTOR in xrange(2,10): 19 | #Test d way 20 | print BRANCHING_FACTOR 21 | pq = create_heap(BRANCHING_FACTOR) 22 | 23 | assert empty(pq) 24 | for i in xrange(BRANCHING_FACTOR ** randrange(2, 4) + randrange(BRANCHING_FACTOR)): 25 | k = randrange(1000000) 26 | dway_min_heap.put(pq, k) 27 | #Test insert 28 | assert not empty(pq) 29 | assert size(pq) == i + 1 30 | 31 | print "---------------" 32 | A = dway_min_heap.heapsort(pq) 33 | 34 | #check that A is sorted (i.e. elements are popped from the queue in the right order 35 | B = sorted(A) 36 | assert(B == A) 37 | 38 | print "Decrease key in min heap" 39 | #Check decrease_key 40 | for BRANCHING_FACTOR in xrange(2,10): 41 | #Test d way 42 | print BRANCHING_FACTOR 43 | pq = create_heap(BRANCHING_FACTOR) 44 | memo = {} 45 | assert empty(pq) 46 | for i in xrange(BRANCHING_FACTOR ** randrange(2, 4) + randrange(BRANCHING_FACTOR)): 47 | k = randrange(1000000) 48 | dway_min_heap.put(pq,k) 49 | memo[k] = 1 50 | #Test insert 51 | assert not empty(pq) 52 | assert size(pq) == i + 1 53 | memo = memo.keys() 54 | for _ in xrange(min(5, randrange(size(pq)))): 55 | index = randrange(len(memo)) 56 | item = memo.pop(index) 57 | dway_min_heap.decrease_priority(pq, item, item / 2) 58 | memo.append(item / 2) 59 | 60 | def test_max_heap(self): 61 | ''' Test the data structure 62 | ''' 63 | print "Test max heap" 64 | 65 | for BRANCHING_FACTOR in xrange(2,10): 66 | #Test d way 67 | print BRANCHING_FACTOR 68 | pq = create_heap(BRANCHING_FACTOR) 69 | 70 | assert empty(pq) 71 | for i in xrange(BRANCHING_FACTOR ** randrange(2, 4) + randrange(BRANCHING_FACTOR)): 72 | k = randrange(1000000) 73 | dway_max_heap.put(pq, k) 74 | #Test insert 75 | assert not empty(pq) 76 | assert size(pq) == i + 1 77 | 78 | print "---------------" 79 | A = dway_max_heap.heapsort(pq) 80 | 81 | #check that A is sorted (i.e. elements are popped from the queue in the right order 82 | B = sorted(A, reverse=True) 83 | assert(B == A) 84 | 85 | print "Decrease key in max heap" 86 | #Check decrease_key 87 | for BRANCHING_FACTOR in xrange(2,10): 88 | #Test d way 89 | print BRANCHING_FACTOR 90 | pq = create_heap(BRANCHING_FACTOR) 91 | memo = {} 92 | assert empty(pq) 93 | for i in xrange(BRANCHING_FACTOR ** randrange(2, 4) + randrange(BRANCHING_FACTOR)): 94 | k = randrange(1000000) 95 | dway_max_heap.put(pq,k) 96 | memo[k] = 1 97 | #Test insert 98 | assert not empty(pq) 99 | assert size(pq) == i + 1 100 | memo = memo.keys() 101 | for _ in xrange(min(5, randrange(size(pq)))): 102 | index = randrange(len(memo)) 103 | item = memo.pop(index) 104 | dway_max_heap.increase_priority(pq, item, item * 2) 105 | memo.append(item * 2) 106 | #END of test definition 107 | 108 | 109 | if __name__ == "__main__": 110 | #import sys;sys.argv = ['', 'Test.testName'] 111 | unittest.main() -------------------------------------------------------------------------------- /dway_heap/dway_max_heap.py: -------------------------------------------------------------------------------- 1 | from dway_heap import create_heap, empty, peek, size 2 | 3 | def put(heap, new_item): 4 | ''' Insert a new element in the heap 5 | : param heap : A valid dway min-heap. 6 | : type heap : Heap, a pseudo-object created by dway_heap.create_heap function. 7 | : param new_item : The element to insert. 8 | : return : self, to allow method chaining. 9 | ''' 10 | try: 11 | queue = heap["queue"] 12 | d = heap["d"] 13 | except KeyError: 14 | raise TypeError("Invalid heap object") 15 | except TypeError: 16 | raise TypeError("Invalid heap object") 17 | 18 | queue.append(new_item) #insert a placeholder to enlarge the array 19 | 20 | pos = len(queue) - 1 21 | parent = (pos - 1) / d 22 | 23 | while parent >= 0: 24 | 25 | if new_item > queue[parent]: 26 | 27 | queue[pos] = queue[parent] 28 | pos = parent 29 | parent = (parent - 1) / d 30 | else: 31 | parent = pos 32 | break 33 | 34 | if parent < 0: 35 | parent = 0 36 | 37 | queue[parent] = new_item 38 | assert check(heap) 39 | return 40 | 41 | def top(heap): 42 | ''' Returns the top element, and removes it from the heap. 43 | : param heap : A valid dway min-heap. 44 | : type heap : Heap, a pseudo-object created by dway_heap.create_heap function. 45 | : raise Exception : If the heap is empty. 46 | : return : The top element. 47 | ''' 48 | try: 49 | queue = heap["queue"] 50 | d = heap["d"] 51 | except KeyError: 52 | raise TypeError("Invalid heap object") 53 | except TypeError: 54 | raise TypeError("Invalid heap object") 55 | 56 | 57 | #removes the last element from the queue 58 | try: 59 | item = queue.pop() 60 | except IndexError: 61 | raise IndexError("top of an empty queue") 62 | 63 | size = len(queue) 64 | 65 | if size == 0: 66 | return item 67 | else: 68 | res = queue[0] 69 | 70 | pos = 0 71 | tmp_pos = pos * d + 1 72 | child_pos = tmp_pos 73 | 74 | while child_pos < size: 75 | #Look for the smallest children 76 | i = 1 77 | while i < d and tmp_pos + i < size: 78 | if queue[tmp_pos + i] > queue[child_pos]: 79 | child_pos = tmp_pos + i 80 | i += 1 81 | if queue[child_pos] > item: 82 | queue[pos] = queue[child_pos] 83 | 84 | pos = child_pos 85 | tmp_pos = pos * d + 1 86 | child_pos = tmp_pos 87 | else: 88 | break 89 | 90 | queue[pos] = item 91 | 92 | assert check(heap) 93 | return res 94 | 95 | def increase_priority(heap, old_element, new_element): 96 | ''' Increase the priority of a given key 97 | WARNING: duplicates keys aren't handled! 98 | : param heap : A valid dway min-heap. 99 | : type heap : Heap, a pseudo-object created by dway_heap.create_heap function. 100 | : param old_element : The element to be replaced. 101 | : param new_element : The new value for that element. 102 | : raise IllegalArgumentException : If the new element is smaller than the the existing one. 103 | : return : True iff the old element was in the heap and it is successfully updated. 104 | ''' 105 | if new_element < old_element: 106 | raise Exception("In max-heaps existing elements' priority can only be increased!") 107 | 108 | try: 109 | queue = heap["queue"] 110 | d = heap["d"] 111 | except KeyError: 112 | raise TypeError("Invalid heap object") 113 | except TypeError: 114 | raise TypeError("Invalid heap object") 115 | 116 | 117 | size = len(queue) 118 | 119 | for pos in xrange(size): 120 | item = queue[pos] 121 | if item == old_element: 122 | queue[pos] = new_element 123 | break 124 | if pos == size: 125 | return False #Key not found 126 | 127 | parent = (pos - 1) / d 128 | 129 | while parent >= 0: 130 | 131 | if new_element > queue[parent]: 132 | 133 | queue[pos] = queue[parent] 134 | pos = parent 135 | parent = (parent - 1) / d 136 | else: 137 | parent = pos 138 | break 139 | 140 | if parent < 0: 141 | parent = 0 142 | queue[parent] = new_element 143 | 144 | assert check(heap) 145 | 146 | def heapsort(heap): 147 | ''' Return a sorted array with all the elements in the heap. 148 | WARNING: All the elemens will be removed from the heap! 149 | : param heap : A valid dway min-heap. 150 | : type heap : Heap, a pseudo-object created by dway_heap.create_heap function. 151 | : return : An array with the elements in the heap. 152 | ''' 153 | 154 | if empty(heap): 155 | return [] 156 | else: 157 | try: 158 | queue = heap["queue"] 159 | except KeyError: 160 | raise KeyError("Invalid heap object") 161 | 162 | res = [] 163 | while len(queue) > 0: 164 | res.append(top(heap)) 165 | 166 | return res 167 | 168 | 169 | def check(heap): 170 | ''' Check queue integrity 171 | : param heap : A valid dway min-heap. 172 | : type heap : Heap, a pseudo-object created by dway_heap.create_heap function. 173 | : raise AssertionError : If the main property of the dway heap is violated 174 | : return : True iff the heap is valid 175 | ''' 176 | try: 177 | queue = heap["queue"] 178 | d = heap["d"] 179 | except KeyError: 180 | raise TypeError("Invalid heap object") 181 | except TypeError: 182 | raise TypeError("Invalid heap object") 183 | 184 | pos = 0 185 | child = 1 186 | size = len(queue) 187 | while child < size: 188 | i = 0 189 | while i < d and child + i < size: 190 | assert queue[child + i] <= queue[pos] 191 | i += 1 192 | 193 | pos += 1 194 | child = pos * d + 1 195 | 196 | return True 197 | 198 | -------------------------------------------------------------------------------- /dway_heap/dway_min_heap.py: -------------------------------------------------------------------------------- 1 | from dway_heap import create_heap, empty, peek, size 2 | 3 | def put(heap, new_item): 4 | ''' Insert a new element in the heap 5 | : param heap : A valid dway min-heap. 6 | : type heap : Heap, a pseudo-object created by dway_heap.create_heap function. 7 | : param new_item : The element to insert. 8 | : return : self, to allow method chaining. 9 | ''' 10 | try: 11 | queue = heap["queue"] 12 | d = heap["d"] 13 | except KeyError: 14 | raise TypeError("Invalid heap object") 15 | except TypeError: 16 | raise TypeError("Invalid heap object") 17 | 18 | queue.append(new_item) #insert a placeholder to enlarge the array 19 | 20 | pos = len(queue) - 1 21 | parent = (pos - 1) / d 22 | 23 | while parent >= 0: 24 | 25 | if new_item < queue[parent]: 26 | 27 | queue[pos] = queue[parent] 28 | pos = parent 29 | parent = (parent - 1) / d 30 | else: 31 | parent = pos 32 | break 33 | 34 | if parent < 0: 35 | parent = 0 36 | 37 | queue[parent] = new_item 38 | assert check(heap) 39 | return 40 | 41 | def top(heap): 42 | ''' Returns the top element, and removes it from the heap. 43 | : param heap : A valid dway min-heap. 44 | : type heap : Heap, a pseudo-object created by dway_heap.create_heap function. 45 | : raise Exception : If the heap is empty. 46 | : return : The top element. 47 | ''' 48 | try: 49 | queue = heap["queue"] 50 | d = heap["d"] 51 | except KeyError: 52 | raise TypeError("Invalid heap object") 53 | except TypeError: 54 | raise TypeError("Invalid heap object") 55 | 56 | 57 | #removes the last element from the queue 58 | try: 59 | item = queue.pop() 60 | except IndexError: 61 | raise IndexError("top of an empty queue") 62 | 63 | size = len(queue) 64 | 65 | if size == 0: 66 | return item 67 | else: 68 | res = queue[0] 69 | 70 | pos = 0 71 | tmp_pos = pos * d + 1 72 | child_pos = tmp_pos 73 | 74 | while child_pos < size: 75 | #Look for the smallest children 76 | i = 1 77 | while i < d and tmp_pos + i < size: 78 | if queue[tmp_pos + i] < queue[child_pos]: 79 | child_pos = tmp_pos + i 80 | i += 1 81 | if queue[child_pos] < item: 82 | queue[pos] = queue[child_pos] 83 | 84 | pos = child_pos 85 | tmp_pos = pos * d + 1 86 | child_pos = tmp_pos 87 | else: 88 | break 89 | 90 | queue[pos] = item 91 | 92 | assert check(heap) 93 | return res 94 | 95 | def decrease_priority(heap, old_element, new_element): 96 | ''' Decrease the priority of a given key 97 | WARNING: duplicates keys aren't handled! 98 | : param heap : A valid dway min-heap. 99 | : type heap : Heap, a pseudo-object created by dway_heap.create_heap function. 100 | : param old_element : The element to be replaced. 101 | : param new_element : The new value for that element. 102 | : raise IllegalArgumentException : If the new element is greater than the the existing one. 103 | : return : True iff the old element was in the heap and it is successfully updated. 104 | ''' 105 | if new_element > old_element: 106 | raise Exception("In min-heaps existing elements' priority can only be decreased!") 107 | 108 | try: 109 | queue = heap["queue"] 110 | d = heap["d"] 111 | except KeyError: 112 | raise TypeError("Invalid heap object") 113 | except TypeError: 114 | raise TypeError("Invalid heap object") 115 | 116 | 117 | size = len(queue) 118 | 119 | for pos in xrange(size): 120 | item = queue[pos] 121 | if item == old_element: 122 | queue[pos] = new_element 123 | break 124 | if pos == size: 125 | return False #Key not found 126 | 127 | parent = (pos - 1) / d 128 | 129 | while parent >= 0: 130 | 131 | if new_element < queue[parent]: 132 | 133 | queue[pos] = queue[parent] 134 | pos = parent 135 | parent = (parent - 1) / d 136 | else: 137 | parent = pos 138 | break 139 | 140 | if parent < 0: 141 | parent = 0 142 | queue[parent] = new_element 143 | 144 | assert check(heap) 145 | 146 | def heapsort(heap): 147 | ''' Return a sorted array with all the elements in the heap. 148 | WARNING: All the elemens will be removed from the heap! 149 | : param heap : A valid dway min-heap. 150 | : type heap : Heap, a pseudo-object created by dway_heap.create_heap function. 151 | : return : An array with the elements in the heap. 152 | ''' 153 | 154 | if empty(heap): 155 | return [] 156 | else: 157 | try: 158 | queue = heap["queue"] 159 | except KeyError: 160 | raise KeyError("Invalid heap object") 161 | 162 | res = [] 163 | while len(queue) > 0: 164 | res.append(top(heap)) 165 | 166 | return res 167 | 168 | 169 | def check(heap): 170 | ''' Check queue integrity 171 | : param heap : A valid dway min-heap. 172 | : type heap : Heap, a pseudo-object created by dway_heap.create_heap function. 173 | : raise AssertionError : If the main property of the dway heap is violated 174 | : return : True iff the heap is valid 175 | ''' 176 | try: 177 | queue = heap["queue"] 178 | d = heap["d"] 179 | except KeyError: 180 | raise TypeError("Invalid heap object") 181 | except TypeError: 182 | raise TypeError("Invalid heap object") 183 | 184 | pos = 0 185 | child = 1 186 | size = len(queue) 187 | while child < size: 188 | i = 0 189 | while i < d and child + i < size: 190 | assert queue[child + i] >= queue[pos] 191 | i += 1 192 | 193 | pos += 1 194 | child = pos * d + 1 195 | 196 | return True 197 | 198 | -------------------------------------------------------------------------------- /genetic_algorithm.py: -------------------------------------------------------------------------------- 1 | ''' 2 | @author: mlarocca 3 | ''' 4 | 5 | from time import time 6 | from random import random, randrange, seed 7 | from math import copysign 8 | from copy import deepcopy 9 | 10 | from numpy import mean, std 11 | 12 | 13 | ''' Genetic Algorithm 14 | The class is designed on the Template Pattern: it implements just the sketch 15 | of a genetic algorithm, with a random initialization, and then a cycle, with 16 | a new __population created at each iteration from the __population at the previous 17 | one. 18 | This class specifies only the selection algorithm (round robin selection) 19 | and the elitism criteria; the details of chromosomes' structure, of the 20 | crossover and of the mutations algorithms (including the number of different 21 | kinds of mutations), together with their ratio of application, are completely 22 | left to the specific class that models evolving individuals. 23 | 24 | Of course the Template pattern isn't fully applied in this case because the 25 | further generalization needed would make the implementation unnecessarily 26 | complicated and would also penalize the performance. 27 | ''' 28 | class genetic_algorithm(): 29 | 30 | 31 | 32 | ''' Constructor 33 | Sets up the parameters 34 | @param individual_initializer: A reference to the constructor function 35 | of the individuals used. 36 | @param population_size: The number of individuals to be included in the 37 | __population to evolve; 38 | WARNING: this value MUST be greater than or 39 | equal to 4 otherwise crossover would 40 | be meaningless (and selection would 41 | raise an exception anyway); 42 | @param time_limit: The maximum time that can be spent on optimizing; 43 | WARNING: the actual time spent will be slightly 44 | larger than this limit, because only 45 | when the limit is already crossed the 46 | main function will return; 47 | ''' 48 | def __init__(self, individual_initializer, population_size, time_limit): 49 | assert(population_size >= 4) 50 | assert(time_limit > 0) 51 | 52 | self.__individual_initializer = individual_initializer 53 | self.__population_size = population_size 54 | self.__time_limit = time_limit 55 | 56 | ''' Genetic Algorithm main 57 | Although it has been created specifically for this challenge, it sketches 58 | a general purpose genetic algorithm, needing just a few adjustments 59 | to be used for different problems. 60 | The application of the Template Design Pattern is limited in order to 61 | achieve clarity, readability and good performance. 62 | 63 | The algorithm goes through the following steps: 64 | 1) Generates randomly an initial population; The details of the 65 | generation of the single individual are left to the 66 | problem-specific class that models individual; 67 | 2) Repeats the following cycle, until the allotted time is over: 68 | 2.a) Let's the best element(s) of the previous generation 69 | pass through to the next one unchanged (elitism); 70 | 2.b) Until the new population hasn't been fully generated: 71 | 2.b.1) Randomly selects couple of elements from the old 72 | generation and let them reproduce (either by 73 | crossover or cloning); 74 | 2.b.2) Applies mutation(s) to the couple of elements produced 75 | by the reproduction routine at the previous step; 76 | 2.b.3) Adds each of the new elements to the new population, 77 | in the right position (the population is kept 78 | in reverse order with respect to the fitness - 79 | higher fitness means better elements); 80 | INVARIANT: after the iteration is completed, the first 81 | element in the population, if it models a valid solution, 82 | is also the best solution found so far. 83 | 3) Returns The solution modeled by the first element of the population 84 | 85 | @param file_log: Optional parameter: the file to which write log info, like 86 | intermediate results. 87 | @return: (best_score, best_element) 88 | A couple whose first element is the score of the best solution found, 89 | and the second one is the solution itself. 90 | ''' 91 | def start(self, file_log = None): 92 | #Need to ensure randomization 93 | seed(time()) 94 | 95 | 96 | self.__population = self.__init_population(self.__individual_initializer, 97 | self.__population_size) 98 | 99 | #DEBUG 100 | if file_log != None: 101 | it = 0 102 | 103 | start_time = time() #Doen't count the initialization time, in order to have the main 104 | #cycle executed at least once! 105 | 106 | while time() - start_time < self.__time_limit: 107 | #Elitism: the best element always passes to the next generation 108 | new_population = [self.__population[0]] 109 | #If __population_size is even, then, since new elements are added in pairs, 110 | #to match the size extends elitism to the second best individual 111 | if not self.__population_size % 2: 112 | new_population.append(self.__population[1]) 113 | M = len(new_population) 114 | while M < self.__population_size: 115 | #Select 2 individuals from the previous __population, and then have them reproduced to 116 | #the next one, either by crossover or cloning (see __reproduction function) 117 | (new_individual_1, 118 | new_individual_2) = self.__reproduction( 119 | self.__selection(self.__population, 120 | len(self.__population))) 121 | 122 | for individual in [new_individual_1, new_individual_2]: 123 | #Applies the mutations according to the rates specified by the Individual's class itself 124 | for mutation in individual.MUTATIONS: 125 | self.__apply_mutation(mutation) 126 | 127 | #Tries to insert the new element in the existing __population 128 | for i in range(M): 129 | #Higher __fitness individuals have better rank 130 | if self.__fitness(individual) > self.__fitness(new_population[i]): 131 | new_population.insert(i, individual) 132 | break 133 | if i==M-1: 134 | #Element must be added to the end of the list 135 | new_population.append(individual) 136 | M+=1 137 | 138 | self.__population = new_population 139 | 140 | #DEBUG 141 | if file_log != None: 142 | it += 1 143 | fitnesses = map(lambda ind: self.__fitness(ind), new_population) 144 | file_log.writelines('\tIteration # {} - Fitness: Best={}, mean={}, std={}\n' 145 | .format(it, self.__fitness(new_population[0]), 146 | mean(fitnesses), std(fitnesses))) 147 | 148 | best_fitness = self.__fitness(self.__population[0]) 149 | 150 | return best_fitness, self.__population[0] 151 | 152 | 153 | ''' Creates a population of the specified size of Individual individuals, 154 | using the "constructor" method for the Individuals specified when the 155 | Genetic Algorithm was itself init. 156 | 157 | @param individual_initializer: A reference to the constructor function 158 | of the individuals used. 159 | @param population_size: The desired size for the population set; 160 | @return: The new population created. 161 | ''' 162 | def __init_population(self, individual_initializer, population_size): 163 | new_population = [] 164 | for i in xrange(population_size): 165 | new_population.append(individual_initializer()) 166 | return new_population 167 | 168 | ''' Shortcut to compute any individual's fitness 169 | @param individual: The member of the __population whose fitness must be computed; 170 | @return: The value of the individual's fitness. 171 | ''' 172 | def __fitness(self, individual): 173 | return individual.computeFitness() 174 | 175 | ''' Shortcut to perform "reproduction" on a couple of individuals; 176 | The crossover reproduction is applied with probability CROSSOVER_PROBABILITY, 177 | otherwise the individuals just clone themselves into the new generation; 178 | 179 | @param individual_1: The first element that is going to reproduct; 180 | @param individual_2: The second element that is going to reproduct; 181 | ''' 182 | def __reproduction(self, (individual_1, individual_2)): 183 | if random() < individual_1.CROSSOVER_PROBABILITY: 184 | #Applies crossover (100*CROSSOVER_PROBABILITY)% of the times... 185 | (new_individual_1, new_individual_2) = individual_1.crossover(individual_2) 186 | else: 187 | #... otherwise the individuals are just copied to next generation 188 | (new_individual_1, new_individual_2) = (individual_1.copy(), individual_2.copy()) 189 | 190 | return (new_individual_1, new_individual_2) 191 | 192 | ''' Shortcut to perform one of the kinds of mutations designed for the 193 | specific problem; 194 | 195 | @param mutation: the function that actually perform the mutation; 196 | @param mutation_probability: the probability that the mutation is actually 197 | applied. 198 | ''' 199 | def __apply_mutation(self, (mutation, mutation_probability)): 200 | if random() < mutation_probability: 201 | mutation() 202 | 203 | 204 | ''' Round robin selection; 205 | The how_many elements are chosen randomly from the __population; 206 | For each element returned, two candidates are taken randomly from a uniform 207 | distribution over the __population set, then with probability SELECT_BEST_PROBABILITY 208 | the best of the two is chosen, and with prob. 1.-SELECT_BEST_PROBABILITY the least 209 | fit one is chosen. 210 | The probability SELECT_BEST_PROBABILITY is left to the specific problem to choose; 211 | If SELECT_BEST_PROBABILITY == 0.5 each element is selected exactly with uniform 212 | probability, otherwise the mean is shifted towards one of the sides in 213 | proportion to the difference SELECT_BEST_PROBABILITY - 0.5, in the same way as 214 | the mean of the minimum between two uniform random numbers in [0,1] 215 | becomes 1/3 and the mean of the maximum becomes 2/3; 216 | 217 | @param __population: The __population from which to choose the individuals; 218 | @param size: The size of the __population from which to choose; 219 | NOTE: size can be lower than len(__population), allowing 220 | to use only a subset of the __population; 221 | @param how_many: The number of elements to be selected; 222 | @return: The list of elements chosen. 223 | ''' 224 | def __selection(self, population, size, how_many = 2): 225 | #INVARIANT: len(__population) >= size >= how_many * 2 226 | indices = [i for i in range(size)] 227 | chosen = [] 228 | for i in range(how_many): 229 | #Chooses two individuals randomly 230 | first = indices[randrange(size)] 231 | indices.remove(first) #Doesn't allow repetitions (Every index generated here must be different) 232 | size -= 1 233 | second = indices[randrange(size)] 234 | indices.remove(second) #Doesn't allow repetitions 235 | size -= 1 236 | 237 | if random() < population[first].SELECT_BEST_PROBABILITY: 238 | #The one with better rank is chosen 239 | mul = 1 240 | else: 241 | #The one with worst rank is chosen 242 | mul = -1 243 | 244 | if mul * population[first].computeFitness() > mul * population[second].computeFitness(): 245 | chosen.append(self.__population[first]) 246 | else: 247 | chosen.append(self.__population[second]) 248 | 249 | return chosen 250 | 251 | 252 | #END of class genetic_algorithm 253 | 254 | 255 | 256 | ''' Class Individual 257 | A Individual Object models the solution to the problem in such a way that 258 | it can be used as the basis of the genetic algorithm sketched in the class 259 | genetic_algorithm. 260 | This is a base class from which real individuals should be modeled, 261 | according to the specific problem. 262 | 263 | Chromosomes are represented array of flags. 264 | ''' 265 | class Individual(): 266 | 267 | ''' Constant: alias for a value denoting a valid solution (each 268 | ''' 269 | __VALID_SOLUTION = 1. 270 | ''' Constant: alias for a value denoting an invalid subsed (one that 271 | violate some constraint).''' 272 | __NOT_VALID_SOLUTION = 0. 273 | 274 | ''' The probability that during round robin selection it is chosen the best 275 | individual from the dueling couple .''' 276 | SELECT_BEST_PROBABILITY = .7 277 | ''' The probabilty that crossover is applied during individuals reproduction.''' 278 | CROSSOVER_PROBABILITY = .8; 279 | 280 | 281 | ''' Constructor: define a constant set containing reference to the mutation 282 | methods coupled with the probability with which they should be applied; 283 | The chromosome of the individual can either be passed as a parameter, 284 | or generated at random. 285 | ''' 286 | def __init__(self, chromosome_size, chromosome=None): 287 | assert(chromosome_size > 0) 288 | ''' Only one kind of mutation is applied, and it is stored together with 289 | its ratio of application for this problem. 290 | Subclasses may add mutation methods as well.''' 291 | self.MUTATIONS = [(self.__mutation_1, 0.5)] 292 | 293 | if chromosome != None: 294 | self.__chromosome = deepcopy(chromosome) 295 | else: 296 | self.__chromosome = self.__random_init(chromosome_size) 297 | 298 | self.__changed = True 299 | 300 | 301 | 302 | ''' Initialize the element mask, which denotes the subset of the Universe 303 | of the Stories characterizing a single element. 304 | The probability distribution over the space of the subsets is uniform. 305 | @param N: The size of the Universe 306 | @return: A list of 0 and 1, representing a bit mask that denotes 307 | a subset of the Universe (i.e. the set of all the Stories 308 | in the DB). 309 | ''' 310 | def __random_init(self, N): 311 | 312 | ''' 313 | @return: 0 or 1 with probability 1 over 2. 314 | ''' 315 | def random_bit(): 316 | if random() < 0.5: 317 | return 1 318 | else: 319 | return 0 320 | 321 | return [random_bit() for i in xrange(N)] 322 | 323 | 324 | ''' Shortcut for a deepcopy of the element. 325 | @return: a deepcopy of the individual. 326 | ''' 327 | def copy(self): 328 | copy_instance = deepcopy(self) 329 | return copy_instance 330 | 331 | 332 | 333 | ''' Computes the fitness associated with this Individual 334 | WARNING: This method MUST be overridden by a problem specific version. 335 | 336 | The base class version returns just the number of ones in the chromosome. 337 | To speed up runtime, the fitness is computed again only when the individual 338 | has been changed since the last time it was computed. 339 | 340 | @return: The indidual's fitness, as the tuple described above. 341 | ''' 342 | def computeFitness(self): 343 | if self.__changed: 344 | self._fitness = 0 345 | for i in xrange(len(self.__chromosome)): 346 | if self.__chromosome[i]: 347 | self._fitness += 1 348 | self.__changed = False 349 | 350 | return self._fitness 351 | 352 | ''' Crossover 353 | Single point Crossover is used for individuals reproduction: it is randomily 354 | chosen one point in the middle of the chromosome, and the 4 halves created 355 | by dividing the two individuals' genomes are mixed together to form 356 | 2 new individuals. 357 | 358 | @param other: The other subset that will be used for reproduction; 359 | @return: A couple of brand new individuals. 360 | ''' 361 | def crossover(self, other): 362 | N = len(self.__chromosome) 363 | if N<3: 364 | return self.copy(), other.copy() 365 | 366 | point = 1 + randrange(N-2) #Crossing point must be non-trivial 367 | new_mask_1 = self.__chromosome[:point] + other.__chromosome[point:] 368 | new_mask_2 = other.__chromosome[:point] + self.__chromosome[point:] 369 | return Individual(N, new_mask_1), Individual(N, new_mask_2) 370 | 371 | ''' Mutation1 372 | One flag, chosen at random, is flipped, so that one gene previously 373 | activated won't be anymore, or viceversa; 374 | 375 | WARNING: Mutation1 changes the modify the object it's called on! 376 | ''' 377 | def __mutation_1(self): 378 | point = randrange(len(self.__chromosome)) 379 | self.__chromosome[point] = int( copysign(self.__chromosome[point]-1, 1) ) 380 | self.__changed = True 381 | 382 | #END of class Individual 383 | 384 | 385 | ''' Simple example: ones counter 386 | ''' 387 | if __name__ == '__main__': 388 | from sys import stdout 389 | chromosome_size = 50 390 | initializer = lambda: Individual(chromosome_size) 391 | ga = genetic_algorithm(initializer, 20, 0.25) #20 individuals, 0.25 seconds time limit. 392 | fitness, solution = ga.start(stdout) #Print log info on stdout -------------------------------------------------------------------------------- /graph.js: -------------------------------------------------------------------------------- 1 | function node(k, v){ 2 | that = { 3 | key:k, 4 | val:v, 5 | 6 | compareTo: function(othernode){ 7 | return compareDistances(v, othernode.val); 8 | } 9 | } 10 | Object.preventExtensions(that); 11 | return that; 12 | } 13 | 14 | function vertex( label ){ 15 | "use strict"; 16 | 17 | var that = {}, col; 18 | 19 | Object.defineProperty( that, "getLabel", { 20 | value: function(){ 21 | return label; 22 | }, 23 | writable: false, 24 | enumerable: false, 25 | configurable: false 26 | }); 27 | Object.defineProperty( that, "setLabel", { 28 | value: function(newLabel){ 29 | label = newLabel; 30 | }, 31 | writable: false, 32 | enumerable: false, 33 | configurable: false 34 | }); 35 | 36 | Object.defineProperty( that, "equals", { 37 | value: function(otherLabel){ 38 | return label === otherLabel; 39 | }, 40 | writable: false, 41 | enumerable: false, 42 | configurable: false 43 | }); 44 | 45 | Object.defineProperty( that, "color", { 46 | get: function(){return col;}, 47 | set: function(c){ 48 | if (!c){ 49 | throw "Invalid color parameter"; 50 | } 51 | c = c.toLowerCase(); 52 | if ( c === "white" || c === "gray" || c === "black" ){ 53 | col = c; 54 | }else{ 55 | throw "Invalid color parameter"; 56 | } 57 | }, 58 | enumerable: false, 59 | configurable: false 60 | }); 61 | 62 | Object.defineProperty( that, "isVertex", { 63 | value: true, 64 | writable: false, 65 | enumerable: false, 66 | configurable: false 67 | }); 68 | 69 | Object.preventExtensions(that); 70 | 71 | return that; 72 | } 73 | 74 | function edge(src, dest, weight){ 75 | "use strict"; 76 | 77 | var that = {}, tipo; 78 | 79 | if (!src || !src.isVertex || !dest || !dest.isVertex){ 80 | throw { 81 | name: "IllegalArgumentException", 82 | message: "Source and destination must be valid vertex" 83 | }; 84 | } 85 | 86 | 87 | Object.defineProperty( that, "getSrc", { 88 | value: function(){ 89 | return src; 90 | }, 91 | writable: false, 92 | enumerable: false, 93 | configurable: false 94 | }); 95 | 96 | Object.defineProperty( that, "getDest", { 97 | value: function(){ 98 | return dest; 99 | }, 100 | writable: false, 101 | enumerable: false, 102 | configurable: false 103 | }); 104 | 105 | Object.defineProperty( that, "getWeight", { 106 | value: function(){ 107 | return weight; 108 | }, 109 | writable: false, 110 | enumerable: false, 111 | configurable: false 112 | }); 113 | 114 | Object.defineProperty( that, "setWeight", { 115 | value: function(w){ 116 | weight = w; 117 | }, 118 | writable: false, 119 | enumerable: false, 120 | configurable: false 121 | }); 122 | 123 | Object.defineProperty( that, "type", { 124 | get: function(){return tipo;}, 125 | set: function(t){ 126 | if (!t){ 127 | throw "Invalid edge type parameter"; 128 | } 129 | t = t.toLowerCase(); 130 | if ( t === "tree" || t === "forward" || t === "back" || t === "cross" ){ 131 | tipo = t; 132 | }else{ 133 | throw "Invalid edge type parameter"; 134 | } 135 | }, 136 | enumerable: false, 137 | configurable: false 138 | }); 139 | 140 | Object.defineProperty( that, "isTreeEdge", { 141 | value: function(){ return (tipo === "edge");}, 142 | writable: false, 143 | enumerable: false, 144 | configurable: false 145 | }); 146 | 147 | Object.defineProperty( that, "isForwardEdge", { 148 | value: function(){ return (tipo === "forward");}, 149 | writable: false, 150 | enumerable: false, 151 | configurable: false 152 | }); 153 | 154 | 155 | Object.defineProperty( that, "isBackEdge", { 156 | value: function(){ return (tipo === "back");}, 157 | writable: false, 158 | enumerable: false, 159 | configurable: false 160 | }); 161 | 162 | Object.defineProperty( that, "isCrossEdge", { 163 | value: function(){ return (tipo === "cross");}, 164 | writable: false, 165 | enumerable: false, 166 | configurable: false 167 | }); 168 | 169 | Object.defineProperty( that, "isEdge", { 170 | value: true, 171 | writable: false, 172 | enumerable: false, 173 | configurable: false 174 | }); 175 | 176 | Object.preventExtensions(that); 177 | 178 | return that; 179 | } 180 | 181 | function graph(vertices, directed){ 182 | "use strict"; 183 | 184 | var i, 185 | that = {}, 186 | printGraph, addVertex, findVertex, findVertexIndex, findEdgeIndex, removeVertex, addEdge, removeEdge, DFS, BFS, Dijkstra, Prim, 187 | vList = [], 188 | edges = {}; 189 | 190 | findVertex = function(vLabel){ 191 | var i; 192 | //alert("Looking for " + vLabel); 193 | for (i=0; i enter_time[parentLabel] ){ 316 | e.type = "forward"; 317 | }else{ 318 | e.type = "cross"; 319 | } 320 | return; 321 | default : 322 | throw "Error"; 323 | } 324 | }//else => vertex is a start one for DFS => color must be white (it is checked in the DS for loop) 325 | vertex.color = "gray"; 326 | enter_time[vLabel] = ++time; 327 | predecessors[vLabel] = parentLabel; 328 | archi = edges[vLabel]; 329 | for (i=0; i 346 | for(i=0; i
" + vList[i].getLabel() + " <= " + predecessors[vList[i].getLabel()] ); 351 | for (j = 0; j " + edges[vList[i].getLabel()][j].getDest().getLabel().toString() + ", " ); 354 | if ( edges[vList[i].getLabel()][j].type ){ 355 | vs.push( "(" + edges[vList[i].getLabel()][j].type + ")" ); 356 | } 357 | } 358 | } 359 | 360 | } 361 | // 362 | return vs.join(""); 363 | } 364 | 365 | BFS = function(sourceLabel){ 366 | var s, v, u, i, vq, archi, 367 | predecessors = [], 368 | distances = [], 369 | vs = []; 370 | 371 | s = findVertex(sourceLabel); 372 | 373 | if (!s || !s.isVertex){ 374 | throw "Illegal Argument: vertex not in graph"; 375 | } 376 | 377 | for(i=0; i 414 | vs.push("

Source = " + s.getLabel()); 415 | for(i=0; i" + v + " <= " + predecessors[v] + " d: "+ distances[v] ); 421 | } 422 | vs.push("
"); 423 | // 424 | 425 | return vs.join(""); 426 | 427 | } 428 | 429 | Dijkstra = function(sourceLabel){ 430 | var s, v, u, i, vq, archi, 431 | predecessors = [], 432 | distances = [], 433 | vs = []; 434 | 435 | s = findVertex(sourceLabel); 436 | 437 | if (!s || !s.isVertex){ 438 | throw "Illegal Argument: vertex not in graph"; 439 | } 440 | 441 | for(i=0; i 478 | vs.push("

|Dijkstra| Source = " + s.getLabel()); 479 | for(i=0; i" + v + " <= " + predecessors[v] + " d: "+ distances[v] ); 485 | } 486 | vs.push("
"); 487 | // 488 | 489 | return vs.join(""); 490 | 491 | } 492 | 493 | Prim = function(sourceLabel){ 494 | var i, j , archi, srcLabel, destLabel, e, 495 | treeEdges = [], predecessors = [], treeVertex = [], vs = [], 496 | eq; 497 | 498 | if (vList.length<=0){ 499 | throw "Graph is empty" ; 500 | } 501 | if (sourceLabel === undefined || sourceLabel === null || !findVertex(sourceLabel) ){ 502 | do{ 503 | i = Math.floor( vList.length * Math.random() ); 504 | }while ( !vList.hasOwnProperty(i) ); 505 | sourceLabel = vList[i].getLabel(); 506 | } 507 | treeVertex[sourceLabel] = true; 508 | archi = edges[sourceLabel]; 509 | 510 | eq = priorityQueue(); 511 | for (i = 0; i < archi.length; i++){ 512 | eq.push( node( archi[i], archi[i].getWeight() ) ); 513 | } 514 | 515 | while (!eq.isEmpty()){ 516 | e = eq.pop().key; 517 | srcLabel = e.getSrc().getLabel(); 518 | destLabel = e.getDest().getLabel(); 519 | //treeVertex[srcLabel] ought to be true because of the way edges are added to the queue 520 | if ( !treeVertex[destLabel] ){ 521 | //Add dest vertex to the tree 522 | treeVertex[destLabel] = true; 523 | predecessors[destLabel] = srcLabel; 524 | //Add the edge to the tree; 525 | treeEdges.push(e); 526 | //Add all edges from destLabel to non-tree vertices to the queue; 527 | archi = edges[destLabel]; 528 | for (i = 0; i < archi.length; i++){ 529 | if (!treeVertex[archi[i].getDest().getLabel()] ){ 530 | eq.push( node( archi[i], archi[i].getWeight() ) ); 531 | } 532 | } 533 | } 534 | } 535 | 536 | // 537 | vs.push( "

|PRIM| Source: " + sourceLabel); 538 | for(i=0; i" + vList[i].getLabel() + " <= " + predecessors[vList[i].getLabel()] ); 543 | } 544 | vs.push("
--- Edges:"); 545 | for (i = 0; i " + e.getSrc().getLabel() + " -> " + e.getDest().getLabel() + " (" + e.getWeight() + ")" + ", " ); 549 | } 550 | } 551 | // 552 | return vs.join(""); 553 | 554 | } 555 | 556 | printGraph = function(node){ 557 | var vs = ["Vertices: "], 558 | es = ["Edges: "], 559 | i,j, srcLabel; 560 | for (i = 0; i " + edges[srcLabel][j].getDest().getLabel().toString() ) ); 566 | } 567 | } 568 | node.innerHTML += "

" + vs.join(", ") + "
" + es.join(", "); 569 | }; 570 | 571 | for (i=0; i c - weight: 61 | break 62 | else: 63 | size += 1 64 | mask[pos] = 1 65 | value += p[pos] 66 | weight += w[pos] 67 | pos += 1 68 | 69 | if pos >= N: 70 | #Completed one "depth first search" visit in the solution 71 | #space tree: now must break off the while cycle 72 | break 73 | 74 | upper_bound = value + (int)(e[pos] * (c - weight)) 75 | 76 | if upper_bound < best_solution_value: 77 | #The forward move would not led us to a better solution, 78 | #so it performs backtracking 79 | 80 | #Brings the situation back at before the forward move 81 | for k in xrange(j,N): 82 | mask[k] = 0 83 | 84 | value = initial_value 85 | weight = initial_heigh 86 | size = initial_size 87 | 88 | #Looks for a possible backtracking move 89 | pos = j - 1 90 | while True: 91 | try: 92 | while mask[pos] == 0: 93 | pos -= 1 94 | except IndexError: 95 | #pos < 0: No more backtracking possible 96 | return best_solution_value, best_solution_weight, best_solution_mask 97 | else: 98 | #Exclude the element from the knapsack 99 | mask[pos] = 0 100 | size -= 1 101 | 102 | value -= p[pos] 103 | weight -= w[pos] 104 | j = pos + 1 105 | 106 | #Computes the upper bound on the score (According to the elements 107 | #that can be added to the knapsack) 108 | bound_height = weight 109 | value_bound = 0 110 | for i in xrange(j, N): 111 | if w[i] > c - bound_height: 112 | break 113 | 114 | value_bound += p[i] 115 | bound_height += w[i] 116 | 117 | try: 118 | value_bound += (int)(e[i] * (c - bound_height)) 119 | except IndexError: 120 | pass 121 | 122 | upper_bound = value + value_bound 123 | 124 | if upper_bound > best_solution_value: 125 | break 126 | else: 127 | #The forward move was successful: discards the next element 128 | #(which couldn't have been added because violates the 129 | #knapsack capacity) and tries to perform more f. moves. 130 | j = pos + 1 131 | 132 | #INVARIANT: j == N: 133 | #Completed one "depth first search" visit in the solution space tree. 134 | if value > best_solution_value: 135 | #Checks current solution 136 | best_solution_mask = mask[:] 137 | best_solution_size = size 138 | best_solution_weight = weight 139 | best_solution_value = value 140 | 141 | if best_solution_size == N: #best_solution_value == U or 142 | return best_solution_value, best_solution_weight, best_solution_mask 143 | 144 | try: 145 | if mask[N-1] == 1: 146 | mask[N-1] = 0 147 | size -= 1 148 | value -= p[N-1] 149 | weight -= w[N-1] 150 | except IndexError: 151 | pass 152 | 153 | #Tries a backtracking move 154 | pos = N - 2 155 | while True: 156 | try: 157 | while mask[pos] == 0: 158 | pos -= 1 159 | except IndexError: 160 | #pos < 0: No more backtracking possible 161 | return best_solution_value, best_solution_weight, best_solution_mask 162 | else: 163 | #Exclude the element from the knapsack 164 | mask[pos] = 0 165 | size -= 1 166 | value -= p[pos] 167 | weight -= w[pos] 168 | j = pos + 1 169 | 170 | #Computes the upper bound on the score (According to the elements 171 | #that can be added to the knapsack) 172 | bound_height = weight 173 | value_bound = 0 174 | for i in xrange(j, N): 175 | if w[i] > c - bound_height: 176 | break 177 | 178 | value_bound += p[i] 179 | bound_height += w[i] 180 | 181 | try: #if i < N: 182 | value_bound += (int)(e[i] * (c - bound_height)) 183 | except IndexError: 184 | pass 185 | 186 | upper_bound = value + value_bound 187 | 188 | if upper_bound > best_solution_value: 189 | break 190 | -------------------------------------------------------------------------------- /karger/karger.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | from random import randrange 3 | from copy import copy, deepcopy 4 | from union_find import UnionFind 5 | 6 | def contract(G, edges, cuts, edges_index): 7 | m = len(edges) 8 | while True: 9 | u, v = edges[edges_index] 10 | edges_index += 1 11 | v = cuts.find_root(v) 12 | u = cuts.find_root(u) 13 | if not cuts.connected(u, v): 14 | break 15 | cuts.union(u, v) 16 | 17 | w = cuts.find_root(u) #find the common root of the two vertices 18 | 19 | if w != v: 20 | for k, z in G.get(v, []): 21 | #print z, v 22 | if not cuts.connected(z, v): 23 | G[w].append((k, z)) 24 | del G[v] 25 | 26 | if w != u: 27 | for k, z in G.get(u, []): 28 | #print z, u 29 | if not cuts.connected(z, u): 30 | G[w].append((k, z)) 31 | del G[u] 32 | 33 | #G[w] = filter(lambda (k, z): not cuts.connected(w, z), G[w]) 34 | 35 | return edges_index 36 | 37 | def karger_min_cut(G, edges): 38 | 39 | n = max(G.keys()) 40 | cuts = UnionFind(n+1) 41 | 42 | edges_map = {} 43 | edges_index = 0 44 | for _ in xrange(n-2): 45 | edges_index = contract(G, edges, cuts, edges_index) 46 | #print G, edges 47 | 48 | assert(len(G) == 2) 49 | u, v = G.keys() 50 | #assert(len(G[u]) == len(G[v])) 51 | 52 | #before returning the cuts list, we must remove the self loops from the adjacency list 53 | return filter(lambda (k, z): not cuts.connected(u, z), G[u]) #each edge is stored twice, so we can just return one of the two vertices' adj list 54 | 55 | 56 | def montecarlo_karger(G, edges, N): 57 | min_cut_len = float('inf') 58 | min_cut = [] 59 | 60 | m = len(edges) 61 | for _ in xrange(N): 62 | G_1 = deepcopy(G) 63 | #shuffle edges, to mimic random edge selection, but in a faster way 64 | for i in xrange(m): 65 | j = randrange(i, m) 66 | tmp = edges[j] 67 | edges[j] = edges[i] 68 | edges[i] = tmp 69 | 70 | mc = karger_min_cut(G_1, edges) 71 | if len(mc) < min_cut_len: 72 | min_cut_len = len(mc) 73 | min_cut = mc 74 | print min_cut 75 | return min_cut 76 | 77 | def read_input(f): 78 | G = {} 79 | edges = [] 80 | for line in f: 81 | line = map(int, line.strip().split(" ")) 82 | v = line[0] 83 | G[v] = map(lambda u: (v,u), line[1:]) 84 | for i in xrange(1, len(line)): 85 | edges.append((v, line[i])) 86 | f.close() 87 | return G, edges 88 | 89 | if __name__ == "__main__": 90 | G, edges = read_input(open(argv[1], 'r')) 91 | 92 | min_cut = montecarlo_karger(G, edges, len(G) ) 93 | print len(min_cut), min_cut -------------------------------------------------------------------------------- /karger/karger_profile.py: -------------------------------------------------------------------------------- 1 | import profile 2 | from karger import read_input, montecarlo_karger 3 | 4 | pr = profile.Profile() 5 | for i in range(5): 6 | print pr.calibrate(10000) 7 | 8 | G, edges = read_input(open('kargerMinCut.txt', 'r')) 9 | 10 | n = len(G) 11 | 12 | profile.run('montecarlo_karger(G, edges, n)', 'karger_profile.txt') -------------------------------------------------------------------------------- /karger/union_find.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 06/apr/2013 3 | 4 | @author: mlarocca 5 | ''' 6 | 7 | class UnionFind: 8 | 9 | def __init__(self, size): 10 | ''' : Constructor : 11 | Create a new union-find data structure 12 | : param size : The initial size of the union-find; 13 | The size can be later increased, but not decreased. 14 | : type size : int (Other types will be converted to int) 15 | : raise IllegalArgumentException : If size is less than 1. 16 | : return : self, as all constructors. 17 | ''' 18 | self.n = int(size) 19 | if self.n <= 0: 20 | raise Exception("IllegalArgumentException: size must be positive") 21 | 22 | self.set = range(size) 23 | self.set_size = [1] * size 24 | 25 | 26 | def find_root(self, i): 27 | ''' 28 | Implement find with path compression 29 | : param i : The element whose root has to be found. 30 | : type i : int (Other types will be converted to int) 31 | ''' 32 | #makes sure i is an integer 33 | i = int(i) 34 | 35 | if self.set[i] != i: 36 | self.set[i] = self.find_root(self.set[i]) 37 | 38 | return self.set[i] 39 | 40 | 41 | def connected(self, i, j): 42 | ''' Are elements i and j connected? 43 | : param i : The first element to check. 44 | : param j : The second element to check. 45 | : return : True <=> i and j belongs to the same component. 46 | : raise IllegalArgumentException : Raise an exception if either element is not in the union set 47 | ''' 48 | 49 | if i == j: 50 | if 0 <= i < self.n: 51 | return True 52 | else: 53 | raise Exception("IllegalArgumentException") 54 | 55 | root_i = self.find_root(i) 56 | root_j = self.find_root(j) 57 | 58 | return root_i == root_j 59 | 60 | 61 | def union(self, i, j): 62 | ''' Perform the union of two components, if they aren't unified yet. 63 | : param i : The first element. 64 | : param j : The second element, to be unified with i's component. 65 | : raise Exception: Raise an exception if either element is not in the 66 | union set (through find_root). 67 | : return : The size of the newly created component 68 | ''' 69 | 70 | root_i = self.find_root(i) 71 | root_j = self.find_root(j) 72 | if root_i == root_j: 73 | return self.set_size[root_i] 74 | 75 | if self.set_size[root_i] <= self.set_size[root_j]: 76 | self.set[root_i] = root_j 77 | self.set_size[root_j] += self.set_size[root_i] 78 | return self.set_size[root_i] 79 | else: 80 | self.set[root_j] = root_i 81 | self.set_size[root_i] += self.set_size[root_j] 82 | return self.set_size[root_j] 83 | 84 | def __str__(self): 85 | ''' : override : 86 | ''' 87 | res = [str(range(self.n)), str(self.set), str(self.set_size)] 88 | return "\n".join(res) 89 | 90 | -------------------------------------------------------------------------------- /martello_toth_reduction.py: -------------------------------------------------------------------------------- 1 | ''' 2 | @author: mlarocca 3 | 4 | Tries to reduce the 0-1 Knapsack problem by finding the elements that must 5 | be part of any optimal solution (set J1) and those that can't appear in an 6 | optimal solution (set J0). The core is represented by all the elements that 7 | neither belongs to J1 or J0, and the exact solution may now be computed on 8 | this smaller set rather than on the whole set of elements: the global 9 | solution will then be the union of the solution on the core problem and 10 | the elements in the set J1. 11 | 12 | The critical element (whose index is s) is the only one that might appear 13 | in both sets: if it is so and the intersection between the two sets is not 14 | empty, then the reduction is not valid. 15 | 16 | During the reduction process, a value p_star is computed: this is a lower 17 | bound to the optimal solution. If the sum of the core problem solution and 18 | the value of the elements in J1 is lower than p_star, then p_star is the 19 | solution to the problem (it might be worth keeping track of the elements 20 | corresponding to the highest value of p_star found, for this reason). 21 | 22 | @param p: List of elements' values; 23 | @param w: List of elements' weights; 24 | @param e: List of elements' scaled values: e[i] = p[i]/w[i] 25 | The elements available are sorted according to the 'e' vector. 26 | The i-th element has value p[i], weight w[i]. 27 | @param N: The number of elements available; 28 | @param c: Total capacity of the knapsack; 29 | @return: The sets of indices J1 and J0, as described above. 30 | ''' 31 | 32 | def martello_toth_reduction(p, w, e, N, c): 33 | p_bar = [0] 34 | w_bar = [0] 35 | 36 | 37 | def binary_search(vec, size, value): 38 | l = 0 39 | r = size 40 | while l <= r: 41 | s = (l+r)/2 42 | if s == 0: 43 | if value <= vec[0]: 44 | break 45 | else: 46 | l = 1 47 | elif s == size: 48 | if vec[s-1] <= value: 49 | break 50 | else: 51 | r = s - 1 52 | else: 53 | if vec[s-1] <= value and value < vec[s]: 54 | break 55 | elif value < vec[s-1]: 56 | r = s - 1 57 | else: #w_bar[s] <= c 58 | l = s + 1 59 | return s 60 | 61 | value = 0 62 | weight = 0 63 | #INVARIANT: Stories must be considered in descending _scaled_value order 64 | for i in xrange(N): 65 | value += p[i] 66 | weight += w[i] 67 | p_bar.append(value) 68 | w_bar.append(weight) 69 | 70 | #DEBUG print p_bar 71 | #DEBUG print w_bar 72 | 73 | 74 | u_zero = [0] 75 | u_one = [] 76 | 77 | 78 | s = binary_search(w_bar, N+1, c) 79 | 80 | p_star = p_bar[s-1] #value of the set J1 81 | c_bar = c - w_bar[s-1] 82 | 83 | #DEBUG print s, p_star, c_bar 84 | 85 | for j in range(s, N): #Lists start from index '0', so indices are shifted by 1 86 | if w[j] <= c_bar: 87 | p_star += p[j] 88 | c_bar -= w[j] 89 | j += 1 90 | #DEBUG print p_star, c_bar 91 | 92 | for j in range(s): 93 | c_bar = c + w[j] 94 | s_bar = binary_search(w_bar, N+1, c_bar) 95 | 96 | c_bar -= w_bar[s_bar-1] 97 | 98 | if s_bar < N: 99 | scaled_value_plus_one = e[s_bar] #Indices are shifted by 1 100 | else: 101 | scaled_value_plus_one = 2e63-2 102 | 103 | if s_bar > 1: 104 | scaled_value_minus_one = e[s_bar - 2] #Indices are shifted by 1 105 | else: 106 | scaled_value_minus_one = 2e63-2 107 | 108 | u_zero.append(p_bar[s_bar-1] - p[j] + 109 | max((int)(c_bar*scaled_value_plus_one), 110 | (int)(p[s_bar-1] 111 | - float(w[s_bar-1] - c_bar) * scaled_value_minus_one) 112 | ) 113 | ) 114 | p_star = max(p_star, 115 | p_bar[s_bar-1] - p[j]) 116 | 117 | #DEBUG print j, s_bar, c_bar, u_zero[j+1], p_star 118 | for j in range(s-1, N): #Indices are shifted by 1 119 | 120 | c_bar = c - w[j] #Indices are shifted by 1 121 | s_bar = binary_search(w_bar, N+1, c_bar) 122 | 123 | c_bar -= w_bar[s_bar-1] 124 | 125 | if s_bar < N: 126 | scaled_value_plus_one = e[s_bar] #Indices are shifted by 1 127 | else: 128 | scaled_value_plus_one = 2e63-2 129 | 130 | if s_bar > 1: 131 | scaled_value_minus_one = e[s_bar-2] #Indices are shifted by 1 132 | else: 133 | scaled_value_minus_one = 2e63-2 134 | 135 | u_one.append(p_bar[s_bar-1] + p[j] + 136 | max((int)(c_bar * scaled_value_plus_one), 137 | (int)(p[s_bar-1] 138 | - (w[s_bar-1] - c_bar) * scaled_value_minus_one) 139 | ) 140 | ) 141 | p_star = max(p_star, 142 | p_bar[s_bar-1] + p[j]) 143 | 144 | #DEBUG print j, s_bar, c_bar, u_one[j-s+1], p_star 145 | 146 | J1 = [j-1 for j in range(1, s + 1) if u_zero[j] <= p_star] 147 | J0 = [j-1 for j in range(s, N + 1) if u_one[j-s] <= p_star] 148 | 149 | return J1, J0 -------------------------------------------------------------------------------- /network_flow.py: -------------------------------------------------------------------------------- 1 | '''Edmonds-Karp algorithm 2 | Computes the maximum flow from source to sink 3 | @param edges: A dict whose keys are edge tuples, while the values are the capacity associated with each edge 4 | Restrictions: if (u,v) is in edges, (v,u) can't be 5 | Every value is OK as vertex label, but None 6 | @return: the value of maximum flow, and the effective flow matrix 7 | ''' 8 | def edmonds_karp(edges, source, sink): 9 | 10 | adj = {} #Adjacency matrix 11 | capacity = {} #Edges capacity 12 | flow = {} #Flow for graph's edges and residual edges 13 | 14 | for edge in edges: 15 | (u,v) = edge 16 | 17 | try: 18 | adj[u].append(v) 19 | except KeyError: 20 | adj[u] = [v] 21 | flow[edge] = 0 22 | capacity[edge] = edges[edge] 23 | 24 | 25 | #also consider the residual edge, with capacity and flow initially null 26 | edge = (v,u) 27 | try: 28 | adj[v].append(u) 29 | except KeyError: 30 | adj[v] = [u] 31 | 32 | flow[edge] = 0 33 | capacity[edge] = 0 34 | 35 | n = len(adj) 36 | queue = [None] * n 37 | 38 | 39 | 40 | '''Find a path from source to sink, if any (using BFS) 41 | ''' 42 | def find_path_BFS(): 43 | head = 0 44 | queue[head] = source 45 | tail = 1 46 | 47 | parents = {source : None} 48 | M = {source: float('inf')} #Not needed in this cause flow is either -1, 0 or 1 49 | 50 | while head < tail: 51 | u = queue[head] 52 | head += 1 53 | for v in adj[u]: 54 | if (not v in parents): 55 | edge = (u,v) 56 | residual = capacity[edge] - flow[edge] 57 | if residual > 0: 58 | M[v] = min(M[u], residual) 59 | if v == sink: 60 | path_flow = M[v] 61 | flow[edge] += path_flow 62 | flow[(v,u)] -= path_flow 63 | v = parents[u] 64 | while v != None: 65 | flow[(v,u)] += path_flow 66 | flow[(u,v)] -= path_flow 67 | u = v 68 | v = parents[v] 69 | return path_flow 70 | else: 71 | parents[v] = u 72 | queue[tail] = v 73 | tail += 1 74 | #No path to sink node 75 | return 0 76 | 77 | partial_flow = find_path_BFS() 78 | while partial_flow != 0: 79 | partial_flow = find_path_BFS() 80 | 81 | return sum(flow[(source, dest)] for dest in adj[source]), flow 82 | 83 | 84 | 85 | '''Relabel-to-Front algorithm 86 | Computes the maximum flow from source to sink 87 | @param edges: A dict whose keys are edge tuples, while the values are the capacity associated with each edge; 88 | Restrictions: if (u,v) is in edges, (v,u) can't be 89 | Every value is OK as vertex label, but None 90 | @return: the value of maximum flow, and the effective flow matrix 91 | ''' 92 | def relabel_to_front(edges, source, sink): 93 | adj = {} #Adjacency matrix 94 | capacity = {} #Edges capacity 95 | flow = {} #Flow for graph's edges and residual edges 96 | 97 | for edge in edges: 98 | (u,v) = edge 99 | 100 | try: 101 | adj[u].append(v) 102 | except KeyError: 103 | adj[u] = [v] 104 | flow[edge] = 0 105 | capacity[edge] = edges[edge] 106 | 107 | 108 | #also consider the residual edge, with capacity and flow initially null 109 | edge = (v,u) 110 | try: 111 | adj[v].append(u) 112 | except KeyError: 113 | adj[v] = [u] 114 | 115 | flow[edge] = 0 116 | capacity[edge] = 0 117 | 118 | n = len(adj) 119 | 120 | height = {} # height of node 121 | excess = {} # flow into node minus flow from node 122 | current = {} #next neighbour to be evaluated 123 | neighbours = {} #next neighbour to be evaluated 124 | 125 | for u in adj: 126 | height[u] = excess[u] = current[u] = 0 127 | neighbours[u] = len(adj[u]) 128 | 129 | # node "queue" 130 | v_list = [u for u in adj if u != source and u != sink] 131 | 132 | def push(u, v): 133 | send = min(excess[u], capacity[(u,v)] - flow[(u,v)]) 134 | flow[(u,v)] += send 135 | flow[(v,u)] -= send 136 | excess[u] -= send 137 | excess[v] += send 138 | 139 | def relabel(u): 140 | # find smallest new height making a push possible, 141 | # if such a push is possible at all 142 | try: 143 | height[u] = min([height[v] for v in adj[u] if capacity[(u,v)] > flow[(u,v)]]) + 1 144 | except: #except ValueError 145 | return #height[u] = n 146 | 147 | def discharge(u): 148 | while excess[u] > 0: 149 | try: 150 | # check next neighbour 151 | v = adj[u][current[u]] 152 | except IndexError: 153 | # we have checked all neighbours. must relabel 154 | relabel(u) 155 | current[u] = 0 156 | v = adj[u][0] 157 | 158 | if height[u] > height[v] and capacity[(u,v)] > flow[(u,v)]: 159 | push(u, v) 160 | else: 161 | current[u] += 1 162 | 163 | height[source] = n # longest path from source to sink is less than n long 164 | excess[source] = float('inf') # send as much flow as possible to neighbours of source 165 | for u in adj[source]: 166 | push(source, u) 167 | 168 | p = 0 169 | k = len(v_list) 170 | while p < k: 171 | u = v_list[p] 172 | old_height = height[u] 173 | discharge(u) 174 | if height[u] > old_height: 175 | v_list.insert(0, v_list.pop(p)) # move to front of v_list 176 | p = 0 # start from front of v_list 177 | else: 178 | p += 1 179 | 180 | return sum(flow[(source, dest)] for dest in adj[source]), flow 181 | 182 | 183 | if __name__ == "__main__": 184 | #Example 185 | example_graph = {("s",1): 16, ("s",2): 40, (1,2): 10, (1,3): 12, (3,2): 9, (2,4): 14, (4,3): 7, (3,"t"): 20, (4,"t"): 4} 186 | print edmonds_karp(example_graph, "s", "t") 187 | print relabel_to_front(example_graph, "s", "t") -------------------------------------------------------------------------------- /patricia_trie.py: -------------------------------------------------------------------------------- 1 | from sets import Set 2 | '''Node constructor 3 | @param l: The label of the node; 4 | @param item: An item connected to the prefix represented by 5 | the path from the root of the tree to this node. 6 | ''' 7 | def __create_leaf(label="", item=None): 8 | if item: 9 | return {"label": label, "items": Set([item]), "children":[]} 10 | else: 11 | return {"label": label, "items": Set(), "children":[]} 12 | 13 | 14 | '''Node constructor 15 | @param l: The label of the node; 16 | @param item: An item connected to the prefix represented by 17 | the path from the root of the tree to this node. 18 | ''' 19 | def __create_inner_node(label, children, items): 20 | return {"label": label, "items": items, "children": children} 21 | 22 | 23 | 24 | 25 | '''Applies binary search to look for the string s in the subtree rooted in node 26 | ''' 27 | def __binary_search(node, s): 28 | 29 | l = pos = 0 30 | size = len(node["children"]) 31 | 32 | if size > 0: 33 | r = size - 1 34 | s_len = len(s) 35 | 36 | try : #try/except used instead of if as a speedup 37 | #If the search string is empty or null, return null 38 | #Needed to prevent crash when empty string or null are searched 39 | c = s[0] 40 | except: 41 | return None, None, None, None, l 42 | 43 | #Binary search on the first character of the string and of the children's label 44 | while l <= r: 45 | pos = (l+r)/2 46 | child = node["children"][pos] 47 | label = child["label"] 48 | l_len = len(label) 49 | 50 | tmp_c = label[0] 51 | 52 | 53 | if tmp_c == c: 54 | s_len = len(s) 55 | 56 | n = min(s_len, l_len) 57 | i = 1 58 | while i < n: 59 | if s[i] != label[i]: 60 | break 61 | else: 62 | i += 1 63 | 64 | #Arriving here means the two strings are equals for the common parts 65 | return child, i, s_len, l_len, l 66 | elif tmp_c < c: 67 | l = pos + 1 68 | else: 69 | r = pos - 1 70 | 71 | return None, None, None, None, l 72 | 73 | ''' 74 | Search a string in the tree starting at the current node. 75 | 76 | @param s: The query string; 77 | @return: - If the string is a substring of the label of any 78 | children of this node, a reference to this node will 79 | be returned; 80 | - If the any of the children's label is a substring of 81 | the search string, it will search the remaining of the 82 | string starting from that children, and return the 83 | result of the recursive call; 84 | - Otherwise, there is no match for the search string. 85 | ''' 86 | def __node_search(node, s): 87 | 88 | child, i, s_len, l_len, l = __binary_search(node, s) 89 | 90 | if child == None: 91 | return None 92 | elif i == s_len: 93 | return child 94 | elif i == l_len: 95 | return __node_search(child, s[i:]) 96 | else: 97 | return None 98 | 99 | ''' 100 | Insert a new string (and all its prefixes) into the subtree rooted in 101 | this node. 102 | - If the string is a prefix of any of this node's children's label, 103 | then it just adds the item associated with it to that node's 104 | items list. 105 | - If any of the children's label is a prefix of the new string, adds 106 | the item to that node's list and then recursively insert the rest 107 | of the string starting from that same node; 108 | - If any of the children's label is a partial match to the string 109 | (meaning a prefix of the search string matches a prefix of the 110 | node's label) then splits that node at the first difference and 111 | continues the insertion of the rest of the string from the newly 112 | created node. 113 | - Otherwise, creates a new node and adds it to this node's children. 114 | 115 | @param s: The string to insert; 116 | @param item: The T object associated with the string to insert; 117 | ''' 118 | def __node_insert(node, s, item): 119 | #DEBUG print "node ", s, item 120 | child, i, s_len, l_len, new_child_pos = __binary_search(node, s) 121 | 122 | if child == None: 123 | #No path even partially matches the new string: a new node must be added to the tree. 124 | new_node = __create_leaf(s, item) 125 | node["children"].insert(new_child_pos, new_node) 126 | elif i < l_len: 127 | #DEBUG print child["label"], s, i, s_len, l_len, l 128 | #The new string partially matches node's label 129 | rest_of_label = child["label"][i:] 130 | child["label"] = child["label"][:i] 131 | new_child = __create_inner_node(rest_of_label, child["children"], child["items"].copy()) 132 | child["children"] = [] 133 | child["children"].append(new_child) 134 | child["items"].add(item) 135 | if i < s_len: 136 | rest_of_s = s[i:] 137 | new_child = __create_leaf(rest_of_s, item) 138 | 139 | if rest_of_label < rest_of_s : 140 | child["children"].append(new_child) 141 | else: 142 | child["children"].insert(0, new_child) 143 | elif i < s_len: 144 | #Node's label is a prefix of the new string 145 | child["items"].add(item) 146 | rest_of_s = s[i:] 147 | __node_insert(child, rest_of_s, item) 148 | else: 149 | #The new string is a prefix of node's label 150 | child["items"].add(item) 151 | 152 | 153 | ''' 154 | Removes a new string (and all its prefixes) from the subtree rooted in 155 | this node, if there's any match. 156 | 157 | WARNING: The path is NOT removed from the tree, only the item associated 158 | with it is 159 | 160 | @param node: Current node 161 | @param s: The string to remove; 162 | @param item: The T object associated with the string to delete 163 | 164 | @return: True <=> the deletion was completely successfull 165 | False <=> item isn't found among the stored items, or s isn't found in the tree path 166 | ''' 167 | def __node_remove_item(node, s, item): 168 | 169 | child, i, s_len, l_len, l = __binary_search(node, s) 170 | success = True 171 | 172 | if child != None: 173 | #At one prefix of the string matches this child 174 | try: 175 | child["items"].remove(item) 176 | except KeyError: 177 | success = False 178 | 179 | if i == l_len and i < s_len: 180 | #The node's label is a prefix of the string to remove 181 | return success and __node_remove_item(child, s[i:], item); 182 | else: 183 | #There can be no further match 184 | return success 185 | else: 186 | return False 187 | 188 | 189 | ''' 190 | Removes a string and all its prefixes from the tree. 191 | It travels from the root of the tree along the path corresponding to the 192 | string and removes the object associated with the string from the items 193 | list of each node visited. 194 | 195 | WARNING: The path is NOT removed from the tree, only the item associated 196 | with it is 197 | 198 | @param s: The string to remove; 199 | @param item: The object associated with the string. 200 | 201 | @return: True <=> the deletion was completely successfull 202 | False <=> item isn't found among the stored items, or s isn't found in the tree path 203 | ''' 204 | def trie_remove_item(root, label, item): 205 | return __node_remove_item(root, label, item) 206 | #DEBUG print root 207 | 208 | ''' 209 | Inserts a new string, and all its prefixes, into the trie. 210 | @param root: The root of the trie 211 | @param label: The string to add; 212 | @param item: The item associated with the string. 213 | ''' 214 | def trie_insert(root, label, item): 215 | __node_insert(root, label, item) 216 | #DEBUG print root 217 | 218 | ''' 219 | Search a string in the tree and return the set of item stored in the 220 | node corresponding to the end of the string. 221 | @param root: The root of the trie 222 | @param s: The string to search; 223 | @return: A set filled with the objects associated with 224 | the entire string (possibly an empty set). 225 | ''' 226 | def trie_search(root, s): 227 | try: 228 | return __node_search(root, s)["items"] 229 | except TypeError: 230 | #If the search return None, then it has no items field. 231 | #Try/except is used to speed up happy cases 232 | return Set() 233 | 234 | 235 | '''Creates the root of the tree (an empty node) 236 | ''' 237 | def create_trie(): 238 | return __create_leaf() 239 | 240 | if __name__ == "__main__": 241 | #Unit Testing 242 | def test(): 243 | trie = create_trie() 244 | trie_insert(trie, "test", "test str") 245 | #print trie_search(trie, "test") 246 | trie_insert(trie, "alfa", "alfa str") 247 | 248 | 249 | assert( "test str" in trie_search(trie, "test") ) 250 | assert( "alfa str" in trie_search(trie, "alfa") ) 251 | assert( len(trie_search(trie, "testo")) == 0 ) 252 | trie_insert(trie, "tempo", "tempo str") 253 | print trie_search(trie, "test") 254 | assert( "test str" in trie_search(trie, "test") ) 255 | assert( "alfa str" in trie_search(trie, "alfa") ) 256 | assert( "tempo str" in trie_search(trie, "tem") ) 257 | assert( "tempo str" in trie_search(trie, "tempo") ) 258 | search = trie_search(trie, "te") 259 | assert( "tempo str" in search and "test str" in search and not "alfa str" in search ) 260 | search = trie_search(trie, "t") 261 | assert( "tempo str" in search and "test str" in search and not "alfa str" in search ) 262 | #print trie_search(trie, "test") 263 | trie_insert(trie, "test", "test str2") 264 | print trie_search(trie, "test") 265 | assert( "test str2" in trie_search(trie, "test") ) 266 | print trie 267 | print trie_search(trie, "tet") 268 | assert( len(trie_search(trie, "tet")) == 0 ) 269 | 270 | def trie_properties_test(node, father_label): 271 | label = father_label + node["label"] 272 | l_len = len(label) 273 | print node["items"] 274 | for item in node["items"]: 275 | try: 276 | assert(item[:l_len] == label) 277 | except: 278 | print "ERROR ", label, item 279 | print node 280 | for child in node['children']: 281 | trie_properties_test(child, label) 282 | 283 | trie_properties_test(trie, "") 284 | 285 | 286 | 287 | print trie_remove_item(trie, "test", "teststr") 288 | 289 | assert("test str" in trie_search(trie, "test") ) 290 | print trie_remove_item(trie, "test", "test str") 291 | print trie 292 | assert(not "test str" in trie_search(trie, "test") ) 293 | trie_insert(trie, "test", "test str") 294 | 295 | assert( "test str" in trie_search(trie, "test") ) 296 | 297 | 298 | trie_properties_test(trie, "") 299 | 300 | print trie_remove_item(trie, "test", "test str") 301 | print trie 302 | assert(not "test str" in trie_search(trie, "test") ) 303 | trie_properties_test(trie, "") 304 | 305 | test() -------------------------------------------------------------------------------- /simulated_annealing.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from random import randrange 3 | 4 | 5 | ''' Defines the base class that models a generic solution found by Simulated Annealing 6 | to a generic problem. 7 | Solution objects MUST have: 8 | * A 'score' method (returning either a float or an int, must express how well 9 | a solution solves the problem; 10 | * One or more 'mutation' methods: methods that enhance the solution in a problem specific way 11 | * A 'mutation' field: a list of all the mutation methods supported 12 | 13 | Real solution classes can inherit from this stub class. 14 | ''' 15 | class Solution(object): 16 | 17 | def __init__(self): 18 | #Stub: implement the real constructor here 19 | self.mutations = [self.mutation_1] 20 | 21 | ''' WARNING: 22 | The returned value MUST be strictly positive 23 | ''' 24 | def score(self): 25 | #Stub: problem specific method, define it here or override 26 | return 0. 27 | 28 | @staticmethod 29 | def mutation_1(solution): 30 | #Stub: real mutation methods should return a (mutated) copy of the object 31 | return deepcopy(solution) 32 | 33 | 34 | '''Example class''' 35 | class OnesSolution(Solution): 36 | def __init__(self, length): 37 | assert(length > 0) 38 | self.length = length 39 | self.string = [False] * length 40 | super(OnesSolution, self).__init__() 41 | pass 42 | 43 | ''' WARNING: 44 | The returned value MUST be strictly positive 45 | ''' 46 | def score(self): 47 | score = 1 + len([el for el in self.string if el]) 48 | assert(score > 0) 49 | return score 50 | 51 | @staticmethod 52 | def mutation_1(solution): 53 | temp_solution = deepcopy(solution) 54 | i = randrange(solution.length) 55 | solution.string[i] = not solution.string[i] 56 | return temp_solution 57 | 58 | 59 | '''Simulated annealing main: until all the allotted time has been used, keeps restarting 60 | the annealing procedure and saves its result 61 | @param max_time: the maximum (indicative) execution time for the annealing, in seconds; 62 | @return: (best_score, best_solution) 63 | The best solution found by simulated annealing, and its score. 64 | ''' 65 | def simulated_annealing(max_time): 66 | from random import random 67 | from math import e 68 | from time import time 69 | ''' Start temperature''' 70 | INITIAL_TEMPERATURE = 1. 71 | 72 | ''' How many times do we cool''' 73 | COOLING_STEPS = 100 #500 74 | 75 | ''' How much to cool each time''' 76 | COOLING_FRACTION = 0.97 77 | 78 | ''' Number of mutations cycles for each temperature cooling step - lower makes it faster, higher makes it potentially better. ''' 79 | STEPS_PER_TEMP = 50 #1000 80 | 81 | ''''Problem specific Boltzman's constant''' 82 | K = 0.1 83 | 84 | 85 | ''' Stub for a method that builds a solution to the problem, 86 | randomly 87 | ''' 88 | def initial_solution(): 89 | #Stub 90 | return OnesSolution(20) #Defer initialization to the Solution class 91 | 92 | 93 | 94 | ''' Single iteration of simulated annealing 95 | @return: (best_value, best_solution) 96 | best_solution is the the best solution to the problem that this cycle of simulated annealing could find, and best_value 97 | is its score according to the problem's own metric. 98 | 99 | ''' 100 | def annealing(): 101 | temperature = INITIAL_TEMPERATURE 102 | 103 | solution = initial_solution() 104 | best_solution = deepcopy(solution) 105 | best_value = current_value = solution.score() 106 | 107 | for i in xrange(COOLING_STEPS): 108 | temperature *= COOLING_FRACTION 109 | start_value = current_value 110 | 111 | for j in xrange(STEPS_PER_TEMP): 112 | for mutation in solution.mutations: 113 | new_solution = mutation(solution) 114 | 115 | new_value = new_solution.score() 116 | delta = new_value - current_value 117 | 118 | if delta==0: #No change to solution's score 119 | continue 120 | 121 | flip = random() 122 | exponent = float(new_value) / delta * K / temperature 123 | merit = e ** exponent 124 | 125 | if delta > 0 : # ACCEPT-WIN 126 | solution = deepcopy(new_solution) 127 | current_value = new_value 128 | if current_value > best_value: 129 | best_value = current_value 130 | best_solution = deepcopy(solution) 131 | 132 | elif merit > flip : #ACCEPT-LOSS 133 | solution = deepcopy(new_solution) 134 | current_value = new_value 135 | 136 | if (current_value-start_value) > 0.0 : # rerun at this same temperature 137 | temperature /= COOLING_FRACTION 138 | 139 | return (best_value, best_solution) 140 | 141 | start_time = time() 142 | best_solution = None 143 | best_score = 0 144 | 145 | #Continues until the execution exceeded the allotted time 146 | while time() < start_time + max_time: 147 | (score, solution) = annealing() 148 | if score > best_score: 149 | best_solution = deepcopy(solution) 150 | best_score = score 151 | 152 | return (best_score, best_solution) 153 | 154 | if __name__ == '__main__': 155 | print simulated_annealing(0.5) -------------------------------------------------------------------------------- /ss_tree.py: -------------------------------------------------------------------------------- 1 | from math import sqrt 2 | from array import array 3 | class ss_tree(object): 4 | 5 | ''' Creates the empty root of an SS-tree. 6 | @param max_elements_per_cluster: The max number of children for each intermediate node, 7 | and the max number of points for each leaf. 8 | ''' 9 | def __init__(self, max_elements_per_cluster=2): 10 | self.__root = {'points': [], 'leaf':True, 'parent': None, 'x':0., 'y':0.} 11 | assert(max_elements_per_cluster > 1) 12 | 13 | self.__max_elements_per_cluster = max_elements_per_cluster 14 | self.__split_size = self.__max_elements_per_cluster / 2 15 | 16 | ''' Inserts a point (topic) in a SS-tree; If necessary, splits the tree node in which the point was inserted 17 | and fixes the tree structure from that node up to the root. 18 | 19 | @param new_point: The point to be inserted; 20 | The point must be a dictionary with at least 3 fields: 21 | - 'x': Point's x coordinate; 22 | - 'y': Point's y coordinate; 23 | - 'data': The data associated with the point. 24 | ''' 25 | def insert(self, new_point): 26 | x_new_point = new_point['x'] 27 | y_new_point = new_point['y'] 28 | 29 | #Looks for the right leaf (the one with the closest centroid) to which the new_point should be added. 30 | #INVARIANT: The empty tree's root is a (empty) leaf. 31 | node = self.__root 32 | while not node['leaf']: 33 | children = node['children'] 34 | child = children[0] 35 | min_dist = (child['x'] - x_new_point) ** 2 + (child['y'] - y_new_point) ** 2 36 | min_index = 0 37 | for i in range(1,len(children)): 38 | child = children[i] 39 | dist = (child['x'] - x_new_point) ** 2 + (child['y'] - y_new_point) ** 2 40 | if dist < min_dist: 41 | min_index = i 42 | min_dist = dist 43 | node = children[min_index] 44 | 45 | 46 | #Now adds the new point to the leaf it has found. 47 | 48 | #INVARIANT: node is a leaf 49 | points = node['points'] 50 | if len(points) < self.__max_elements_per_cluster: 51 | #No split neeeded to add the point to this node 52 | 53 | #Can add the new_point to this node 54 | old_x_node = x_node = node['x'] 55 | old_y_node = y_node = node['y'] 56 | 57 | #Compute the new centroid for the node 58 | n_p = len(points) 59 | x_node *= n_p 60 | y_node *= n_p 61 | x_node += x_new_point 62 | y_node += y_new_point 63 | points.append(new_point) 64 | n_p += 1 65 | x_node /= n_p 66 | y_node /= n_p 67 | node['x'] = x_node 68 | node['y'] = y_node 69 | 70 | #Compute node's radius and variance 71 | radius = 0. 72 | x_var = y_var = 0. 73 | for point in points: 74 | #INVARIANT: points don't have radius 75 | x_dist = (x_node - point['x']) ** 2 76 | y_dist = (y_node - point['y']) ** 2 77 | radius = max(radius, x_dist + y_dist) 78 | #We don't need the exact variance, we can do fine with an estimate based on max distance form the centroid 79 | x_var = max(x_var, x_dist) 80 | y_var = max(y_var, y_dist) 81 | node['radius'] = sqrt(radius) 82 | node['x_var'] = x_var 83 | node['y_var'] = y_var 84 | 85 | #Propagates the change all the way to the root 86 | node_parent = node['parent'] 87 | while node_parent != None: 88 | tmp_x = x_node_parent = node_parent['x'] 89 | tmp_y = y_node_parent = node_parent['y'] 90 | n_p = len(node_parent['children']) 91 | x_node_parent *= n_p 92 | y_node_parent *= n_p 93 | x_node_parent += x_node - old_x_node 94 | y_node_parent += y_node - old_y_node 95 | old_x_node = tmp_x 96 | old_y_node = tmp_y 97 | x_node_parent /= n_p 98 | y_node_parent /= n_p 99 | node_parent['x'] = x_node_parent 100 | node_parent['y'] = y_node_parent 101 | 102 | radius = 0. 103 | x_var = y_var = 0. 104 | for child_node in node_parent['children']: 105 | x_dist = (x_node_parent - child_node['x']) ** 2 106 | y_dist = (y_node_parent - child_node['y']) ** 2 107 | radius = max(radius, sqrt(x_dist + y_dist) + child_node['radius']) 108 | #We don't need the exact variance, we can do fine with an estimate based on max distance form the centroid 109 | x_var = max(x_var, x_dist + child_node['radius'] ** 2) 110 | y_var = max(y_var, y_dist + child_node['radius'] ** 2) 111 | 112 | node_parent['radius'] = radius 113 | node_parent['x_var'] = x_var 114 | node_parent['y_var'] = y_var 115 | 116 | node = node_parent 117 | node_parent = node['parent'] 118 | else: 119 | #len(children) == max_elements_per_cluster => The leaf must be split 120 | 121 | #Splits along the direction with highest variance 122 | if node['x_var'] >= node['y_var']: 123 | points.sort(key=lambda p: p['x']) 124 | else: 125 | points.sort(key=lambda p: p['y']) 126 | 127 | #The new nodes have exactly half the elements of the old one 128 | new_node_1 = {'points': points[:self.__split_size], 'leaf': True} 129 | new_node_2 = {'points': points[self.__split_size:], 'leaf': True} 130 | 131 | 132 | #Compute the centroids for the new nodes 133 | for new_node in [new_node_1, new_node_2]: 134 | points = new_node['points'] 135 | x_node = 0. 136 | y_node = 0. 137 | for point in points: 138 | x_node += point['x'] 139 | y_node += point['y'] 140 | n_p = len(points) 141 | x_node /= n_p 142 | y_node /= n_p 143 | 144 | new_node['x'] = x_node 145 | new_node['y'] = y_node 146 | 147 | #Adds the new point to the one of the two new nodes that is closest to the old centroid 148 | x_node = node['x'] 149 | y_node = node['y'] 150 | dist_1 = (x_node - new_node_1['x']) ** 2 + (y_node - new_node_1['y']) ** 2 151 | dist_2 = (x_node - new_node_2['x']) ** 2 + (y_node - new_node_2['y']) ** 2 152 | 153 | if (dist_1 > dist_2): 154 | new_node = new_node_2 155 | new_node_2 = new_node_1 156 | new_node_1 = new_node 157 | 158 | #INVARIANT: at this point new_node_1 is the one of the two new nodes closest to the old node's centroid 159 | #Adds the new point to new_node_1 160 | points = new_node_1['points'] 161 | n_p = len(points) 162 | #Updates new_node_1's centroid 163 | x_node = new_node_1['x'] 164 | y_node = new_node_1['y'] 165 | x_node *= n_p 166 | y_node *= n_p 167 | x_node += new_point['x'] 168 | y_node += new_point['y'] 169 | points.append(new_point) 170 | n_p += 1 171 | new_node_1['x'] = x_node / n_p 172 | new_node_1['y'] = y_node / n_p 173 | 174 | #Compute the radius of the new nodes 175 | for new_node in [new_node_1, new_node_2]: 176 | 177 | x_node = new_node['x'] 178 | y_node = new_node['y'] 179 | 180 | radius = 0. 181 | x_var = y_var = 0. 182 | for point in new_node['points']: 183 | #INVARIANT: point don't have radius 184 | x_dist = (x_node - point['x']) ** 2 185 | y_dist = (y_node - point['y']) ** 2 186 | radius = max(radius, x_dist + y_dist) 187 | #We don't need the exact variance, we can do fine with an estimate based on max distance form the centroid 188 | x_var = max(x_var, x_dist) 189 | y_var = max(y_var, y_dist) 190 | 191 | new_node['radius'] = sqrt(radius) 192 | new_node['x_var'] = x_var 193 | new_node['y_var'] = y_var 194 | 195 | 196 | #INVARIANT: at this new_point new_node_1 is the closest to the centroid of node, so it takes its place among the 197 | #childrens of its parent 198 | node_parent = node['parent'] 199 | 200 | if node_parent == None: 201 | #The node that has just been split was the root: so it must create a new root... 202 | self.__root = {'children': [new_node_1, new_node_2], 'leaf':False, 'parent': None, 203 | 'x': (new_node_1['x'] + new_node_2['x'])/2, 204 | 'y': (new_node_1['y'] + new_node_2['y'])/2} 205 | x_dist_1 = (new_node_1['x'] - self.__root['x']) ** 2 206 | x_dist_2 = (new_node_2['x'] - self.__root['x']) ** 2 207 | y_dist_1 = (new_node_1['y'] - self.__root['y']) ** 2 208 | y_dist_2 = (new_node_2['y'] - self.__root['y']) ** 2 209 | self.__root['radius'] = max(sqrt(x_dist_1 + y_dist_1) + new_node_1['radius'], 210 | sqrt(x_dist_2 + y_dist_2) + new_node_2['radius']) 211 | self.__root['x_var'] = max(x_dist_1 + new_node_1['radius'] ** 2, 212 | x_dist_2 + new_node_2['radius'] ** 2) 213 | self.__root['y_var'] = max(y_dist_1 + new_node_1['radius'] ** 2, 214 | y_dist_2 + new_node_2['radius'] ** 2) 215 | 216 | new_node_1['parent'] = new_node_2['parent'] = self.__root 217 | 218 | #... and return 219 | return 220 | else: 221 | #Replaces the old node (the one just split) with the closest of the newly created 222 | new_node_1['parent'] = node_parent 223 | 224 | node_parent['children'].remove(node) 225 | node_parent['children'].append(new_node_1) 226 | 227 | 228 | while node_parent != None: 229 | node = node_parent 230 | children = node['children'] 231 | 232 | #Checks if there is still a node resulting from the split of one of its children 233 | #INVARIANT: new_node_2 is the farthest of the two resulting node from the split 234 | if new_node_2: 235 | 236 | if len(children) < self.__max_elements_per_cluster: 237 | #No need for farther splits: just append the new node 238 | children.append(new_node_2) 239 | new_node_2['parent'] = node 240 | new_node_2 = None 241 | else: 242 | #Must split this node too 243 | old_node = new_node_2 244 | 245 | #Split the children along the axes with the biggest variance 246 | if node['x_var'] >= node['y_var']: 247 | children.sort(key=lambda p: p['x']) 248 | else: 249 | children.sort(key=lambda p: p['y']) 250 | 251 | new_children = children[:self.__split_size] 252 | new_node_1 = {'children': new_children, 'leaf': node['leaf']} 253 | for child in new_children: 254 | child['parent'] = new_node_1 255 | 256 | new_children = children[self.__split_size:] 257 | new_node_2 = {'children': new_children, 'leaf': node['leaf']} 258 | for child in new_children: 259 | child['parent'] = new_node_2 260 | 261 | #Compute the centroids 262 | for new_node in [new_node_1, new_node_2]: 263 | x_node = 0. 264 | y_node = 0. 265 | for child in new_node['children']: 266 | x_node += child['x'] 267 | y_node += child['y'] 268 | n_p = len(new_node['children']) 269 | new_node['x'] = x_node / n_p 270 | new_node['y'] = y_node / n_p 271 | 272 | #Finds the one of the new nodes closest to the original centroid 273 | dist_1 = (node['x'] - new_node_1['x']) ** 2 + (node['y'] - new_node_1['y']) ** 2 274 | dist_2 = (node['x'] - new_node_2['x']) ** 2 + (node['y'] - new_node_2['y']) ** 2 275 | 276 | if (dist_1 > dist_2): 277 | new_node = new_node_2 278 | new_node_2 = new_node_1 279 | new_node_1 = new_node 280 | 281 | #INVARIANT: At this point new_node_1 is the one of two nodes resulting from the split 282 | # closest to the orginal centroid 283 | n_p = len(new_node_1['children']) 284 | new_node_1['children'].append(old_node) 285 | old_node['parent'] = new_node_1 286 | 287 | x_node = new_node_1['x'] 288 | y_node = new_node_1['y'] 289 | x_node *= n_p 290 | y_node *= n_p 291 | x_node += old_node['x'] 292 | y_node += old_node['y'] 293 | n_p += 1 294 | new_node_1['x'] = x_node / n_p 295 | new_node_1['y'] = y_node / n_p 296 | 297 | #Compute the radiuses and the variances 298 | for new_node in [new_node_1, new_node_2]: 299 | 300 | x_node = new_node['x'] 301 | y_node = new_node['y'] 302 | 303 | radius = 0. 304 | x_var = y_var = 0. 305 | 306 | for child_node in new_node['children']: 307 | x_dist = (x_node - child_node['x']) ** 2 308 | y_dist = (y_node - child_node['y']) ** 2 309 | radius = max(radius, sqrt(x_dist + y_dist) + child_node['radius']) 310 | #We don't need the exact variance, we can do fine with an estimate based on max distance form the centroid 311 | x_var = max(x_var, x_dist + child_node['radius'] ** 2) 312 | y_var = max(y_var, y_dist + child_node['radius'] ** 2) 313 | 314 | new_node['radius'] = radius 315 | new_node['x_var'] = x_var 316 | new_node['y_var'] = y_var 317 | 318 | #Checks whether the root has been split 319 | node_parent = node['parent'] 320 | if node_parent == None: 321 | #Has just split the root 322 | self.__root = {'children': [new_node_1, new_node_2], 'leaf':False, 'parent': None, 323 | 'x': (new_node_1['x'] + new_node_2['x'])/2, 324 | 'y': (new_node_1['y'] + new_node_2['y'])/2} 325 | x_dist_1 = (new_node_1['x'] - self.__root['x']) ** 2 326 | x_dist_2 = (new_node_2['x'] - self.__root['x']) ** 2 327 | y_dist_1 = (new_node_1['y'] - self.__root['y']) ** 2 328 | y_dist_2 = (new_node_2['y'] - self.__root['y']) ** 2 329 | self.__root['radius'] = max(sqrt(x_dist_1 + y_dist_1) + new_node_1['radius'], 330 | sqrt(x_dist_2 + y_dist_2) + new_node_2['radius']) 331 | self.__root['x_var'] = max(x_dist_1 + new_node_1['radius'] ** 2, x_dist_2 + new_node_2['radius'] ** 2) 332 | self.__root['y_var'] = max(y_dist_1 + new_node_1['radius'] ** 2, y_dist_2 + new_node_2['radius'] ** 2) 333 | new_node_1['parent'] = new_node_2['parent'] = self.__root 334 | return 335 | else: 336 | new_node_1['parent'] = node_parent 337 | 338 | node_parent['children'].remove(node) 339 | node_parent['children'].append(new_node_1) 340 | 341 | #node doesn't exist anymore, and for new_node_1 and new_node_2 everything has been computed 342 | #and therefore can go to the next iteration 343 | continue 344 | 345 | #Updates node's centroid, radius and variances 346 | x_node = 0. 347 | y_node = 0. 348 | 349 | for child_node in children: 350 | x_node += child_node['x'] 351 | y_node += child_node['y'] 352 | 353 | n_p = len(children) 354 | x_node /= n_p 355 | y_node /= n_p 356 | node['x'] = x_node 357 | node['y'] = y_node 358 | 359 | radius = 0. 360 | x_var = y_var = 0. 361 | for child_node in children: 362 | x_dist = (x_node - child_node['x']) ** 2 363 | y_dist = (y_node - child_node['y']) ** 2 364 | radius = max(radius, sqrt(x_dist + y_dist) + child_node['radius']) 365 | x_var = max(x_var, x_dist + child_node['radius'] ** 2) 366 | y_var = max(y_var, y_dist + child_node['radius'] ** 2) 367 | 368 | node['radius'] = radius 369 | node['x_var'] = x_var 370 | node['y_var'] = y_var 371 | 372 | node_parent = node['parent'] 373 | 374 | return 375 | 376 | 377 | ''' Finds the k nearest points to the query point taking advantage of the 378 | SS Tree structure; 379 | A heap whose size is bounded to k is used to store the k closest 380 | distances to the query point (if at least k are found). 381 | To speed up performance, the heap is implemented as a static array 382 | of doubles to store just the distances of the points, while 383 | another dynamic list will hold couples (distance, point data) so 384 | that, once the traversal of the tree is ended, the k closest points 385 | can be filtered from this list using the distance of the k-th nearest 386 | neighbour (stored in heap[0]). 387 | 388 | @param (x0,y0): Coordinates of the query point; 389 | @param k: How many neighbours must be retrieved. 390 | @return: The list of the data field of the k nearest neighbours, 391 | sorted by proximity to the query point. 392 | ''' 393 | def k_nearest_neighbours(self, (x0, y0), k): 394 | assert(k > 0) 395 | heap = array('d', [0] * (k+1)) #INVARIANT: no more than k results are needed 396 | #Init the heap to an empty max-heap 397 | heap_size = 0 398 | #Keeps track of the candidates to nearest neighbours found 399 | heap_elements = [] 400 | 401 | 402 | #Starts a search in the topics SS-tree; 403 | #All the topics are pushed in a bounded max-heap which holds at most k distances 404 | #(the k smallest ones) so that, once the heap is full, its first element is 405 | #the kth distance discovered so far, and this value can be used to prune the search 406 | #on the SS-tree. 407 | 408 | if self.__root['leaf']: 409 | #The tree has only one node, the root: so every point must be examined 410 | points = self.__root['points'] 411 | for p in points: 412 | data = p['data'] 413 | x = p['x'] 414 | y = p['y'] 415 | 416 | new_dist = sqrt((x - x0) ** 2 + (y - y0) ** 2) 417 | 418 | if heap_size == k: 419 | if new_dist > heap[0]: 420 | #The heap is full: if the new value is greather than the kth distance, 421 | #then it can't be one of the k nearest neighbour's distances 422 | continue 423 | 424 | heap_elements.append((new_dist, data)) 425 | pos = 0 426 | # Bubble up the greater child until hitting a leaf. 427 | child_pos = 2 * pos + 1 # leftmost child position 428 | while child_pos < heap_size: 429 | # Set childpos to index of greater child. 430 | right_pos = child_pos + 1 431 | if right_pos < heap_size and heap[child_pos] < heap[right_pos]: 432 | child_pos = right_pos 433 | # Move the greater child up. 434 | if heap[child_pos] <= new_dist: 435 | break 436 | heap[pos] = heap[child_pos] 437 | pos = child_pos 438 | child_pos = 2*pos + 1 439 | heap[pos] = new_dist 440 | else: 441 | heap_elements.append((new_dist, data)) 442 | heap[heap_size] = new_dist 443 | pos = heap_size 444 | heap_size += 1 445 | # Follow the path to the root, moving parents down until finding a place 446 | # newitem fits. 447 | while pos > 0: 448 | parent_pos = (pos - 1) >> 1 449 | parent = heap[parent_pos] 450 | if new_dist > parent: 451 | heap[pos] = parent 452 | pos = parent_pos 453 | else: 454 | break 455 | heap[pos] = new_dist 456 | else: 457 | queue = [] 458 | #Adds all the root's children to the queue, and examines them in order of increasing distance 459 | #of their border from the query point 460 | children = self.__root['children'] 461 | for child in children: 462 | dist = sqrt((child['x'] - x0) ** 2 + (child['y'] - y0) ** 2) 463 | radius = child['radius'] 464 | if dist <= radius: 465 | dist = 0 466 | else: 467 | dist -= radius 468 | queue.append((dist, radius, child)) 469 | 470 | queue.sort(key=lambda q:q[0], reverse=True) 471 | 472 | while len(queue) > 0: 473 | (d, r, node) = queue.pop() 474 | 475 | if node['leaf']: 476 | points = node['points'] 477 | for p in points: 478 | data = p['data'] 479 | x = p['x'] 480 | y = p['y'] 481 | 482 | new_dist = sqrt((x - x0) ** 2 + (y - y0) ** 2) 483 | 484 | if heap_size == k: 485 | #The heap is full: if the new value is greather than the kth distance, 486 | #then it can't be one of the k nearest neighbour's distances 487 | if new_dist > heap[0]: 488 | continue 489 | 490 | heap_elements.append((new_dist, data)) 491 | #heap[0] = new_dist 492 | pos = 0 493 | # Bubble up the greater child until hitting a leaf. 494 | child_pos = 2 * pos + 1 # leftmost child position 495 | while child_pos < heap_size: 496 | # Set childpos to index of greater child. 497 | right_pos = child_pos + 1 498 | if right_pos < heap_size and heap[child_pos] < heap[right_pos]: 499 | child_pos = right_pos 500 | # Move the greater child up. 501 | if heap[child_pos] <= new_dist: 502 | break 503 | heap[pos] = heap[child_pos] 504 | pos = child_pos 505 | child_pos = 2*pos + 1 506 | heap[pos] = new_dist 507 | else: 508 | heap_elements.append((new_dist, data)) 509 | heap[heap_size] = new_dist 510 | pos = heap_size 511 | heap_size += 1 512 | # Follow the path to the root, moving parents down until it finds a place 513 | #where new_item fits. 514 | while pos > 0: 515 | parent_pos = (pos - 1) >> 1 516 | parent = heap[parent_pos] 517 | if new_dist > parent: 518 | heap[pos] = parent 519 | pos = parent_pos 520 | else: 521 | break 522 | heap[pos] = new_dist 523 | 524 | #Checks if now the queue is full 525 | if heap_size == k: 526 | #If it is so, filters the queue 527 | #The heap is full: if the distance of the border of the node from the query point 528 | #is greather than the kth distance then no point in that node can be one of the 529 | #k nearest neighbour's 530 | d_max = heap[0] 531 | queue = [(d, r, n) for (d, r, n) in queue if d <= d_max] 532 | else: 533 | if heap_size < k: 534 | for child in node['children']: 535 | dist = sqrt((child['x'] - x0) ** 2 + (child['y'] - y0) ** 2) 536 | radius = child['radius'] 537 | if dist <= radius: 538 | dist = 0 539 | else: 540 | dist -= radius 541 | queue.append((dist, radius, child)) 542 | 543 | queue.sort(key=lambda q:q[0], reverse=True) 544 | else: 545 | d_max = heap[0] 546 | queue = [(d, r, n) for (d, r, n) in queue if d <= d_max] 547 | for child in node['children']: 548 | dist = sqrt((child['x'] - x0) ** 2 + (child['y'] - y0) ** 2) 549 | radius = child['radius'] 550 | if dist <= radius: 551 | dist = 0 552 | else: 553 | dist -= radius 554 | 555 | if dist <= d_max: 556 | #The heap is full: if the distance of the border of the node from the query point 557 | #is greather than the kth distance then no point in that node can be one of the 558 | #k nearest neighbour's 559 | queue.append((dist, radius, child)) 560 | 561 | queue = sorted([(d, r, n) for (d, r, n) in queue if d <= d_max], 562 | key=lambda q:q[0], reverse=True) 563 | 564 | #Filters the possible nearest neighbours such that their distance is not greater than the the distance of the kth 565 | #nearest neighbour 566 | return [data for (d, data) in 567 | sorted([(d, data) for (d, data) in heap_elements if d <= heap[0]])] -------------------------------------------------------------------------------- /sudoku/sudoku_profiler.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 15/dic/2012 3 | 4 | @author: mlarocca 5 | ''' 6 | from sudoku_solver import solve_sudoku 7 | super_hard = [[0,0,3,0,0,5,4,1,0], 8 | [0,0,0,1,0,0,0,8,5], 9 | [0,0,0,3,0,0,6,0,0], 10 | [0,0,0,0,3,0,0,6,0], 11 | [2,0,0,7,0,9,0,0,8], 12 | [0,6,0,0,5,0,0,0,0], 13 | [0,0,8,0,0,3,0,0,0], 14 | [9,3,0,0,0,6,0,0,0], 15 | [0,5,7,4,0,0,9,0,0]] 16 | 17 | blank = [[0]*9 for i in xrange(9)] 18 | 19 | if __name__ == '__main__': 20 | import cProfile 21 | 22 | cProfile.run('solve_sudoku(super_hard)', 's_profile.txt') 23 | 24 | import pstats 25 | p = pstats.Stats('s_profile.txt') 26 | p.sort_stats('time').print_stats(20) -------------------------------------------------------------------------------- /sudoku/sudoku_solver.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 15/dic/2012 3 | 4 | @author: mlarocca 5 | ''' 6 | 7 | # CHALLENGE PROBLEM: 8 | # 9 | # Use your check_sudoku function as the basis for solve_sudoku(): a 10 | # function that takes a partially-completed Sudoku grid and replaces 11 | # each 0 cell with an integer in the range 1..9 in such a way that the 12 | # final grid is valid. 13 | # 14 | # There are many ways to cleverly solve a partially-completed Sudoku 15 | # puzzle, but a brute-force recursive solution with backtracking is a 16 | # perfectly good option. The solver should return None for broken 17 | # input, False for inputs that have no valid solutions, and a valid 18 | # 9x9 Sudoku grid containing no 0 elements otherwise. In general, a 19 | # partially-completed Sudoku grid does not have a unique solution. You 20 | # should just return some member of the set of solutions. 21 | # 22 | 23 | '''Checks if the grid is a valid sudoku (partial) solution 24 | ASSUMES that the grid is well formed 25 | Checks ONLY the distribution of the values inside a valid grid 26 | DO NOT check that all values are valid (for performance reasons it is checked 27 | only once when the input is read) 28 | @param grid: The grid to be checked 29 | @return: True <=> the grid is a valid (possibly partial) solution 30 | False <-> Otherwise 31 | ''' 32 | def check_sudoku(grid): 33 | 34 | #checks rows and cols values 35 | row_i = {} 36 | col_i = {} 37 | for i in xrange(9): 38 | row_i.clear() 39 | col_i.clear() 40 | for j in xrange(9): 41 | #For every values it gets on the grid, 42 | #Increments a counter that keeps track 43 | #Of how many times that value appear in the ith col and row 44 | try: 45 | row_i[grid[i][j]] += 1 46 | except KeyError: 47 | row_i[grid[i][j]] = 1 48 | try: 49 | col_i[grid[j][i]] += 1 50 | except KeyError: 51 | col_i[grid[j][i]] = 1 52 | #Discards values relative to zeros (wildcards) 53 | try: 54 | del row_i[0] 55 | except KeyError: 56 | pass 57 | try: 58 | del col_i[0] 59 | except KeyError: 60 | pass 61 | 62 | #If any value (excluding 0) appears more than once in a single 63 | #row or column, then the sudoku assignment isn't valid 64 | row_i_v = row_i.values() 65 | col_i_v = col_i.values() 66 | if ((len(row_i_v) > 0 and max(row_i_v) > 1) or 67 | (len(col_i_v) > 0 and max(col_i_v) > 1)): 68 | return False 69 | 70 | #now checks the 3x3 cells 71 | 72 | cell = {} 73 | for cell_row in xrange(3): 74 | for cell_col in xrange(3): 75 | #For each cell... 76 | cell.clear() 77 | for row in xrange(3 * cell_row, 3 * (cell_row + 1)): 78 | for col in xrange(3 * cell_col, 3 * (cell_col + 1)): 79 | #...for each value found in a single cell 80 | #Increments a counter that keeps track 81 | #Of how many times that value appear in the cell 82 | try: 83 | cell[grid[row][col]] += 1 84 | except KeyError: 85 | cell[grid[row][col]] = 1 86 | #Discards values relative to zeros (wildcards) 87 | try: 88 | del cell[0] 89 | except KeyError: 90 | pass 91 | 92 | #If any value (excluding 0) appears more than once in a single 93 | #cell, then the sudoku assignment isn't valid 94 | cell_v = cell.values() 95 | if len(cell_v) > 0 and max(cell_v) > 1: 96 | return False 97 | 98 | #If it has made it so far, the assignment is valid 99 | return True 100 | 101 | '''A sudoku solver function 102 | If the grid is well formed (any iterable containing 9 iterables each of 103 | which contains 9 integers between 0 and 9 included is accepted as valid) 104 | and if it is a valid partial solution for the sudoku puzzle, 105 | it tries to solve it, if possible. 106 | 107 | @param grid: The grid representing the specific sudoku puzzle to solve; 108 | @return: The grid properly filled <=> The grid is properly formatted and 109 | the puzzle is solvable 110 | False <=> The grid is properly formatted but there is no 111 | solution to the puzzle 112 | None <=> The grid violates the sudoku contraints 113 | (wrong size or wrong types or values) 114 | ''' 115 | def solve_sudoku(grid): 116 | 117 | 118 | 119 | ''' For a partial solution grid and a cell (the cell at the 120 | crossing of ith row and jth column) enumerates all the possible 121 | values that can be assigned to that cell; 122 | @param grid: A partial solution grid 123 | @param i: Row of the cell under evaluation 124 | @param j: Column of the cell under evaluation 125 | @return: A list of the possible values for the cell. 126 | ''' 127 | def get_valid_values_for_cell(grid, i, j): 128 | valid_values = {i : 0 for i in xrange(10) } #Initially, 1 to 9 are supposed to be valid 129 | for k in xrange(9): 130 | #For every values it gets on the grid, 131 | #removes it from the valid ones, if it's still included 132 | 133 | valid_values[grid[i][k]] = 1 134 | valid_values[grid[k][j]] = 1 135 | 136 | #Now checks the 3x3 cell 137 | cell_i = i - i%3 138 | cell_j = j - j%3 139 | for i in xrange(cell_i, cell_i + 3): 140 | for j in xrange(cell_j, cell_j + 3): 141 | valid_values[grid[i][j]] = 1 142 | 143 | #If the call is issued correctly grid[i][j] == 0 and therefore valid_values[0] == 1 => 0 in valid_values 144 | del valid_values[0] 145 | 146 | return [k for (k,v) in valid_values.items() if v == 0] 147 | 148 | ''' Given a partial solution grid and the list of its unassigned cells 149 | chooses the next move (i.e. the next cell to be assigned a value) 150 | according to the most constrained one first criterium 151 | 152 | @param grid: A partial solution grid 153 | @param free_cells: A list of the free cells remaining in the grid 154 | @return: A couple containing: 155 | - The list of possible values that can be assigned to the chosen cell 156 | - A couple of indices for row and column of the chosen cell 157 | ''' 158 | def pick_next_move(grid, free_cells): 159 | best_choice_len = float("inf") 160 | 161 | for (i,j) in free_cells: 162 | values = get_valid_values_for_cell(grid, i, j) 163 | n = len(values) 164 | if n == 0: 165 | return None, (None, None) #This cell has no possible choice, so we can as well backtrack 166 | elif n == 1: 167 | #This is the most possible constrained situation 168 | #so we can as well choose this 169 | return values, (i, j) 170 | elif n < best_choice_len: 171 | best_choice_len = n 172 | best_choice = (values, (i,j)) 173 | assert(best_choice) 174 | return best_choice 175 | 176 | 177 | ''' Given a partial solution grid and the list of its unassigned cells 178 | tries to solve the puzzle by choosing the best possible next cell 179 | to assign a value to and then trying to assign it all the possible 180 | values, for each one recursively calls itself in a DFS search. 181 | 182 | @param grid: A partial solution grid 183 | @param free_cells: A list of the free cells remaining in the grid 184 | @return: The grid properly filled <=> The grid is properly formatted and 185 | the puzzle is solvable 186 | False <=> The grid is properly formatted but there is no 187 | solution to the puzzle 188 | ''' 189 | def recursive_solver(grid, free_cells): 190 | #Grid completed 191 | if len(free_cells) == 0: 192 | assert(check_sudoku(grid)) 193 | return grid 194 | 195 | 196 | #looks for the best next move 197 | values, (i,j) = pick_next_move(grid, free_cells) 198 | if values is None: 199 | return False #Must backtrack 200 | #else 201 | free_cells.remove((i,j)) 202 | #free cell! 203 | 204 | for val in values: 205 | grid[i][j] = val 206 | sol = recursive_solver(grid, free_cells[:]) 207 | if sol != False: 208 | return sol 209 | 210 | #If none of the attempted substitution worked, then must backtrack 211 | grid[i][j] = 0 212 | return False 213 | 214 | ''' solve_sudoku BODY ''' 215 | #checks that the grid is well formed 216 | try: 217 | rows = len(grid) 218 | except TypeError: 219 | return None 220 | 221 | if rows != 9: 222 | return None 223 | 224 | free_cells = [] 225 | for i in xrange(9): #INVARIANT: assert(len(grid) == 9) 226 | #check that the single rows are well formed 227 | try: 228 | cols = len(grid[i]) 229 | except TypeError: 230 | return None 231 | 232 | if cols != 9: 233 | return None 234 | 235 | for j in xrange(9): #INVARIANT: assert(len(grid[i]) == 9) 236 | val = grid[i][j] 237 | if val == 0: 238 | free_cells.append((i,j)) 239 | elif type(val) != int or val < 0 or val > 9: 240 | #The cell contains an invalid value 241 | return None 242 | 243 | check = check_sudoku(grid) 244 | if check != True: 245 | return False #The input is already an invalid solution 246 | #else, try to solve the puzzle 247 | return recursive_solver(grid, free_cells) -------------------------------------------------------------------------------- /sudoku/sudoku_tester.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 15/dic/2012 3 | 4 | @author: mlarocca 5 | ''' 6 | 7 | from sudoku_solver import * 8 | 9 | # solve_sudoku should return None 10 | ill_formed = [[5,3,4,6,7,8,9,1,2], 11 | [6,7,2,1,9,5,3,4,8], 12 | [1,9,8,3,4,2,5,6,7], 13 | [8,5,9,7,6,1,4,2,3], 14 | [4,2,6,8,5,3,7,9], # <--- 15 | [7,1,3,9,2,4,8,5,6], 16 | [9,6,1,5,3,7,2,8,4], 17 | [2,8,7,4,1,9,6,3,5], 18 | [3,4,5,2,8,6,1,7,9]] 19 | 20 | ill_formed_2 = [(5,3,4,6,7,8,9,1,2), 21 | [6,"7",2,1,9,5,3,4,8], 22 | [1,9,8,3,4,2,5,6,7], 23 | [8,5,9,7,6,1,4,2,3], 24 | [4,2,6,8,5,3,7,9,10], # <--- 25 | [7,1,3,9,2,4,8,5,6], 26 | [9,6,1,5,3,7,2,8,4], 27 | [2,8,7,4,1,9,6,3,5], 28 | [3,4,5,2,8,6,1,7,9]] 29 | 30 | # solve_sudoku should return valid unchanged 31 | valid = [[5,3,4,6,7,8,9,1,2], 32 | [6,7,2,1,9,5,3,4,8], 33 | [1,9,8,3,4,2,5,6,7], 34 | [8,5,9,7,6,1,4,2,3], 35 | [4,2,6,8,5,3,7,9,1], 36 | [7,1,3,9,2,4,8,5,6], 37 | [9,6,1,5,3,7,2,8,4], 38 | [2,8,7,4,1,9,6,3,5], 39 | [3,4,5,2,8,6,1,7,9]] 40 | 41 | # solve_sudoku should return False 42 | invalid = [[5,3,4,6,7,8,9,1,2], 43 | [6,7,2,1,9,5,3,4,8], 44 | [1,9,8,3,8,2,5,6,7], 45 | [8,5,9,7,6,1,4,2,3], 46 | [4,2,6,8,5,3,7,9,1], 47 | [7,1,3,9,2,4,8,5,6], 48 | [9,6,1,5,3,7,2,8,4], 49 | [2,8,7,4,1,9,6,3,5], 50 | [3,4,5,2,8,6,1,7,9]] 51 | 52 | # solve_sudoku should return False 53 | invalid_2 = [[1,0,0,0,0,7,0,9,0], 54 | [0,3,0,0,2,0,0,0,8], 55 | [0,0,3,6,0,0,5,0,0], 56 | [0,0,5,3,0,0,9,0,0], 57 | [0,1,0,0,8,0,0,0,2], 58 | [6,0,0,0,0,4,0,0,0], 59 | [3,0,0,0,0,0,0,1,0], 60 | [0,4,0,0,0,0,0,0,7], 61 | [0,0,7,0,0,0,3,0,0]] 62 | 63 | 64 | # solve_sudoku should return a 65 | # sudoku grid which passes a 66 | # sudoku checker. There may be 67 | # multiple correct grids which 68 | # can be made from this starting 69 | # grid. 70 | easy = [[2,9,0,0,0,0,0,7,0], 71 | [3,0,6,0,0,8,4,0,0], 72 | [8,0,0,0,4,0,0,0,2], 73 | [0,2,0,0,3,1,0,0,7], 74 | [0,0,0,0,8,0,0,0,0], 75 | [1,0,0,9,5,0,0,6,0], 76 | [7,0,0,0,9,0,0,0,1], 77 | [0,0,1,2,0,0,3,0,6], 78 | [0,3,0,0,0,0,0,5,9]] 79 | 80 | # Note: this may timeout 81 | # in the Udacity IDE! Try running 82 | # it locally if you'd like to test 83 | # your solution with it. 84 | # 85 | hard = [[1,0,0,0,0,7,0,9,0], 86 | [0,3,0,0,2,0,0,0,8], 87 | [0,0,9,6,0,0,5,0,0], 88 | [0,0,5,3,0,0,9,0,0], 89 | [0,1,0,0,8,0,0,0,2], 90 | [6,0,0,0,0,4,0,0,0], 91 | [3,0,0,0,0,0,0,1,0], 92 | [0,4,0,0,0,0,0,0,7], 93 | [0,0,7,0,0,0,3,0,0]] 94 | 95 | super_hard = [[0,0,3,0,0,5,4,1,0], 96 | [0,0,0,1,0,0,0,8,5], 97 | [0,0,0,3,0,0,6,0,0], 98 | [0,0,0,0,3,0,0,6,0], 99 | [2,0,0,7,0,9,0,0,8], 100 | [0,6,0,0,5,0,0,0,0], 101 | [0,0,8,0,0,3,0,0,0], 102 | [9,3,0,0,0,6,0,0,0], 103 | [0,5,7,4,0,0,9,0,0]] 104 | 105 | blank = [[0]*9 for i in xrange(9)] 106 | 107 | print solve_sudoku(ill_formed) # --> None 108 | print solve_sudoku(ill_formed_2) # --> None 109 | print solve_sudoku(3) # --> None 110 | print solve_sudoku([3]) # --> None 111 | print solve_sudoku([3 for i in xrange(9)]) # --> None 112 | print solve_sudoku(valid) 113 | print solve_sudoku(invalid) # --> False 114 | print solve_sudoku(invalid_2) # --> False 115 | print solve_sudoku(easy) 116 | print solve_sudoku(hard) 117 | print solve_sudoku(super_hard) 118 | print solve_sudoku(blank) #edge case -------------------------------------------------------------------------------- /union_find.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 06/apr/2013 3 | 4 | @author: mlarocca 5 | ''' 6 | 7 | class UnionFind: 8 | 9 | def __init__(self, size): 10 | ''' : Constructor : 11 | Create a new union-find data structure 12 | : param size : The initial size of the union-find; 13 | The size can be later increased, but not decreased. 14 | : type size : int (Other types will be converted to int) 15 | : raise IllegalArgumentException : If size is less than 1. 16 | : return : self, as all constructors. 17 | ''' 18 | self.n = int(size) 19 | if self.n <= 0: 20 | raise Exception("IllegalArgumentException: size must be positive") 21 | 22 | self.set = range(size) 23 | self.set_size = [1 for _ in xrange(size)] 24 | 25 | def add_element(self): 26 | ''' Add a new element to the union-find; the new element 27 | will be assigned to its own component. 28 | : return : self, to allow method chaining. 29 | ''' 30 | self.set.append(self.n) 31 | self.set_size.append(1) 32 | self.n += 1 33 | return self 34 | 35 | def find_root(self, i): 36 | ''' 37 | Implement find with path compression 38 | : param i : The element whose root has to be found. 39 | : type i : int (Other types will be converted to int) 40 | ''' 41 | #makes sure i is an integer 42 | i = int(i) 43 | 44 | if self.set[i] != i: 45 | self.set[i] = self.find_root(self.set[i]) 46 | 47 | return self.set[i] 48 | 49 | 50 | def connected(self, i, j): 51 | ''' Are elements i and j connected? 52 | : param i : The first element to check. 53 | : param j : The second element to check. 54 | : return : True <=> i and j belongs to the same component. 55 | : raise IllegalArgumentException : Raise an exception if either element is not in the union set 56 | ''' 57 | 58 | if i == j: 59 | if 0 <= i < self.n: 60 | return True 61 | else: 62 | raise Exception("IllegalArgumentException") 63 | 64 | root_i = self.find_root(i) 65 | root_j = self.find_root(j) 66 | 67 | return root_i == root_j 68 | 69 | def union(self, i, j): 70 | ''' Perform the union of two components, if they aren't unified yet. 71 | : param i : The first element. 72 | : param j : The second element, to be unified with i's component. 73 | : raise Exception: Raise an exception if either element is not in the 74 | union set (through find_root). 75 | : return : The size of the newly created component 76 | ''' 77 | 78 | root_i = self.find_root(i) 79 | root_j = self.find_root(j) 80 | if root_i == root_j: 81 | return self.set_size[root_i] 82 | 83 | if self.set_size[root_i] <= self.set_size[root_j]: 84 | self.set[root_i] = root_j 85 | self.set_size[root_j] += self.set_size[root_i] 86 | return self.set_size[root_i] 87 | else: 88 | self.set[root_j] = root_i 89 | self.set_size[root_i] += self.set_size[root_j] 90 | return self.set_size[root_j] 91 | 92 | def __str__(self): 93 | ''' : override : 94 | ''' 95 | res = [str(range(self.n)), str(self.set), str(self.set_size)] 96 | return "\n".join(res) 97 | 98 | 99 | 100 | if __name__ == '__main__': 101 | 102 | def test_UF(): 103 | ''' Test the structure ''' 104 | u = UnionFind(4) 105 | #print u 106 | 107 | assert not u.connected(2, 3) 108 | u.union(2,3) 109 | assert u.connected(2, 3) 110 | u.union(2,1) 111 | assert u.connected(2, 3) 112 | #print u 113 | u.add_element() 114 | assert not u.connected(2,4) 115 | u.union(0,4) 116 | assert u.connected(4,0) 117 | assert not u.connected(2,4) 118 | #print u 119 | #end of test_UF definition 120 | test_UF() --------------------------------------------------------------------------------