├── images
├── README.txt
└── cover.png
├── ch01
├── __init__.py
├── largest.py
└── timing.py
├── ch02
├── __init__.py
├── bas.py
├── random_sort.py
├── mult.py
├── test.py
└── timing.py
├── ch03
├── __init__.py
├── perfect
│ ├── __init__.py
│ ├── README.txt
│ ├── book.py
│ ├── shakespeare.py
│ └── perfect_example.py
├── hashtable.py
├── hashtable_open_perfect.py
├── entry.py
├── base26.py
├── months.py
└── hashtable_linked.py
├── ch04
├── __init__.py
├── entry.py
├── linked_entry.py
├── list_queue.py
├── builtin.py
├── circular_queue.py
├── array.py
├── linked.py
├── ordered_list.py
├── ordered.py
├── heap.py
├── dynamic_heap.py
├── factorial_heap.py
└── timing.py
├── ch05
├── __init__.py
├── recursion.py
├── merge.py
├── heapsort.py
├── sorting.py
├── timing.py
└── timsort.py
├── ch06
├── __init__.py
├── recursive_lists.py
├── speaking.py
├── avl.py
├── tree.py
├── pq.py
├── balanced.py
├── symbol.py
└── expression.py
├── ch07
├── __init__.py
├── dependencies.py
├── list_stack.py
├── snapshot.py
├── xlsx_example.py
├── fibonacci_example.py
├── plot_map.py
├── all_pairs_sp.py
├── solver_bfs.py
├── single_source_sp.py
├── viewer.py
├── solver_guided.py
├── solver_dfs.py
├── indexed_pq.py
├── timing.py
├── xlsx_loader.py
├── digraph_search.py
├── tmg_load.py
├── search.py
└── spreadsheet.py
├── ch08
└── __init__.py
├── __init__.py
├── algs
├── __init__.py
├── sorting.py
├── node.py
├── counting.py
├── output.py
├── test.py
└── timing.py
├── resources
├── __init__.py
├── ch07-fibonacci-example.xlsx
├── test.py
├── highway.py
└── english.py
├── .coveragerc
├── .project
├── .pydevproject
├── launch.sh
├── launch.bat
├── LICENSE
├── coverage.bat
├── book.py
└── .gitignore
/images/README.txt:
--------------------------------------------------------------------------------
1 | Images will be generated into this directory.
2 |
--------------------------------------------------------------------------------
/ch01/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module containing Python code for Chapter 1.
3 | """
4 |
--------------------------------------------------------------------------------
/ch02/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module containing Python code for Chapter 2.
3 | """
4 |
--------------------------------------------------------------------------------
/ch03/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module containing Python code for Chapter 3.
3 | """
4 |
--------------------------------------------------------------------------------
/ch04/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module containing Python code for Chapter 4.
3 | """
4 |
--------------------------------------------------------------------------------
/ch05/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module containing Python code for Chapter 5.
3 | """
4 |
--------------------------------------------------------------------------------
/ch06/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module containing Python code for Chapter 6.
3 | """
4 |
--------------------------------------------------------------------------------
/ch07/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module containing Python code for Chapter 7.
3 | """
4 |
--------------------------------------------------------------------------------
/ch08/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module containing Python code for Chapter 8.
3 | """
4 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module containing Python code for Learning Algorithms
3 | """
4 |
--------------------------------------------------------------------------------
/algs/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module containing Python code to support the book.
3 | """
4 |
--------------------------------------------------------------------------------
/resources/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module containing Python code to support the book.
3 | """
4 |
--------------------------------------------------------------------------------
/images/cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heineman/LearningAlgorithms/HEAD/images/cover.png
--------------------------------------------------------------------------------
/ch03/perfect/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module containing Python code for Perfect Hashing as part of Chapter 3.
3 | """
4 |
--------------------------------------------------------------------------------
/resources/ch07-fibonacci-example.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heineman/LearningAlgorithms/HEAD/resources/ch07-fibonacci-example.xlsx
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | # c:\Python37\Scripts\coverage-3.7.exe run -m unittest discover
2 | #
3 | # c:\Python37\Scripts\coverage-3.7.exe html
4 |
5 | [run]
6 | omit =
7 | # Anything outside of Learning Algorithms code
8 | */AppData/*
--------------------------------------------------------------------------------
/ch03/perfect/README.txt:
--------------------------------------------------------------------------------
1 | Note that hashFile.py contained the full output from perfect-hash,
2 | which includes a self-check at the end with the actual words used to
3 | generate the perfect hash.
4 |
5 | generated_dictionary only contains the actual G, S1, S2, hash_f and
6 | perfect_hash functions
7 |
8 |
--------------------------------------------------------------------------------
/ch04/entry.py:
--------------------------------------------------------------------------------
1 | """Represents an entry in a Priority Queue."""
2 | class Entry:
3 | """Represents a (v,p) entry in a priority queue."""
4 | def __init__(self, v, p):
5 | self.value = v
6 | self.priority = p
7 |
8 | def __str__(self):
9 | return '[{} p={}]'.format(self.value, self.priority)
10 |
--------------------------------------------------------------------------------
/ch04/linked_entry.py:
--------------------------------------------------------------------------------
1 | """
2 | Represents an entry for a Priority Queue stored within a Linked List.
3 | """
4 | class LinkedEntry:
5 | """Represents a (v,p) entry in a priority queue using linked lists."""
6 | def __init__(self, v, p, nxt=None):
7 | self.value = v
8 | self.priority = p
9 | self.next = nxt
10 |
11 | def __str__(self):
12 | return '[{} p={}]'.format(self.value, self.priority)
13 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | LearningAlgorithms
4 |
5 |
6 |
7 |
8 |
9 | org.python.pydev.PyDevBuilder
10 |
11 |
12 |
13 |
14 |
15 | org.python.pydev.pythonNature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.pydevproject:
--------------------------------------------------------------------------------
1 |
2 |
3 | Default
4 | python interpreter
5 |
6 | /${PROJECT_DIR_NAME}
7 |
8 |
9 |
--------------------------------------------------------------------------------
/ch07/dependencies.py:
--------------------------------------------------------------------------------
1 | """
2 | Trims away those test cases and book code that relies on tkinter or matplotlib
3 | """
4 |
5 | tkinter_error = []
6 | try:
7 | import tkinter
8 | except ImportError:
9 | if tkinter_error == []:
10 | print('trying to continue without tkinter')
11 | tkinter_error.append(1)
12 |
13 | plt_error = []
14 | try:
15 | import matplotlib.pyplot as plt
16 | except ImportError:
17 | if plt_error == []:
18 | print('trying to continue without matplotlib.pyplot')
19 | plt_error.append(1)
20 |
--------------------------------------------------------------------------------
/resources/test.py:
--------------------------------------------------------------------------------
1 | """
2 | Test Resources.
3 | """
4 | import unittest
5 |
6 | class TestHashing(unittest.TestCase):
7 |
8 | def test_dictionary_present(self):
9 | from resources.english import english_words
10 |
11 | # Dictionary used
12 | self.assertEqual(321129,len(english_words()))
13 |
14 | def test_highway_present(self):
15 | from resources.highway import highway_map
16 |
17 | # Dictionary used
18 | self.assertEqual(2305 + 2826 + 2,len(highway_map()))
19 |
20 | if __name__ == '__main__':
21 | unittest.main()
22 |
--------------------------------------------------------------------------------
/ch03/perfect/book.py:
--------------------------------------------------------------------------------
1 | """
2 | How many unused indices in G are there for the perfect hash?
3 | """
4 |
5 | from ch03.perfect.generated_dictionary import G
6 | from algs.table import comma
7 |
8 | def count_unused():
9 | """Count unused entries in G."""
10 | count = 0
11 | for _,val in enumerate(G):
12 | if val == 0:
13 | count += 1
14 | print('From G which has', comma(len(G)), 'entries', comma(count),
15 | 'of them are zero ({:.2f}%)'.format(100*count/len(G)))
16 |
17 | #######################################################################
18 | if __name__ == '__main__':
19 | count_unused()
20 |
--------------------------------------------------------------------------------
/ch02/bas.py:
--------------------------------------------------------------------------------
1 | """Binary Array Search implementation"""
2 |
3 | def binary_array_search(A, target):
4 | """
5 | Use Binary Array Search to search for target in ordered list A.
6 | If target is found, a non-negative value is returned marking the
7 | location in A; if a negative number, x, is found then -x-1 is the
8 | location where target would need to be inserted.
9 | """
10 | lo = 0
11 | hi = len(A) - 1
12 |
13 | while lo <= hi:
14 | mid = (lo + hi) // 2
15 |
16 | if target < A[mid]:
17 | hi = mid-1
18 | elif target > A[mid]:
19 | lo = mid+1
20 | else:
21 | return mid
22 |
23 | return -(lo+1)
24 |
--------------------------------------------------------------------------------
/algs/sorting.py:
--------------------------------------------------------------------------------
1 | """Utility methods for validating sorting algorithms."""
2 |
3 | def is_sorted(a):
4 | """Determines if list is sorted, throwing exception if not."""
5 | if not check_sorted(a):
6 | raise ValueError('Not sorted!')
7 |
8 | def check_sorted(a):
9 | """Determines if list is sorted."""
10 | for i, val in enumerate(a):
11 | if i > 0 and val < a[i-1]:
12 | return False
13 | return True
14 |
15 | def unique(A):
16 | """Determine if A contains any duplicate values."""
17 | ascending = sorted(A)
18 |
19 | for i in range(len(ascending)-1):
20 | if ascending[i] == ascending[i+1]:
21 | return False
22 |
23 | return True
24 |
--------------------------------------------------------------------------------
/algs/node.py:
--------------------------------------------------------------------------------
1 | """Class represents a node in a linked list."""
2 |
3 | class Node:
4 | """
5 | Node structure to use in linked list.
6 | """
7 | def __init__(self, val, rest=None):
8 | self.value = val
9 | self.next = rest
10 |
11 | def __str__(self):
12 | return '[{}]'.format(self.value)
13 |
14 | def __iter__(self):
15 | """
16 | Generator to retrieve values in linked list in order.
17 |
18 | Enabled Python code like following, where alist is a Node.
19 |
20 | for v in alist:
21 | print(v)
22 |
23 | """
24 | yield self.value
25 |
26 | if self.next:
27 | for v in self.next:
28 | yield v
29 |
--------------------------------------------------------------------------------
/ch07/list_stack.py:
--------------------------------------------------------------------------------
1 | """
2 | Stack Data Type implemented using linked lists.
3 | """
4 | from algs.node import Node
5 |
6 | class Stack:
7 | """
8 | Implementation of a Stack using linked lists.
9 | """
10 | def __init__(self):
11 | self.top = None
12 |
13 | def is_empty(self):
14 | """Determine if queue is empty."""
15 | return self.top is None
16 |
17 | def push(self, val):
18 | """Push new item to the top of the stack."""
19 | self.top = Node(val, self.top)
20 |
21 | def pop(self):
22 | """Remove and return top item from stack."""
23 | if self.is_empty():
24 | raise RuntimeError('Stack is empty')
25 |
26 | val = self.top.value
27 | self.top = self.top.next
28 | return val
29 |
--------------------------------------------------------------------------------
/ch02/random_sort.py:
--------------------------------------------------------------------------------
1 | """Unusually poor sorting algorithms that work (eventually)."""
2 | from random import shuffle
3 | from itertools import permutations
4 | from algs.sorting import check_sorted
5 |
6 | def random_sort(A):
7 | """
8 | Randomly shuffle A until it is sorted.
9 | This can take arbitrarily long and may never actually
10 | produce the sorted answer. However, with non-zero
11 | probability it might generate the answer.
12 | """
13 | while not check_sorted(A):
14 | shuffle(A)
15 |
16 | def permutation_sort(A):
17 | """
18 | Generates all permutation of A until one is sorted.
19 | Guaranteed to sort the values in A.
20 | """
21 | for attempt in permutations(A):
22 | if check_sorted(attempt):
23 | A[:] = attempt[:] # copy back into A
24 | return
25 |
--------------------------------------------------------------------------------
/ch07/snapshot.py:
--------------------------------------------------------------------------------
1 | """
2 | Convenience function for taking a snapshot of a canvas into
3 | designated file AND terminating tkinter application.
4 | """
5 | from algs.output import image_file
6 |
7 | def tkinter_register_snapshot(root, canvas, file_name):
8 | """
9 | Install a callback immediately after launch of application, which
10 | will save contents of canvas to postscript. Use any of the available
11 | conversion cools to create PNG or JPG images. Must give it time to
12 | draw. Certainly one second is more than enough...
13 | """
14 | root.after(1000, tkinter_to_file, root, canvas, file_name)
15 |
16 | def tkinter_to_file(root, canvas, ps_file):
17 | """Snapshot the current canvas. Also, cleanly shutdown tkinter so it can run again."""
18 | canvas.postscript(file=image_file(ps_file), colormode='color')
19 | root.withdraw()
20 | root.destroy()
21 | root.quit()
22 |
--------------------------------------------------------------------------------
/ch03/perfect/shakespeare.py:
--------------------------------------------------------------------------------
1 | """Generated from perfect-hash"""
2 | # =======================================================================
3 | # ================= Python code for perfect hash function ===============
4 | # =======================================================================
5 |
6 | G = [0, 8, 1, 4, 7, 10, 2, 0, 9, 11, 1, 5]
7 |
8 | S1 = [9, 4, 8, 6, 6]
9 | S2 = [2, 10, 6, 3, 5]
10 | assert len(S1) == len(S2) == 5
11 |
12 | def hash_f(key, T):
13 | """Generated helper function."""
14 | return sum(T[i % 5] * ord(c) for i, c in enumerate(key)) % 12
15 |
16 | def perfect_hash(key):
17 | """Perfect hash for words in K."""
18 | return (G[hash_f(key, S1)] + G[hash_f(key, S2)]) % 12
19 |
20 | # ============================ Sanity check =============================
21 |
22 | K = ['a', 'rose', 'by', 'any', 'other', 'name', 'would', 'smell', 'as', 'sweet']
23 | assert len(K) == 10
24 |
25 | for h, k in enumerate(K):
26 | assert perfect_hash(k) == h
27 |
--------------------------------------------------------------------------------
/ch04/list_queue.py:
--------------------------------------------------------------------------------
1 | """
2 | A queue implemented using linked Lists, storing values
3 | to be retrieved in First-in, First-out fashion.
4 | """
5 | from algs.node import Node
6 |
7 | class Queue:
8 | """
9 | Implementation of a Queue using linked lists.
10 | """
11 | def __init__(self):
12 | self.first = None
13 | self.last = None
14 |
15 | def is_empty(self):
16 | """Determine if queue is empty."""
17 | return self.first is None
18 |
19 | def enqueue(self, val):
20 | """Enqueue new item to end of queue."""
21 | if self.first is None:
22 | self.first = self.last = Node(val)
23 | else:
24 | self.last.next = Node(val)
25 | self.last = self.last.next
26 |
27 | def dequeue(self):
28 | """Remove and return first item from queue."""
29 | if self.is_empty():
30 | raise RuntimeError('Queue is empty')
31 |
32 | val = self.first.value
33 | self.first = self.first.next
34 | return val
35 |
--------------------------------------------------------------------------------
/ch02/mult.py:
--------------------------------------------------------------------------------
1 | """
2 | Multiply two n-digit numbers (where n is quite large) of the following form
3 |
4 | 1234567891234567 x 9876543219876543
5 | """
6 | import random
7 |
8 | def create_pair(n):
9 | """Create a pair of n-digit integers, from 1-up and from 9-down."""
10 | one = 0
11 | two = 0
12 | up = 1
13 | down = 9
14 | num_digits = 0
15 | while num_digits < n:
16 | one = 10*one + up
17 | two = 10*two + down
18 | up += 1
19 | if up == 10: up = 1
20 | down -= 1
21 | if down == 0: down = 9
22 | num_digits += 1
23 |
24 | return [one, two]
25 |
26 | def create_random_pair(n):
27 | """Create a pair of n-digit integers, containing digits from 1-9 only."""
28 | one = 0
29 | two = 0
30 | for _ in range(n):
31 | one = 10*one + random.randint(1,9)
32 | two = 10*two + random.randint(1,9)
33 |
34 | return [one, two]
35 |
36 | def mult_pair(pair):
37 | """Return the product of two, potentially large, numbers."""
38 | return pair[0]*pair[1]
39 |
--------------------------------------------------------------------------------
/launch.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export PYTHONPATH=.
4 |
5 | # update, as needed, to point to your own Python installation or
6 | # just eliminate if you already have python executables in your path
7 | PYTHON3=python3
8 |
9 | echo "Running test cases -- should take about 15 minutes."
10 | $PYTHON3 -m unittest discover > tests.txt
11 | $PYTHON3 ch07/replacement.py >> tests.txt
12 |
13 | echo "Generating figures and tables for the book. This will take about six hours"
14 | $PYTHON3 book.py > book.txt
15 |
16 | echo "Generating Timing results for each chapter. Should take about two hours"
17 | $PYTHON3 algs/timing.py > algs.txt
18 | $PYTHON3 ch02/timing.py > ch02.txt
19 | $PYTHON3 ch03/timing.py > ch03.txt
20 | $PYTHON3 ch04/timing.py > ch04.txt
21 | $PYTHON3 ch05/timing.py > ch05.txt
22 | $PYTHON3 ch07/timing.py > ch07.txt
23 |
24 | echo "Challenge Exercise at end of each chapter. Might take up to fifteen hours"
25 | $PYTHON3 ch01/challenge.py > ch01.txt
26 | $PYTHON3 ch02/challenge.py > ch02.txt
27 | $PYTHON3 ch03/challenge.py > ch03.txt
28 | $PYTHON3 ch04/challenge.py > ch04.txt
29 | $PYTHON3 ch05/challenge.py > ch05.txt
30 | $PYTHON3 ch06/challenge.py > ch06.txt
31 | $PYTHON3 ch07/challenge.py > ch07.txt
32 |
--------------------------------------------------------------------------------
/ch03/perfect/perfect_example.py:
--------------------------------------------------------------------------------
1 | """
2 | Generate the files containing the perfect hash for the words in
3 | the English dictionary. Typically the 'perfect-hash' executable
4 | is installed in the $PYTHON_HOME/Scripts directory as perfect-hash.
5 |
6 | You can also directly execute this file
7 |
8 | I executed this file with the following arguments:
9 |
10 | ..\\..\\resources\\words.english.txt --hft=2 -o hashFile.py
11 |
12 | Required several minutes to execute because of the size of the
13 | dictionary. Also I used the option (--hft=2) to create two
14 | supplementary lists, S1 and S2, to probe into primary list, G.
15 |
16 | Once the file is generated, you can safely delete the second
17 | half of 'hashFile.py' which has a copy of the words in the
18 | dictionary as a sanity check. Then I renamed the file as
19 | generated_dictionary.py
20 |
21 | generated_dictionary only contains the actual G, S1, S2, hash_f and
22 | perfect_hash functions
23 |
24 | Requires `perfect-hash` library: install using
25 |
26 | pip install perfect-hash
27 |
28 | """
29 | from perfect_hash import main
30 |
31 | #######################################################################
32 | if __name__ == '__main__':
33 | main()
34 |
--------------------------------------------------------------------------------
/launch.bat:
--------------------------------------------------------------------------------
1 | set PYTHONPATH=.
2 |
3 | REM Update, as needed, to point to your own Python installation or
4 | REM just eliminate if you already have Python executables in your path
5 | REM ------------------------------------------------------------------
6 | set PYTHON3=python3
7 |
8 | echo "Running test cases -- should take about 15 minutes."
9 | %PYTHON3% -m unittest discover > tests.txt
10 |
11 | echo "Generating figures and tables for the book. This will take about six hours."
12 | %PYTHON3% book.py > book.txt
13 |
14 | echo "Generating Timing results for each chapter. Should take about one hour."
15 | %PYTHON3% algs\timing.py > algs.txt
16 | %PYTHON3% ch02\timing.py > ch02.txt
17 | %PYTHON3% ch03\timing.py > ch03.txt
18 | %PYTHON3% ch04\timing.py > ch04.txt
19 | %PYTHON3% ch05\timing.py > ch05.txt
20 | %PYTHON3% ch07\timing.py > ch07.txt
21 |
22 | echo "Generating challenge exercises for each chapter. Should take about two hours."
23 | %PYTHON3% ch01\challenge.py > ch01.txt
24 | %PYTHON3% ch02\challenge.py > ch02.txt
25 | %PYTHON3% ch03\challenge.py > ch03.txt
26 | %PYTHON3% ch04\challenge.py > ch04.txt
27 | %PYTHON3% ch05\challenge.py > ch05.txt
28 | %PYTHON3% ch06\challenge.py > ch06.txt
29 | %PYTHON3% ch07\challenge.py > ch07.txt
30 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 George T. Heineman
4 |
5 | Highway file resources/MA-region-simple.tmg graciously provided by James Teresco
6 | from https://travelmapping.net/graphs/
7 |
8 | Permission is hereby granted, free of charge, to any person obtaining a copy
9 | of this software and associated documentation files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 |
15 | The above copyright notice and this permission notice shall be included in all
16 | copies or substantial portions of the Software.
17 |
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 | SOFTWARE.
25 |
--------------------------------------------------------------------------------
/ch04/builtin.py:
--------------------------------------------------------------------------------
1 | """
2 | Array of unordered values.
3 | """
4 | from ch04.entry import Entry
5 |
6 | def by_priority(entry):
7 | """Extract priority to be evaluated for sorting."""
8 | return entry.priority
9 |
10 | class PQ:
11 | """Priority Queue using built-in list."""
12 | def __init__(self, size):
13 | self.size = size
14 | self.storage = []
15 |
16 | def __len__(self):
17 | """Return number of values in priority queue."""
18 | return len(self.storage)
19 |
20 | def is_full(self):
21 | """If priority queue has run out of storage, return True."""
22 | return self.size == len(self.storage)
23 |
24 | def enqueue(self, v, p):
25 | """Enqueue (v, p) entry into priority queue."""
26 | if len(self.storage) == self.size:
27 | raise RuntimeError('Priority Queue is Full!')
28 | self.storage.append(Entry(v, p))
29 |
30 | def dequeue(self):
31 | """Remove and return value with highest priority in priority queue."""
32 | if self.storage:
33 | m = max(self.storage, key=by_priority) # finds left-most max
34 | self.storage.remove(m) # removes left-most with same index
35 | return m.value
36 |
37 | raise RuntimeError('PriorityQueue is empty!')
38 |
--------------------------------------------------------------------------------
/ch04/circular_queue.py:
--------------------------------------------------------------------------------
1 | """A fixed-capacity queue implemented as circular queue.
2 |
3 | Queue can become full.
4 |
5 | * enqueue is O(1)
6 | * dequeue is O(1)
7 |
8 | """
9 |
10 | class Queue:
11 | """
12 | Implementation of a Queue using a circular buffer.
13 | """
14 | def __init__(self, size):
15 | self.size = size
16 | self.storage = [None] * size
17 | self.first = 0
18 | self.last = 0
19 | self.N = 0
20 |
21 | def is_empty(self):
22 | """Determine if queue is empty."""
23 | return self.N == 0
24 |
25 | def is_full(self):
26 | """Determine if queue is full."""
27 | return self.N == self.size
28 |
29 | def enqueue(self, item):
30 | """Enqueue new item to end of queue."""
31 | if self.is_full():
32 | raise RuntimeError('Queue is full')
33 |
34 | self.storage[self.last] = item
35 | self.N += 1
36 | self.last = (self.last + 1) % self.size
37 |
38 | def dequeue(self):
39 | """Remove and return first item from queue."""
40 | if self.is_empty():
41 | raise RuntimeError('Queue is empty')
42 |
43 | val = self.storage[self.first]
44 | self.N -= 1
45 | self.first = (self.first + 1) % self.size
46 | return val
47 |
--------------------------------------------------------------------------------
/ch07/xlsx_example.py:
--------------------------------------------------------------------------------
1 | """
2 | Load up XLSX Fibonacci file.
3 |
4 | This is only a proof of concept and not meant to work for arbitrary XLSX files!
5 | """
6 | import os
7 | from ch07.xlsx_loader import load_xlsx
8 |
9 | try:
10 | import networkx as nx
11 | except ImportError:
12 | import ch07.replacement as nx
13 |
14 | from ch07.dependencies import tkinter_error
15 |
16 | def load_fibonacci_from_resource(ss):
17 | """Load up sample XLSX Microsoft Excel file as a Spreadsheet."""
18 | entries = load_xlsx(os.path.join('..', 'resources', 'ch07-fibonacci-example.xlsx'))
19 | for k in entries:
20 | ss.set(k,entries[k])
21 |
22 | #######################################################################
23 | if __name__ == '__main__':
24 |
25 | if tkinter_error:
26 | print('tkinter is not installed so unable to launch spreadsheet application')
27 | else:
28 | import tkinter
29 | from ch07.spreadsheet import Spreadsheet
30 |
31 | root = tkinter.Tk()
32 | ss = Spreadsheet(root, nx.DiGraph())
33 | load_fibonacci_from_resource(ss)
34 |
35 | # Might not be necessary IF entries are loaded in proper topological order!
36 | from ch07.digraph_search import topological_sort
37 | print(list(topological_sort(ss.digraph)))
38 | root.mainloop()
39 |
--------------------------------------------------------------------------------
/ch03/hashtable.py:
--------------------------------------------------------------------------------
1 | """
2 | Hashtable to store (key, value) pairs in a fixed hashtable using
3 | hash() % N as hash code. This table can replace values associated
4 | with a given key, but if two keys attempt to use the same
5 | location, a RuntimeError is raised.
6 |
7 | Not able to handle collisions. Do not use this for production code!
8 | Only here as an example demonstrating that something needs fixing.
9 | """
10 |
11 | from ch03.entry import Entry
12 |
13 | class Hashtable:
14 | """Weak Hashtable implementation with no collision strategy."""
15 | def __init__(self, M=10):
16 | if M < 1:
17 | raise ValueError('Hashtable storage must be at least 1.')
18 |
19 | self.table = [None] * M
20 | self.M = M
21 |
22 | def get(self, k):
23 | """Retrieve value associated with key, k."""
24 | hc = hash(k) % self.M
25 | return self.table[hc].value if self.table[hc] else None
26 |
27 | def put(self, k, v):
28 | """Associate value, v, with the key, k."""
29 | hc = hash(k) % self.M
30 | entry = self.table[hc]
31 | if entry:
32 | if entry.key == k:
33 | entry.value = v
34 | else:
35 | raise RuntimeError('Key Collision between {} and {}'.format(k, entry.key))
36 | else:
37 | self.table[hc] = Entry(k, v)
38 |
--------------------------------------------------------------------------------
/ch03/hashtable_open_perfect.py:
--------------------------------------------------------------------------------
1 | """
2 | Uses perfect hash generated from English dictionary. Use each designated bucket
3 | to hold a single Entry. If a non-English word somehow hashes to a bucket, the
4 | code checks against the key to be sure it is the right one.
5 |
6 | As long as you only use keys from the English dictionary used to construct the
7 | perfect hash, there will be no collisions.
8 | """
9 |
10 | from ch03.entry import Entry
11 | from ch03.perfect.generated_dictionary import perfect_hash
12 |
13 | class Hashtable:
14 | """Hashtable using perfect hashing from 321,129 English word dictionary."""
15 | def __init__(self):
16 | self.table = [None] * 321129
17 | self.N = 0
18 |
19 | def get(self, k):
20 | """Retrieve value associated with key, k."""
21 | hc = perfect_hash(k)
22 | if self.table[hc] and self.table[hc].key == k:
23 | return self.table[hc].value
24 | return None
25 |
26 | def put(self, k, v):
27 | """Associate value, v, with the key, k."""
28 | hc = perfect_hash(k)
29 | self.table[hc] = Entry(k, v)
30 | self.N += 1
31 |
32 | def __iter__(self):
33 | """Generate all (k, v) tuples for entries in the table."""
34 | for entry in self.table:
35 | if not entry is None:
36 | yield (entry.key, entry.value)
37 |
--------------------------------------------------------------------------------
/ch05/recursion.py:
--------------------------------------------------------------------------------
1 | """Recursive implementations."""
2 |
3 | def find_max(A):
4 | """invoke recursive function to find maximum value in A."""
5 |
6 | def rmax(lo, hi):
7 | """Use recursion to find maximum value in A[lo:hi+1]."""
8 | if lo == hi: return A[lo]
9 |
10 | mid = (lo+hi) // 2
11 | L = rmax(lo, mid)
12 | R = rmax(mid+1, hi)
13 | return max(L, R)
14 |
15 | return rmax(0, len(A)-1)
16 |
17 | def find_max_with_count(A):
18 | """Count number of comparisons."""
19 |
20 | def frmax(lo, hi):
21 | """Use recursion to find maximum value in A[lo:hi+1] incl. count"""
22 | if lo == hi: return (0, A[lo])
23 |
24 | mid = (lo+hi)//2
25 | ctleft,left = frmax(lo, mid)
26 | ctright,right = frmax(mid+1, hi)
27 | return (1+ctleft+ctright, max(left, right))
28 |
29 | return frmax(0, len(A)-1)
30 |
31 | def count(A,target):
32 | """invoke recursive function to return number of times target appears in A."""
33 |
34 | def rcount(lo, hi, target):
35 | """Use recursion to find maximum value in A[lo:hi+1]."""
36 | if lo == hi:
37 | return 1 if A[lo] == target else 0
38 |
39 | mid = (lo+hi)//2
40 | left = rcount(lo, mid, target)
41 | right = rcount(mid+1, hi, target)
42 | return left + right
43 |
44 | return rcount(0, len(A)-1, target)
45 |
--------------------------------------------------------------------------------
/algs/counting.py:
--------------------------------------------------------------------------------
1 | """An Item that can be used to count the number of times <, = or > is invoked.
2 |
3 | Clear the statistics by invoking RecordedItem.clear()
4 | """
5 | class RecordedItem:
6 | """
7 | When used in a list, this class records the number of times that each
8 | of the (eq, lt, gt) operations are called.
9 | """
10 | num_eq = 0
11 | num_lt = 0
12 | num_gt = 0
13 |
14 | def __init__(self, val):
15 | self.val = val
16 |
17 | @classmethod
18 | def range(cls, n):
19 | """Return list of RecordItem(i) for i from 0 to n-1."""
20 | return [RecordedItem(i) for i in range(n)]
21 |
22 | @classmethod
23 | def header(cls):
24 | """Terms in the report."""
25 | return ('eq', 'lt', 'gt')
26 |
27 | @classmethod
28 | def clear(cls):
29 | """Reset the counters."""
30 | RecordedItem.num_eq = RecordedItem.num_lt = RecordedItem.num_gt = 0
31 |
32 | @classmethod
33 | def report(cls):
34 | """Return the resulting statistics for EQ, LT, GT."""
35 | return (RecordedItem.num_eq, RecordedItem.num_lt, RecordedItem.num_gt)
36 |
37 | def __eq__(self, other):
38 | RecordedItem.num_eq += 1
39 | return self.val == other.val
40 |
41 | def __lt__(self, other):
42 | RecordedItem.num_lt += 1
43 | return self.val < other.val
44 |
45 | def __gt__(self, other):
46 | RecordedItem.num_gt += 1
47 | return self.val > other.val
48 |
--------------------------------------------------------------------------------
/coverage.bat:
--------------------------------------------------------------------------------
1 | set PYTHONPATH=.
2 |
3 | REM Update, as needed, to point to your own Python installation or
4 | REM just eliminate if you already have Python executables in your path
5 | REM ------------------------------------------------------------------
6 | set COVERAGE3=c:\python37\Scripts\coverage-3.7.exe
7 |
8 | echo "Running test cases -- should take about 15 minutes."
9 | %COVERAGE3% run -m unittest discover > tests.txt
10 | %COVERAGE3% run -a ch07\replacement.py >> tests.txt
11 |
12 | echo "Generating figures and tables for the book. This will take about six hours."
13 | %COVERAGE3% run -a book.py > book.txt
14 |
15 | echo "Generating Timing results for each chapter. Should take about 2.5 hours"
16 | %COVERAGE3% run -a algs\timing.py > tim_algs.txt
17 | %COVERAGE3% run -a ch02\timing.py > tim_ch02.txt
18 | %COVERAGE3% run -a ch03\timing.py > tim_ch03.txt
19 | %COVERAGE3% run -a ch04\timing.py > tim_ch04.txt
20 | %COVERAGE3% run -a ch05\timing.py > tim_ch05.txt
21 | %COVERAGE3% run -a ch07\timing.py > tim_ch07.txt
22 |
23 | echo "Generating challenge exercises for each chapter. Should take about an hour."
24 | %COVERAGE3% run -a ch01\challenge.py > ch01.txt
25 | %COVERAGE3% run -a ch02\challenge.py > ch02.txt
26 | %COVERAGE3% run -a ch03\challenge.py > ch03.txt
27 | %COVERAGE3% run -a ch04\challenge.py > ch04.txt
28 | %COVERAGE3% run -a ch05\challenge.py > ch05.txt
29 | %COVERAGE3% run -a ch06\challenge.py > ch06.txt
30 | %COVERAGE3% run -a ch07\challenge.py > ch07.txt
31 |
32 | echo "Generating coverage report in htmlcov\index.html"
33 | %COVERAGE3% html
34 |
--------------------------------------------------------------------------------
/ch04/array.py:
--------------------------------------------------------------------------------
1 | """A priority queue using an array of unordered values.
2 |
3 | Unordered array is a fixed-length size, so it can become full.
4 |
5 | * enqueue is O(1)
6 | * dequeue is O(N)
7 |
8 | """
9 |
10 | from ch04.entry import Entry
11 |
12 | class PQ:
13 | """A priority queue using a fixed-size array for storage."""
14 | def __init__(self, size):
15 | self.size = size
16 | self.storage = [None] * size
17 | self.N = 0
18 |
19 | def __len__(self):
20 | """Return number of values in priority queue."""
21 | return self.N
22 |
23 | def is_full(self):
24 | """If priority queue has run out of storage, return True."""
25 | return self.size == self.N
26 |
27 | def enqueue(self, v, p):
28 | """Enqueue (v, p) entry into priority queue."""
29 | if self.N == self.size:
30 | raise RuntimeError('Priority Queue is full!')
31 | self.storage[self.N] = Entry(v, p)
32 | self.N += 1
33 |
34 | def dequeue(self):
35 | """Remove and return value with highest priority in priority queue."""
36 | if self.N == 0:
37 | raise RuntimeError('PriorityQueue is empty!')
38 |
39 | max_pos = 0
40 | for i in range(1,self.N):
41 | if self.storage[i].priority > self.storage[max_pos].priority:
42 | max_pos = i
43 |
44 | max_entry = self.storage[max_pos]
45 | self.storage[max_pos] = self.storage[self.N-1]
46 | self.storage[self.N-1] = None
47 | self.N -= 1
48 |
49 | return max_entry.value
50 |
--------------------------------------------------------------------------------
/ch01/largest.py:
--------------------------------------------------------------------------------
1 | """ Algorithms to find largest value in unordered array.
2 | """
3 |
4 | def largest(A):
5 | """
6 | Requires N-1 invocations of less-than to determine max of N>0 elements.
7 | """
8 | my_max = A[0]
9 | for idx in range(1, len(A)):
10 | if my_max < A[idx]:
11 | my_max = A[idx]
12 | return my_max
13 |
14 | def native_largest(A):
15 | """Simply access built-in max() method."""
16 | return max(A)
17 |
18 | def alternate(A):
19 | """
20 | In worst case, requires (1/2)*(N^2 + 3N - 2) invocations of less-than.
21 | In best case requires N.
22 | """
23 | for v in A:
24 | v_is_largest = True # When iterating over A, each value, v, could be largest
25 | for x in A:
26 | if v < x: # If v is smaller than some x, stop and record not largest
27 | v_is_largest = False
28 | break
29 | if v_is_largest: # If largest, return v since it is maximum value.
30 | return v
31 |
32 | return None # If A is empty, return None
33 |
34 | def just_three(A):
35 | """
36 | Returns largest in A when given exactly three elements. Won't work on
37 | all problem instances.
38 | """
39 | if len(A) != 3:
40 | raise ValueError('I only work on lists of size 3.')
41 |
42 | if A[1] < A[0]:
43 | if A[2] < A[1]:
44 | return A[0]
45 | if A[2] < A[0]:
46 | return A[0]
47 | return A[2]
48 | if A[1] < A[2]:
49 | return A[2]
50 | return A[1]
51 |
--------------------------------------------------------------------------------
/resources/highway.py:
--------------------------------------------------------------------------------
1 | """
2 | Provide access to Highway map stored in TMG file
3 |
4 | Highway file graciously provided by James Teresco from https://travelmapping.net/graphs/
5 |
6 | If this code just doesn't work for you, then simply hard-code something like this:
7 |
8 | def highway_map():
9 | data_file = open(TMG_FILE, 'r')
10 | information = data_file.read().splitlines()
11 | data_file.close()
12 | return information
13 | """
14 |
15 | _highway_data = []
16 |
17 | def highway_map():
18 | """Return TMG file containing highway data."""
19 | if _highway_data:
20 | return _highway_data
21 |
22 | # Try to load up...
23 | try:
24 | import importlib.resources as pkg_resources
25 | file = pkg_resources.read_text('resources', 'MA-region-simple.tmg')
26 | _highway_data.extend(file.splitlines())
27 | return _highway_data
28 | except ImportError:
29 | pass
30 |
31 | try:
32 | import pkg_resources
33 |
34 | file = pkg_resources.resource_string('resources', 'MA-region-simple.tmg').decode('utf-8')
35 | _highway_data.extend(file.splitlines())
36 | return _highway_data
37 | except ImportError:
38 | pass
39 |
40 | # if still cannot access, then you will have to hard-code to
41 | # change the following path name to the location of the
42 | # TMG file
43 | import os
44 | with open(os.path.join('resources', 'MA-region-simple.tmg')) as file:
45 | for line in file.readlines():
46 | _highway_data.append(line[:-1]) # chomp '\n'
47 | return _highway_data
48 |
--------------------------------------------------------------------------------
/resources/english.py:
--------------------------------------------------------------------------------
1 | """
2 | Provide access to English words, stored in alphabetical order, one per line.
3 |
4 | Compatible with Python 3.7
5 |
6 | If this code just doesn't work for you, then simply hard-code something like this:
7 |
8 | def english_words():
9 | word_file = open(DICTIONARY_FILE, 'r')
10 | all_words = word_file.read().splitlines()
11 | word_file.close()
12 | return all_words
13 | """
14 |
15 | _english_words = []
16 |
17 | def english_words():
18 | """Return list of 321,129 English words from dictionary."""
19 | if _english_words:
20 | return _english_words
21 |
22 | # Try to load up...
23 | try:
24 | import importlib.resources as pkg_resources
25 | contents = pkg_resources.read_text('resources', 'words.english.txt')
26 | _english_words.extend(contents.splitlines())
27 | return _english_words
28 | except ImportError:
29 | pass
30 |
31 | try:
32 | import pkg_resources
33 |
34 | contents = pkg_resources.resource_string('resources', 'words.english.txt').decode('utf-8')
35 | _english_words.extend(contents.splitlines())
36 | return _english_words
37 | except ImportError:
38 | pass
39 |
40 | # if still cannot access, then you will have to hard-code to
41 | # change the following path name to the location of the
42 | # words.english.txt file
43 | import os
44 | with open(os.path.join('resources', 'words.english.txt')) as file:
45 | for line in file.readlines():
46 | _english_words.append(line[:-1]) # chomp '\n'
47 | return _english_words
48 |
--------------------------------------------------------------------------------
/ch06/recursive_lists.py:
--------------------------------------------------------------------------------
1 | """
2 | Code showing how to work with Linked Lists as recursive structures.
3 | """
4 | from algs.node import Node
5 |
6 | def create_linked_list(alist):
7 | """Given a Python list, create linked list in same order."""
8 | if len(alist) == 0:
9 | return None
10 |
11 | first = Node(alist[0])
12 | first.next = create_linked_list(alist[1:])
13 | return first
14 |
15 | def sum_list(node):
16 | """Given a Python list, sum its values recursively."""
17 | if node is None:
18 | return 0
19 |
20 | return node.value + sum_list(node.next)
21 |
22 | def iterate_list(node):
23 | """
24 | Python Generator for a linked list.
25 |
26 | The following will print all elements in a linked list:
27 |
28 | for v in iterate_list(alist):
29 | print(v)
30 |
31 | """
32 | if node is None:
33 | return
34 |
35 | yield node.value
36 |
37 | for v in iterate_list(node.next):
38 | yield v
39 |
40 | def sum_iterative(node):
41 | """Given a Python list, sum its values iteratively."""
42 | total = 0
43 | while node:
44 | total += node.value
45 | node = node.next
46 |
47 | return total
48 |
49 | def reverse(node):
50 | """
51 | Given the first node in a linked list, return (R, L) where R is the
52 | linked list in reverse, and L points to last node in that list.
53 | """
54 | if node.next is None:
55 | return (node, node)
56 |
57 | (flipped, tail) = reverse(node.next)
58 |
59 | # Append to tail and return
60 | tail.next = node
61 | node.next = None
62 | return (flipped, node)
63 |
--------------------------------------------------------------------------------
/ch03/entry.py:
--------------------------------------------------------------------------------
1 | """
2 | Represents an entry in a Hashtable.
3 |
4 | There are two flavors. Entry is meant for array-based storage while
5 | LinkedEntry is meant for linked-list storage.
6 |
7 | MarkedEntry is designed to support `remove()` in an open addressing
8 | hashtable, as described in a challenge question for this chapter.
9 |
10 | """
11 | class Entry:
12 | """Standard (k, v) entry for a hashtable."""
13 | def __init__(self, k, v):
14 | self.key = k
15 | self.value = v
16 |
17 | def __str__(self):
18 | return '{} -> {}'.format(self.key, self.value)
19 |
20 | class LinkedEntry:
21 | """A (k, v) entry for a hashtable using linked lists, via next."""
22 | def __init__(self, k, v, rest=None):
23 | self.key = k
24 | self.value = v
25 | self.next = rest
26 |
27 | def __str__(self):
28 | return '{} -> {}'.format(self.key, self.value)
29 |
30 | class MarkedEntry:
31 | """
32 | Entry (k, v) that can be marked. An open addressing hashtable can support
33 | removal by marking entries, and throwing them away upon resize.
34 | """
35 | def __init__(self, k, v):
36 | self.key = k
37 | self.value = v
38 | self.marked = False
39 |
40 | def is_marked(self):
41 | """Determines if entry is marked."""
42 | return self.marked
43 |
44 | def mark(self):
45 | """Mark entry."""
46 | self.marked = True
47 |
48 | def unmark(self):
49 | """Unmark entry."""
50 | self.marked = False
51 |
52 | def __str__(self):
53 | """Return entry as a string."""
54 | marks = ' [Marked]' if self.marked else ''
55 | return '{} -> {}{}'.format(self.key, self.value, marks)
56 |
--------------------------------------------------------------------------------
/ch07/fibonacci_example.py:
--------------------------------------------------------------------------------
1 | """
2 | Fibonacci Spreadsheet example for book.
3 |
4 | Depends on having tkinter installed.
5 | """
6 | try:
7 | import networkx as nx
8 | except ImportError:
9 | import ch07.replacement as nx
10 |
11 | from ch07.dependencies import tkinter_error
12 |
13 | def fibonacci_example(spreadsheet):
14 | """Initialize Spreadsheet to small Fibonacci example for book."""
15 | entries = {
16 | 'A1': 'N',
17 | 'B1': 'Fibn',
18 | 'C1': 'Sum',
19 | 'A2': '0',
20 | 'A3': '=(A2+1)',
21 | 'A4': '=(A3+1)',
22 | 'A5': '=(A4+1)',
23 | # 'A6': '=(A5+1)', ## Add these lines back in for full
24 | # 'A7': '=(A6+1)', ## example.
25 | # 'A8': '=(A7+1)',
26 | 'B2': '0',
27 | 'B3': '1',
28 | 'B4': '=(B2+B3)',
29 | 'B5': '=(B3+B4)',
30 | # 'B6': '=(B4+B5)',
31 | # 'B7': '=(B5+B6)',
32 | # 'B8': '=(B6+B7)',
33 | 'C2': '=B2',
34 | 'C3': '=(B3+C2)',
35 | 'C4': '=(B4+C3)',
36 | 'C5': '=(B5+C4)',
37 | # 'C6': '=(B6+C5)',
38 | # 'C7': '=(B7+C6)',
39 | # 'C8': '=(B8+C7)'
40 | }
41 | for k in entries:
42 | spreadsheet.set(k,entries[k])
43 |
44 | #######################################################################
45 | if __name__ == '__main__':
46 | if tkinter_error:
47 | print('tkinter is not installed so unable to launch spreadsheet application')
48 | else:
49 | import tkinter
50 | from ch07.spreadsheet import Spreadsheet
51 |
52 | root = tkinter.Tk()
53 | ss = Spreadsheet(root, nx.DiGraph())
54 | fibonacci_example(ss)
55 | from ch07.digraph_search import topological_sort
56 | print(list(topological_sort(ss.digraph)))
57 | root.mainloop()
58 |
--------------------------------------------------------------------------------
/algs/output.py:
--------------------------------------------------------------------------------
1 | """
2 | Provide access to where images are output.
3 | """
4 | import os
5 |
6 | IMAGE_DIR = 'images'
7 |
8 | def visualize(tbl, description, label, xaxis='Problem instance size', yaxis='Time (in seconds)'):
9 | """
10 | Plot the table and store into file. If MatPlotLib is not installed, this
11 | silently ignores this request.
12 | """
13 | try:
14 | import numpy as np
15 | import matplotlib.pyplot as plt
16 | except ImportError:
17 | return
18 |
19 | # make sure interactive is off....
20 | plt.ioff()
21 |
22 | # Grab x values from the first label in headers
23 | x_arr = np.array(tbl.column(tbl.labels[0]))
24 | fig, axes = plt.subplots()
25 |
26 | # It may be that some of these columns are PARTIAL; if so, truncate xs as well
27 | for hdr in tbl.labels[1:]:
28 | yvals = np.array(tbl.column(hdr))
29 | xvals = x_arr[:]
30 | if len(yvals) < len(xvals):
31 | xvals = xvals[:len(yvals)]
32 |
33 | axes.plot(xvals, yvals, label=hdr)
34 |
35 | axes.set(xlabel=xaxis, ylabel=yaxis, title=description)
36 | axes.legend(loc='upper left')
37 | axes.grid()
38 |
39 | img_file = image_file(label)
40 | fig.savefig(img_file)
41 | print('Wrote image to', img_file)
42 | print()
43 |
44 | def image_file(relative_name):
45 | """
46 | Return file location where image directory is found, using relative_name.
47 | If directory does not exist, then just place in current directory.
48 | """
49 | # If directory exists, then return
50 | if os.path.isdir(IMAGE_DIR):
51 | return ''.join([IMAGE_DIR, os.sep, relative_name])
52 |
53 | if os.path.isdir(''.join(['..', os.sep, IMAGE_DIR])):
54 | return ''.join(['..', os.sep, IMAGE_DIR, os.sep, relative_name])
55 |
56 | return ''.join(['.',os.sep,relative_name])
57 |
--------------------------------------------------------------------------------
/book.py:
--------------------------------------------------------------------------------
1 | """Generate all Tables/Figures for entire book
2 |
3 | Learning Algorithms:
4 | A programmer's guide to writing better code
5 | (C) 2021, George T. Heineman
6 |
7 | Import all external modules that are ever used in the book, so you can
8 | see now whether there are any surprises, and not later!
9 |
10 | """
11 | import timeit
12 | import itertools
13 |
14 | try:
15 | import numpy as np
16 | except ImportError:
17 | print('numpy is not installed. Consider installing with pip install --user numpy')
18 |
19 | try:
20 | from scipy.optimize import curve_fit
21 | from scipy.stats.stats import pearsonr
22 | from scipy.special import factorial
23 | except ImportError:
24 | print('scipy is not installed. Consider installing with pip install --user scipy')
25 |
26 | try:
27 | import networkx as nx
28 | except ImportError:
29 | print('networkx is not installed. Consider installing with pip install --user networkx')
30 | print('will attempt to use stub implementation to complete tables and figures.')
31 |
32 | from ch01.book import generate_ch01
33 | from ch02.book import generate_ch02
34 | from ch03.book import generate_ch03
35 | from ch04.book import generate_ch04
36 | from ch05.book import generate_ch05
37 | from ch06.book import generate_ch06
38 | from ch07.book import generate_ch07
39 |
40 | #######################################################################
41 | from datetime import datetime
42 |
43 | # Generate all chapters, with timestamps
44 | print("ch01:", datetime.now())
45 | generate_ch01()
46 | print("ch02:", datetime.now())
47 | generate_ch02()
48 | print("ch03:", datetime.now())
49 | generate_ch03()
50 | print("ch04:", datetime.now())
51 | generate_ch04()
52 | print("ch05:", datetime.now())
53 | generate_ch05()
54 | print("ch06:", datetime.now())
55 | generate_ch06()
56 | print("ch07:", datetime.now())
57 | generate_ch07()
58 |
--------------------------------------------------------------------------------
/ch04/linked.py:
--------------------------------------------------------------------------------
1 | """
2 | Linked list implementation of priority queue structure.
3 |
4 | Stores all values in descending.
5 | """
6 |
7 | from ch04.linked_entry import LinkedEntry
8 |
9 | class PQ:
10 | """Heap storage for a priority queue using linked lists."""
11 | def __init__(self, size):
12 | self.size = size
13 | self.first = None
14 | self.N = 0
15 |
16 | def __len__(self):
17 | """Return number of values in priority queue."""
18 | return self.N
19 |
20 | def is_full(self):
21 | """If priority queue has run out of storage, return True."""
22 | return self.size == self.N
23 |
24 | def enqueue(self, v, p):
25 | """Enqueue (v, p) entry into priority queue."""
26 | if self.N == self.size:
27 | raise RuntimeError('Priority Queue is Full!')
28 | self.N += 1
29 | to_add = LinkedEntry(v, p)
30 |
31 | if self.first:
32 | # find first node SMALLER than key, and keep track of
33 | # prev so we can insert properly
34 | n = self.first
35 | prev = None
36 | while n:
37 | if p > n.priority: # Stop once in the right place
38 | if prev:
39 | to_add.next = n
40 | prev.next = to_add
41 | else:
42 | to_add.next = self.first
43 | self.first = to_add
44 | return
45 | prev, n = n, n.next
46 | prev.next = LinkedEntry(v, p)
47 | else:
48 | self.first = to_add
49 |
50 | def dequeue(self):
51 | """Remove and return value with highest priority in priority queue."""
52 | if self.first:
53 | val = self.first.value
54 | self.first = self.first.next
55 | self.N -= 1
56 | return val
57 |
58 | raise RuntimeError('PriorityQueue is empty!')
59 |
--------------------------------------------------------------------------------
/ch06/speaking.py:
--------------------------------------------------------------------------------
1 | """
2 | Provides English descriptions for operations as they happen.
3 |
4 | Used for Table 6-2 in the book.
5 | """
6 | class BinaryNode:
7 | """
8 | Node structure to use in a binary tree.
9 |
10 | Attributes
11 | ----------
12 | left - left child (or None)
13 | right - right child (or None)
14 | value - value stored by Node
15 | """
16 | def __init__(self, val):
17 | self.value = val
18 | self.left = None
19 | self.right = None
20 |
21 | class SpeakingBinaryTree:
22 | """
23 | A Binary tree contains the root node, and methods to manipulate the tree.
24 | """
25 | def __init__(self):
26 | self.root = None
27 |
28 | def insert(self, val):
29 | """Insert value into Binary Tree."""
30 | (self.root,explanation) = self._insert(self.root, val, 'To insert `{}`, '.format(val))
31 | return explanation
32 |
33 | def _insert(self, node, val, sofar):
34 | """Inserts a new BinaryNode to the tree containing this value."""
35 | if node is None:
36 | return (BinaryNode(val), '{}create a new subtree with root of `{}`.'.format(sofar, val))
37 |
38 | if val <= node.value:
39 | sofar += '`{1}` is smaller than or equal to `{0}`, so insert `{1}` into the left subtree of `{0}`'.format(node.value, val)
40 | if node.left is None:
41 | sofar += ', but there is no left subtree, so '
42 | else:
43 | sofar += ' rooted at `{}`. Now '.format(node.left.value)
44 | (node.left, expl) = self._insert(node.left, val, sofar)
45 | return (node, expl)
46 |
47 | sofar += '`{1}` is larger than `{0}`, so insert `{1}` into the right subtree of `{0}`'.format(node.value, val)
48 | if node.right is None:
49 | sofar += ', but there is no right subtree, so '
50 | else:
51 | sofar += ' rooted at `{}`. Now '.format(node.right.value)
52 | (node.right, expl) = self._insert(node.right, val, sofar)
53 | return (node, expl)
54 |
--------------------------------------------------------------------------------
/ch04/ordered_list.py:
--------------------------------------------------------------------------------
1 | """
2 | List of ordered values. Use BinaryArraySearch to locate where we should be
3 | and take advantage of Python's ability to extend list efficiently.
4 | """
5 |
6 | from ch04.entry import Entry
7 |
8 | def binary_array_search(A, hi, target):
9 | """
10 | Use binary array search to search for target in ordered list A.
11 | If target is found, a non-negative value is returned marking the
12 | location in A; if a negative number, x, is found then -x-1 is the
13 | location where target would need to be inserted.
14 | """
15 | lo = 0
16 | while lo <= hi:
17 | mid = (lo + hi) // 2
18 |
19 | diff = target - A[mid].priority
20 | if diff < 0:
21 | hi = mid-1
22 | elif diff > 0:
23 | lo = mid+1
24 | else:
25 | return mid
26 |
27 | return -(1+lo)
28 |
29 | class PQ:
30 | """A Priority Queue implemented as a sorted list."""
31 | def __init__(self, size):
32 | self.size = size
33 | self.storage = [None] * size
34 | self.N = 0
35 |
36 | def __len__(self):
37 | """Return number of values in priority queue."""
38 | return self.N
39 |
40 | def is_full(self):
41 | """Priority queue has no fixed limit."""
42 | return False
43 |
44 | def enqueue(self, v, p):
45 | """Enqueue (v, p) entry into priority queue."""
46 | if self.N == self.size:
47 | raise RuntimeError('Priority Queue is Full!')
48 |
49 | to_add = Entry(v, p)
50 | idx = binary_array_search(self.storage, self.N-1, p)
51 | if idx < 0:
52 | self.storage.insert(-idx-1,to_add)
53 | else:
54 | self.storage.insert(idx,to_add)
55 |
56 | self.N += 1
57 |
58 | def dequeue(self):
59 | """Remove and return value with highest priority in priority queue."""
60 | if self.N == 0:
61 | raise RuntimeError('PriorityQueue is empty!')
62 |
63 | self.N -= 1
64 | to_return = self.storage[self.N]
65 | self.storage[self.N] = None
66 | return to_return.value
67 |
--------------------------------------------------------------------------------
/ch07/plot_map.py:
--------------------------------------------------------------------------------
1 | """
2 | Supporting functions for plotting the latitude/longitude data.
3 | """
4 |
5 | from ch07.dependencies import plt_error
6 | from ch07.replacement import WEIGHT
7 |
8 | def plot_edge_path(positions, src, target, edge_to, marker='.', color='green'):
9 | """
10 | Plot path using list of nodes in edge_to[] according to positional information
11 | in positions.
12 | """
13 | if plt_error:
14 | return
15 | import matplotlib.pyplot as plt
16 |
17 | nodex = []
18 | nodey = []
19 | e = edge_to[target]
20 | my_total = 0
21 | while e[0] != src:
22 | pos = positions[e[0]]
23 | nodex.append(pos[1])
24 | nodey.append(pos[0])
25 | my_total += e[2][WEIGHT]
26 | e = edge_to[e[0]]
27 | my_total += e[2][WEIGHT]
28 | print('my total={}'.format(my_total))
29 | plt.plot(nodex, nodey, color=color)
30 | plt.scatter(nodex, nodey, marker=marker, color=color)
31 |
32 | def plot_path(positions, path, marker='.', color='red'):
33 | """
34 | Plot path using list of nodes in path[] according to positional information
35 | in positions.
36 | """
37 | if plt_error:
38 | return
39 | import matplotlib.pyplot as plt
40 |
41 | pxs = []
42 | pys = []
43 | for v in path:
44 | pos = positions[v]
45 | pxs.append(pos[1])
46 | pys.append(pos[0])
47 | plt.plot(pxs, pys, color=color)
48 | plt.scatter(pxs, pys, marker=marker, color=color)
49 |
50 | def plot_node_from(positions, src, target, node_from, marker='.', color='orange'):
51 | """Plot path from src to target using node_from[] information."""
52 | if plt_error:
53 | return
54 | import matplotlib.pyplot as plt
55 |
56 | nodex = []
57 | nodey = []
58 | v = target
59 | while v != src:
60 | pos = positions[v]
61 | nodex.append(pos[1])
62 | nodey.append(pos[0])
63 | v = node_from[v]
64 | pos = positions[src]
65 | nodex.append(pos[1])
66 | nodey.append(pos[0])
67 | plt.plot(nodex, nodey, color=color)
68 | plt.scatter(nodex, nodey, marker=marker, color=color)
69 |
--------------------------------------------------------------------------------
/ch04/ordered.py:
--------------------------------------------------------------------------------
1 | """
2 | Array of ordered values. Use BinaryArraySearch to locate where we should be
3 | """
4 |
5 | from ch04.entry import Entry
6 |
7 | def binary_array_search(A, hi, target):
8 | """
9 | Use binary array search to search for target in ordered list A.
10 | If target is found, a non-negative value is returned marking the
11 | location in A; if a negative number, x, is found then -x-1 is the
12 | location where target would need to be inserted.
13 | """
14 | lo = 0
15 | while lo <= hi:
16 | mid = (lo + hi) // 2
17 |
18 | diff = target - A[mid].priority
19 | if diff < 0:
20 | hi = mid-1
21 | elif diff > 0:
22 | lo = mid+1
23 | else:
24 | return mid
25 |
26 | return -(1+lo)
27 |
28 | class PQ:
29 | """A Priority Queue implemented as list."""
30 | def __init__(self, size):
31 | self.size = size
32 | self.storage = [None] * size
33 | self.N = 0
34 |
35 | def __len__(self):
36 | """Return number of values in priority queue."""
37 | return self.N
38 |
39 | def is_full(self):
40 | """Determine if array is full."""
41 | return self.size == self.N
42 |
43 | def enqueue(self, v, p):
44 | """Enqueue (v, p) entry into priority queue."""
45 | if self.N == self.size:
46 | raise RuntimeError('Priority Queue is Full!')
47 | idx = binary_array_search(self.storage, self.N-1, p)
48 |
49 | if idx < 0: # might be duplicate, might not...
50 | idx = -idx-1
51 |
52 | # move each element up to make room
53 | for i in range(self.N, idx, -1):
54 | self.storage[i] = self.storage[i-1]
55 | self.storage[idx] = Entry(v, p)
56 |
57 | self.N += 1
58 |
59 | def dequeue(self):
60 | """Remove and return value with highest priority in priority queue."""
61 | if self.N == 0:
62 | raise RuntimeError('PriorityQueue is empty!')
63 |
64 | self.N -= 1
65 | to_return = self.storage[self.N]
66 | self.storage[self.N] = None
67 | return to_return.value
68 |
--------------------------------------------------------------------------------
/ch03/base26.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions to work with base26 hash scheme.
3 |
4 | :Example:
5 |
6 | >>> base26('sample')
7 | 214086110
8 |
9 | """
10 | from algs.sorting import unique
11 | from ch03.months import s_data, s_num, days_in_month, days_bas, days_mixed
12 |
13 | def base26(w):
14 | """Convert string into base26 representation where a=0 and z=25."""
15 | val = 0
16 | for ch in w.lower():
17 | next_digit = ord(ch) - ord('a')
18 | val = 26*val + next_digit
19 | return val
20 |
21 | def eval_search_base26(m):
22 | """
23 | Check if all hashes are unique for given modulo m for s_data; if
24 | so, then return array containing -1 in invalid indices, and the
25 | month length from s_num in valid ones.
26 | """
27 | result = [base26(k) % m for k in s_data]
28 | if (min(result) >= 0) and unique(result):
29 | data = [-1] * (1+max(result))
30 | for idx in range(len(s_data)):
31 | data[result[idx]] = s_num[idx]
32 | return data
33 | return None
34 |
35 | def search_for_base():
36 | """
37 | Search for lowest base that ensures hash(m) modulo base is unique
38 | for the twelve months stored in s_data
39 | """
40 | for m in s_data:
41 | if days_in_month[m] != days_mixed(m) or days_bas(m) != days_mixed(m):
42 | raise RuntimeError('Inconsistent access for {}'.format(m))
43 |
44 | # search for a range of potential bases, starting from 12 which is
45 | # the lowest it could be
46 | for m in range(12,1000):
47 | data = eval_search_base26(m)
48 | if data:
49 | return (m, data)
50 |
51 | # failed...
52 | raise RuntimeError('search_for_base() failed')
53 |
54 | #######################################################################
55 | if __name__ == '__main__':
56 | print('base26 of june is {:,d}'.format(base26('june')))
57 | print('base26 of january is {:,d}'.format(base26('january')))
58 | print('base26 of august is {0:,d} with {0:,d} % 34 = {1}'.format(base26('august'), base26('august') % 34))
59 | print('base26 of abbreviated is {0:,d} and for march is {1:,d}'.format(base26('abbreviated') % 34, base26('march') % 34))
60 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
--------------------------------------------------------------------------------
/ch01/timing.py:
--------------------------------------------------------------------------------
1 | """Timing results for chapter 1.
2 |
3 | Compute whether crossover occurs between tournament_VARIATION() and sorting_two()
4 |
5 | Turns out that sorting is just too fast, and all other approaches are slowed
6 | down by extra space requirements. Note the subtle problem with tournament_object
7 | which ends up being an O(N^2) algorithm because of the costs in the del tourn[0:2]
8 | operation.
9 |
10 | """
11 |
12 | import timeit
13 | from algs.table import DataTable, SKIP
14 |
15 | def timing_trial(output=True, decimals=3):
16 | """
17 | Seek possible crossover between tournament_two() and sorting_two().
18 | Because of the high memory demands, tournament_two() is always slower than
19 | sorting_two().
20 | """
21 | tbl = DataTable([8,8,8,8,8,8], ['N', 'Sorting', 'Tournament', 'Tourn. Object', 'Tourn. Linked', 'Tourn. Losers'], output=output, decimals=decimals)
22 |
23 | for n in [2 ** k for k in range(10, 24)]:
24 | st_time = timeit.timeit(stmt='sorting_two(x)', setup='''
25 | import random
26 | from ch01.largest_two import sorting_two
27 | random.seed({0})
28 | x=list(range({0}))
29 | random.shuffle(x)'''.format(n), number=1)
30 |
31 | tt_time = timeit.timeit(stmt='tournament_two(x)', setup='''
32 | import random
33 | from ch01.largest_two import tournament_two
34 | random.seed({0})
35 | x=list(range({0}))
36 | random.shuffle(x)'''.format(n), number=1)
37 |
38 | if n > 1048576:
39 | tto_time = SKIP
40 | else:
41 | tto_time = timeit.timeit(stmt='tournament_two_object(x)', setup='''
42 | import random
43 | from ch01.largest_two import tournament_two_object
44 | random.seed({0})
45 | x=list(range({0}))
46 | random.shuffle(x)'''.format(n), number=1)
47 |
48 | ttl_time = timeit.timeit(stmt='tournament_two_losers(x)', setup='''
49 | import random
50 | from ch01.largest_two import tournament_two_losers
51 | random.seed({0})
52 | x=list(range({0}))
53 | random.shuffle(x)'''.format(n), number=1)
54 |
55 | ttll_time = timeit.timeit(stmt='tournament_two_linked(x)', setup='''
56 | import random
57 | from ch01.largest_two import tournament_two_linked
58 | random.seed({0})
59 | x=list(range({0}))
60 | random.shuffle(x)'''.format(n), number=1)
61 |
62 | tbl.row([n, st_time, tt_time, tto_time, ttll_time, ttl_time])
63 | return tbl
64 |
65 | #######################################################################
66 | if __name__ == '__main__':
67 |
68 | print('Does tournament_two() beat sorting_two().')
69 | timing_trial()
70 |
--------------------------------------------------------------------------------
/ch05/merge.py:
--------------------------------------------------------------------------------
1 | """
2 | Merge sort uses auxiliary storage
3 | """
4 |
5 | def merge_sort(A):
6 | """Merge Sort implementation using auxiliary storage."""
7 | aux = [None] * len(A)
8 |
9 | def rsort(lo, hi):
10 | if hi <= lo:
11 | return
12 |
13 | mid = (lo+hi) // 2
14 | rsort(lo, mid)
15 | rsort(mid+1, hi)
16 | merge(lo, mid, hi)
17 |
18 | def merge(lo, mid, hi):
19 | # copy results of sorted sub-problems into auxiliary storage
20 | aux[lo:hi+1] = A[lo:hi+1]
21 |
22 | left = lo # starting index into left sorted sub-array
23 | right = mid+1 # starting index into right sorted sub-array
24 |
25 | for i in range(lo, hi+1):
26 | if left > mid:
27 | A[i] = aux[right]
28 | right += 1
29 | elif right > hi:
30 | A[i] = aux[left]
31 | left += 1
32 | elif aux[right] < aux[left]:
33 | A[i] = aux[right]
34 | right += 1
35 | else:
36 | A[i] = aux[left]
37 | left += 1
38 |
39 | rsort(0, len(A)-1)
40 |
41 | def merge_sort_counting(A):
42 | """Perform Merge Sort and return #comparisons."""
43 | aux = [None] * len(A)
44 |
45 | def rsort(lo, hi):
46 | if hi <= lo:
47 | return (0,0)
48 |
49 | mid = (lo+hi) // 2
50 | (lefts, leftc) = rsort(lo, mid)
51 | (rights, rightc) = rsort(mid+1, hi)
52 | (nswap, ncomp) = merge(lo, mid, hi)
53 | return (lefts+rights+nswap, leftc+rightc+ncomp)
54 |
55 | def merge(lo, mid, hi):
56 | # copy results of sorted sub-problems into auxiliary storage
57 | aux[lo:hi+1] = A[lo:hi+1]
58 |
59 | i = lo # starting index into left sorted sub-array
60 | j = mid+1 # starting index into right sorted sub-array
61 |
62 | numc = 0
63 | nums = 0
64 | for k in range(lo, hi+1):
65 | if i > mid:
66 | if A[k] != aux[j]: nums += 0.5
67 | A[k] = aux[j]
68 | j += 1
69 | elif j > hi:
70 | if A[k] != aux[i]: nums += 0.5
71 | A[k] = aux[i]
72 | i += 1
73 | elif aux[j] < aux[i]:
74 | numc += 1
75 | if A[k] != aux[j]: nums += 0.5
76 | A[k] = aux[j]
77 | j += 1
78 | else:
79 | numc += 1
80 | if A[k] != aux[i]: nums += 0.5
81 | A[k] = aux[i]
82 | i += 1
83 | return (nums, numc)
84 |
85 | return rsort( 0, len(A)-1)
86 |
--------------------------------------------------------------------------------
/ch07/all_pairs_sp.py:
--------------------------------------------------------------------------------
1 | """
2 | All Pairs Shortest path algorithm.
3 | """
4 | from ch07.replacement import WEIGHT
5 |
6 | def debug_state(title, G, node_from, dist_to, output=True):
7 | """Useful to show state of all pairs shortest path."""
8 | from algs.table import DataTable
9 |
10 | print('debug :', title)
11 | labels = list(G.nodes())
12 |
13 | tbl = DataTable([6] + [6]*len(labels), ['.'] + labels, output=output)
14 | tbl.format('.','s')
15 | for field in labels:
16 | tbl.format(field, 's')
17 | for u in labels:
18 | row = [u]
19 | for v in labels:
20 | if node_from[u][v]:
21 | row.append(node_from[u][v])
22 | else:
23 | row.append('.')
24 | tbl.row(row)
25 | print()
26 |
27 | tbl_dist_to = DataTable([6] + [6]*len(labels), ['.'] + labels, output=output, decimals=1)
28 | tbl_dist_to.format('.','s')
29 | for u in labels:
30 | row = [u]
31 | for v in labels:
32 | if u == v:
33 | row.append(0)
34 | else:
35 | row.append(dist_to[u][v])
36 | tbl_dist_to.row(row)
37 | print()
38 | return (tbl, tbl_dist_to)
39 |
40 | def floyd_warshall(G):
41 | """
42 | Compute All Pairs Shortest Path using Floyd Warshall and return
43 | dist_to[] with results and node_from[] to be able to recover the
44 | shortest paths.
45 | """
46 | inf = float('inf')
47 | dist_to = {}
48 | node_from = {}
49 | for u in G.nodes():
50 | dist_to[u] = {v:inf for v in G.nodes()}
51 | node_from[u] = {v:None for v in G.nodes()}
52 |
53 | dist_to[u][u] = 0
54 |
55 | for e in G.edges(u, data=True):
56 | v = e[1]
57 | dist_to[u][v] = e[2][WEIGHT]
58 | node_from[u][v] = u
59 |
60 | for k in G.nodes():
61 | for u in G.nodes():
62 | for v in G.nodes():
63 | new_len = dist_to[u][k] + dist_to[k][v]
64 | if new_len < dist_to[u][v]:
65 | dist_to[u][v] = new_len
66 | node_from[u][v] = node_from[k][v] # CRITICAL
67 |
68 | return (dist_to, node_from)
69 |
70 | def all_pairs_path_to(node_from, src, target):
71 | """Recover path from src to target."""
72 | if node_from[src][target] is None:
73 | raise ValueError('{} is unreachable from {}'.format(target,src))
74 |
75 | path = []
76 | v = target
77 | while v != src:
78 | path.append(v)
79 | v = node_from[src][v]
80 |
81 | # last one to push is the source, which makes it
82 | # the first one to be retrieved
83 | path.append(src)
84 | path.reverse()
85 | return path
86 |
--------------------------------------------------------------------------------
/ch04/heap.py:
--------------------------------------------------------------------------------
1 | """
2 | max binary Heap.
3 | """
4 | from ch04.entry import Entry
5 |
6 | class PQ:
7 | """
8 | Heap storage for a priority queue.
9 | """
10 | def __init__(self, size):
11 | self.size = size
12 | self.storage = [None] * (size+1)
13 | self.N = 0
14 |
15 | def __len__(self):
16 | """Return number of values in priority queue."""
17 | return self.N
18 |
19 | def is_empty(self):
20 | """Returns whether priority queue is empty."""
21 | return self.N == 0
22 |
23 | def is_full(self):
24 | """If priority queue has run out of storage, return True."""
25 | return self.size == self.N
26 |
27 | def enqueue(self, v, p):
28 | """Enqueue (v, p) entry into priority queue."""
29 | if self.N == self.size:
30 | raise RuntimeError('Priority Queue is full!')
31 |
32 | self.N += 1
33 | self.storage[self.N] = Entry(v, p)
34 | self.swim(self.N)
35 |
36 | def less(self, i, j):
37 | """
38 | Helper function to determine if storage[j] has higher
39 | priority than storage[i].
40 | """
41 | return self.storage[i].priority < self.storage[j].priority
42 |
43 | def swap(self, i, j):
44 | """Switch the values in storage[i] and storage[j]."""
45 | self.storage[i],self.storage[j] = self.storage[j],self.storage[i]
46 |
47 | def swim(self, child):
48 | """Reestablish heap-order property from storage[child] up."""
49 | while child > 1 and self.less(child//2, child):
50 | self.swap(child, child//2)
51 | child = child // 2
52 |
53 | def sink(self, parent):
54 | """Reestablish heap-order property from storage[parent] down."""
55 | while 2*parent <= self.N:
56 | child = 2*parent
57 | if child < self.N and self.less(child, child+1):
58 | child += 1
59 | if not self.less(parent, child):
60 | break
61 | self.swap(child, parent)
62 |
63 | parent = child
64 |
65 | def peek(self):
66 | """
67 | Peek without disturbing the value at the top of the priority queue. Must
68 | return entire Entry, since the one calling might like to know priority and value
69 | """
70 | if self.N == 0:
71 | raise RuntimeError('PriorityQueue is empty!')
72 |
73 | return self.storage[1]
74 |
75 | def dequeue(self):
76 | """Remove and return value with highest priority in priority queue."""
77 | if self.N == 0:
78 | raise RuntimeError('PriorityQueue is empty!')
79 |
80 | max_entry = self.storage[1]
81 | self.storage[1] = self.storage[self.N]
82 | self.storage[self.N] = None
83 | self.N -= 1
84 | self.sink(1)
85 | return max_entry.value
86 |
--------------------------------------------------------------------------------
/ch07/solver_bfs.py:
--------------------------------------------------------------------------------
1 | """
2 | Animates the Breadth First Search solution of a maze.
3 |
4 | If you set `refresh_rate` to zero, then it will remove all delays. As it is, the
5 | small 0.01 refresh rate shows the progress of the search.
6 |
7 | if you set `stop_end` to be True, the search will terminate at the target
8 | """
9 | import time
10 | import random
11 |
12 | from ch07.maze import Maze, to_networkx
13 | from ch07.viewer import Viewer
14 | from ch07.dependencies import tkinter_error
15 |
16 | class BreadthFirstSearchSolver():
17 | """
18 | Solves a maze using Depth First Search, showing results graphically.
19 | """
20 | def __init__(self, master, maze, size, refresh_rate=0.01, stop_end=False):
21 | self.master = master
22 | self.viewer = Viewer(maze, size)
23 | self.marked = {}
24 | self.node_from = {}
25 |
26 | self.g = to_networkx(maze)
27 | self.start = maze.start()
28 | self.end = maze.end()
29 | self.stop_end = stop_end
30 |
31 | self.refresh_rate = refresh_rate
32 |
33 | master.after(0, self.animate)
34 | self.canvas = self.viewer.view(master)
35 |
36 | def animate(self):
37 | """Start animation by initiating DFS."""
38 | self.bfs_visit(self.start)
39 |
40 | # draw BACK edges to solution
41 | pos = self.end
42 | while pos != self.start:
43 | self.viewer.color_cell(pos, 'lightgray')
44 | pos = self.node_from[pos]
45 | self.master.update()
46 |
47 | def bfs_visit(self, pos):
48 | """Recursive depth-first search investigating given position."""
49 | from ch04.list_queue import Queue
50 |
51 | queue = Queue()
52 | self.viewer.color_cell(pos, 'blue')
53 | queue.enqueue(pos)
54 |
55 | while not queue.is_empty():
56 | cell = queue.dequeue()
57 | self.master.update()
58 | if self.refresh_rate:
59 | time.sleep(self.refresh_rate)
60 |
61 | for next_cell in self.g.neighbors(cell):
62 | if not next_cell in self.marked:
63 | self.node_from[next_cell] = cell
64 | self.marked[next_cell] = True
65 | self.viewer.color_cell(next_cell, 'blue')
66 | if self.stop_end and next_cell == self.end:
67 | return True
68 | queue.enqueue(next_cell)
69 |
70 | return False
71 |
72 | #######################################################################
73 | if __name__ == '__main__':
74 | if tkinter_error:
75 | print('tkinter is not installed so unable to launch BFS solver application')
76 | else:
77 | import tkinter
78 | random.seed(15)
79 | m = Maze(60,60)
80 | root = tkinter.Tk()
81 | dfs = BreadthFirstSearchSolver(root, m, 15, refresh_rate=0, stop_end=True)
82 | root.mainloop()
83 |
--------------------------------------------------------------------------------
/ch07/single_source_sp.py:
--------------------------------------------------------------------------------
1 | """
2 | Single-source Shortest path algorithms, including Dijkstra and Bellman-Ford.
3 | """
4 | from ch07.indexed_pq import IndexedMinPQ
5 |
6 | # data associated with an edge can contain a weight
7 | WEIGHT = 'weight'
8 |
9 | def bellman_ford(G, src):
10 | """
11 | Compute Single Source Shortest Path using Bellman_ford and return
12 | dist_to[] with results and edge_to[] to be able to recover the
13 | shortest paths. Can work even if there are negative edge weights,
14 | but NOT if a negative cycle exists. Fortunately it can detect
15 | this situation.
16 | """
17 | inf = float('inf')
18 | dist_to = {v:inf for v in G.nodes()}
19 | dist_to[src] = 0
20 | edge_to = {}
21 |
22 | def relax(e):
23 | u, v, weight = e[0], e[1], e[2][WEIGHT]
24 | if dist_to[u] + weight < dist_to[v]:
25 | dist_to[v] = dist_to[u] + weight
26 | edge_to[v] = e
27 | return True
28 | return False
29 |
30 | #debug_state('initialize', G, node_from, dist_to)
31 | # Do N total passes. Only N-1 are needed, but use the Nth final one to detect
32 | # if there had been a negative cycle.
33 | for i in range(G.number_of_nodes()):
34 | for e in G.edges(data=True):
35 | if relax(e):
36 | if i == G.number_of_nodes()-1:
37 | raise RuntimeError('Negative Cycle exists in graph.')
38 |
39 | return (dist_to, edge_to)
40 |
41 | def dijkstra_sp(G, src):
42 | """
43 | Compute Dijkstra's algorithm using src as source and return dist_to[] with
44 | results and edge_to[] to be able to recover the shortest paths.
45 | """
46 | N = G.number_of_nodes()
47 |
48 | inf = float('inf')
49 | dist_to = {v:inf for v in G.nodes()}
50 | dist_to[src] = 0
51 |
52 | impq = IndexedMinPQ(N)
53 | impq.enqueue(src, dist_to[src])
54 | for v in G.nodes():
55 | if v != src:
56 | impq.enqueue(v, inf)
57 |
58 | def relax(e):
59 | n, v, weight = e[0], e[1], e[2][WEIGHT]
60 | if dist_to[n] + weight < dist_to[v]:
61 | dist_to[v] = dist_to[n] + weight
62 | edge_to[v] = e
63 | impq.decrease_priority(v, dist_to[v])
64 |
65 | edge_to = {}
66 | while not impq.is_empty():
67 | n = impq.dequeue()
68 | for e in G.edges(n, data=True):
69 | relax(e)
70 |
71 | return (dist_to, edge_to)
72 |
73 | def edges_path_to(edge_to, src, target):
74 | """Recover path from src to target."""
75 | if not target in edge_to:
76 | raise ValueError('{} is unreachable from {}'.format(target, src))
77 |
78 | path = []
79 | v = target
80 | while v != src:
81 | path.append(v)
82 | v = edge_to[v][0]
83 |
84 | # last one to push is the source, which makes it
85 | # the first one to be retrieved
86 | path.append(src)
87 | path.reverse()
88 | return path
89 |
--------------------------------------------------------------------------------
/ch06/avl.py:
--------------------------------------------------------------------------------
1 | """
2 | Helper functions to support AVL re-balancing.
3 |
4 | These depend on having a binary tree structure with a
5 | compute_height() method to maintain height information.
6 | """
7 |
8 | def rotate_right(node):
9 | """Perform right rotation around given node."""
10 | new_root = node.left
11 | grandson = new_root.right
12 | node.left = grandson
13 | new_root.right = node
14 |
15 | node.compute_height()
16 | return new_root
17 |
18 | def rotate_left(node):
19 | """Perform left rotation around given node."""
20 | new_root = node.right
21 | grandson = new_root.left
22 | node.right = grandson
23 | new_root.left = node
24 |
25 | node.compute_height()
26 | return new_root
27 |
28 | def rotate_left_right(node):
29 | """Perform left, then right rotation around given node."""
30 | child = node.left
31 | new_root = child.right
32 | grand1 = new_root.left
33 | grand2 = new_root.right
34 | child.right = grand1
35 | node.left = grand2
36 |
37 | new_root.left = child
38 | new_root.right = node
39 |
40 | child.compute_height()
41 | node.compute_height()
42 | return new_root
43 |
44 | def rotate_right_left(node):
45 | """Perform right, then left rotation around given node."""
46 | child = node.right
47 | new_root = child.left
48 | grand1 = new_root.left
49 | grand2 = new_root.right
50 | child.left = grand2
51 | node.right = grand1
52 |
53 | new_root.left = node
54 | new_root.right = child
55 |
56 | child.compute_height()
57 | node.compute_height()
58 | return new_root
59 |
60 | def resolve_left_leaning(node):
61 | """If node is right-leaning, rebalance and return new root node for subtree."""
62 | if node.height_difference() == 2:
63 | if node.left.height_difference() >= 0:
64 | node = rotate_right(node)
65 | else:
66 | node = rotate_left_right(node)
67 | return node
68 |
69 | def resolve_right_leaning(node):
70 | """If node is right-leaning, rebalance and return new root node for subtree."""
71 | if node.height_difference() == -2:
72 | if node.right.height_difference() <= 0:
73 | node = rotate_left(node)
74 | else:
75 | node = rotate_right_left(node)
76 | return node
77 |
78 | def check_avl_property(n):
79 | """
80 | Validates that the height for each node in the tree rooted at 'n' is correct, and that
81 | the AVL property regarding height difference is correct. This is a helpful debugging tool.
82 | """
83 | if n is None:
84 | return -1
85 |
86 | left_height = check_avl_property(n.left)
87 | right_height = check_avl_property(n.right)
88 |
89 | if n.height != 1 + max(left_height, right_height):
90 | raise ValueError('AVL height incorrect at {}'.format(n.value))
91 |
92 | if left_height - right_height < -1 or left_height - right_height > 1:
93 | raise ValueError('AVL tree property invalidated at {}'.format(n.value))
94 |
95 | return n.height
96 |
--------------------------------------------------------------------------------
/ch04/dynamic_heap.py:
--------------------------------------------------------------------------------
1 | """
2 | max binary Heap that can grow and shrink as needed. Typically this
3 | functionality is not needed. self.size records initial size and never
4 | changes, which prevents shrinking logic from reducing storage below
5 | this initial amount.
6 | """
7 | from ch04.entry import Entry
8 |
9 | class PQ:
10 | """Priority Queue implemented using a heap."""
11 | def __init__(self, size):
12 | self.size = size
13 | self.storage = [None] * (size+1)
14 | self.N = 0
15 |
16 | def __len__(self):
17 | """Return number of values in priority queue."""
18 | return self.N
19 |
20 | def is_empty(self):
21 | """Determine whether Priority Queue is empty."""
22 | return self.N == 0
23 |
24 | def is_full(self):
25 | """If priority queue has run out of storage, return True."""
26 | return self.size == self.N
27 |
28 | def enqueue(self, v, p):
29 | """Enqueue (v, p) entry into priority queue."""
30 | if self.N == len(self.storage) - 1:
31 | self.resize(self.N*2)
32 | self.N += 1
33 |
34 | self.storage[self.N] = Entry(v, p)
35 | self.swim(self.N)
36 |
37 | def less(self, i, j):
38 | """
39 | Helper function to determine if storage[i] has higher
40 | priority than storage[j].
41 | """
42 | return self.storage[i].priority < self.storage[j].priority
43 |
44 | def swap(self, i, j):
45 | """Switch the values in storage[i] and storage[j]."""
46 | self.storage[i],self.storage[j] = self.storage[j],self.storage[i]
47 |
48 | def swim(self, child):
49 | """Reestablish heap-order property from storage[child] up."""
50 | while child > 1 and self.less(child//2, child):
51 | self.swap(child, child//2)
52 | child = child//2
53 |
54 | def sink(self, parent):
55 | """Reestablish heap-order property from storage[parent] down."""
56 | while 2*parent <= self.N:
57 | child = 2*parent
58 | if child < self.N and self.less(child, child+1):
59 | child += 1
60 | if not self.less(parent, child):
61 | break
62 | self.swap(child, parent)
63 |
64 | parent = child
65 |
66 | def dequeue(self):
67 | """Remove and return value with highest priority in priority queue."""
68 | if self.N == 0:
69 | raise RuntimeError('PriorityQueue is empty!')
70 |
71 | max_entry = self.storage[1]
72 | self.swap(1, self.N)
73 | self.storage[self.N] = None
74 | self.N -= 1
75 | self.sink(1)
76 | storage_size = len(self.storage)
77 | if storage_size > self.size and self.N < storage_size // 4:
78 | self.resize(self.N // 2)
79 | return max_entry.value
80 |
81 | def resize(self, new_size):
82 | """Resize storage array to accept more elements."""
83 | replace = [None] * (new_size+1)
84 | replace[0:self.N+1] = self.storage[0:self.N+1]
85 | self.storage = replace
86 |
--------------------------------------------------------------------------------
/algs/test.py:
--------------------------------------------------------------------------------
1 | """Test cases for book package."""
2 | import unittest
3 |
4 | from algs.counting import RecordedItem
5 | from algs.node import Node
6 | from algs.table import DataTable
7 | from algs.modeling import Model, numpy_error
8 |
9 | class TestAlgs(unittest.TestCase):
10 | """Test cases for book package."""
11 |
12 | def test_counting(self):
13 | """Test basic mechanics of RecordedItem."""
14 | ri1 = RecordedItem(1)
15 | ri2 = RecordedItem(2)
16 |
17 | RecordedItem.clear()
18 | self.assertTrue(ri1 < ri2)
19 | self.assertEqual(0, RecordedItem.report()[0])
20 | self.assertEqual(1, RecordedItem.report()[1])
21 | self.assertEqual(0, RecordedItem.report()[2])
22 |
23 | RecordedItem.clear()
24 | self.assertFalse(ri1 > ri2)
25 | self.assertEqual(0, RecordedItem.report()[0])
26 | self.assertEqual(0, RecordedItem.report()[1])
27 | self.assertEqual(1, RecordedItem.report()[2])
28 |
29 | RecordedItem.clear()
30 | self.assertFalse(ri1 == ri2)
31 | self.assertEqual(1, RecordedItem.report()[0])
32 | self.assertEqual(0, RecordedItem.report()[1])
33 | self.assertEqual(0, RecordedItem.report()[2])
34 |
35 | def test_recorded_item(self):
36 | self.assertEqual(('eq', 'lt', 'gt'), RecordedItem.header())
37 |
38 | def test_helper(self):
39 | self.assertEqual([RecordedItem(0), RecordedItem(1)], RecordedItem.range(2))
40 |
41 | def test_node(self):
42 | n = Node('sample')
43 | self.assertEqual('[sample]', str(n))
44 |
45 | def test_is_sorted(self):
46 | from algs.sorting import is_sorted
47 |
48 | is_sorted([2, 9, 55])
49 | with self.assertRaises(ValueError):
50 | is_sorted([55, 9, 2])
51 |
52 | def test_node_2(self):
53 | node1 = Node('sample')
54 | node2 = Node('other', node1)
55 | self.assertEqual('other', node2.value)
56 | self.assertEqual('sample', node2.next.value)
57 |
58 | self.assertEqual(['other', 'sample'], list(node2))
59 |
60 | def test_table(self):
61 | tbl = DataTable([8, 8, 8], ['N', 'Another', 'SquareRoot'], output=False, decimals=4)
62 | tbl.format('Another', 'd')
63 | for n in range(2,10):
64 | tbl.row([n, n, n ** 0.5])
65 | self.assertEqual(tbl.entry(3, 'Another'), 3)
66 |
67 | print('Testing that Table is print to console')
68 | tbl = DataTable([8, 8, 8], ['N', 'Another', 'SquareRoot'], decimals=4)
69 | tbl.format('Another', 'd')
70 | for n in range(2,10):
71 | tbl.row([n, n, n ** 0.5])
72 |
73 | self.assertEqual(list(range(2,10)), tbl.column('Another'))
74 |
75 | model = tbl.best_model('Another')[0]
76 | if numpy_error:
77 | pass
78 | else:
79 | self.assertEqual(model[0], Model.LINEAR)
80 | self.assertAlmostEqual(model[3], 1.0000, places=5)
81 |
82 | #######################################################################
83 | if __name__ == '__main__':
84 | unittest.main()
85 |
--------------------------------------------------------------------------------
/ch07/viewer.py:
--------------------------------------------------------------------------------
1 | """View the contents of Maze, a rectangular maze object, scaled to cells of given size."""
2 | import random
3 |
4 | from ch07.maze import Maze
5 | from ch07.dependencies import tkinter_error
6 |
7 | class Viewer:
8 | """
9 | Tkinter application to view a maze generated from Maze().
10 | """
11 | # Ensure drawing is not flush against the left-edge or top-edge of window
12 | OFFSET = 8
13 |
14 | # inset the circles for the path so they are visible.
15 | INSET = 2
16 |
17 | def __init__(self, maze, size):
18 | self.maze = maze
19 | self.size = size
20 | self.built = {}
21 | self.canvas = None
22 |
23 | def view(self, master):
24 | """Show window with maze and return constructed tkinter canvas into which it was drawn."""
25 | if tkinter_error:
26 | return None
27 | import tkinter
28 |
29 | size = self.size
30 | w = self.maze.num_cols * size
31 | h = self.maze.num_rows * size
32 | canvas = tkinter.Canvas(master, width=w+10, height=h+10)
33 | canvas.pack()
34 |
35 | offset = self.OFFSET
36 |
37 | # Draw Vertical border at the left edge, then the top row in two pieces to show entrance. Note that
38 | # the other borders of the maze are drawn as part of each individual cell
39 | canvas.create_line(offset, offset, offset, offset+h, width=3)
40 | canvas.create_line(offset, offset, offset + (w/size//2)*size, offset, width=3)
41 | canvas.create_line(offset + size*(1+(w/size)//2), offset, offset + (w/size)*size, offset, width=3)
42 |
43 | # Draw each cell, by its east (right) edge and south (bottom) walls if they exist.
44 | for r in range(self.maze.num_rows):
45 | for c in range(self.maze.num_cols):
46 | if self.maze.south_wall[r,c]:
47 | canvas.create_line(offset + c*size, offset + (r+1)*size, offset + (c+1)*size, offset + (r+1)*size, width=3)
48 | if self.maze.east_wall[r,c]:
49 | canvas.create_line(offset + (c+1)*size, offset + r*size, offset + (c+1)*size, offset + (r+1)*size, width=3)
50 |
51 | self.canvas = canvas
52 | return canvas
53 |
54 | def color_cell(self, cell, color):
55 | """
56 | Either create new visible cell or change its color. Store created elements in self.built
57 | so they can be recolored if invoked again.
58 | """
59 | if cell in self.built:
60 | self.canvas.itemconfig(self.built[cell], fill=color)
61 | else:
62 | # inset the circles for the path so they are visible.
63 | inset = self.INSET
64 | size = self.size
65 | cx = self.OFFSET + cell[1]*size + 1 # fudge factor to move off the wall...
66 | ry = self.OFFSET + cell[0]*size + 1
67 | self.built[cell] = self.canvas.create_oval(cx + inset, ry + inset, cx + size - 2*inset, ry + size - 2*inset, fill=color)
68 |
69 | #######################################################################
70 | if __name__ == '__main__':
71 | if tkinter_error:
72 | print('Unable to visualize maze without tkinter')
73 | else:
74 | import tkinter
75 | random.seed(15)
76 | m = Maze(50,50,salt=0.05)
77 | root = tkinter.Tk()
78 | Viewer(m, 15).view(root)
79 | root.mainloop()
80 |
--------------------------------------------------------------------------------
/ch05/heapsort.py:
--------------------------------------------------------------------------------
1 | """
2 | Contains implementation of Heap Sort as a stand-alone class.
3 |
4 | HeapSortCounting also provides ability to record number of swaps
5 | and number of times less was invoked.
6 | """
7 |
8 | def heap_sort(A):
9 | """Function to invoke Heap Sort on A."""
10 | hs = HeapSort(A)
11 | hs.sort()
12 |
13 | class HeapSort:
14 | """
15 | Wrapper class that provides Heap Sort implementation.
16 | """
17 | def __init__(self, A):
18 | self.A = A
19 | self.N = len(A)
20 |
21 | for k in range(self.N//2, 0, -1):
22 | self.sink(k)
23 |
24 | def sort(self):
25 | """Use Heap to Sort array in place."""
26 | while self.N > 1:
27 | self.swap(1, self.N)
28 | self.N -= 1
29 | self.sink(1)
30 |
31 | def less(self, i, j):
32 | """Determine if A[i] < A[j], using updated index locations."""
33 | return self.A[i-1] < self.A[j-1]
34 |
35 | def swap(self, i, j):
36 | """Swap A[i] and A[j]."""
37 | self.A[i-1],self.A[j-1] = self.A[j-1],self.A[i-1]
38 |
39 | def sink(self, parent):
40 | """Reestablish heap-ordered property from storage[parent] down."""
41 | while 2*parent <= self.N:
42 | child = 2*parent
43 | if child < self.N and self.less(child, child+1):
44 | child += 1
45 | if not self.less(parent, child):
46 | break
47 | self.swap(child, parent)
48 |
49 | parent = child
50 |
51 | class HeapSortCounting:
52 | """
53 | Wrapper class that provides Heap Sort implementation.
54 |
55 | Counts number of times less() and swap() were invoked.
56 | """
57 | def __init__(self, A, output=False):
58 | self.A = A
59 | self.N = len(A)
60 | self.num_swaps = 0
61 | self.num_comparisons = 0
62 |
63 | for k in range(self.N//2, 0, -1):
64 | if output:
65 | if self.num_comparisons == 0:
66 | print('|'.join([' {:>2} '.format(k) for k in A]))
67 | else:
68 | print('|'.join([' {:>2} '.format(k) for k in A]) + '\t{} comparisons'.format(self.num_comparisons))
69 | self.sink(k)
70 | if output:
71 | print('|'.join([' {:>2} '.format(k) for k in A]) + '\t{} comparisons'.format(self.num_comparisons))
72 |
73 | def sort(self):
74 | """Use Heap to sort array in place."""
75 | while self.N > 1:
76 | self.swap(1, self.N)
77 | self.N -= 1
78 | self.sink(1)
79 |
80 | def less(self, i, j):
81 | """Determine if A[i] < A[j], using updated index locations. Increments num_comparisons"""
82 | self.num_comparisons += 1
83 | return self.A[i-1] < self.A[j-1]
84 |
85 | def swap(self, i, j):
86 | """Swap A[i] and A[j], incrementing num_swaps count."""
87 | self.num_swaps += 1
88 | self.A[i-1],self.A[j-1] = self.A[j-1],self.A[i-1]
89 |
90 | def sink(self, parent):
91 | """Reestablish heap-ordered property from parent down."""
92 | while 2*parent <= self.N:
93 | child = 2*parent
94 | if child < self.N and self.less(child, child+1):
95 | child += 1
96 | if not self.less(parent, child):
97 | break
98 | self.swap(child, parent)
99 |
100 | parent = child
101 |
--------------------------------------------------------------------------------
/ch05/sorting.py:
--------------------------------------------------------------------------------
1 | """
2 | SelectionSort and InsertionSort
3 | """
4 |
5 | def selection_sort(A):
6 | """Sort A using Selection Sort."""
7 | N = len(A)
8 | for i in range(N-1):
9 | min_index = i
10 | for j in range(i+1, N):
11 | if A[j] < A[min_index]:
12 | min_index = j
13 |
14 | A[i],A[min_index] = A[min_index],A[i]
15 |
16 | def python_selection_sort(A):
17 | """Optimized Selection Sort in Python. Perhaps 20% improvement."""
18 | N = len(A)
19 | for i in range(N-1):
20 | idx = min(range(i, N), key=A.__getitem__)
21 | A[i],A[idx] = A[idx],A[i]
22 |
23 | def insertion_sort(A):
24 | """Sort A using Insertion Sort. Use Aj-1 <= Aj to ensure stable sort."""
25 | N = len(A)
26 | for i in range(1,N):
27 | for j in range(i,0,-1):
28 | if A[j-1] <= A[j]:
29 | break
30 |
31 | A[j],A[j-1] = A[j-1],A[j]
32 |
33 | def insertion_sort_cmp(A, less=lambda one,two: one <= two):
34 | """Sort A using Insertion Sort with externally-provided less operation."""
35 | N = len(A)
36 | for i in range(1,N):
37 | for j in range(i,0,-1):
38 | if less(A[j-1], A[j]):
39 | break
40 |
41 | A[j],A[j-1] = A[j-1],A[j]
42 |
43 | def quick_sort(A):
44 | """Quicksort using a random pivot select."""
45 | from ch01.challenge import partition
46 | from random import randint
47 |
48 | def qsort(lo, hi):
49 | if hi <= lo:
50 | return
51 |
52 | pivot_idx = randint(lo, hi)
53 | location = partition(A, lo, hi, pivot_idx)
54 |
55 | qsort(lo, location-1)
56 | qsort(location+1, hi)
57 |
58 | qsort(0, len(A)-1)
59 |
60 | def insertion_sort_bas(A):
61 | """
62 | Sort A using Insertion Sort using Binary Array Search to insert
63 | value. This code takes advantage of Python ability to insert value
64 | into an array since Python lists can dynamically resize. Will
65 | no longer be able to guarantee resulting sort is stable.
66 | """
67 | N = len(A)
68 | for i in range(1,N):
69 | lo = 0
70 | hi = i-1
71 | val = A[i]
72 | while lo <= hi:
73 | mid = (lo+hi)//2
74 | diff = val - A[mid]
75 | if diff < 0:
76 | hi = mid-1
77 | elif diff > 0:
78 | lo = mid + 1
79 | else:
80 | del A[i] # delete from end first
81 | A.insert(mid, val) # insert into proper spot
82 | break
83 |
84 | if hi < lo < i: # protect if already in spot
85 | del A[i] # delete from end first
86 | A.insert(lo, val) # insert into proper spot
87 |
88 | def selection_sort_counting(A):
89 | """Instrumented Selection Sort to return #swaps, #compares."""
90 | N = len(A)
91 | num_swap = num_compare = 0
92 | for i in range(N-1):
93 | min_index = i
94 | for j in range(i+1, N):
95 | num_compare += 1
96 | if A[j] < A[min_index]:
97 | min_index = j
98 |
99 | num_swap += 1
100 | A[i],A[min_index] = A[min_index],A[i]
101 | return (num_swap, num_compare)
102 |
103 | def insertion_sort_counting(A):
104 | """Instrumented Insertion Sort to return #swaps, #compares."""
105 | N = len(A)
106 | num_swap = num_compare = 0
107 | for i in range(N):
108 | for j in range(i,0,-1):
109 | num_compare += 1
110 | if A[j-1] <= A[j]:
111 | break
112 | num_swap += 1
113 | A[j],A[j-1] = A[j-1],A[j]
114 | return (num_swap, num_compare)
115 |
--------------------------------------------------------------------------------
/ch07/solver_guided.py:
--------------------------------------------------------------------------------
1 | """
2 | Animates the Guided Search solution of a maze.
3 |
4 | If you set `refresh_rate` to zero, then it will remove all delays. As it is, the
5 | small 0.01 refresh rate shows the progress of the search.
6 |
7 | if you set `stop_end` to be True, the search will terminate at the target
8 | """
9 | import time
10 | import random
11 |
12 | from ch07.maze import Maze, to_networkx
13 | from ch07.viewer import Viewer
14 | from ch07.dependencies import tkinter_error
15 |
16 | class GuidedSearchSolver():
17 | """
18 | Solves a maze by taking advantage of Euclidean distance to solution.
19 | """
20 | def __init__(self, master, maze, size, refresh_rate=0.01, stop_end=False):
21 | self.master = master
22 | self.viewer = Viewer(maze, size)
23 | self.marked = {}
24 | self.node_from = {}
25 | self.size = maze.num_rows * maze.num_cols
26 |
27 | self.g = to_networkx(maze)
28 | self.start = maze.start()
29 | self.end = maze.end()
30 | self.stop_end = stop_end
31 |
32 | self.refresh_rate = refresh_rate
33 |
34 | master.after(0, self.animate)
35 | self.canvas = self.viewer.view(master)
36 |
37 | def animate(self):
38 | """Start animation by initiating DFS."""
39 | self.guided_search(self.start)
40 |
41 | # draw BACK edges to solution
42 | pos = self.end
43 | while pos != self.start:
44 | self.viewer.color_cell(pos, 'lightgray')
45 | if pos in self.node_from:
46 | pos = self.node_from[pos]
47 | else:
48 | # Turns out there was no solution...
49 | break
50 | self.master.update()
51 |
52 | def distance_to(self, to_cell):
53 | """Return Manhattan distance between cells."""
54 | return abs(self.end[0] - to_cell[0]) + abs(self.end[1] - to_cell[1])
55 |
56 | def guided_search(self, pos):
57 | """use Manhattan distance to maze end as priority in PQ to guide search."""
58 | from ch04.heap import PQ
59 | pq = PQ(self.size)
60 | self.viewer.color_cell(pos, 'blue')
61 | src = self.start
62 | dist_to = {}
63 | dist_to[src] = 0
64 |
65 | # Using a MAX PRIORITY QUEUE means we rely on negative distance to
66 | # choose the one that is closest...
67 | self.marked[src] = True
68 | pq.enqueue(src, -self.distance_to(src))
69 |
70 | while not pq.is_empty():
71 | cell = pq.dequeue()
72 | self.master.update()
73 | if self.refresh_rate:
74 | time.sleep(self.refresh_rate)
75 |
76 | if self.stop_end and cell == self.end:
77 | self.marked[cell] = True
78 | self.viewer.color_cell(cell, 'blue')
79 | return True
80 |
81 | for next_cell in self.g.neighbors(cell):
82 | if not next_cell in self.marked:
83 | self.node_from[next_cell] = cell
84 | dist_to[next_cell] = dist_to[cell] + 1
85 | pq.enqueue(next_cell, -self.distance_to(next_cell))
86 | self.marked[next_cell] = True
87 | self.viewer.color_cell(next_cell, 'blue')
88 |
89 | return False
90 |
91 | #######################################################################
92 | if __name__ == '__main__':
93 | if tkinter_error:
94 | print('tkinter is not installed so unable to launch Guided solver application')
95 | else:
96 | import tkinter
97 | random.seed(15)
98 | m = Maze(60,60)
99 | root = tkinter.Tk()
100 | dfs = GuidedSearchSolver(root, m, 15, refresh_rate=0.001, stop_end=True)
101 | root.mainloop()
102 |
--------------------------------------------------------------------------------
/ch05/timing.py:
--------------------------------------------------------------------------------
1 | """
2 | Timing Results for chapter 5.
3 |
4 | Compare Merge Sort against built in Python sort. This takes unusually long.
5 | N MergeSort Built-In Sort
6 | 256 0.371 0.002
7 | 512 0.825 0.003
8 | 1,024 1.839 0.007
9 | 2,048 3.958 0.015
10 | 4,096 8.455 0.032
11 | 8,192 17.843 0.070
12 | 16,384 37.647 0.153
13 |
14 | Compare Selection Sort against two flavors of Insertion Sort. This takes unusually long.
15 | N Select Insert InsertBAS
16 | 256 1.28 0.20 0.23
17 | 512 5.86 0.77 0.56
18 | 1,024 23.66 3.10 1.33
19 | 2,048 94.52 12.33 3.08
20 |
21 | """
22 | import timeit
23 | from algs.table import DataTable
24 |
25 | def table_trials(max_k=15, output=True, decimals=3):
26 | """Compare Merge Sort against built in Python sort up to, but not including 2**max_k."""
27 | tbl = DataTable([8,10,10], ['N', 'MergeSort', 'Built-In Sort'],
28 | output=output, decimals=decimals)
29 |
30 | for n in [2**k for k in range(8, max_k)]:
31 | msort = 1000*min(timeit.repeat(stmt='merge_sort(x)', setup='''
32 | import random
33 | from ch05.merge import merge_sort
34 | x=list(range({}))
35 | random.shuffle(x)'''.format(n), repeat=20, number=15))/15
36 |
37 | builtin = 1000*min(timeit.repeat(stmt='x.sort()', setup='''
38 | import random
39 | x=list(range({}))
40 | random.shuffle(x)'''.format(n), repeat=20, number=15))/15
41 |
42 | tbl.row([n, msort, builtin])
43 | return tbl
44 |
45 | def quadratic_sort_trials(max_k=12, output=True, decimals=2):
46 | """
47 | Compare two flavors of Selection Sort against two flavors of Insertion Sort
48 | up to (but not including) 2^max_k.
49 | """
50 | tbl = DataTable([8,8,8,8,8], ['N', 'Select', 'PythonSelect', 'Insert', 'InsertBAS'],
51 | output=output, decimals=decimals)
52 |
53 | for n in [2**k for k in range(8, max_k)]:
54 | if n > 2048:
55 | m_select = -1
56 | else:
57 | m_select = 1000*min(timeit.repeat(stmt='selection_sort(x)', setup='''
58 | import random
59 | from ch05.sorting import selection_sort
60 | random.seed({0})
61 | x=list(range({0}))
62 | random.shuffle(x)'''.format(n), repeat=20, number=15))/15
63 |
64 | if n > 2048:
65 | m_pselect = -1
66 | else:
67 | m_pselect = 1000*min(timeit.repeat(stmt='python_selection_sort(x)', setup='''
68 | import random
69 | from ch05.sorting import python_selection_sort
70 | random.seed({0})
71 | x=list(range({0}))
72 | random.shuffle(x)'''.format(n), repeat=20, number=15))/15
73 |
74 | if n > 8192:
75 | m_insert = -1
76 | else:
77 | m_insert = 1000*min(timeit.repeat(stmt='insertion_sort(x)', setup='''
78 | import random
79 | from ch05.sorting import insertion_sort
80 | random.seed({0})
81 | x=list(range({0}))
82 | random.shuffle(x)'''.format(n), repeat=20, number=15))/15
83 |
84 | m_insert_bas = 1000*min(timeit.repeat(stmt='insertion_sort_bas(x)', setup='''
85 | import random
86 | from ch05.sorting import insertion_sort_bas
87 | random.seed({0})
88 | x=list(range({0}))
89 | random.shuffle(x)'''.format(n), repeat=20, number=15))/15
90 |
91 | tbl.row([n, m_select, m_pselect, m_insert, m_insert_bas])
92 | return tbl
93 |
94 | #######################################################################
95 | if __name__ == '__main__':
96 | print('Compare Selection Sort against two flavors of Insertion Sort; takes unusually long.')
97 | quadratic_sort_trials()
98 |
99 | print('Compare Merge Sort against built-in Python sort. This takes unusually long.')
100 | table_trials()
101 | print()
102 |
--------------------------------------------------------------------------------
/ch07/solver_dfs.py:
--------------------------------------------------------------------------------
1 | """
2 | Animates the Depth First Search solution of a maze.
3 |
4 | The Depth First Search implementation is non-recursive, since mazes can grow quite
5 | large. I have provided the recursive variation as well, which you can switch within
6 | the `animate()` method below.
7 |
8 | If you set `refresh_rate` to zero, then it will remove all delays. As it is, the
9 | small 0.01 refresh rate shows the progress of the search.
10 |
11 | if you set `stop_end` to be True, the search will terminate at the target
12 | """
13 | import time
14 | import random
15 |
16 | from ch07.maze import Maze, to_networkx
17 | from ch07.viewer import Viewer
18 | from ch07.dependencies import tkinter_error
19 |
20 | class DepthFirstSearchSolver():
21 | """
22 | Solves a maze using Depth First Search, showing results graphically.
23 | """
24 | def __init__(self, master, maze, size, refresh_rate=0.01, stop_end=False):
25 | self.master = master
26 | self.viewer = Viewer(maze, size)
27 | self.marked = {}
28 | self.node_from = {}
29 |
30 | self.g = to_networkx(maze)
31 | self.start = maze.start()
32 | self.end = maze.end()
33 | self.stop_end = stop_end
34 |
35 | self.refresh_rate = refresh_rate
36 |
37 | master.after(0, self.animate)
38 | self.canvas = self.viewer.view(master)
39 |
40 | def animate(self):
41 | """Start animation by initiating DFS."""
42 | self.dfs_visit_nr(self.start)
43 |
44 | # draw BACK edges to solution
45 | pos = self.end
46 | while pos != self.start:
47 | self.viewer.color_cell(pos, 'lightgray')
48 | pos = self.node_from[pos]
49 | self.master.update()
50 |
51 | def dfs_visit_nr(self, pos):
52 | """Non-recursive depth-first search investigating given position."""
53 | from ch07.list_stack import Stack
54 | stack = Stack()
55 | self.viewer.color_cell(pos, 'blue')
56 | stack.push(pos)
57 |
58 | while not stack.is_empty():
59 | cell = stack.pop()
60 | self.master.update()
61 | if self.refresh_rate:
62 | time.sleep(self.refresh_rate)
63 |
64 | if self.stop_end and cell == self.end:
65 | self.marked[cell] = True
66 | self.viewer.color_cell(cell, 'blue')
67 | return True
68 |
69 | for next_cell in self.g.neighbors(cell):
70 | if not next_cell in self.marked:
71 | self.node_from[next_cell] = cell
72 | stack.push(next_cell)
73 | self.marked[next_cell] = True
74 | self.viewer.color_cell(next_cell, 'blue')
75 |
76 | return False
77 |
78 | def dfs_visit(self, pos):
79 | """Recursive depth-first search investigating given position."""
80 | self.marked[pos] = True
81 | self.viewer.color_cell(pos, 'blue')
82 | self.master.update()
83 | if self.refresh_rate:
84 | time.sleep(self.refresh_rate)
85 |
86 | # immediately force all processing to unwind...
87 | if self.stop_end and pos == self.end:
88 | return True
89 |
90 | for cell in self.g.neighbors(pos):
91 | if not cell in self.marked:
92 | self.node_from[cell] = pos
93 | if self.dfs_visit(cell):
94 | return True
95 |
96 | self.marked[pos] = True
97 | self.viewer.color_cell(pos, 'blue')
98 | return False
99 |
100 | #######################################################################
101 | if __name__ == '__main__':
102 | if tkinter_error:
103 | print('tkinter is not installed so unable to launch DFS solver application')
104 | else:
105 | import tkinter
106 | random.seed(15)
107 | m = Maze(60,60)
108 | root = tkinter.Tk()
109 | dfs = DepthFirstSearchSolver(root, m, 15, refresh_rate=0.001, stop_end=True)
110 | root.mainloop()
111 |
--------------------------------------------------------------------------------
/algs/timing.py:
--------------------------------------------------------------------------------
1 | """
2 | Demonstrates timing capabilities of timeit in Python.
3 |
4 | :Example:
5 |
6 | >>> timing_trials()
7 | 5 repetitions of sleeping for 100 milliseconds ten times.
8 | 5 10 1.091816
9 | Should be about 1.0:
10 |
11 | 50 repetitions of sleeping for 100 milliseconds ten times.
12 | 50 10 1.081817
13 | Should be closer to 1.0 (but might not):
14 |
15 | 500 repetitions of sleeping for 100 milliseconds ten times.
16 | 500 10 1.078466
17 | Should be even closer to 1.0:
18 |
19 | 5 repetitions of sleeping for 100 milliseconds one hundred times.
20 | 5 100 10.884912
21 | Should be about 10.0:
22 |
23 | A number of R repetitions of a trial are run, creating a list of R timing
24 | values. We then retrieve the minimum value from this list, since that is
25 | (at least) a definitive measurement of time. To compute the average, now
26 | divide by X. This is why the timing statements all look like this:
27 |
28 | m = min(timeit.repeat(stmt=... setup=..., repeat = R, number=X))/X
29 |
30 | R is the number of independent trials. X becomes higher when the statement
31 | to be executed is too fast, and the only way to measure it using the
32 | granularity of the OS timer is to have it execute the statement X times
33 | in sequence.
34 |
35 | If the stmt to be executed depends on some parameter N, then you have to
36 | additionally divide the final result by N to normalize and retrieve
37 | average operation cost.
38 |
39 | m = min(timeit.repeat(stmt='''f({N})''' setup=..., repeat = R, number=X))/(X*N)
40 |
41 | """
42 |
43 | import timeit
44 |
45 | def run_trial(rep, num):
46 | """Sleep for 100 milliseconds, 'num' times; repeat for 'rep' attempts."""
47 | sleep = 'sleep(0.1)'
48 | return min(timeit.repeat(stmt=sleep, setup = 'from time import sleep', repeat=rep, number=num))
49 |
50 | def ten_million_addition_trial():
51 | """Time the addition of first ten million numbers."""
52 | loop = '''
53 | x = 0
54 | for i in range(10000000):
55 | x += i
56 | '''
57 | return min(timeit.repeat(stmt=loop, repeat=10, number=1))
58 |
59 | def stages_of_timing():
60 | """
61 | Show when stages are invoked.
62 |
63 | Demonstrates that 'setup' is invoked once before each repetition.
64 |
65 | Shows that the statement is repeated 'number' of times. Also observe
66 | that the state is shared from one number to another (but not across
67 | repetitions.
68 |
69 | :Expected:
70 | >>> stages_of_timing():
71 | in setup
72 | real statement 1
73 | real statement 2
74 | real statement 3
75 | in setup
76 | real statement 1
77 | real statement 2
78 | real statement 3
79 |
80 | """
81 | return min(timeit.repeat(stmt='''
82 | print('real statement', val)
83 | val += 1''', setup = '''
84 | print('in setup')
85 | val = 1''', repeat=2, number=3))
86 |
87 | def timing_trials():
88 | """Complete sequence of timing trials, showing how to use timeit."""
89 | reps = 5
90 | num = 10
91 | print('5 repetitions of sleeping for 100 milliseconds ten times.')
92 | print('{}\t{}\t{:.6f}'.format(reps, num, run_trial(reps, num)))
93 | print('\t\tShould be about 1.0:')
94 | print()
95 |
96 | reps = 50
97 | print('50 repetitions of sleeping for 100 milliseconds ten times.')
98 | print('{}\t{}\t{:.6f}'.format(reps, num, run_trial(reps, num)))
99 | print('\t\tShould be closer to 1.0 (but might not):')
100 | print()
101 |
102 | reps = 500
103 | print('500 repetitions of sleeping for 100 milliseconds ten times.')
104 | print('{}\t{}\t{:.6f}'.format(reps, num, run_trial(reps, num)))
105 | print('\t\tShould be even closer to 1.0:')
106 | print()
107 |
108 | reps = 5
109 | num = 100
110 | print('5 repetitions of sleeping for 100 milliseconds one hundred times.')
111 | print('{}\t{}\t{:.6f}'.format(reps, num, run_trial(reps, num)))
112 | print('\t\tShould be about 10.0:')
113 |
114 | #######################################################################
115 | if __name__ == '__main__':
116 | stages_of_timing()
117 | print()
118 |
119 | print('Time to add first ten million numbers (seconds)')
120 | print('{:.6f}'.format(ten_million_addition_trial()))
121 | print()
122 |
123 | timing_trials()
124 | print()
125 |
--------------------------------------------------------------------------------
/ch07/indexed_pq.py:
--------------------------------------------------------------------------------
1 | """
2 | Indexed minimum priority queue
3 | """
4 | class IndexedMinPQ:
5 | """
6 | Heap storage for an indexed min priority queue.
7 |
8 | Attributes
9 | ----------
10 | size - available storage (note 0th index unused)
11 | N - Number of (value, priority) pairs in the PQ
12 | values - stores the ith value in the PQ
13 | priorities - stores the priority of the ith value in the PQ
14 | location - records index in values/priorities for given value
15 | """
16 | def __init__(self, size):
17 | self.N = 0
18 | self.size = size
19 | self.values = [None] * (size+1)
20 | self.priorities = [None] * (size+1) # binary heap using 1-based indexing
21 | self.location = {} # For each value, remember its location in storage
22 |
23 | def __len__(self):
24 | """Return number of values in priority queue."""
25 | return self.N
26 |
27 | def __contains__(self, v):
28 | """Determine if idx is currently in the priority queue."""
29 | return v in self.location
30 |
31 | def is_empty(self):
32 | """Returns whether priority queue is empty."""
33 | return self.N == 0
34 |
35 | def is_full(self):
36 | """If priority queue has run out of storage, return True."""
37 | return self.size == self.N
38 |
39 | def enqueue(self, v, p):
40 | """Enqueue (v, p) entry into priority queue."""
41 | if self.N == self.size:
42 | raise RuntimeError('Priority Queue is full!')
43 | self.N += 1
44 |
45 | self.values[self.N], self.priorities[self.N] = v, p
46 | self.location[v] = self.N # record where it is being stored
47 | self.swim(self.N)
48 |
49 | def decrease_priority(self, v, lower_priority):
50 | """Reduce associated priority with v to move it closer to head of priority queue."""
51 | if not v in self.location:
52 | raise ValueError('{} not in the indexed min priority queue.'.format(v))
53 | idx = self.location[v]
54 | if lower_priority >= self.priorities[idx]:
55 | raise RuntimeError('Value {} has existing priority of {} which is already lower than {}'.format(v, self.priorities[idx], lower_priority))
56 |
57 | self.priorities[idx] = lower_priority
58 | self.swim(idx)
59 |
60 | def less(self, i, j):
61 | """
62 | Helper function to determine if priorities[j] has higher
63 | priority than priorities[i]. Min PQ means > is operator to use.
64 | """
65 | return self.priorities[i] > self.priorities[j]
66 |
67 | def swap(self, i, j):
68 | """Switch the values in storage[i] and storage[j]."""
69 | self.values[i],self.values[j] = self.values[j],self.values[i]
70 | self.priorities[i],self.priorities[j] = self.priorities[j],self.priorities[i]
71 |
72 | self.location[self.values[i]] = i
73 | self.location[self.values[j]] = j
74 |
75 | def swim(self,child):
76 | """Reestablish heap-order property from storage[child] up."""
77 | while child > 1 and self.less(child//2, child):
78 | self.swap(child, child//2)
79 | child = child//2
80 |
81 | def sink(self, parent):
82 | """Reestablish heap-order property from storage[parent] down."""
83 | while 2*parent <= self.N:
84 | child = 2*parent
85 | if child < self.N and self.less(child, child+1):
86 | child += 1
87 | if not self.less(parent, child):
88 | break
89 | self.swap(child, parent)
90 |
91 | parent = child
92 |
93 | def peek(self):
94 | """Peek without disturbing the value at the top of the priority queue."""
95 | if self.N == 0:
96 | raise RuntimeError('IndexMinPriorityQueue is empty!')
97 |
98 | return self.values[1]
99 |
100 | def dequeue(self):
101 | """Remove and return value with highest priority in priority queue."""
102 | if self.N == 0:
103 | raise RuntimeError('PriorityQueue is empty!')
104 |
105 | min_value = self.values[1]
106 | self.values[1] = self.values[self.N]
107 | self.priorities[1] = self.priorities[self.N]
108 | self.location[self.values[1]] = 1
109 |
110 | self.values[self.N] = self.priorities[self.N] = None
111 | self.location.pop(min_value) # remove from dictionary
112 |
113 | self.N -= 1
114 | self.sink(1)
115 | return min_value
116 |
--------------------------------------------------------------------------------
/ch06/tree.py:
--------------------------------------------------------------------------------
1 | """
2 | Data Structure for non-balancing Binary Search Tree.
3 |
4 | The tree can contain duplicate values.
5 | """
6 |
7 | class BinaryNode:
8 | """
9 | Node structure to use in a binary tree.
10 |
11 | Attributes
12 | ----------
13 | left - left child (or None)
14 | right - right child (or None)
15 | value - value stored by Node
16 | """
17 | def __init__(self, val):
18 | self.value = val
19 | self.left = None
20 | self.right = None
21 |
22 | def size(self):
23 | """Return number of nodes in subtree rooted at node."""
24 | ct = 1
25 | if self.left: ct += self.left.size()
26 | if self.right: ct += self.right.size()
27 | return ct
28 |
29 | class BinaryTree:
30 | """
31 | A Binary tree contains the root node, and methods to manipulate the tree.
32 | """
33 | def __init__(self):
34 | self.root = None
35 |
36 | def is_empty(self):
37 | """Returns whether tree is empty."""
38 | return self.root is None
39 |
40 | def insert(self, val):
41 | """Insert value into Binary Tree."""
42 | self.root = self._insert(self.root, val)
43 |
44 | def _insert(self, node, val):
45 | """Inserts a new BinaryNode to the tree containing this value."""
46 | if node is None:
47 | return BinaryNode(val)
48 |
49 | if val <= node.value:
50 | node.left = self._insert(node.left, val)
51 | else:
52 | node.right = self._insert(node.right, val)
53 | return node
54 |
55 | def min(self):
56 | """Return minimum value in tree without causing any changes."""
57 | if self.root is None:
58 | return None
59 | node = self.root
60 | while node.left:
61 | node = node.left
62 | return node.value
63 |
64 | def _remove_min(self, node):
65 | """Delete minimum value from subtree rooted at node."""
66 | if node.left is None:
67 | return node.right
68 |
69 | node.left = self._remove_min(node.left)
70 | return node
71 |
72 | def remove(self, val):
73 | """Remove value from tree."""
74 | self.root = self._remove(self.root, val)
75 |
76 | def _remove(self, node, val):
77 | """Remove val from subtree rooted at node and return resulting subtree."""
78 | if node is None:
79 | return None
80 |
81 | if val < node.value:
82 | node.left = self._remove(node.left, val)
83 | elif val > node.value:
84 | node.right = self._remove(node.right, val)
85 | else:
86 | if node.left is None:
87 | return node.right
88 | if node.right is None:
89 | return node.left
90 |
91 | # replace self value with largest value from left subtree
92 | original = node
93 |
94 | # find SMALLEST child in right subtree and remove it
95 | node = node.right
96 | while node.left:
97 | node = node.left
98 |
99 | node.right = self._remove_min(original.right)
100 | node.left = original.left
101 |
102 | return node
103 |
104 | def __contains__(self, target):
105 | """Check whether BST contains target value."""
106 | node = self.root
107 | while node:
108 | if target == node.value:
109 | return True
110 | if target < node.value:
111 | node = node.left
112 | else:
113 | node = node.right
114 |
115 | return False
116 |
117 | def __iter__(self):
118 | """In order traversal of elements in the tree."""
119 | for v in self._inorder(self.root):
120 | yield v
121 |
122 | def _inorder(self, node):
123 | """Inorder traversal of tree."""
124 | if node is None:
125 | return
126 |
127 | for v in self._inorder(node.left):
128 | yield v
129 |
130 | yield node.value
131 |
132 | for v in self._inorder(node.right):
133 | yield v
134 |
135 | def copy(self):
136 | """Return a copy of the binary tree using preorder traversal."""
137 | duplicate = BinaryTree()
138 | duplicate.root = self._copy(self.root)
139 | return duplicate
140 |
141 | def _copy(self, node):
142 | """Preorder traversal of tree to copy the tree."""
143 | if node is None:
144 | return None
145 |
146 | p = BinaryNode(node.value)
147 | p.left = self._copy(node.left)
148 | p.right = self._copy(node.right)
149 |
150 | return p
151 |
--------------------------------------------------------------------------------
/ch02/test.py:
--------------------------------------------------------------------------------
1 | """Test cases for Chapter 02."""
2 |
3 | import random
4 | import unittest
5 |
6 | from algs.sorting import check_sorted
7 |
8 | class TestChapter2(unittest.TestCase):
9 |
10 | def test_mult(self):
11 | from ch02.mult import create_pair, mult_pair, create_random_pair
12 | (up, down) = create_pair(5)
13 | self.assertEqual(12345,up)
14 | self.assertEqual(98765,down)
15 |
16 | (up, down) = create_pair(12)
17 | self.assertEqual(123456789123,up)
18 | self.assertEqual(987654321987,down)
19 |
20 | self.assertEqual(77, mult_pair([7, 11]))
21 |
22 | (up, down) = create_random_pair(12)
23 | self.assertTrue(100000000000 < up < 999999999999)
24 | self.assertTrue(100000000000 < down < 999999999999)
25 |
26 | def test_valid(self):
27 | from ch02.bas import binary_array_search
28 | A = []
29 | self.assertEqual(-1, binary_array_search(A, 6)) # placed into empty array
30 |
31 | A = [6]
32 | self.assertEqual(0, binary_array_search(A, 6))
33 | self.assertEqual(-1, binary_array_search(A, 2)) # placed BEFORE 6
34 | self.assertEqual(-2, binary_array_search(A, 11)) # placed AFTER 6
35 |
36 | A = [2,4,6]
37 | self.assertEqual(-1, binary_array_search(A, 1)) # placed BEFORE 2
38 | self.assertEqual(-2, binary_array_search(A, 3)) # placed BEFORE 4
39 | self.assertEqual(-3, binary_array_search(A, 5)) # placed BEFORE 6
40 | self.assertEqual(-4, binary_array_search(A, 7)) # placed AFTER 6
41 |
42 | def test_max_sort(self):
43 | from ch02.challenge import max_sort
44 | A = list(range(20))
45 | random.shuffle(A)
46 | self.assertTrue(check_sorted(max_sort(A)))
47 |
48 | def test_random(self):
49 | from ch02.random_sort import permutation_sort, random_sort
50 |
51 | # CAUTION! Only do this with small sizes.
52 | A = [8, 7, 6, 5, 4]
53 | random_sort(A)
54 | self.assertTrue(check_sorted(A))
55 |
56 | # CAUTION! Only do this with small sizes.
57 | A = [8, 7, 6, 5, 4]
58 | permutation_sort(A)
59 | self.assertTrue(check_sorted(A))
60 |
61 | def test_challenge(self):
62 | from ch02.challenge import run_max_sort_worst_case, run_permutation_sort
63 |
64 | tbl = run_permutation_sort(max_n=8, output=False)
65 | self.assertTrue(tbl.entry(2, 'PermutationSort') >= 0)
66 |
67 | tbl = run_max_sort_worst_case(max_k=10, output=False)
68 | self.assertTrue(tbl.entry(128, 'MaxSort') >= 0)
69 |
70 | def test_performance_bas(self):
71 | from ch02.challenge import performance_bas
72 |
73 | tbl = performance_bas(max_k=10, output=False)
74 | self.assertTrue(tbl.entry(32,'T(N)') < tbl.entry(512,'T(N)'))
75 |
76 | def test_range(self):
77 | A = [1, 2, 2, 2, 3, 4]
78 | from ch02.challenge import worst_range, best_range
79 | self.assertIsNone(worst_range([], 2))
80 | self.assertIsNone(best_range([], 2))
81 |
82 | self.assertEqual((1,3), worst_range(A, 2))
83 | self.assertEqual((0,0), worst_range(A, 1))
84 | self.assertEqual((4,4), worst_range(A, 3))
85 | self.assertEqual((5,5), worst_range(A, 4))
86 |
87 | self.assertIsNone(worst_range(A,0))
88 | self.assertIsNone(worst_range(A,7))
89 |
90 | self.assertEqual((1,3), best_range(A, 2))
91 | self.assertEqual((0,0), best_range(A, 1))
92 | self.assertEqual((4,4), best_range(A, 3))
93 | self.assertEqual((5,5), best_range(A, 4))
94 |
95 | self.assertIsNone(best_range(A,0))
96 | self.assertIsNone(best_range(A,7))
97 |
98 | A = [3] * 10000
99 | self.assertEqual((0,9999), worst_range(A, 3))
100 | self.assertEqual((0,9999), best_range(A, 3))
101 |
102 | # a bit of a stress test....
103 | tgt = random.random()
104 | alist = [tgt] * 999
105 | for _ in range(5000):
106 | alist.append(random.random())
107 | alist = sorted(alist)
108 | self.assertEqual(worst_range(A, tgt), best_range(A, tgt))
109 |
110 | # All single ones....
111 | nums = list(range(100))
112 | for i in range(100):
113 | self.assertEqual((i,i), best_range(nums, i))
114 | self.assertEqual((i,i), worst_range(nums, i))
115 |
116 | nums = list(range(0,100,2))
117 | for i in range(1,100,2):
118 | self.assertIsNone(best_range(nums, i))
119 | self.assertIsNone(worst_range(nums, i))
120 |
121 | #######################################################################
122 | if __name__ == '__main__':
123 | unittest.main()
124 |
--------------------------------------------------------------------------------
/ch04/factorial_heap.py:
--------------------------------------------------------------------------------
1 | """
2 | max Heap where each successive level has one more child, which leads to levels
3 | containing k! elements, where k is the level. This code provides the implementation
4 | to a challenge exercise.
5 | """
6 | from ch04.entry import Entry
7 |
8 | # SUMS of factorials. 1st id on a new level is 1 + this
9 | _factorials = [0, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800, 39916800 ]
10 | _sums = [0, 1, 3, 9, 33, 153, 873, 5913, 46233, 409113, 4037913, 43954713 ]
11 |
12 | _firsts = [0, 1, 2, 4, 10, 34, 154, 874, 5914, 46234, 409114, 4037914, 40325914 ]
13 | _constants = [0, 2, 6, 16, 50, 204, 1078, 6992, 53226, 462340, 4500254, 48454968, 524236882]
14 |
15 | def fh_parent(k,lev):
16 | """Return index of parent for index k on level lev."""
17 | if lev <= 0:
18 | return 1 # Covers base case inelegantly
19 | return (k + _constants[lev-1]) // (lev+1)
20 |
21 | def fh_child(k,lev):
22 | """Return index of first child of index k on level lev."""
23 | return k*(lev+2) - _constants[lev]
24 |
25 | def validate_level(pq, lev, k):
26 | """Validate node k on a given level."""
27 |
28 | # If no child possible leave
29 | fc = fh_child(k,lev)
30 | count = 0
31 | while fc <= pq.N and count <= lev:
32 | if pq.less(k, fc):
33 | return False
34 | if not validate_level(pq, lev+1, fc):
35 | return False
36 |
37 | count += 1
38 | fc += 1
39 |
40 | return True # checks out!
41 |
42 | def validate(pq):
43 | """
44 | Validate heap-ordered property is valid (assumed heap-shape).
45 | Used while developing this class. Insert calls to `validate` after
46 | dequeue and enqueue.
47 | """
48 | return validate_level(pq, 0, 1)
49 |
50 | class PQ:
51 | """
52 | Factorial Heap storage for a priority queue.
53 | """
54 | def __init__(self, size):
55 | self.size = size
56 | self.storage = [None] * (size+1)
57 | self.N = 0
58 | self.level = 0
59 |
60 | def __len__(self):
61 | """Return number of values in priority queue."""
62 | return self.N
63 |
64 | def is_full(self):
65 | """If priority queue has run out of storage, return True."""
66 | return self.size == self.N
67 |
68 | def enqueue(self, v, p):
69 | """Enqueue (v, p) entry into priority queue."""
70 | if self.N == self.size:
71 | raise RuntimeError('Priority Queue is Full!')
72 | self.N += 1
73 | if self.N > _sums[self.level+1]:
74 | self.level += 1
75 |
76 | self.storage[self.N] = Entry(v, p)
77 | self.swim(self.N)
78 |
79 | def less(self, i, j):
80 | """
81 | Helper function to determine if storage[i] has higher
82 | priority than storage[j].
83 | """
84 | return self.storage[i].priority < self.storage[j].priority
85 |
86 | def swap(self, i, j):
87 | """Switch the values in storage[i] and storage[j]."""
88 | self.storage[i],self.storage[j] = self.storage[j],self.storage[i]
89 |
90 | def swim(self, k):
91 | """Reestablish heap-order property from storage[child] up."""
92 | lev = self.level
93 | parent = fh_parent(k,lev)
94 | while k > 1 and self.less(parent, k):
95 | self.swap(parent, k)
96 | k = parent
97 | lev -= 1
98 | parent = fh_parent(k, lev)
99 |
100 | def sink(self, k):
101 | """Reestablish heap-order property from storage[parent] down."""
102 | lev = 0
103 |
104 | # If no child possible leave
105 | fc = fh_child(k,lev)
106 | while fc <= self.N:
107 | # Find largest of children
108 | largest = fc
109 | offset = 1
110 | lev += 1
111 | while fc+offset < self.N and offset <= lev:
112 | if self.less(largest, fc+offset):
113 | largest = fc+offset
114 | offset += 1
115 |
116 | if not self.less(k, largest):
117 | break
118 |
119 | self.swap(k, largest)
120 |
121 | k = largest
122 | fc = fh_child(k,lev)
123 |
124 | def dequeue(self):
125 | """Remove and return value with highest priority in priority queue."""
126 | if self.N == 0:
127 | raise RuntimeError('PriorityQueue is empty!')
128 |
129 | max_entry = self.storage[1]
130 | self.swap(1, self.N)
131 | self.N -= 1
132 | if self.N == _sums[self.level]: # advance to next level
133 | self.level -= 1
134 |
135 | self.storage[self.N+1] = None # avoid lingering
136 | self.sink(1)
137 |
138 | return max_entry.value
139 |
--------------------------------------------------------------------------------
/ch05/timsort.py:
--------------------------------------------------------------------------------
1 | """
2 | Simplistic non-optimized, native Python implementation showing the mechanics
3 | of TimSort.
4 |
5 | This code is designed to show how TimSort uses Insertion Sort and Merge Sort
6 | as its constituent building blocks. It is not the actual sorting algorithm,
7 | because of extra complexities that optimize this base algorithm even further.
8 |
9 | Full details on the sorting algorithm are in the actual CPython code base,
10 | but Tim Peters has provided documentation explaining reasons behind many
11 | of the choices in Tim Sort.
12 |
13 | https://hg.python.org/cpython/file/tip/Objects/listsort.txt
14 | """
15 | import timeit
16 | from algs.table import DataTable
17 |
18 | def merge(A, lo, mid, hi, aux):
19 | """Merge two (consecutive) runs together."""
20 | aux[lo:hi+1] = A[lo:hi+1]
21 |
22 | left = lo
23 | right = mid + 1
24 | for i in range(lo, hi+1):
25 | if left > mid:
26 | A[i] = aux[right]
27 | right += 1
28 | elif right > hi:
29 | A[i] = aux[left]
30 | left += 1
31 | elif aux[right] < aux[left]:
32 | A[i] = aux[right]
33 | right += 1
34 | else:
35 | A[i] = aux[left]
36 | left += 1
37 |
38 | # https://hg.python.org/cpython/file/tip/Objects/listsort.txt
39 | # Instead we pick a minrun in range(32, 65) such that N/minrun is exactly a
40 | # power of 2, or if that isn't possible, is close to, but strictly less than,
41 | # a power of 2. This is easier to do than it may sound: take the first 6
42 | # bits of N, and add 1 if any of the remaining bits are set. In fact, that
43 | # rule covers every case in this section, including small N and exact powers
44 | # of 2; merge_compute_minrun() is a deceptively simple function.
45 |
46 | def compute_min_run(n):
47 | """Compute min_run to use when sorting n total values."""
48 | # Used to add 1 if any remaining bits are set
49 | r = 0
50 | while n >= 64:
51 | r |= n & 1
52 | n >>= 1
53 | return n + r
54 |
55 | def insertion_sort(A, lo, hi):
56 | """Sort A[lo .. hi] using Insertion Sort. Stable sort demands Ai <= Aj. """
57 | for i in range(lo+1,hi+1):
58 | for j in range(i,lo,-1):
59 | if A[j-1] <= A[j]:
60 | break
61 | A[j],A[j-1] = A[j-1],A[j]
62 |
63 | def tim_sort(A):
64 | """Apply simplistic Tim Sort implementation on A."""
65 | # Small arrays are sorted using insertion sort
66 | N = len(A)
67 | if N < 64:
68 | insertion_sort(A,0,N-1)
69 | return
70 |
71 | # Insertion sort in strips of 'size'
72 | size = compute_min_run(N)
73 | for lo in range(0, N, size):
74 | insertion_sort(A, lo, min(lo+size-1, N-1))
75 |
76 | aux = [None]*N
77 | while size < N:
78 | # Merge all doubled ranges, taking care with last one
79 | for lo in range(0, N, 2*size):
80 | mid = min(lo + size - 1, N-1)
81 | hi = min(lo + 2*size - 1, N-1)
82 | merge(A, lo, mid, hi, aux)
83 |
84 | size = 2 * size
85 |
86 | def timing_nlogn_sorting_real_world(max_k=18, output=True):
87 | """
88 | Confirm N Log N performance of Merge Sort, Heap Sort and Python's built-in sort
89 | for n in 2**k for k up to (but not including) max_k=18.
90 |
91 | Represents real-world case where Tim Sort shines, namely, where you are
92 | adding random data to an already sorted set.
93 | """
94 | # Build model
95 | tbl = DataTable([12,10,10,10,10],['N','MergeSort', 'Quicksort', 'TimSort', 'PythonSort'],
96 | output=output)
97 |
98 | for n in [2**k for k in range(8, max_k)]:
99 | t_ms = min(timeit.repeat(stmt='merge_sort(A)', setup='''
100 | import random
101 | from ch05.merge import merge_sort
102 | A=list(range(int({0}*.8)))
103 | B=list(range({0}-len(A)))
104 | random.shuffle(B)
105 | A.extend(B)'''.format(n), repeat=10, number=1))
106 |
107 | t_qs = min(timeit.repeat(stmt='quick_sort(A)', setup='''
108 | import random
109 | from ch05.sorting import quick_sort
110 | A=list(range(int({0}*.8)))
111 | B=list(range({0}-len(A)))
112 | random.shuffle(B)
113 | A.extend(B)'''.format(n), repeat=10, number=1))
114 |
115 | t_ps = min(timeit.repeat(stmt='A.sort()', setup='''
116 | import random
117 | A=list(range(int({0}*.8)))
118 | B=list(range({0}-len(A)))
119 | random.shuffle(B)
120 | A.extend(B)'''.format(n), repeat=10, number=1))
121 |
122 | t_ts = min(timeit.repeat(stmt='tim_sort(A)', setup='''
123 | import random
124 | from ch05.timsort import tim_sort
125 | A=list(range(int({0}*.8)))
126 | B=list(range({0}-len(A)))
127 | random.shuffle(B)
128 | A.extend(B)'''.format(n), repeat=10, number=1))
129 |
130 | tbl.row([n, t_ms, t_qs, t_ts, t_ps])
131 | return tbl
132 |
--------------------------------------------------------------------------------
/ch07/timing.py:
--------------------------------------------------------------------------------
1 | """
2 | Timing Results for chapter 7.
3 |
4 | Executing Floyd-Warshall on Massachusetts Highway is costly since it is an O(N^3)
5 | algorithm and the graph has 2305 vertices. An alternative is to use a technique
6 | called "Chained Dijkstra's", where multiple invocations of Dijkstra's single-source
7 | shortest path are used, one for each vertex, v, to find the best computed shortest path
8 | from that vertex.
9 |
10 | Perform Floyd-Warshall on MA highway data.
11 | This might take awhile
12 | start (42.045357, -70.214707) to end (42.539347, -73.341637) in longest shortest distance 251.43114701935508 in time 1334.837 seconds
13 | start (42.045357, -70.214707) to end (42.539347, -73.341637) in longest shortest distance 251.4311470193551 in time 7.694 seconds
14 |
15 | now compute same results with chained Dijkstra
16 | start (42.045357, -70.214707) to end (42.539347, -73.341637) in longest shortest distance 251.4311470193551 in time 73.632 seconds
17 |
18 | The first result above takes about twenty minutes. Using the chained approach, the
19 | second result (implemented by networkx as all_pairs_dijkstra_path_length) takes
20 | significantly faster. This is because Floyd-Warshall is O(V^3) while Chained
21 | Dijkstra's is O(V * (V+E) * log V) and since the graph is sparse, E is on the
22 | order of V, leading to an overall classification of O(V^2 * log V) which handily
23 | beats O(V^3).
24 |
25 | The third approach is native Python code implementing Chained Dijkstra's, and this
26 | native code outperforms Floyd-Warshall handily, though the networkx implementation
27 | is still better, which is the reason for using Networkx in the first place.
28 |
29 | """
30 | import time
31 |
32 | def floyd_warshall_highway():
33 | """Generate Floyd-Warshall results with MA highway data."""
34 | from ch07.tmg_load import tmg_load, highway_map
35 | from ch07.dependencies import plt_error
36 |
37 | if not plt_error:
38 | (G, positions, _) = tmg_load(highway_map())
39 | from networkx.algorithms.shortest_paths.dense import floyd_warshall
40 | print('This might take awhile')
41 | start_fw_time = time.time()
42 | dist_to = floyd_warshall(G, weight='weight')
43 | longest_so_far = 0
44 | start = -1
45 | end = -1
46 | for i in range(G.number_of_nodes()):
47 | for j in range(i+1,G.number_of_nodes()):
48 | if dist_to[i][j] > longest_so_far:
49 | longest_so_far = dist_to[i][j]
50 | start = i
51 | end = j
52 | end_fw_time = time.time()
53 | print('start {} to end {} in longest shortest distance {} in time {:.3f} seconds'
54 | .format(positions[start], positions[end], longest_so_far, end_fw_time-start_fw_time))
55 |
56 | # so much faster since graph is sparse
57 | from networkx.algorithms.shortest_paths.weighted import all_pairs_dijkstra_path_length
58 | start_time = time.time()
59 | dist_to = dict(all_pairs_dijkstra_path_length(G))
60 |
61 | longest_so_far = 0
62 | start = -1
63 | end = -1
64 | for i in range(G.number_of_nodes()):
65 | for j in range(i+1,G.number_of_nodes()):
66 | if dist_to[i][j] > longest_so_far:
67 | longest_so_far = dist_to[i][j]
68 | start = i
69 | end = j
70 | end_time = time.time()
71 | print('start {} to end {} in longest shortest distance {} in time {:.3f} seconds'
72 | .format(positions[start], positions[end], longest_so_far, end_time-start_time))
73 |
74 | def chained_dijkstra():
75 | """Generate Chained Dijkstra results with MA highway data."""
76 | from ch07.tmg_load import tmg_load, highway_map
77 | from ch07.dependencies import plt_error
78 | from ch07.single_source_sp import dijkstra_sp
79 |
80 | if not plt_error:
81 | (G, positions, _) = tmg_load(highway_map())
82 |
83 | start_time = time.time()
84 | longest_so_far = 0
85 | start = -1
86 | end = -1
87 | for i in range(G.number_of_nodes()):
88 | (dist_to, _) = dijkstra_sp(G, i)
89 | for j in range(i+1, G.number_of_nodes()):
90 | if dist_to[j] > longest_so_far:
91 | longest_so_far = dist_to[j]
92 | start = i
93 | end = j
94 |
95 | end_time = time.time()
96 | print('start {} to end {} in longest shortest distance {} in time {:.3f} seconds'
97 | .format(positions[start], positions[end], longest_so_far, end_time-start_time))
98 |
99 | #######################################################################
100 | if __name__ == '__main__':
101 | print('Perform Floyd-Warshall on MA highway data.')
102 | floyd_warshall_highway()
103 | print()
104 |
105 | print('now compute same results with chained Dijkstra')
106 | chained_dijkstra()
107 | print()
108 |
--------------------------------------------------------------------------------
/ch07/xlsx_loader.py:
--------------------------------------------------------------------------------
1 | """
2 | Load up rudimentary XLSX file.
3 |
4 | worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
5 |
6 | Note that Excel files have a space-saving device to reuse formula that are identical from
7 | one cell to another within a region. I saw this in the Fibonacci Example.
8 |
9 |
10 | (A2+1)
11 | 1
12 |
13 |
14 |
15 | (A3+1)
16 | 2
17 |
18 |
19 | This cell shares the formula (with index si="0") in the region A4:A8, as you can see from
20 | the definition of 'A5' which is 'shared' as marked by the 't' tag, and it is recast to
21 | become (A4+1) in relation to the other one above.
22 |
23 |
24 |
25 | 3
26 |
27 |
28 | """
29 | from xml.dom import minidom
30 |
31 | class Cell:
32 | """Represents a cell in the spreadsheet XML."""
33 | def __init__(self, label, value, formula):
34 | self.label = label
35 | self.value = value
36 | self.formula = formula
37 |
38 | def load_xlsx(file):
39 | """Load up XLS file as rudimentary spreadsheet."""
40 | from zipfile import ZipFile
41 |
42 | # Will return entries, where each key is cell and contents is either value or proper formula
43 | entries = {}
44 | shared_formula = {}
45 |
46 | def diff(cell, base):
47 | # quick and dirty. Only works for single letters
48 | return (ord(cell[0]) - ord(base[0]), int(cell[1:]) - int(base[1:]))
49 |
50 | def adjust_formula(cell, si):
51 | """
52 | Adjust shared formula for new context, based on the 'base' cell. Note that the reference
53 | is likely also needed for more complicated examples, but I didn't need it for my
54 | Fibonacci example.
55 | """
56 | from ch06.expression import build_expression, Reference, Value
57 | (ref, base) = shared_formula[si]
58 |
59 | (delta_c, delta_r) = diff(cell, base)
60 |
61 | base_formula = entries[base]
62 | expr = build_expression(base_formula[1:])
63 |
64 | def modify_in_place(node):
65 | """Hack/quick-and-dirty way to modify EXPR in place."""
66 | if isinstance(node, Value):
67 | return node
68 |
69 | if isinstance(node, Reference):
70 | oldref = str(node)
71 | newref = chr(ord(oldref[0]) + delta_c) + str(int(oldref[1:]) + delta_r)
72 | return Reference(newref)
73 |
74 | node.left = modify_in_place(node.left)
75 | node.right = modify_in_place(node.right)
76 | return node
77 |
78 | # replace each reference with delta
79 | expr = modify_in_place(expr)
80 | return '=' + str(expr)
81 |
82 | with ZipFile(file, 'r') as zip_file:
83 | data = zip_file.read('xl/worksheets/sheet1.xml').decode('utf-8')
84 |
85 | def get_all_text(node):
86 | """Grab up all text in children and make it available in one step."""
87 | if node.nodeType == node.TEXT_NODE:
88 | return node.data
89 | text_string = ""
90 | for child_node in node.childNodes:
91 | text_string += get_all_text( child_node )
92 | return text_string
93 |
94 | doc = minidom.parseString(data)
95 | access_points = doc.getElementsByTagName('c') # TAG for cell
96 | for acc in access_points:
97 | cell = acc.getAttribute('r')
98 | value = 0
99 | t = None
100 | si = None
101 | ref = None
102 | formula = None
103 | for v in acc.getElementsByTagName('v'): # TAG for value (may be present with formula)
104 | value = get_all_text(v)
105 | for f in acc.getElementsByTagName('f'): # TAG for formula
106 | formula = get_all_text(f)
107 | t = f.getAttribute('t') # ATTRIB tag to declare sharing
108 | ref = f.getAttribute('ref') # ATTRIB region where sharing is scoped [unused]
109 | si = f.getAttribute('si') # ATTRIB for shared index
110 |
111 | # Be sure to represent formula signaled by starting '='
112 | if formula:
113 | formula = '=' + formula
114 |
115 | if formula or si:
116 | if not si:
117 | # This is a straight formula that is not (yet) shared
118 | entries[cell] = str(formula)
119 | else:
120 | if formula:
121 | entries[cell] = str(formula) # This formula will be shared
122 | shared_formula[si] = (ref, cell) # Remember base reference and cell range of scope
123 | else:
124 | # find formula with reference AND adjust accordingly
125 | entries[cell] = adjust_formula(cell, si)
126 | else:
127 | entries[cell] = str(value)
128 | return entries
129 |
--------------------------------------------------------------------------------
/ch07/digraph_search.py:
--------------------------------------------------------------------------------
1 | """
2 | Code to blindly search through a Graph in Depth First and Breadth First strategies. Also
3 | contains a rudimentary Guided Search for undirected graphs when there is a metric showing
4 | how far a node is from the target.
5 | """
6 |
7 | def recover_cycle(DG):
8 | """Use recursive Depth First Search to detect cycle."""
9 | marked = {}
10 | in_stack = {}
11 | node_from = {}
12 | cycle = []
13 |
14 | def _recover_cycle(w, v):
15 | n = v
16 | while n != w:
17 | yield n
18 | n = node_from[n]
19 | yield w
20 | yield v
21 |
22 | def dfs(v):
23 | in_stack[v] = True
24 | marked[v] = True
25 |
26 | if cycle: return # Leave if cycle detected
27 |
28 | for w in DG[v]:
29 | if not w in marked:
30 | node_from[w] = v
31 | dfs(w)
32 | else:
33 | # Check to make sure it's not in stack -- CYCLE if so!
34 | if w in in_stack and in_stack[w]:
35 | cycle.extend(reversed(list(_recover_cycle(w, v))))
36 |
37 | in_stack[v] = False
38 |
39 | for v in DG.nodes():
40 | if not v in marked and not cycle:
41 | dfs(v)
42 | return cycle
43 |
44 | def has_cycle(DG):
45 | """Use recursive Depth First Search to detect cycle."""
46 | marked = {}
47 | in_stack = {}
48 |
49 | def dfs(v):
50 | in_stack[v] = True
51 | marked[v] = True
52 |
53 | for w in DG[v]:
54 | if not w in marked:
55 | if dfs(w):
56 | return True
57 | else:
58 | # Check to make sure it's not in stack -- CYCLE if so!
59 | if w in in_stack and in_stack[w]:
60 | return True
61 |
62 | in_stack[v] = False
63 | return False
64 |
65 | for v in DG.nodes():
66 | if not v in marked:
67 | if dfs(v):
68 | return True
69 | return False
70 |
71 | def topological_sort(DG):
72 | """
73 | Use recursive Depth First Search to generate a topological sort of nodes.
74 | Only call when no cycle exists!
75 | """
76 | marked = {}
77 | postorder = []
78 |
79 | def dfs(v):
80 | marked[v] = True
81 |
82 | for w in DG[v]:
83 | if not w in marked:
84 | dfs(w)
85 |
86 | postorder.append(v)
87 |
88 | for v in DG.nodes():
89 | if not v in marked:
90 | dfs(v)
91 |
92 | return reversed(postorder)
93 |
94 | def has_cycle_nr(DG):
95 | """Conduct non-recursive cycle detection over directed graph."""
96 | from ch07.list_stack import Stack
97 | marked = {}
98 | in_stack = {}
99 | node_from = {}
100 | stack = Stack()
101 |
102 | for s in DG.nodes():
103 | if not s in marked:
104 | stack.push(s)
105 |
106 | while not stack.is_empty():
107 | v = stack.pop()
108 | if v in marked:
109 | in_stack[v] = False
110 | else:
111 | marked[v] = True
112 | stack.push(v)
113 | in_stack[v] = True
114 |
115 | for w in DG[v]:
116 | if not w in marked:
117 | stack.push(w)
118 | node_from[w] = v
119 | else:
120 | # Check to make sure it's not in stack -- CYCLE if so!
121 | if w in in_stack and in_stack[w]:
122 | return True
123 | return False
124 |
125 | def recover_cycle_nr(DG):
126 | """Conduct non-recursive cycle detection over directed graph and return cycle."""
127 | from ch07.list_stack import Stack
128 | marked = {}
129 | in_stack = {}
130 | node_from = {}
131 | stack = Stack()
132 |
133 | for s in DG.nodes():
134 | if not s in marked:
135 | stack.push(s)
136 |
137 | while not stack.is_empty():
138 | v = stack.pop()
139 | if v in marked:
140 | in_stack[v] = False
141 | else:
142 | marked[v] = True
143 | stack.push(v)
144 | in_stack[v] = True
145 |
146 | for w in DG[v]:
147 | if not w in marked:
148 | stack.push(w)
149 | node_from[w] = v
150 | else:
151 | # Check to make sure it's not in stack -- CYCLE if so!
152 | if w in in_stack and in_stack[w]:
153 | cycle = []
154 | n = v
155 | while n != w:
156 | cycle.append(n)
157 | n = node_from[n]
158 |
159 | cycle.append(w)
160 | cycle.append(v)
161 | cycle.reverse()
162 | return cycle
163 |
164 | return []
165 |
--------------------------------------------------------------------------------
/ch04/timing.py:
--------------------------------------------------------------------------------
1 | """
2 | Timing Results for chapter 4.
3 |
4 | All timing costs are scaled by 1000 to convert from seconds into milliseconds.
5 | Results from runTrials divided by T*N because the number of statements executed
6 | is directly proportional to that, and we are trying to find the average
7 | operational cost (of both enqueue and dequeue).
8 |
9 | Compare performance of resizable hashtables.
10 | N Heap DHeap
11 | 256 2.87 3.00
12 | 512 3.23 3.34
13 | 1,024 3.46 3.64
14 | 2,048 3.87 4.02
15 | 4,096 4.33 4.52
16 | 8,192 4.69 4.86
17 | 16,384 5.10 5.44
18 | 32,768 6.55 6.88
19 |
20 | """
21 | import timeit
22 | from algs.table import DataTable
23 |
24 | def build_up(pq, N):
25 | """Populate pq with slate of integers."""
26 | delta = 993557 # large prime
27 |
28 | k = 0
29 | for _ in range(N):
30 | pq.enqueue(k, k) # use key as the value (doesn't really matter)
31 | k = (k + delta) % N
32 |
33 | def drain(pq, n=0):
34 | """invoke remove_max_priority() n times, or until empty. Pass in 0 to drain everything."""
35 | while pq:
36 | n -= 1
37 | pq.dequeue()
38 | if n == 0:
39 | return
40 |
41 | def run_trials(clazz, N, factor):
42 | """Execute 3*N/2 add operations and 3*N/2 remove_max operations for a total of 3*N."""
43 | stmt = '''
44 | from {0} import PQ
45 | one_run(PQ({1}), {1}, {2})'''.format(clazz,N,factor)
46 | return min(timeit.repeat(stmt=stmt,
47 | setup='from ch04.timing import one_run', repeat=5, number=10))/10
48 |
49 | def run_dynamic_trials(clazz, N, factor):
50 | """Execute 3*N/2 add operations and 3*N/2 remove_max operations for a total of 3*N."""
51 | stmt = '''
52 | from {} import PQ
53 | one_run(PQ(256), {}, {})'''.format(clazz,N,factor)
54 | return min(timeit.repeat(stmt=stmt, setup='from ch04.timing import one_run',
55 | repeat=5, number=10))/10
56 |
57 | def one_run(pq, N, factor):
58 | """
59 | Conduct a run that exercised priority queue without causing a failure.
60 | Assume N divisible by 4 and factor > 2. Total of factor*N operations.
61 | """
62 | build_up(pq, N//2) # Fill halfway
63 | drain(pq, N//4) # Now go back to 1/4 full
64 | for _ in range(factor-2):
65 | build_up(pq, N//2) # bring up to 3/4 full
66 | drain(pq, N//2) # now back to 1/4 full
67 |
68 | build_up(pq, N//2) # back to 3/4 full
69 | drain(pq, 0) # empty out...
70 |
71 | def trial_factorial_heap(max_n=32768, output=True, decimals=2):
72 | """
73 | Generate trial using factorial heap compared with regular heap up to but not including max_n
74 | """
75 | factor = 3
76 | base = 256
77 | high = max_n
78 |
79 | tbl = DataTable([10,8,8], ['N', 'Heap', 'FactHeap'], output=output, decimals=decimals)
80 | N = base
81 | while N < high:
82 | heap = 1000000*run_trials('ch04.heap', N, factor)/(factor*N)
83 | fheap = 1000000*run_trials('ch04.factorial_heap', N, factor)/(factor*N)
84 | tbl.row([N, heap, fheap])
85 |
86 | N *= 2
87 | return tbl
88 |
89 | def factorial_heap_timing(max_n=5920):
90 | """Provide empirical evidence on runtime behavior of max factorial heap."""
91 | # 10, 34, 154, 874, 5914, 46234 are the boundaries to evaluate
92 | name = 'factorial_timing_results.csv'
93 | file = open(name, 'w')
94 | for N in range(154, max_n):
95 | num_to_insert=1
96 | if N % 1000 == 0:
97 | print(N)
98 | stmt = '''
99 | sz = len(pq)
100 | for i in range({}):
101 | pq.enqueue(99,sz+i)
102 | '''.format(num_to_insert)
103 | timing = min(timeit.repeat(stmt=stmt, setup='''
104 | from ch04.factorial_heap import PQ
105 | pq = PQ({0}+700)
106 | for i in range({0},0,-1):
107 | pq.enqueue(i,i)'''.format(N), repeat=10, number=7))/7
108 | file.write(str(timing))
109 | file.write('\n')
110 | file.close()
111 | print('created file:', name)
112 |
113 | def dynamic_comparison(max_n=32768, output=True, decimals=2):
114 | """Generate table for comparing resizable hashtable performance."""
115 | T = 3
116 | base = 256
117 | high = max_n
118 | tbl = DataTable([8,8,8],['N','Heap', 'DHeap'], output=output, decimals=decimals)
119 |
120 | heap = {}
121 | dheap = {}
122 | N = base
123 | while N <= high:
124 | heap[N] = 1000000*run_trials('ch04.heap', N, T)/(T*N)
125 | dheap[N] = 1000000*run_dynamic_trials('ch04.dynamic_heap', N, T)/(T*N)
126 | tbl.row([N, heap[N], dheap[N]])
127 |
128 | N *= 2
129 | return tbl
130 |
131 | #######################################################################
132 | if __name__ == '__main__':
133 | print('Head-to-head comparison of binary heaps and factorial heaps.')
134 | trial_factorial_heap()
135 |
136 | print('Evaluate performance of factorial heaps on enqueue.')
137 | factorial_heap_timing()
138 |
139 | print('Compare performance of resizable Heaps.')
140 | dynamic_comparison()
141 |
--------------------------------------------------------------------------------
/ch07/tmg_load.py:
--------------------------------------------------------------------------------
1 | """
2 | Python script to load up TMG file as a Graph.
3 | """
4 |
5 | from math import cos, asin, sqrt, pi
6 |
7 | from resources.highway import highway_map
8 | from ch07.single_source_sp import dijkstra_sp
9 | from ch07.replacement import WEIGHT
10 | from ch07.dependencies import plt_error
11 |
12 | try:
13 | import networkx as nx
14 | except ImportError:
15 | import ch07.replacement as nx
16 |
17 | def compute_distance(positions, node_from, src, target):
18 | """
19 | Compute total distance from src to target, traversing positions and using
20 | positions[] information as waypoints for distance.
21 | """
22 | total = 0
23 | last_pos = None
24 | v = target
25 | while v != src:
26 | pos = positions[v]
27 | v = node_from[v]
28 | if last_pos:
29 | total += distance(pos, last_pos)
30 | last_pos = pos
31 | total += distance(positions[src], last_pos)
32 | return total
33 |
34 | def plot_gps(positions, s=8, marker='.', color='blue'):
35 | """Draw positions of individual nodes."""
36 | if plt_error:
37 | return
38 | import matplotlib.pyplot as plt
39 |
40 | x = []
41 | y = []
42 | for i in positions:
43 | pos = positions[i]
44 | x.append(pos[1])
45 | y.append(pos[0])
46 | plt.scatter(x, y, marker=marker, s=s, color=color)
47 |
48 | def bounding_ids(positions):
49 | """Compute the distant borders via GPS in the positions. [NORTH, EAST, SOUTH, WEST]."""
50 | north = -360
51 | east = -360
52 | south = 360
53 | west = 360
54 | north_id = -1
55 | east_id = -1
56 | south_id = -1
57 | west_id = -1
58 |
59 | for node in positions:
60 | gps = positions[node]
61 | if gps[0] > north:
62 | north = gps[0]
63 | north_id = node
64 | if gps[0] < south:
65 | south = gps[0]
66 | south_id = node
67 | if gps[1] > east:
68 | east = gps[1]
69 | east_id = node
70 | if gps[1] < west:
71 | west = gps[1]
72 | west_id = node
73 | return (north_id, east_id, south_id, west_id)
74 |
75 | def plot_highways(positions, edges, color='gray'):
76 | """Plot highways with linesegments."""
77 | if plt_error:
78 | return
79 | import matplotlib.pyplot as plt
80 |
81 | for e in edges:
82 | head = positions[e[0]]
83 | tail = positions[e[1]]
84 | plt.plot([head[1], tail[1]],[head[0], tail[0]], linewidth=1, color=color)
85 |
86 | def distance(gps1, gps2):
87 | """
88 | Return reasonably distance in miles. Based on helpful method found here:
89 |
90 | https://stackoverflow.com/questions/27928/calculate-distance-between-two-latitude-longitude-points-haversine-formula
91 | """
92 | (lat1, long1) = gps1
93 | (lat2, long2) = gps2
94 |
95 | p = pi/180
96 | a = 0.5 - cos((lat2-lat1)*p)/2 + cos(lat1*p) * cos(lat2*p) * (1-cos((long2-long1)*p))/2
97 | return 7917.509282 * asin(sqrt(a)) # convert into miles and use 12742 as earth diameter in KM
98 |
99 | def tmg_load(raw_data):
100 | """
101 | Load up a TMG 1.0 simple file into a directed weighted graph, using
102 | long/lat coordinate calculator for distance.
103 |
104 | TMG 1.0 simple
105 | #N #E
106 | {NODE: LABEL LAT LONG}
107 | {EDGE: id1 id2 LABEL}
108 |
109 | For each edge, compute the distance. Also return labels for the nodes.
110 | """
111 | G = nx.Graph()
112 | line = 0
113 | if not 'TMG' in raw_data[line]:
114 | raise ValueError('Contents is not a valid TMG file ({}).'.format(raw_data[line]))
115 | line += 1
116 |
117 | (snum_nodes, snum_edges) = raw_data[line].split()
118 | line += 1
119 | num_nodes = int(snum_nodes)
120 | num_edges = int(snum_edges)
121 |
122 | positions = {}
123 | labels = {}
124 |
125 | for i in range(num_nodes):
126 | (label, slat1, slong1) = raw_data[line].split()
127 | line += 1
128 |
129 | positions[i] = (float(slat1), float(slong1))
130 | labels[i] = label
131 | G.add_node(i)
132 |
133 | for i in range(num_edges):
134 | (su, sv, _) = raw_data[line].split()
135 | line += 1
136 |
137 | u = int(su)
138 | v = int(sv)
139 | d = distance(positions[u], positions[v])
140 | G.add_edge(u, v, weight=d)
141 |
142 | return (G, positions, labels)
143 |
144 | #######################################################################
145 | if __name__ == '__main__':
146 | if not plt_error:
147 | import matplotlib.pyplot as plt
148 |
149 | (G,positions, _) = tmg_load(highway_map())
150 | print(G.number_of_nodes(), G.number_of_edges())
151 | print(bounding_ids(positions))
152 |
153 | src = 389
154 | target = 2256
155 |
156 | paths = nx.single_source_shortest_path(G, src)
157 | path = paths[target]
158 |
159 | total = 0
160 | for i in range(len(path)-1):
161 | total += G[path[i]][path[i+1]][WEIGHT]
162 | print(total)
163 | print(G.edges(src, data=True))
164 |
165 | (dist_to, edge_to) = dijkstra_sp(G, src)
166 | print(dist_to[target])
167 |
168 | plot_gps(positions)
169 | plot_highways(positions, G.edges())
170 | plt.show()
171 |
--------------------------------------------------------------------------------
/ch06/pq.py:
--------------------------------------------------------------------------------
1 | """
2 | Priority Queue implementation using Symbol Tree Binary Tree implementation.
3 |
4 | Cannot use symbol table implementation "as is" because there may be multiple
5 | values with the same priority. It is for this reason that the remove() is more
6 | complicated since you have to be careful not to lose values when there happen
7 | to be multiple values with the same priority.
8 |
9 | """
10 | from ch06.avl import resolve_left_leaning, resolve_right_leaning
11 |
12 | class BinaryNode:
13 | """
14 | Node structure to use in a binary tree implementing a priority queue.
15 |
16 | Attributes
17 | ----------
18 | left - left child (or None)
19 | right - right child (or None)
20 | height - height of the node
21 | value - value for (value, priority) pair
22 | priority - key for (value, priority) pair
23 | """
24 | def __init__(self, v, p):
25 | self.value = v
26 | self.priority = p
27 | self.left = None
28 | self.right = None
29 | self.height = 0
30 |
31 | def height_difference(self):
32 | """
33 | Compute height difference of node's children in BST. Can return
34 | a negative number or positive number.
35 | """
36 | left_height = self.left.height if self.left else -1
37 | right_height = self.right.height if self.right else -1
38 | return left_height - right_height
39 |
40 | def compute_height(self):
41 | """Compute height of node in BST."""
42 | left_height = self.left.height if self.left else -1
43 | right_height = self.right.height if self.right else -1
44 | self.height = 1 + max(left_height, right_height)
45 |
46 | class BinaryTree:
47 | """
48 | A Binary tree contains the root node, and methods to manipulate the tree.
49 | """
50 | def __init__(self):
51 | self.root = None
52 |
53 | def is_empty(self):
54 | """Returns whether tree is empty."""
55 | return self.root is None
56 |
57 | def insert(self, v, p):
58 | """Insert (value, priority) entry into Binary Tree."""
59 | self.root = self._insert(self.root, v, p)
60 |
61 | def _insert(self, node, v, p):
62 | """Inserts a new BinaryNode to the tree containing (value, priority) pair."""
63 | if node is None:
64 | return BinaryNode(v, p)
65 |
66 | if p <= node.priority:
67 | node.left = self._insert(node.left, v, p)
68 | node = resolve_left_leaning(node)
69 | else:
70 | node.right = self._insert(node.right, v, p)
71 | node = resolve_right_leaning(node)
72 |
73 | node.compute_height()
74 | return node
75 |
76 | def __iter__(self):
77 | """In order traversal of elements in the tree."""
78 | for p in self._inorder(self.root):
79 | yield p
80 |
81 | def _inorder(self, node):
82 | """Inorder traversal of tree."""
83 | if node is None:
84 | return
85 |
86 | for pair in self._inorder(node.left):
87 | yield pair
88 |
89 | yield (node.value, node.priority)
90 |
91 | for pair in self._inorder(node.right):
92 | yield pair
93 |
94 | class PQ:
95 | """
96 | PriorityQueue using a Binary Tree to store entries, although this stored N.
97 | """
98 | def __init__(self):
99 | self.tree = BinaryTree()
100 | self.N = 0
101 |
102 | def __len__(self):
103 | """Return number of values in priority queue."""
104 | return self.N
105 |
106 | def is_empty(self):
107 | """Returns whether priority queue is empty."""
108 | return self.N == 0
109 |
110 | def is_full(self):
111 | """Priority queue using a Binary Tree is never full."""
112 | return False
113 |
114 | def enqueue(self, v, p):
115 | """Enqueue (v, p) entry into priority queue. Priority cannot be None."""
116 | if p is None:
117 | raise ValueError('key for symbol table cannot be None.')
118 |
119 | self.tree.insert(v, p)
120 | self.N += 1
121 |
122 | def peek(self):
123 | """Return value associated with node with maximum priority in queue."""
124 | if self.N == 0:
125 | raise RuntimeError('PriorityQueue is empty!')
126 |
127 | node = self.tree.root
128 | while node.right:
129 | node = node.right
130 |
131 | return node.value
132 |
133 | def _remove_max(self, node):
134 | """
135 | Remove max and unwind, addressing AVL property on way back. Return
136 | pair (value, new root)
137 | """
138 | if node.right is None:
139 | return (node.value, node.left)
140 |
141 | (value, node.right) = self._remove_max(node.right)
142 | node = resolve_left_leaning(node)
143 | node.compute_height()
144 | return (value, node)
145 |
146 | def dequeue(self):
147 | """Remove and return value with highest priority in priority queue."""
148 | if self.N == 0:
149 | raise RuntimeError('PriorityQueue is empty!')
150 |
151 | (value, self.tree.root) = self._remove_max(self.tree.root)
152 | self.N -= 1
153 | return value
154 |
155 | def __iter__(self):
156 | """In order traversal of elements in the PQ."""
157 | if self.tree:
158 | for pair in self.tree:
159 | yield pair
160 |
--------------------------------------------------------------------------------
/ch06/balanced.py:
--------------------------------------------------------------------------------
1 | """
2 | Data Structure for self-balancing AVL Binary Search Tree.
3 |
4 | The tree can contain duplicate values.
5 | """
6 | from ch06.avl import resolve_left_leaning, resolve_right_leaning
7 |
8 | class BinaryNode:
9 | """
10 | Node structure to use in a binary tree.
11 |
12 | Attributes
13 | ----------
14 | left - left child (or None)
15 | right - right child (or None)
16 | value - value stored by Node
17 | height - computed height of node in AVL tree
18 | """
19 | def __init__(self, val):
20 | self.value = val
21 | self.left = None
22 | self.right = None
23 | self.height = 0
24 |
25 | def height_difference(self):
26 | """
27 | Compute height difference of node's children in BST. Can return
28 | a negative number or positive number.
29 | """
30 | left_height = self.left.height if self.left else -1
31 | right_height = self.right.height if self.right else -1
32 | return left_height - right_height
33 |
34 | def compute_height(self):
35 | """Compute height of node in BST."""
36 | left_height = self.left.height if self.left else -1
37 | right_height = self.right.height if self.right else -1
38 | self.height = 1 + max(left_height, right_height)
39 |
40 | def size(self):
41 | """Return number of nodes in subtree rooted at node."""
42 | ct = 1
43 | if self.left: ct += self.left.size()
44 | if self.right: ct += self.right.size()
45 | return ct
46 |
47 | class BinaryTree:
48 | """
49 | A Binary tree contains the root node, and methods to manipulate the tree.
50 | """
51 | def __init__(self):
52 | self.root = None
53 |
54 | def is_empty(self):
55 | """Returns whether tree is empty."""
56 | return self.root is None
57 |
58 | def insert(self, val):
59 | """Insert value into Binary Tree."""
60 | self.root = self._insert(self.root, val)
61 |
62 | def _insert(self, node, val):
63 | """Inserts a new BinaryNode to the tree containing this value."""
64 | if node is None:
65 | return BinaryNode(val)
66 |
67 | if val <= node.value:
68 | node.left = self._insert(node.left, val)
69 | node = resolve_left_leaning(node)
70 | else:
71 | node.right = self._insert(node.right, val)
72 | node = resolve_right_leaning(node)
73 |
74 | node.compute_height()
75 | return node
76 |
77 | def min(self):
78 | """Return minimum value in tree without causing any changes."""
79 | if self.root is None:
80 | return None
81 | node = self.root
82 | while node.left:
83 | node = node.left
84 | return node.value
85 |
86 | def _remove_min(self, node):
87 | """
88 | Delete minimum value from subtree rooted at node.
89 | Have to make sure to compute_height on all affected ancestral nodes.
90 | """
91 | if node.left is None:
92 | return node.right
93 |
94 | # Might have made right-leaning, since deleted from left. Deal with it
95 | node.left = self._remove_min(node.left)
96 | node = resolve_right_leaning(node)
97 | node.compute_height()
98 | return node
99 |
100 | def remove(self, val):
101 | """Remove value from tree."""
102 | self.root = self._remove(self.root, val)
103 |
104 | def _remove(self, node, val):
105 | """Remove val from subtree rooted at node and return resulting subtree."""
106 | if node is None:
107 | return None
108 |
109 | if val < node.value:
110 | node.left = self._remove(node.left, val)
111 | node = resolve_right_leaning(node)
112 | elif val > node.value:
113 | node.right = self._remove(node.right, val)
114 | node = resolve_left_leaning(node)
115 | else:
116 | if node.left is None:
117 | return node.right
118 | if node.right is None:
119 | return node.left
120 |
121 | # replace self value with node containing smallest value from right subtree
122 | original = node
123 |
124 | # find SMALLEST child in right subtree and remove it
125 | node = node.right
126 | while node.left:
127 | node = node.left
128 |
129 | node.right = self._remove_min(original.right)
130 | node.left = original.left
131 |
132 | # Might have made left-leaning by shrinking right side
133 | node = resolve_left_leaning(node)
134 |
135 | node.compute_height()
136 | return node
137 |
138 | def __contains__(self, target):
139 | """Check whether BST contains target value."""
140 | node = self.root
141 | while node:
142 | if target == node.value:
143 | return True
144 | if target < node.value:
145 | node = node.left
146 | else:
147 | node = node.right
148 |
149 | return False
150 |
151 | def __iter__(self):
152 | """In order traversal of elements in the tree."""
153 | for v in self._inorder(self.root):
154 | yield v
155 |
156 | def _inorder(self, node):
157 | """Inorder traversal of tree."""
158 | if node is None:
159 | return
160 |
161 | for v in self._inorder(node.left):
162 | yield v
163 |
164 | yield node.value
165 |
166 | for v in self._inorder(node.right):
167 | yield v
168 |
--------------------------------------------------------------------------------
/ch06/symbol.py:
--------------------------------------------------------------------------------
1 | """
2 | Data Structure for non-balancing Binary Search Tree.
3 |
4 | The tree can contain duplicate values.
5 | """
6 | from ch06.avl import resolve_left_leaning, resolve_right_leaning
7 |
8 | class BinaryNode:
9 | """
10 | Node structure to use in a binary tree implementing a symbol table.
11 |
12 | Attributes
13 | ----------
14 | left - left child (or None)
15 | right - right child (or None)
16 | height - height of the node in AVL tree
17 | key - key for (key, value) pair
18 | value - value for (key, value) pair
19 | """
20 | def __init__(self, k, v):
21 | self.key = k
22 | self.value = v
23 | self.left = None
24 | self.right = None
25 | self.height = 0
26 |
27 | def __str__(self):
28 | return '{} -> {} [{}]'.format(self.key, self.value, self.height)
29 |
30 | def height_difference(self):
31 | """
32 | Compute height difference of node's children in BST. Can return
33 | a negative number or positive number.
34 | """
35 | left_height = self.left.height if self.left else -1
36 | right_height = self.right.height if self.right else -1
37 | return left_height - right_height
38 |
39 | def compute_height(self):
40 | """Compute height of node in BST."""
41 | left_height = self.left.height if self.left else -1
42 | right_height = self.right.height if self.right else -1
43 | self.height = 1 + max(left_height, right_height)
44 |
45 | def size(self):
46 | """Return number of nodes in subtree rooted at node."""
47 | ct = 1
48 | if self.left: ct += self.left.size()
49 | if self.right: ct += self.right.size()
50 | return ct
51 |
52 | class BinaryTree:
53 | """
54 | A Binary tree contains the root node, and methods to manipulate the tree.
55 | """
56 | def __init__(self):
57 | self.root = None
58 |
59 | def is_empty(self):
60 | """Returns whether tree is empty."""
61 | return self.root is None
62 |
63 | def put(self, k, v):
64 | """
65 | Adds a new BinaryNode to the tree containing this value or update
66 | association of (k, v). Key cannot be None.
67 | """
68 | if k is None:
69 | raise ValueError('key for symbol table cannot be None.')
70 | self.root = self._put(self.root, k, v)
71 |
72 | def _put(self, node, k, v):
73 | """
74 | Adds a new BinaryNode to the subtree rooted at node or update
75 | association of (k, v).
76 | """
77 | if node is None:
78 | return BinaryNode(k,v)
79 |
80 | if k == node.key:
81 | node.value = v
82 | return node
83 |
84 | if k < node.key:
85 | node.left = self._put(node.left, k, v)
86 | node = resolve_left_leaning(node)
87 | else:
88 | node.right = self._put(node.right, k, v)
89 | node = resolve_right_leaning(node)
90 |
91 | node.compute_height()
92 | return node
93 |
94 | def remove(self, key):
95 | """Remove (key, val) from self in BinaryTree and return self."""
96 | self.root = self._remove(self.root, key)
97 |
98 | def _remove_min(self, node):
99 | """
100 | Delete minimum value from subtree rooted at node.
101 | Have to make sure to compute_height on all affected ancestral nodes.
102 | """
103 | if node.left is None:
104 | return node.right
105 |
106 | node.left = self._remove_min(node.left)
107 | node = resolve_right_leaning(node)
108 | node.compute_height()
109 | return node
110 |
111 | def _remove(self, node, key):
112 | """Remove (key,value) from subtree rooted at node and return resulting subtree."""
113 | if node is None:
114 | return None
115 |
116 | if key < node.key:
117 | node.left = self._remove(node.left, key)
118 | node = resolve_right_leaning(node)
119 | elif key > node.key:
120 | node.right = self._remove(node.right, key)
121 | node = resolve_left_leaning(node)
122 | else:
123 | if node.left is None:
124 | return node.right
125 | if node.right is None:
126 | return node.left
127 |
128 | # replace self value with largest value from left subtree
129 | original = node
130 |
131 | # find SMALLEST child in right subtree and remove it
132 | node = node.right
133 | while node.left:
134 | node = node.left
135 |
136 | node.right = self._remove_min(original.right)
137 | node.left = original.left
138 |
139 | node = resolve_left_leaning(node)
140 |
141 | node.compute_height()
142 | return node
143 |
144 | def __contains__(self, key):
145 | """Check whether BST contains key value."""
146 | return not self.get(key) is None
147 |
148 | def get(self, key):
149 | """Symbol table API to retrieve value associated with key."""
150 | node = self.root
151 | while node:
152 | if key == node.key:
153 | return node.value
154 | if key < node.key:
155 | node = node.left
156 | else:
157 | node = node.right
158 |
159 | return None
160 |
161 | def __iter__(self):
162 | """In order traversal of elements in the tree."""
163 | for pair in self._inorder(self.root):
164 | yield pair
165 |
166 | def _inorder(self, node):
167 | """Inorder traversal of tree."""
168 | if node is None:
169 | return
170 |
171 | for pair in self._inorder(node.left):
172 | yield pair
173 |
174 | yield (node.key, node.value)
175 |
176 | for pair in self._inorder(node.right):
177 | yield pair
178 |
--------------------------------------------------------------------------------
/ch06/expression.py:
--------------------------------------------------------------------------------
1 | """
2 | Use Binary Tree structure to represent binary expressions.
3 | """
4 | import re
5 |
6 | class Value:
7 | """
8 | Represents a Value in an Expression tree, containing a numeric value.
9 |
10 | Has default eval, __str_() methods and supports converting into postfix.
11 | """
12 | def __init__(self, e):
13 | self.value = e
14 |
15 | def __str__(self):
16 | return str(self.value)
17 |
18 | def eval(self):
19 | """To evaluate a value, report its value."""
20 | return self.value
21 |
22 | def references(self):
23 | """A Value has no references."""
24 | yield None
25 |
26 | def postfix(self):
27 | """A value as postfix is itself."""
28 | yield self.value
29 |
30 | class Reference:
31 | """
32 | Represents a Value in an Expression tree, containing a reference to a value.
33 |
34 | Has default eval, __str_() methods and supports converting into postfix.
35 | """
36 | def __init__(self, e, environment=None):
37 | self.reference = e
38 | self.environment = {} if environment is None else environment
39 |
40 | def __str__(self):
41 | return str(self.reference)
42 |
43 | def eval(self):
44 | """To evaluate a reference, report its value from the environment (or 0 if not found)."""
45 | try:
46 | return self.environment[self.reference]
47 | except KeyError:
48 | return 0
49 |
50 | def references(self):
51 | """Yield this reference."""
52 | yield self.reference
53 |
54 | def postfix(self):
55 | """A reference as postfix is itself."""
56 | yield self.reference
57 |
58 | class Expression:
59 | """
60 | Node structure to use in a binary expression tree.
61 |
62 | Attributes
63 | ----------
64 | left - left child (or None)
65 | right - right child (or None)
66 | func - A function to perform a binary operation
67 | """
68 | def __init__(self, func, left, right):
69 | self.func = func
70 | self.left = left
71 | self.right = right
72 |
73 | def __str__(self):
74 | return '({} {} {})'.format(
75 | self.left,
76 | self.func.__doc__,
77 | self.right)
78 |
79 | def eval(self):
80 | """Evaluate expression."""
81 | return self.func(self.left.eval(), self.right.eval())
82 |
83 | def references(self):
84 | """Return generator for all references, if any exist."""
85 | for v in self.left.references():
86 | if v:
87 | yield v
88 |
89 | for v in self.right.references():
90 | if v:
91 | yield v
92 |
93 | def postfix(self):
94 | """Return generator containing postfix representation of expression."""
95 | for v in self.left.postfix():
96 | yield v
97 |
98 | for v in self.right.postfix():
99 | yield v
100 |
101 | yield self.func.__doc__
102 |
103 | # Pre-loaded operations
104 | def add(left, right):
105 | """+"""
106 | return left + right
107 |
108 | def mult(left, right):
109 | """*"""
110 | return left * right
111 |
112 | def divide(left, right):
113 | """/"""
114 | return left / right
115 |
116 | def sub(left, right):
117 | """-"""
118 | return left - right
119 |
120 | # Built in operators here.
121 | _operators = { '+' : add, '-' : sub, '*' : mult, '/' : divide }
122 |
123 | def build_expression(s, new_operators=None, environment=None):
124 | """
125 | Given a string consisting of numeric values, parentheses and
126 | mathematical operators, return Expression tree using a stack-based
127 | algorithm developed by Dijkstra. To parse new operations, simply
128 | pass them in as a dict where key is the symbol for the new operator and
129 | its value is a function that takes in two arguments (left, right) for
130 | the operands to the binary function.
131 | """
132 |
133 | # Match open- and close- parens, any sequence of digits, and
134 | # known operators, using backslash notation. Limited to only special characters
135 | # but still quite nice...
136 | known_operators = {}
137 | for op in _operators:
138 | known_operators[op] = _operators[op]
139 |
140 | if new_operators:
141 | for op in new_operators:
142 | if op in _operators:
143 | raise ValueError('Attempting to overwrite existing operator: {}'.format(op))
144 |
145 | known_operators[op] = new_operators[op]
146 |
147 | # In Python 3.3, this regular expression generates a Deprecation Warning and yields a unit
148 | # test failure in test_baseline_expression for the "^" operator representing exponentiation
149 | pattern = re.compile('(\(|\)|[a-zA-Z.0-9_]+|[{}])'.format('\\'.join(known_operators.keys())))
150 |
151 | # A bit out of place, but the stack was introduced in Chapter 07
152 | from ch07.list_stack import Stack
153 | ops = Stack()
154 | expressions = Stack()
155 |
156 | for token in pattern.findall(s):
157 | if token in known_operators:
158 | ops.push(known_operators[token]) # Push each operator found for later
159 | elif token == '(':
160 | pass # You seriously do not need to do anything!
161 | elif token == ')':
162 | op = ops.pop() # Close out most recent expression
163 | right = expressions.pop() # Order matters...
164 | left = expressions.pop() # And store it for future
165 | expressions.push(Expression(op, left, right))
166 | else: # If just a numeric value, push it for later
167 | try:
168 | expressions.push(Value(float(token)))
169 | except ValueError:
170 | # If it cannot be evaluated, leave untouched for post processing, perhaps?
171 | expressions.push(Reference(token, environment))
172 |
173 | return expressions.pop() # If parens balance, then left with expression
174 |
--------------------------------------------------------------------------------
/ch07/search.py:
--------------------------------------------------------------------------------
1 | """
2 | Code to blindly search through a Graph in Depth First and Breadth First strategies. Also
3 | contains a rudimentary Guided Search for undirected graphs when there is a metric showing
4 | how far a node is from the target.
5 | """
6 | import random
7 | try:
8 | import networkx as nx
9 | except ImportError:
10 | import ch07.replacement as nx
11 |
12 | from ch07.dependencies import plt_error
13 | from ch07.maze import Maze, to_networkx, solution_graph, node_from_field
14 | from ch04.list_queue import Queue
15 |
16 | def path_to(node_from, src, target):
17 | """
18 | Given a dictionary that results from a search, reproduce path from original src
19 | to target. Have to follow the node_from in reverse order, which is why the
20 | nodes discovered are all inserted at index position 0 in the path.
21 |
22 | Performance is O(N) since a path could involve all nodes, in the worst case.
23 | """
24 | if not target in node_from:
25 | raise ValueError('{} is unreachable from {}'.format(target,src))
26 |
27 | path = []
28 | v = target
29 | while v != src:
30 | path.append(v)
31 | v = node_from[v]
32 |
33 | # last one to push is the source, which makes it
34 | # the first one to be retrieved
35 | path.append(src)
36 | path.reverse()
37 | return path
38 |
39 | def dfs_search_recursive(G, src):
40 | """
41 | Apply Depth First Search to a graph from src. Return
42 | dictionary of explored trail.
43 |
44 | Performance is O(N+E) since every edge is visited once for a directed
45 | graph and twice for an undirected graph.
46 |
47 | Warning: This code is likely to cause a RecursionError when applied
48 | to a graph with thousands of nodes, because Python sets the recursion
49 | limit to about 1000.
50 | """
51 | marked = {}
52 | node_from = {}
53 |
54 | def dfs(v):
55 | marked[v] = True
56 |
57 | for w in G[v]:
58 | if not w in marked:
59 | node_from[w] = v
60 | dfs(w)
61 |
62 | dfs(src)
63 | return node_from
64 |
65 | def dfs_search(G, src):
66 | """
67 | Apply non-recursive Depth First Search to a graph from src. Return
68 | dictionary of explored trail.
69 |
70 | Performance is O(N+E) since every edge is visited once for a directed
71 | graph and twice for an undirected graph.
72 | """
73 | from ch07.list_stack import Stack
74 | marked = {}
75 | node_from = {}
76 |
77 | stack = Stack()
78 | marked[src] = True
79 | stack.push(src)
80 |
81 | while not stack.is_empty():
82 | v = stack.pop()
83 | for w in G[v]:
84 | if not w in marked:
85 | node_from[w] = v
86 | marked[w] = True
87 | stack.push(w)
88 |
89 | return node_from
90 |
91 | def bfs_search(G, src):
92 | """
93 | Apply Depth First Search to a graph from a starting node. Return
94 | dictionary of explored trail.
95 |
96 | Performance is O(N+E) since every edge is visited once for a directed
97 | graph and twice for an undirected graph.
98 | """
99 | marked = {}
100 | node_from = {}
101 |
102 | q = Queue()
103 | marked[src] = True
104 | q.enqueue(src)
105 |
106 | while not q.is_empty():
107 | v = q.dequeue()
108 | for w in G[v]:
109 | if not w in marked:
110 | node_from[w] = v
111 | marked[w] = True
112 | q.enqueue(w)
113 |
114 | return node_from
115 |
116 | def guided_search(G, src, target, distance):
117 | """
118 | Non-recursive depth-first search investigating given position. Needs
119 | a distance (node1, node2) function to determine distance between two nodes.
120 |
121 | Performance is O(N log N + E) since every edge is visited once for a directed
122 | graph and twice for an undirected graph. Each of the N nodes is processed by
123 | the priority queue, where dequeue() and enqueue() operations are each O(log N).
124 | While it is unlikely that the priority queue will ever contain N nodes, the
125 | worst case possibility always exists.
126 | """
127 | from ch04.heap import PQ
128 | marked = {}
129 | node_from = {}
130 |
131 | pq = PQ(G.number_of_nodes())
132 | marked[src] = True
133 |
134 | # Using a MAX PRIORITY QUEUE means we rely on negative distance to
135 | # choose the one that is closest...
136 | pq.enqueue(src, -distance(src, target))
137 |
138 | while not pq.is_empty():
139 | v = pq.dequeue()
140 |
141 | for w in G.neighbors(v):
142 | if not w in marked:
143 | node_from[w] = v
144 | marked[w] = True
145 | pq.enqueue(w, -distance(w, target))
146 |
147 | return node_from
148 |
149 | def draw_solution(G, field, src, target, figsize=(12,6)):
150 | """
151 | Use matplotlib to draw the original graph containing the solution to
152 | a designated target vertex; in the second graph the node_from dictionary
153 | is visualized.
154 | """
155 | if plt_error:
156 | return
157 | import matplotlib.pyplot as plt
158 |
159 | H = solution_graph(G, path_to(field, src, target))
160 | F = node_from_field(G, field)
161 |
162 | _, axes = plt.subplots(nrows=1, ncols=2, figsize=figsize)
163 | ax = axes.flatten()
164 |
165 | # get original positional location from original graph
166 | pos_h = nx.get_node_attributes(H, 'pos')
167 | nx.draw(H, pos_h, with_labels = True, node_color='w', font_size=8, ax=ax[0])
168 | pos_f = nx.get_node_attributes(F, 'pos')
169 | nx.draw(F, pos_f, with_labels = True, node_color='w', font_size=8, ax=ax[1])
170 |
171 | #######################################################################
172 | if __name__ == '__main__':
173 | random.seed(15)
174 | m = Maze(3,5) # Anything bigger and these are too small to read
175 | graph = to_networkx(m)
176 |
177 | # Choose whether to use dfs_search, bfs_search, or guided_search
178 | draw_solution(graph, bfs_search(graph, m.start()), m.start(), m.end())
179 |
180 | import matplotlib.pyplot as plt
181 | plt.show()
182 |
--------------------------------------------------------------------------------
/ch02/timing.py:
--------------------------------------------------------------------------------
1 | """Timing results for chapter 2.
2 |
3 | Generate timing results for inefficient sorting algorithms.
4 |
5 | :Sample Output:
6 |
7 | Permutation Sort Trials (up to N=12): These can take Unusually Long.
8 | Factorial = 3.7490738642164824e-07*N!
9 | N TimeToSort Model
10 | 1 0.0000 0.0000
11 | 2 0.0000 0.0000
12 | 3 0.0000 0.0000
13 | 4 0.0000 0.0000
14 | 5 0.0000 0.0000
15 | 6 0.0003 0.0003
16 | 7 0.0018 0.0019
17 | 8 0.0149 0.0151
18 | 9 0.1350 0.1360
19 | 10 1.3777 1.3605
20 | 11 15.7066 14.9651
21 | 12 194.1625 179.5812
22 |
23 | Random Sort Trials (up to N=11): These can take Unusually Long.
24 | Factorial = 5.975750448109412e-07*N!
25 | N TimeToSort Model
26 | 1 0.0000 0.0000
27 | 2 0.0000 0.0000
28 | 3 0.0000 0.0000
29 | 4 0.0000 0.0000
30 | 5 0.0000 0.0001
31 | 6 0.0012 0.0004
32 | 7 0.0011 0.0030
33 | 8 0.1574 0.0241
34 | 9 0.1935 0.2168
35 | 10 21.2817 2.1685
36 | 11 137.0447 23.8533
37 | """
38 |
39 | import timeit
40 | from algs.table import DataTable
41 | from algs.modeling import numpy_error, factorial_model
42 |
43 | def run_permutation_sort_worst_case(top=11, output=True, decimals=4):
44 | """Generate table for permutation sort from 1 up to and not including top."""
45 |
46 | # Build model for runs of size 1 through 9.
47 | x = []
48 | y = []
49 | for n in range(1,10):
50 | sort_time = timeit.timeit(stmt='permutation_sort(x)', setup='''
51 | from ch02.random_sort import permutation_sort
52 | x=list(range({},0,-1))'''.format(n), number=1)
53 | x.append(n)
54 | y.append(sort_time)
55 |
56 | # Coefficients are returned as first argument
57 | if numpy_error:
58 | factorial_coeffs = [0]
59 | else:
60 | import numpy as np
61 | from scipy.optimize import curve_fit
62 | [factorial_coeffs, _] = curve_fit(factorial_model, np.array(x), np.array(y))
63 | if output:
64 | print('Factorial = {}*N!'.format(factorial_coeffs[0]))
65 |
66 | tbl = DataTable([8,8,8], ['N', 'TimeToSort', 'Model'], output=output, decimals=decimals)
67 |
68 | for n in range(1,top+1):
69 | sort_time = timeit.timeit(stmt='permutation_sort(x)', setup='''
70 | from ch02.random_sort import permutation_sort
71 | x=list(range({},0,-1))'''.format(n), number=1)
72 | tbl.row([n, sort_time, factorial_model(n, factorial_coeffs[0])])
73 |
74 | def run_random_sort(top=12, output=True):
75 | """Generate table for random sort."""
76 |
77 | # Build model for runs of size 1 through 9.
78 | x = []
79 | y = []
80 | for n in range(1,10):
81 | sort_time = timeit.timeit(stmt='random_sort(x)', setup='''
82 | import random
83 | from ch02.random_sort import random_sort
84 | x=list(range({}))
85 | random.shuffle(x)'''.format(n), number=1)
86 | x.append(n)
87 | y.append(sort_time)
88 |
89 | # Coefficients are returned as first argument
90 | if numpy_error:
91 | factorial_coeffs = [0]
92 | else:
93 | import numpy as np
94 | from scipy.optimize import curve_fit
95 |
96 | [factorial_coeffs, _] = curve_fit(factorial_model, np.array(x), np.array(y))
97 | if output:
98 | print('Factorial = {}*N!'.format(factorial_coeffs[0]))
99 |
100 | tbl = DataTable([8,8,8], ['N', 'TimeToSort', 'Model'], decimals=4)
101 |
102 | for n in range(1,top+1):
103 | sort_time = timeit.timeit(stmt='random_sort(x)', setup='''
104 | import random
105 | from ch02.random_sort import random_sort
106 | x=list(range({}))
107 | random.shuffle(x)'''.format(n), number=1)
108 | tbl.row([n, sort_time, factorial_model(n, factorial_coeffs[0])])
109 |
110 | def incremental_multiplication(output=True):
111 | """
112 | Compute results for multiplying large numbers.
113 | This takes several hours to run if you increment by 1. Instead, check powers of 2.
114 | """
115 | num = 1000
116 | tbl = DataTable([8,8,8], ['N', 'Min Mult', 'Max Mult'], decimals=5, output=output)
117 | for n in [2 ** k for k in range(3, 12)]:
118 | all_times = timeit.repeat(stmt='idx += 1\nmult_pair(pairs[idx])', setup='''
119 | from ch02.mult import create_random_pair, mult_pair
120 | idx = -1
121 | pairs = [create_random_pair({}) for _ in range({})]'''.format(n,num), repeat=20, number=num)
122 | tbl.row([n, min(all_times), max(all_times)])
123 | return tbl
124 |
125 | def run_range_analysis(output=True):
126 | """Confirm O(log N) algorithm to find range of duplicates."""
127 | tbl = DataTable([8,8,8], ['N', 'O(N)', 'O(log N)'], decimals=7, output=output)
128 |
129 | commands = '''
130 | from random import random
131 | tgt = random()
132 | alist = [tgt] * {0}
133 | for _ in range({0}-{1}):
134 | alist.append(random())
135 | alist = sorted(alist)
136 | '''
137 | for n in [2**k for k in range(10, 20)]:
138 | custom = commands.format(n, n//16)
139 | best_times = min(timeit.repeat(stmt='best_range(alist, tgt)', setup='''
140 | from ch02.challenge import best_range
141 | {}'''.format(custom), repeat=40, number=50))/50
142 | worst_times = min(timeit.repeat(stmt='worst_range(alist, tgt)', setup='''
143 | from ch02.challenge import worst_range
144 | {}'''.format(custom), repeat=40, number=50))/50
145 |
146 | tbl.row([n, worst_times, best_times])
147 |
148 | #######################################################################
149 | if __name__ == '__main__':
150 |
151 | print('Timing of Multiplication of n-digit numbers.')
152 | incremental_multiplication()
153 |
154 | print('Permutation Sort Trials (up to N=12): These can take Unusually Long.')
155 | run_permutation_sort_worst_case(12)
156 |
157 | print('Random Sort Trials (up to N=11): These can take Unusually Long.')
158 | run_random_sort(11)
159 |
160 | print('Timing of finding range among duplicates.')
161 | run_range_analysis()
162 |
--------------------------------------------------------------------------------
/ch03/months.py:
--------------------------------------------------------------------------------
1 | """Opening example for Chapter 03.
2 |
3 | Provides different alternatives to recording the number of days in each
4 | calendar month
5 |
6 | * days_in_month is a standard Python dict. Access as days_in_month[m]
7 | * days_in_month_mixed uses a list with months in even index locations
8 | and days in odd index locations. Access as days_mixed(m)
9 | * s_data and s_num are parallel arrays, sorted alphabetically to
10 | allow binary array search to be used over s_data. Access
11 | as days_bas(m)
12 |
13 | """
14 | import calendar
15 | from datetime import date
16 | from algs.sorting import unique
17 |
18 | # https://www.oreilly.com/library/view/high-performance-python/9781449361747/ch04.html#:~:text=By%20default%2C%20the%20smallest%20size,will%20still%20allocate%20eight%20elements).
19 | days_in_month = {
20 | 'January' : 31, 'February' : 28, 'March' : 31,
21 | 'April' : 30, 'May' : 31, 'June' : 30,
22 | 'July' : 31, 'August' : 31, 'September' : 30,
23 | 'October' : 31, 'November' : 30, 'December' : 31
24 | }
25 |
26 | # mixed type arrays can also be used
27 | days_in_month_mixed = [ 'January', 31, 'February', 28, 'March', 31, 'April', 30,
28 | 'May', 31, 'June', 30, 'July', 31, 'August', 31, 'September', 30,
29 | 'October', 31, 'November', 30, 'December', 31]
30 |
31 | # parallel arrays in alphabetic order suitable for Binary Array Search
32 | s_data = [ 'April', 'August', 'December', 'February', 'January', 'July', 'June',
33 | 'March', 'May', 'November', 'October', 'September']
34 | s_num = [ 30, 31, 31, 28, 31, 31, 30, 31, 31, 30, 31, 30]
35 |
36 | # canonical ordering of months, with lengths in parallel array
37 | key_array = [ 'January', 'February', 'March', 'April', 'May', 'June', 'July',
38 | 'August', 'September', 'October', 'November', 'December' ]
39 | month_length = [ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
40 |
41 | def print_month(month, year):
42 | """Print brief monthly calendar for month, like ('December', 2020)."""
43 | idx = key_array.index(month)
44 | day = 1
45 |
46 | wd = date(year,idx + 1,day).weekday() # Returns Monday as 0, so adjust
47 | wd = (wd + 1) % 7
48 | end = month_length[idx]
49 | if calendar.isleap(year) and idx == 1: # February LeapYear has one extra day
50 | end += 1
51 |
52 | print('{} {}'.format(month,year).center(20))
53 | print('Su Mo Tu We Th Fr Sa')
54 | print(' ' * wd, end='') # Pad spacing
55 | while day <= end:
56 | print('{:2d} '.format(day), end='')
57 | wd = (wd + 1) % 7
58 | day += 1
59 | if wd == 0: print()
60 | print()
61 |
62 | def day_of_week(y, m, d):
63 | """
64 | There is a formula to compute the week day mathematically, first posted to
65 | comp.lang.c discussion boards in 1992 by Tomohiko Sakamoto. Further details
66 | at https://cs.uwaterloo.ca/~alopez-o/math-faq/node73.html. Works for dates
67 | after 1752 when Gregorian calendar formally adopted. 1 <= m <= 12 and y > 1752.
68 | """
69 | y -= m<3
70 | return (y + y//4 - y//100 + y//400 + ord('-bed=pen+mad.'[m]) + d) % 7
71 |
72 | def day_of_week_one_line(y, m, d):
73 | """Oneliner just for fun."""
74 | return (y-(m<3)+(y-(m<3))//4-(y-(m<3))//100+(y-(m<3))//400+ord('-bed=pen+mad.'[m])+d)%7
75 |
76 | def days_mixed(month):
77 | """Demonstrate using mixed-type array to compute month length."""
78 | for i in range(0,24,2):
79 | if days_in_month_mixed[i] == month:
80 | return days_in_month_mixed[i+1]
81 | return 0
82 |
83 | def days_bas(month):
84 | """Use Binary ArraySearch to locate number of days in given month."""
85 | from ch02.bas import binary_array_search
86 | idx = binary_array_search(s_data, month)
87 | if idx < 0:
88 | return 0
89 | return s_num[idx]
90 |
91 | def sample_search(p1,p2):
92 | """Check if all hashes are unique for p1 and p2."""
93 | result = [month_index(k,p1,p2) for k in s_data]
94 | if (min(result) >= 0) and unique(result):
95 | data = [-1] * (1+max(result))
96 | for idx in range(len(s_data)):
97 | data[result[idx]] = s_num[idx]
98 | return data
99 | return None
100 |
101 | def month_index(m,p1,p2):
102 | """Computed Function to return unique key for month names."""
103 | ct = 0
104 | for ch in m:
105 | ct = (ct*p1 + ord(ch)) % p2
106 | return ct
107 |
108 | def search_for_data():
109 | """Search prime numbers for suitable constants to use."""
110 | p1s = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 61, 67, 71]
111 | best = None
112 | best_tuple = None
113 | for p1 in p1s:
114 | for p2 in p1s:
115 | data = sample_search(p1, p2)
116 | if data:
117 | if best is None:
118 | best = data
119 | best_tuple = (p1, p2)
120 | elif len(data) < len(best):
121 | best = data
122 | best_tuple = (p1, p2)
123 |
124 | return (best_tuple, best)
125 |
126 | def search_for_hashes():
127 | """What is smallest array that stores unique values for months using default hash."""
128 | N = 12
129 | while True:
130 | hashes = [hash(k) % N for k in key_array]
131 | if len(hashes) == len(set(hashes)):
132 | tbl = [None] * N
133 | for idx,key in enumerate(key_array):
134 | tbl[hash(key) % N] = month_length[idx]
135 | return tbl
136 | N += 1
137 |
138 | return []
139 |
140 | def craft_table():
141 | """
142 | Create a Hashtable from months. Changes each time you run because of
143 | salted hash strings.
144 | """
145 | from ch03.hashtable import Hashtable
146 | last = 1000
147 | for M in range(12, last):
148 | ht = Hashtable(M)
149 | try:
150 | for idx,key in enumerate(key_array):
151 | ht.put(key, month_length[idx])
152 | return ht
153 | except RuntimeError:
154 | pass
155 |
156 | return None
157 |
158 | #######################################################################
159 | if __name__ == '__main__':
160 | # Validate leap years and non-leap years
161 | print_month('February', 2021)
162 | print()
163 | print_month('February', 2024)
164 | print()
165 |
166 | (best_tuple, _) = search_for_data()
167 | print('Two constants in monthIndex should be p1 =', best_tuple[0], 'and p2 =', best_tuple[1])
168 |
169 | hash_result = search_for_hashes()
170 | print('Need hashtable of size', len(hash_result), 'to store months uniquely.')
171 | print(hash_result)
172 | ht = craft_table()
173 |
174 | print('created hashtable of size', ht.M)
175 |
--------------------------------------------------------------------------------
/ch03/hashtable_linked.py:
--------------------------------------------------------------------------------
1 | """
2 | Hashtable to store (key, value) pairs in a fixed hashtable of linked
3 | lists, using hash() % N as hash code. This table can replace values
4 | associated with a given key. When two keys attempt to use
5 | the same location, a linked list is constructed.
6 |
7 | Hashtable will never "run out" of storage, though performance suffers
8 | as more (key, value) pairs are added.
9 | """
10 |
11 | from ch03.entry import LinkedEntry
12 |
13 | class Hashtable:
14 | """Hashtable using array of M linked lists."""
15 | def __init__(self, M=10):
16 | if M < 1:
17 | raise ValueError('Hashtable storage must be at least 1.')
18 | self.table = [None] * M
19 | self.M = M
20 | self.N = 0
21 |
22 | def get(self, k):
23 | """Retrieve value associated with key, k."""
24 | hc = hash(k) % self.M # First place it could be
25 | entry = self.table[hc]
26 | while entry:
27 | if entry.key == k:
28 | return entry.value
29 | entry = entry.next
30 | return None # Couldn't find
31 |
32 | def put(self, k, v):
33 | """Associate value, v, with the key, k."""
34 | hc = hash(k) % self.M # First place it could be
35 | entry = self.table[hc]
36 | while entry:
37 | if entry.key == k: # Overwrite if already here
38 | entry.value = v
39 | return
40 | entry = entry.next
41 |
42 | self.table[hc] = LinkedEntry(k, v, self.table[hc])
43 | self.N += 1
44 |
45 | def remove(self, k):
46 | """Remove (k,v) entry associated with k."""
47 | hc = hash(k) % self.M # First place it could be
48 | entry = self.table[hc]
49 | prev = None
50 | while entry:
51 | if entry.key == k:
52 | if prev:
53 | prev.next = entry.next
54 | else:
55 | self.table[hc] = entry.next
56 | self.N -= 1
57 | return entry.value
58 |
59 | prev, entry = entry, entry.next
60 |
61 | return None # Nothing was removed
62 |
63 | def __iter__(self):
64 | """Generate all (k, v) tuples for entries in all linked lists table."""
65 | for entry in self.table:
66 | while entry:
67 | yield (entry.key, entry.value)
68 | entry = entry.next
69 |
70 | class DynamicHashtable:
71 | """Hashtable using array of M linked lists that can resize over time."""
72 | def __init__(self, M=10):
73 | self.table = [None] * M
74 | if M < 1:
75 | raise ValueError('Hashtable storage must be at least 1.')
76 | self.M = M
77 | self.N = 0
78 |
79 | self.load_factor = 0.75
80 |
81 | # Ensure for M <= 3 that threshold is no greater than M-1
82 | self.threshold = min(M * self.load_factor, M-1)
83 |
84 | def get(self, k):
85 | """Retrieve value associated with key, k."""
86 | hc = hash(k) % self.M # First place it could be
87 | entry = self.table[hc]
88 | while entry:
89 | if entry.key == k:
90 | return entry.value
91 | entry = entry.next
92 | return None # Couldn't find
93 |
94 | def put(self, k, v):
95 | """Associate value, v, with the key, k."""
96 | hc = hash(k) % self.M # First place it could be
97 | entry = self.table[hc]
98 | while entry:
99 | if entry.key == k: # Overwrite if already here
100 | entry.value = v
101 | return
102 | entry = entry.next
103 |
104 | # insert, and then trigger resize if hit threshold.
105 | self.table[hc] = LinkedEntry(k, v, self.table[hc])
106 | self.N += 1
107 |
108 | if self.N >= self.threshold:
109 | self.resize(2*self.M + 1)
110 |
111 | def resize(self, new_size):
112 | """Resize table and rehash existing entries into new table."""
113 | temp = DynamicHashtable(new_size)
114 | for n in self.table:
115 | while n:
116 | temp.put(n.key, n.value)
117 | n = n.next
118 | self.table = temp.table
119 | temp.table = None # ensures memory is freed
120 | self.M = temp.M
121 | self.threshold = self.load_factor * self.M
122 |
123 | def remove(self, k):
124 | """Remove (k,v) entry associated with k."""
125 | hc = hash(k) % self.M # First place it could be
126 | entry = self.table[hc]
127 | prev = None
128 | while entry:
129 | if entry.key == k:
130 | if prev:
131 | prev.next = entry.next
132 | else:
133 | self.table[hc] = entry.next
134 | self.N -= 1
135 | return entry.value
136 |
137 | prev, entry = entry, entry.next
138 |
139 | return None # Nothing was removed
140 |
141 | def __iter__(self):
142 | """Generate all (k, v) tuples for entries in all linked lists table."""
143 | for entry in self.table:
144 | while entry:
145 | yield (entry.key, entry.value)
146 | entry = entry.next
147 |
148 | def stats_linked_lists(ht, output=False):
149 | """
150 | Produce statistics on the linked-list implemented table. Returns
151 | (average chain length for non-empty buckets, max chain length)
152 | """
153 | size = len(ht.table)
154 | sizes = {} # record how many chains of given size exist
155 | total_search = 0
156 | max_length = 0
157 | total_non_empty = 0
158 | for i in range(size):
159 | num = 0
160 | idx = i
161 | entry = ht.table[idx]
162 | total_non_empty += 1 if entry else 0
163 |
164 | while entry: # count how many are in this entry
165 | entry = entry.next
166 | num += 1
167 | total_search += num # each entry in the linked list requires more searches to find
168 | if num in sizes: # also counts number with NO entries
169 | sizes[num] = sizes[num] + 1
170 | else:
171 | sizes[num] = 1
172 | if num > max_length:
173 | max_length = num
174 |
175 | if output:
176 | print('Linked List ({} total entries in base size of {})'.format(ht.N, size))
177 | for i in range(size):
178 | if i in sizes:
179 | print('{} linked lists have size of {}'.format(sizes[i], i))
180 |
181 | if total_non_empty == 0:
182 | return (0, 0)
183 | return (ht.N/total_non_empty, max_length)
184 |
--------------------------------------------------------------------------------
/ch07/spreadsheet.py:
--------------------------------------------------------------------------------
1 | """
2 | Spreadsheet application using tkinter to visualize a working spreadsheet.
3 | """
4 | from ch06.expression import build_expression
5 | from ch07.digraph_search import has_cycle
6 |
7 | try:
8 | import networkx as nx
9 | except ImportError:
10 | import ch07.replacement as nx
11 |
12 | from ch07.dependencies import tkinter_error
13 |
14 | def is_formula(s):
15 | """Determine if string is a formula."""
16 | return s[0] == '=' if len(s) > 0 else False
17 |
18 | class Spreadsheet:
19 | """
20 | Represents a spreadsheet.
21 |
22 | Attributes
23 | ----------
24 | digraph - A directed Graph to maintain all cell-based dependencies
25 | to detect cycles. When cell 'A2' is set to '=(B3+1)' then
26 | an edge B3 -> A2 is added to the graph, so whenever B3 changes,
27 | A2 knows it also has to change.
28 | values - When a cell contains a formula, this is its floating point value
29 | expressions - When a cell contains a formula, this maintains the expression tree
30 | expressions_raw - When a cell contains a formula, this is its initial string contents
31 | entries - For the tkinter GUI, this is text widget for given cell
32 | string_vars - For the tkinter GUI, this is StringVar containing the value backing
33 | an entry.
34 | """
35 | undefined = 'Undef'
36 |
37 | def __init__(self, master, new_digraph, num_rows=10, num_cols=5):
38 | self.master = master
39 | self.num_rows = num_rows
40 | self.num_cols = num_cols
41 | self.digraph = new_digraph
42 | self.values = {}
43 | self.expressions = {}
44 | self.expressions_raw = {}
45 | self.entries = {}
46 | self.string_vars = {}
47 | self.make_gui()
48 |
49 | def make_gui(self):
50 | """Construct the necessary widgets for spreadsheet GUI and set up the event handlers."""
51 | if tkinter_error:
52 | return
53 | import tkinter
54 | self.canvas = tkinter.Canvas(self.master)
55 |
56 | for r in range(1,self.num_rows):
57 | tkinter.Label(self.master, text=str(r)).grid(row=r, column=0)
58 | for c in range(self.num_cols):
59 | tkinter.Label(self.master, text=chr(ord('A')+c)).grid(row=0, column=c+1)
60 |
61 | for r in range(1, self.num_rows):
62 | for c in range(1, self.num_cols+1):
63 | label = chr(ord('A')+c-1) + str(r)
64 | sv = tkinter.StringVar(self.master, name=label)
65 | widget = tkinter.Entry(self.master, textvariable=sv)
66 | widget.bind('', lambda s=sv, lab=label: self.entry_update(lab, s))
67 | widget.bind('', lambda s=sv, lab=label: self.show_formula(lab, s))
68 | widget.bind('', lambda s=sv, lab=label: self.show_value(lab, s))
69 | widget.grid(row=r, column=c)
70 | self.entries[label] = widget
71 | self.string_vars[label] = sv
72 |
73 | def entry_update(self, label, event):
74 | """Updates the contents of a spreadsheet cell in response to user input."""
75 | try:
76 | self.set(label, self.string_vars[label].get())
77 | except RuntimeError:
78 | self.string_vars[label].set('#Cycle')
79 |
80 | def show_formula(self, label, event):
81 | """Changes a label's view to a formula, if present, when cell gains focus."""
82 | if label in self.expressions_raw:
83 | self.string_vars[label].set(self.expressions_raw[label])
84 |
85 | def show_value(self, label, event):
86 | """Resumes showing the value for a label, when cell loses focus."""
87 | if label in self.expressions_raw:
88 | self.string_vars[label].set(self.values[label])
89 |
90 | def get(self, cell):
91 | """Return the value of a cell, or 0 if not yet available."""
92 | if cell in self.values:
93 | return self.values[cell]
94 | return 0
95 |
96 | def set(self, cell, sval):
97 | """
98 | Update the value of a cell. Raises Runtime Error if cycle would be induced, otherwise
99 | make change and recompute other cells.
100 | """
101 | if cell in self.expressions: # Clear old dependencies IN CASE changed...
102 | for v in set(self.expressions[cell].references()): # convert to set to eliminate duplicates
103 | self.digraph.remove_edge(v, cell)
104 |
105 | self.string_vars[cell].set(sval) # Set contents (non-numeric is set to zero)
106 | self.digraph.add_node(cell) # Make sure node is in DiGraph
107 |
108 | if is_formula(sval):
109 | self.values[cell] = self.undefined
110 | self.expressions_raw[cell] = sval
111 | self.expressions[cell] = build_expression(self.expressions_raw[cell][1:], environment=self.values)
112 | for v in set(self.expressions[cell].references()): # convert to set to eliminate duplicates
113 | self.digraph.add_edge(v, cell)
114 |
115 | if has_cycle(self.digraph):
116 | for v in set(self.expressions[cell].references()): # convert to set to eliminate duplicates
117 | self.digraph.remove_edge(v, cell)
118 | self.expressions.pop(cell, None) # Pythonic way of deleting key
119 | self.expressions_raw.pop(cell, None) # Pythonic way of deleting key
120 | raise RuntimeError('Changing {} to {} creates cycle.'.format(cell, sval))
121 | else:
122 | self.expressions.pop(cell, None) # Pythonic way of deleting key
123 | self.expressions_raw.pop(cell, None) # Pythonic way of deleting key
124 |
125 | self._recompute(cell) # now recompute dependencies
126 |
127 | def _recompute(self, cell):
128 | """Internal API to recursively ripple changes through spreadsheet."""
129 | if cell in self.expressions:
130 | try:
131 | self.values[cell] = self.expressions[cell].eval()
132 | except TypeError:
133 | self.values[cell] = 0 # bad formula
134 | self.string_vars[cell].set(str(self.values[cell]))
135 | else:
136 | try:
137 | self.values[cell] = float(self.string_vars[cell].get())
138 | except ValueError:
139 | self.values[cell] = self.string_vars[cell].get()
140 |
141 | if cell in self.digraph:
142 | for w in self.digraph[cell]:
143 | self._recompute(w)
144 |
145 | #######################################################################
146 | if __name__ == '__main__':
147 | if tkinter_error:
148 | print('Unable to launch spreadsheet application without access to tkinter')
149 | else:
150 | import tkinter
151 | root = tkinter.Tk()
152 | root.title('You must press ENTER to change the contents of a cell.')
153 | ss = Spreadsheet(root, nx.DiGraph())
154 | root.mainloop()
155 |
--------------------------------------------------------------------------------