├── challenge1 ├── .gitignore ├── tests │ ├── mon0.out │ ├── mon1a.out │ ├── mon1b.out │ ├── mon1c.out │ ├── mon1d.out │ ├── mon2a.out │ ├── mon2b.out │ ├── mon2c.out │ ├── mon3c.out │ ├── mon3d.out │ ├── mon5c.out │ ├── mon8b.out │ ├── mon10a.out │ ├── mon10b.out │ ├── mon3a.out │ ├── mon3b.out │ ├── mon4a.out │ ├── mon4b.out │ ├── mon5a.out │ ├── mon5b.out │ ├── mon6a.out │ ├── mon6b.out │ ├── mon7a.out │ ├── mon7b.out │ ├── mon8a.out │ ├── mon9a.out │ ├── mon9b.out │ ├── mon10c.out │ ├── mon0.in │ ├── mon1b.in │ ├── mon1c.in │ ├── mon1a.in │ ├── mon1d.in │ ├── mon2c.in │ ├── mon2a.in │ ├── mon2b.in │ ├── mon3c.in │ ├── mon3a.in │ ├── mon3b.in │ ├── mon3d.in │ └── mon4b.in ├── url.txt ├── README.md ├── run_tests.sh ├── coins.py └── coins.cpp ├── hello.txt ├── .gitignore ├── challenge2 ├── pilots_nlog2n.py ├── .gitignore ├── url.txt ├── README.md ├── download_test.sh ├── run_tests.sh ├── pilots_n3.py ├── pilots_n2.py ├── pilots.cpp ├── pilots_nlogn.py └── pilots_n.py ├── Lecture 17.ipynb ├── lecture4 └── Heaps.ipynb ├── lecture12 └── Google BFS.ipynb ├── lecture16 └── Bignums.ipynb ├── lecture9 └── Hashing.ipynb ├── lecture1 ├── Stock Exchange.ipynb └── Fast exponentiation and fibonacci sequence.ipynb ├── lecture7 ├── Radix Sort Performance.ipynb └── Understanding Radix Sort.ipynb └── lecture2 └── binary_search.ipynb /challenge1/.gitignore: -------------------------------------------------------------------------------- 1 | coins 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon0.out: -------------------------------------------------------------------------------- 1 | 8 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon1a.out: -------------------------------------------------------------------------------- 1 | 12 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon1b.out: -------------------------------------------------------------------------------- 1 | 12 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon1c.out: -------------------------------------------------------------------------------- 1 | 0 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon1d.out: -------------------------------------------------------------------------------- 1 | 3 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon2a.out: -------------------------------------------------------------------------------- 1 | 24 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon2b.out: -------------------------------------------------------------------------------- 1 | 30 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon2c.out: -------------------------------------------------------------------------------- 1 | 0 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon3c.out: -------------------------------------------------------------------------------- 1 | 0 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon3d.out: -------------------------------------------------------------------------------- 1 | 5 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon5c.out: -------------------------------------------------------------------------------- 1 | 0 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon8b.out: -------------------------------------------------------------------------------- 1 | 0 2 | -------------------------------------------------------------------------------- /hello.txt: -------------------------------------------------------------------------------- 1 | hi 2 | how are you? 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints/ 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon10a.out: -------------------------------------------------------------------------------- 1 | 405963 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon10b.out: -------------------------------------------------------------------------------- 1 | 37260 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon3a.out: -------------------------------------------------------------------------------- 1 | 960 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon3b.out: -------------------------------------------------------------------------------- 1 | 1243 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon4a.out: -------------------------------------------------------------------------------- 1 | 10032 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon4b.out: -------------------------------------------------------------------------------- 1 | 9944 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon5a.out: -------------------------------------------------------------------------------- 1 | 9120 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon5b.out: -------------------------------------------------------------------------------- 1 | 47898 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon6a.out: -------------------------------------------------------------------------------- 1 | 120258 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon6b.out: -------------------------------------------------------------------------------- 1 | 4806 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon7a.out: -------------------------------------------------------------------------------- 1 | 14240 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon7b.out: -------------------------------------------------------------------------------- 1 | 332673 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon8a.out: -------------------------------------------------------------------------------- 1 | 141284 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon9a.out: -------------------------------------------------------------------------------- 1 | 11859 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon9b.out: -------------------------------------------------------------------------------- 1 | 82660 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon10c.out: -------------------------------------------------------------------------------- 1 | 1000000 2 | -------------------------------------------------------------------------------- /challenge2/pilots_nlog2n.py: -------------------------------------------------------------------------------- 1 | # LAZYYYYYY 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon0.in: -------------------------------------------------------------------------------- 1 | 15 3 2 | RORROOROOROOORO 3 | -------------------------------------------------------------------------------- /challenge1/tests/mon1b.in: -------------------------------------------------------------------------------- 1 | 14 2 2 | ORROOOOOOORROO 3 | -------------------------------------------------------------------------------- /challenge1/tests/mon1c.in: -------------------------------------------------------------------------------- 1 | 12 6 2 | OORROROOROOR 3 | -------------------------------------------------------------------------------- /challenge1/tests/mon1a.in: -------------------------------------------------------------------------------- 1 | 17 3 2 | OROOOOOROOOOORRRR 3 | -------------------------------------------------------------------------------- /challenge2/.gitignore: -------------------------------------------------------------------------------- 1 | pilots 2 | tests.zip 3 | tests/ 4 | -------------------------------------------------------------------------------- /challenge1/tests/mon1d.in: -------------------------------------------------------------------------------- 1 | 20 2 2 | RRORRROROORRROORRRRO 3 | -------------------------------------------------------------------------------- /challenge2/url.txt: -------------------------------------------------------------------------------- 1 | http://main.edu.pl/en/archive/oi/17/pil 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon2c.in: -------------------------------------------------------------------------------- 1 | 29 3 2 | RRRRRRRRRRRRRRRRORRRRRRRRRRRR 3 | -------------------------------------------------------------------------------- /challenge1/url.txt: -------------------------------------------------------------------------------- 1 | http://main.edu.pl/en/archive/pa/2010/mon 2 | -------------------------------------------------------------------------------- /challenge1/tests/mon2a.in: -------------------------------------------------------------------------------- 1 | 30 3 2 | OORROOOOORRROROOOOOROOOOOOOROR 3 | -------------------------------------------------------------------------------- /challenge1/tests/mon2b.in: -------------------------------------------------------------------------------- 1 | 33 4 2 | ORROOOOOOOROORROROOOROROOOOOOOOOO 3 | -------------------------------------------------------------------------------- /challenge1/README.md: -------------------------------------------------------------------------------- 1 | # The coins challenge 2 | 3 | To run c++ solution run 4 | ```bash 5 | g++ coins.cpp -O2 -o coins 6 | ./run_tests.sh ./coins 7 | ``` 8 | 9 | To run Python solution run 10 | 11 | ```bash 12 | ./run_tests.sh python2 coins.py 13 | ``` 14 | -------------------------------------------------------------------------------- /challenge2/README.md: -------------------------------------------------------------------------------- 1 | # The pilots challenge 2 | 3 | To run c++ solution run 4 | ```bash 5 | g++ pilots.cpp -O2 -o pilots 6 | ./run_tests.sh ./pilots 7 | ``` 8 | 9 | To run Python solution run 10 | 11 | ```bash 12 | ./run_tests.sh python2 pilots.py 13 | ``` 14 | -------------------------------------------------------------------------------- /challenge2/download_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # stop script on error and print it 4 | set -e 5 | # inform me of undefined variables 6 | set -u 7 | # handle cascading failures well 8 | set -o pipefail 9 | 10 | curl -o tests.zip "http://main.edu.pl/en/user.phtml?op=tests&c=1700&task=633" 11 | unzip tests.zip -d tests 12 | -------------------------------------------------------------------------------- /challenge1/run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # stop script on error and print it 4 | set -e 5 | # inform me of undefined variables 6 | set -u 7 | # handle cascading failures well 8 | set -o pipefail 9 | 10 | SCRIPT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) 11 | 12 | for filename in $SCRIPT_DIR/tests/*.in; do 13 | output_filename=${filename::-3}.out 14 | time $@ < $filename | diff -bs - $output_filename 15 | done 16 | -------------------------------------------------------------------------------- /challenge1/tests/mon3c.in: -------------------------------------------------------------------------------- 1 | 421 3 2 | RRRRRORRRRRORRRRRRRRRRRRRRRRRRRRORRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRORRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRORRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRORRRRRRORRRRRRRRRRRRRORRRRRRRORRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRORRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRORRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRORRRRRRRRRRRRRRRRRRRRORRRRRRORRORRRRRORRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRORRORRRRRRRRRRRRRRRRRRRRRRRORRRRR 3 | -------------------------------------------------------------------------------- /challenge2/run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # stop script on error and print it 4 | set -e 5 | # inform me of undefined variables 6 | set -u 7 | # handle cascading failures well 8 | set -o pipefail 9 | 10 | SCRIPT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) 11 | 12 | for filename in `ls $SCRIPT_DIR/tests/*.in | sort --version-sort -f`; do 13 | output_filename=${filename::-3}.out 14 | time $@ < $filename | diff -bs - $output_filename 15 | done 16 | -------------------------------------------------------------------------------- /challenge2/pilots_n3.py: -------------------------------------------------------------------------------- 1 | # n^3 solution 2 | 3 | # read in the first line of input 4 | k, n = [int(x) for x in raw_input().split(' ')] 5 | # read in the second line of input 6 | A = [int(x) for x in raw_input().split(' ')] 7 | assert len(A) == n 8 | 9 | # check all the subsequences and keep the length 10 | best_sequence_length = 0 11 | 12 | for start in range(0, n): 13 | for end in range(start + 1, n + 1): 14 | current_seq = A[start:end] 15 | # line below is O(n) 16 | difference = max(current_seq) - min(current_seq) 17 | if difference <= k: 18 | best_sequence_length = max(best_sequence_length, 19 | len(current_seq)) 20 | 21 | print best_sequence_length 22 | -------------------------------------------------------------------------------- /challenge2/pilots_n2.py: -------------------------------------------------------------------------------- 1 | # n^3 solution 2 | 3 | # read in the first line of input 4 | k, n = [int(x) for x in raw_input().split(' ')] 5 | # read in the second line of input 6 | A = [int(x) for x in raw_input().split(' ')] 7 | assert len(A) == n 8 | 9 | # check all the subsequences and keep the length 10 | best_sequence_length = 0 11 | 12 | for start in range(0, n): 13 | minimum_so_far = A[start] 14 | maximum_so_far = A[start] 15 | for end in range(start + 1, n + 1): 16 | minimum_so_far = min(minimum_so_far, A[end - 1]) 17 | maximum_so_far = max(maximum_so_far, A[end - 1]) 18 | # line below is O(n) 19 | difference = maximum_so_far - minimum_so_far 20 | if difference <= k: 21 | best_sequence_length = max(best_sequence_length, 22 | end - start) 23 | else: 24 | # smartness (difference will newer decrease!) 25 | break 26 | 27 | print best_sequence_length 28 | -------------------------------------------------------------------------------- /challenge1/tests/mon3a.in: -------------------------------------------------------------------------------- 1 | 993 7 2 | OOOOOROOOOOOOORORROOROOOOOOOROOOOOOOOOOOOOOOOOOOOROOOOROOOOOROROROOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROROORROOROROOOOOROOROOOOOOOOOOOOOOOOOOOOOOOOOOOROOROOROOOROOOOOOOOROOOROOOOOOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOOOOOORROOOOROOOOOOOOOOOOOOOOOOOOOOOOROOOOROOOOORROOOROOROOROOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOROROORROOOROOROORROOOROOOOOOOOOOOOOOOOOOOOOROROORORROROOOOOROOOOOROOOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOORROROOOOROOOOROOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOOOOOROOOOROOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOORROOOOOOROOOOROOOOOOOROOOOOOOOOOOOOOOOOOOOOROOOROOOOOOROOOROOOOROOOOOOOROOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOROOOROOOOOOOOOOOOOOOOOOOOOOOROOOOROOOOORROOROROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOOROOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROROOOORROOOOOROORROOOOOROOOOOOOOOOOOOOOOOOOOOROOOOOOOROOOOROOOOOOROOROOOOOOOOOOOOOOOOOOOOOOOOOOOROOROOOOROOOOROOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOROROOOOOOOOOOROROOOOOOORRORROOOOOOOOOOOOOOOOOOOOOOOOOROOOOOOROOOOORORROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO 3 | -------------------------------------------------------------------------------- /challenge2/pilots.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #define ST first 6 | #define ND second 7 | using namespace std; 8 | typedef pair PI; 9 | 10 | 11 | int t,n,T[3000007]; //12 MB 12 | deque MX, MN; // wartosc,pozycja 13 | //TOTAL: 12 MB 14 | 15 | bool test(int x) { 16 | //printf("%d..\n", x); 17 | MX.clear(); MN.clear(); 18 | for(int i=0; i=T[i]) MN.pop_back(); 20 | MN.push_back(i); 21 | while(i>=x && MN.front()<=(i-x)) MN.pop_front(); 22 | 23 | while(!MX.empty() && T[MX.back()]<=T[i]) MX.pop_back(); 24 | MX.push_back(i); 25 | while(i>=x && MX.front()<=(i-x)) MX.pop_front(); 26 | if(i>=x-1 && T[MX.front()]-T[MN.front()]<=t) return 1; 27 | //if(i>=x-1) printf(" %d: %d, %d\n", i, T[MN.front()], T[MX.front()]); 28 | } 29 | return 0; 30 | } 31 | 32 | 33 | 34 | 35 | 36 | int main() { 37 | scanf("%d%d", &t,&n); 38 | for(int i=0; i 0 and \ 33 | A[min_extravaganza[-1]] >= A[end]: 34 | min_extravaganza.pop() 35 | min_extravaganza.append(end) 36 | 37 | while len(max_extravaganza) > 0 and \ 38 | A[max_extravaganza[-1]] <= A[end]: 39 | max_extravaganza.pop() 40 | max_extravaganza.append(end) 41 | 42 | end += 1 43 | 44 | if ok(): 45 | best_sequence_length = max(best_sequence_length, 46 | end - start) 47 | 48 | if min_extravaganza[0] == start: 49 | min_extravaganza.popleft() 50 | if max_extravaganza[0] == start: 51 | max_extravaganza.popleft() 52 | start += 1 53 | 54 | print best_sequence_length 55 | -------------------------------------------------------------------------------- /challenge1/tests/mon3d.in: -------------------------------------------------------------------------------- 1 | 1835 4 2 | ROOORRRORRRRRORRRRORRROOORORRRRRRRRRRRRRRRRRRRRRRRRORORRRRRRRRRRRORORRORROORRROORORRRRRRRRORROORRRRRRRRRORRRRRRORRRRRRRRORRORRRORRRRRRRRRRRRORRRRRROORORRRRORRROORORRROORRORRRRRRRRRRORRRRRRRRRRRRRRRORRRRORRRRRRRRRRRRRRORRRRRRROORRORRROOORRRRORRRRRRRRRORRRRORRORRORRRORRRORROORRRORORORRRRRRRORRRRRORRRRRORRRRORRRRRRRORRRORRORRRRRRORRORRRRRRRRRRRRRRRRRORRRROORRRORRRRRRRRRRRRRRRRRRRRRORRRRRRRRORRRRRORRRRORRRRRRORRORORRORRROORRRRRRRRORRORRRRRORRRRORRRRRRRRRRRRRRRRRRROORORRRRRRRRRORRRRRRORRORRRRORORROROROROROOORRRRRRRRRRRRRORRORRRRRROORRRRRRRRRRRORROORRRRORRRRRRRRROORROORRRRRRRROORORORRORRORRORRRRRRROORRRRRRRORRRORRRRRRORRROROORRRRRRORROORRRRROOROORRRRRORRRRRRRRRRORRRRRRRRRRORRRORRRRORRRRRRRRRRRRORRRRRRROOORRRRRRRRORRRORRRRORRRORRRRRRORRORROOROOORRRRRRRRRRRORORRRRRRRRRRORRRORRRRORRRRRROROORORRRRRRROROORRRRRRRRRROORORRRRRRRRRRRORRRRRRRRROORRRRORORRRRRRRRRRRORRRRRRRRRORRRORRRROORRRRRRRORRRRRRRORORRRORRRRRORRRRRRRRRRRRRRRRRRRRORRRORORRRRRRRRORRRRRORRRRRRRRRORRRRORRRORRRRRRORRRRRRRRRRRRRRORORRRRRRORORROROORRRORRORRORRRRRORRRROOORRRRRRORORORRRRRROORORRRRRRRRRRRRRRRRRORRRRRRRRRRORORORRRRRRRRRRORORRRRRRRORRRRRRRRRORRRRRRRRRRRRORRRRRRRRRRROOOOORRRRRORRRROROROORRRRORRRRRORORORORORRRRRRRRRRRRRRORRRRRRRORRRORRRRRRRRRRRRRRRRRRRRRORRRRRRRRRRRRRORROROORRORRRRORRRRRRORRRRORRRRRRRRORRRRRRORRRRRRRRRROORRRRRRRRRRRORRORRRRRORRORRRROOROOORRRRORRROORRRRRRRRORRRRRRORRRRRRRRRRRORORRRRRORORORRRRRORRRRRRRRROORRRRORRRORRRRRRRRORRRRRRRRRRRRRRRRRRRRRRRRORRRRRRRORRRRRRRRRRORORRRRRRRORRRRORRRRRRRRRRRORORRRRRORRRRORORRORORRRRRRROOORRRORRRRRRRRORRRRRRRRRRRRRORRRRRRRRRORRRRORRRRORRRRRRORRRRRORRRRRRRRRORRRRRROORRRORRRORRORRRRRORRRRRORRRRROORORRRRRORRRRRRRRRRRRRORORORRRORORRRROROROORRRRRRRRRRRORRRRRRORRRRRRRORRRRRRORRRRORROROROORRRRRRORRRRRRRRORRRRRRRRRRRRRRRRRRORRRORRRRRRROOORORORRRRRRRRRRRRRRORRRRORRRORRRRRROORRRRRRRRRROORRRRORROORRRRORRRRRRRRR 3 | -------------------------------------------------------------------------------- /challenge1/coins.py: -------------------------------------------------------------------------------- 1 | # OVERALL COMPLEXITY: O(n) 2 | # here we use a dictionary 3 | 4 | def main(): 5 | # n - number of coins 6 | # k - we need k as many tails as heads 7 | n, k = [int(v) for v in raw_input().split()] 8 | # coins - buffer to hold all the characters 9 | coins = list(raw_input()) 10 | assert len(coins) == n 11 | coins_as_numbers = [] 12 | for coin in coins: 13 | if coin == 'R': 14 | coins_as_numbers.append(k) 15 | elif coin == 'O': 16 | coins_as_numbers.append(-1) 17 | else: 18 | assert False 19 | # NEW PROBLEM: find longest contiguous subsequence of 20 | # sum 0 (sum zero implies k times as many heads) 21 | 22 | 23 | # for prefix sums it is important to add an extra element 24 | # for empty sequence at the beginning. 25 | prefix_sums = [0] 26 | 27 | # compute the prefix sums 28 | for coin in coins_as_numbers: 29 | prefix_sums.append(prefix_sums[-1] + coin) 30 | 31 | # new problem: find a pair of elements in the array, 32 | # that have the same value and that are furthest 33 | # away possible. 34 | 35 | # this dictionary maps from array values, 36 | # to the index of prefix_sums, where the value 37 | # occurs for the first time 38 | # Specifically leftmost_index_of_value[S] 39 | # is equal to smallest such i that prefix_sums[i] = S 40 | # NOTE: it is build iteratively left to right 41 | leftmost_index_of_value = {} 42 | for s_idx, s in enumerate(prefix_sums): 43 | if s not in leftmost_index_of_value: 44 | leftmost_index_of_value[s] = s_idx 45 | 46 | res = 0 47 | for s_idx, s in enumerate(prefix_sums): 48 | res = max(res, s_idx - leftmost_index_of_value[s]) 49 | 50 | print(res) 51 | 52 | if __name__ == '__main__': 53 | main() 54 | -------------------------------------------------------------------------------- /challenge1/coins.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // OVERALL COMPLEXITY: O(n lg n) 5 | // (due to sorting) 6 | 7 | // maximum number of coins 8 | const int MAXN = 1000005; 9 | 10 | // n - number of coins 11 | // k - we need k as many tails as heads 12 | int n, k; 13 | // c - buffer to hold all the characters 14 | char c[MAXN]; 15 | // array of (prefix_sum[i], i) values 16 | std::pair seq_and_index[MAXN]; 17 | 18 | int main() { 19 | scanf("%d%d", &n, &k); 20 | scanf("%s", c); 21 | // for prefix sums it is important to add an extra element 22 | // for empty sequence at the beginning. 23 | // this is so that when we compute 24 | // 25 | // seq_and_index[n].second - seq_and_index[0].second 26 | // 27 | // we get sum of elements for indexes 1 <= i <= n 28 | 29 | // We reuse seq_and_index for two things 30 | // initially it just stores values k and -1 31 | // later we compute prefix sum 32 | seq_and_index[0] = std::make_pair(0,0); 33 | for (int i=0; i 0\n", 187 | " for t in range(1, len(obs)):\n", 188 | " V.append({})\n", 189 | " predecessor.append({})\n", 190 | "\n", 191 | " for y in states:\n", 192 | " highest_probability, best_y_prev = 0.0, None\n", 193 | " for y_prev in states:\n", 194 | " probability_from_y_prev = V[t-1][y_prev] * trans_p[y_prev][y] * emit_p[y][obs[t]]\n", 195 | " if probability_from_y_prev > highest_probability:\n", 196 | " highest_probability, best_y_prev = probability_from_y_prev, y_prev\n", 197 | " V[t][y] = highest_probability\n", 198 | " predecessor[t][y] = best_y_prev\n", 199 | " \n", 200 | " print(*dptable(V), sep='')\n", 201 | " \n", 202 | " # Return the most likely sequence over the given time frame\n", 203 | " n = len(obs) - 1\n", 204 | " (prob, state) = max((V[n][y], y) for y in states)\n", 205 | " \n", 206 | " # Recover the path\n", 207 | " path = []\n", 208 | " for t in range(n,-1,-1):\n", 209 | " path.append(state)\n", 210 | " state = predecessor[t][state]\n", 211 | " path.reverse()\n", 212 | " \n", 213 | " return (prob, path)\n", 214 | "\n", 215 | "# Don't study this; it just prints a table of the steps.\n", 216 | "def dptable(V):\n", 217 | " yield \" \"\n", 218 | " yield \" \".join((\"%7d\" % i) for i in range(len(V)))\n", 219 | " yield \"\\n\"\n", 220 | " for y in V[0]:\n", 221 | " yield \"%.5s: \" % y\n", 222 | " yield \" \".join(\"%.7s\" % (\"%f\" % v[y]) for v in V)\n", 223 | " yield \"\\n\"" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 8, 229 | "metadata": { 230 | "collapsed": false 231 | }, 232 | "outputs": [ 233 | { 234 | "name": "stdout", 235 | "output_type": "stream", 236 | "text": [ 237 | " 0 1 2\n", 238 | "Fever: 0.04000 0.02700 0.01512\n", 239 | "Healt: 0.30000 0.08400 0.00588\n", 240 | "\n" 241 | ] 242 | }, 243 | { 244 | "data": { 245 | "text/plain": [ 246 | "(0.01512, ['Healthy', 'Healthy', 'Fever'])" 247 | ] 248 | }, 249 | "execution_count": 8, 250 | "metadata": {}, 251 | "output_type": "execute_result" 252 | } 253 | ], 254 | "source": [ 255 | "viterbi(observations, states, start_probability, transition_probability, emission_probability)" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": { 262 | "collapsed": true 263 | }, 264 | "outputs": [], 265 | "source": [] 266 | } 267 | ], 268 | "metadata": { 269 | "kernelspec": { 270 | "display_name": "Python 3", 271 | "language": "python", 272 | "name": "python3" 273 | }, 274 | "language_info": { 275 | "codemirror_mode": { 276 | "name": "ipython", 277 | "version": 3 278 | }, 279 | "file_extension": ".py", 280 | "mimetype": "text/x-python", 281 | "name": "python", 282 | "nbconvert_exporter": "python", 283 | "pygments_lexer": "ipython3", 284 | "version": "3.4.1" 285 | } 286 | }, 287 | "nbformat": 4, 288 | "nbformat_minor": 0 289 | } 290 | -------------------------------------------------------------------------------- /lecture4/Heaps.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 10, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "class MaxHeap(object):\n", 12 | " def __init__(self, max_size=128):\n", 13 | " self.storage = [None for _ in range(max_size)]\n", 14 | " self.heap_size = 0\n", 15 | " \n", 16 | " def display(self, width=80):\n", 17 | " next_element = 0\n", 18 | " for i in range(0, 1000):\n", 19 | " level_elts = 2**i\n", 20 | " elts = self.storage[next_element:min(next_element + level_elts, self.heap_size)]\n", 21 | " next_element = min(next_element + level_elts, self.heap_size)\n", 22 | " if len(elts) == 0:\n", 23 | " break\n", 24 | " positions = [ (i+1) * width / (level_elts + 1) for i in range(level_elts)]\n", 25 | " output = \"\"\n", 26 | " for j, (elt, pos) in enumerate(zip(elts, positions)):\n", 27 | " idx = 2**i + j - 1\n", 28 | " while len(output) <= pos:\n", 29 | " output += \" \"\n", 30 | " output += \"%d[@%d]\" % (elt,idx)\n", 31 | " print(output)\n", 32 | " print()\n", 33 | " \n", 34 | " def fix_down(self, index):\n", 35 | " while index < self.heap_size:\n", 36 | " # pick maximum child\n", 37 | " max_child_idx = None\n", 38 | " if 2 * index + 1 < self.heap_size:\n", 39 | " max_child_idx = 2 * index + 1\n", 40 | "\n", 41 | " if 2 * index + 2 < self.heap_size and \\\n", 42 | " self.storage[2 * index + 1] < self.storage[2 * index + 2]:\n", 43 | " max_child_idx = 2 * index + 2\n", 44 | "\n", 45 | " if max_child_idx is None or \\\n", 46 | " self.storage[index] > self.storage[max_child_idx]:\n", 47 | " # heap property satisfied\n", 48 | " break\n", 49 | " \n", 50 | " self.storage[index], self.storage[max_child_idx] = self.storage[max_child_idx], self.storage[index]\n", 51 | " index = max_child_idx\n", 52 | " \n", 53 | " def fix_up(self, index):\n", 54 | " assert index < self.heap_size\n", 55 | " while index > 0:\n", 56 | " parent_idx = (index - 1) // 2\n", 57 | " if self.storage[index] >= self.storage[parent_idx]:\n", 58 | " self.storage[index], self.storage[parent_idx] = self.storage[parent_idx], self.storage[index]\n", 59 | " index = parent_idx\n", 60 | " else:\n", 61 | " break\n", 62 | " \n", 63 | " def insert(self, element):\n", 64 | " self.heap_size += 1\n", 65 | " new_index = self.heap_size - 1\n", 66 | " self.storage[new_index] = element\n", 67 | " self.fix_up(new_index)\n", 68 | "\n", 69 | " def extract_max(self):\n", 70 | " self.storage[0], self.storage[self.heap_size - 1] = self.storage[self.heap_size -1], self.storage[0]\n", 71 | " self.heap_size -= 1\n", 72 | " self.fix_down(0)\n", 73 | " return self.storage[self.heap_size]\n", 74 | " \n", 75 | " def heapify(self):\n", 76 | " for i in range(self.heap_size - 1, -1, -1):\n", 77 | " self.fix_down(i)\n", 78 | " \n", 79 | " @staticmethod\n", 80 | " def wrap_list(lst):\n", 81 | " h = MaxHeap(0)\n", 82 | " h.storage = lst\n", 83 | " h.heap_size = len(lst)\n", 84 | " return h" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 13, 90 | "metadata": { 91 | "collapsed": false 92 | }, 93 | "outputs": [ 94 | { 95 | "name": "stdout", 96 | "output_type": "stream", 97 | "text": [ 98 | " 3[@0]\n", 99 | "\n", 100 | " 4[@1] 5[@2]\n", 101 | "\n", 102 | " 8[@3] 6[@4] 1[@5] 10[@6]\n", 103 | "\n", 104 | " 9[@7] 5[@8]\n", 105 | "\n" 106 | ] 107 | } 108 | ], 109 | "source": [ 110 | "example = [3,4,5,8,6,1,10,9,5]\n", 111 | "h = MaxHeap.wrap_list(example)\n", 112 | "h.display()" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 14, 118 | "metadata": { 119 | "collapsed": false 120 | }, 121 | "outputs": [ 122 | { 123 | "name": "stdout", 124 | "output_type": "stream", 125 | "text": [ 126 | " 3[@0]\n", 127 | "\n", 128 | " 8[@1] 5[@2]\n", 129 | "\n", 130 | " 9[@3] 6[@4] 1[@5] 10[@6]\n", 131 | "\n", 132 | " 4[@7] 5[@8]\n", 133 | "\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "h.fix_down(1)\n", 139 | "h.display()" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 15, 145 | "metadata": { 146 | "collapsed": false 147 | }, 148 | "outputs": [ 149 | { 150 | "name": "stdout", 151 | "output_type": "stream", 152 | "text": [ 153 | " 8[@0]\n", 154 | "\n", 155 | " 3[@1] 5[@2]\n", 156 | "\n", 157 | " 9[@3] 6[@4] 1[@5] 10[@6]\n", 158 | "\n", 159 | " 4[@7] 5[@8]\n", 160 | "\n" 161 | ] 162 | } 163 | ], 164 | "source": [ 165 | "h.fix_up(1)\n", 166 | "h.display()" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 16, 172 | "metadata": { 173 | "collapsed": false 174 | }, 175 | "outputs": [ 176 | { 177 | "name": "stdout", 178 | "output_type": "stream", 179 | "text": [ 180 | " 10[@0]\n", 181 | "\n", 182 | " 8[@1] 2[@2]\n", 183 | "\n", 184 | " 6[@3] 3[@4] 1[@5] 2[@6]\n", 185 | "\n", 186 | " 4[@7] 5[@8]\n", 187 | "\n" 188 | ] 189 | } 190 | ], 191 | "source": [ 192 | "example = [3,10,2,6,8,1,2,4,5]\n", 193 | "h = MaxHeap.wrap_list(example)\n", 194 | "h.heapify()\n", 195 | "h.display()" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 17, 201 | "metadata": { 202 | "collapsed": false 203 | }, 204 | "outputs": [], 205 | "source": [ 206 | "h = MaxHeap(128)\n", 207 | "h.display()" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 18, 213 | "metadata": { 214 | "collapsed": false 215 | }, 216 | "outputs": [ 217 | { 218 | "name": "stdout", 219 | "output_type": "stream", 220 | "text": [ 221 | " 10[@0]\n", 222 | "\n", 223 | " 5[@1] 4[@2]\n", 224 | "\n", 225 | " 1[@3]\n", 226 | "\n" 227 | ] 228 | } 229 | ], 230 | "source": [ 231 | "h.insert(1)\n", 232 | "h.insert(4)\n", 233 | "h.insert(5)\n", 234 | "h.insert(10)\n", 235 | "h.display()" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 19, 241 | "metadata": { 242 | "collapsed": false 243 | }, 244 | "outputs": [ 245 | { 246 | "name": "stdout", 247 | "output_type": "stream", 248 | "text": [ 249 | "10\n", 250 | "5\n", 251 | "4\n", 252 | "1\n" 253 | ] 254 | } 255 | ], 256 | "source": [ 257 | "while h.heap_size > 0:\n", 258 | " print(h.extract_max())" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 22, 264 | "metadata": { 265 | "collapsed": true 266 | }, 267 | "outputs": [], 268 | "source": [ 269 | "def heap_sort(array):\n", 270 | " as_heap = MaxHeap.wrap_list(array)\n", 271 | " as_heap.heapify()\n", 272 | " while as_heap.heap_size > 0:\n", 273 | " as_heap.extract_max()" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 23, 279 | "metadata": { 280 | "collapsed": false 281 | }, 282 | "outputs": [ 283 | { 284 | "name": "stdout", 285 | "output_type": "stream", 286 | "text": [ 287 | "[1, 2, 2, 3, 4, 5, 6, 8, 10]\n" 288 | ] 289 | } 290 | ], 291 | "source": [ 292 | "example = [3,10,2,6,8,1,2,4,5]\n", 293 | "heap_sort(example)\n", 294 | "print(example)" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": null, 300 | "metadata": { 301 | "collapsed": true 302 | }, 303 | "outputs": [], 304 | "source": [] 305 | } 306 | ], 307 | "metadata": { 308 | "kernelspec": { 309 | "display_name": "Python 3", 310 | "language": "python", 311 | "name": "python3" 312 | }, 313 | "language_info": { 314 | "codemirror_mode": { 315 | "name": "ipython", 316 | "version": 3 317 | }, 318 | "file_extension": ".py", 319 | "mimetype": "text/x-python", 320 | "name": "python", 321 | "nbconvert_exporter": "python", 322 | "pygments_lexer": "ipython3", 323 | "version": "3.4.1" 324 | } 325 | }, 326 | "nbformat": 4, 327 | "nbformat_minor": 0 328 | } 329 | -------------------------------------------------------------------------------- /lecture12/Google BFS.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Google BFS question" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "from collections import deque" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "Given a 2D board with emergency $E$ and some policeman $P$, what is the distance from an emergency to closest policeman. Nodes marked as $X$, are walls and cannot be traversed.\n", 26 | "\n", 27 | "\n", 28 | "Think a little bit about how to solve it" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 66, 34 | "metadata": { 35 | "collapsed": false 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "example = [\n", 40 | " \"...P...\",\n", 41 | " \"...XXX.\",\n", 42 | " \"EX..P..\",\n", 43 | " \".X.....\",\n", 44 | " \".......\",\n", 45 | " \".......\",\n", 46 | "]\n", 47 | "\n", 48 | "class MapGraph(object):\n", 49 | " def __init__(self, mmap):\n", 50 | " \"\"\"Graph abstraction for our map\"\"\"\n", 51 | " self.dim_x, self.dim_y = len(mmap), len(mmap[0])\n", 52 | " self.mmap = {(x,y):mmap[x][y] for x in range(self.dim_x) \n", 53 | " for y in range(self.dim_y)}\n", 54 | "\n", 55 | " def neighbours(self, node):\n", 56 | " \"\"\"Returns all the direct neighbors of a given node\"\"\"\n", 57 | " x,y = node\n", 58 | " # There are four directions in which we can go\n", 59 | " for dx, dy in [[0,1], [0,-1], [-1,0], [1,0]]:\n", 60 | " nx, ny = x + dx, y + dy\n", 61 | " if (0 <= nx < self.dim_x and # but we cannot got through walls\n", 62 | " 0 <= ny < self.dim_y # (at least not yet!)\n", 63 | " and self.mmap[nx,ny] != 'X'):\n", 64 | " yield nx, ny\n", 65 | " \n", 66 | " def find_all(self, letter):\n", 67 | " \"\"\"Finds all the coordinates where a given letter occurs\"\"\"\n", 68 | " res = []\n", 69 | " for x in range(self.dim_x):\n", 70 | " for y in range(self.dim_y):\n", 71 | " if self.mmap[x,y] == letter:\n", 72 | " res.append((x,y))\n", 73 | " return res\n", 74 | " \n", 75 | " def show(self, what=None):\n", 76 | " \"\"\"Displays the graph\"\"\"\n", 77 | " what = what or self.mmap\n", 78 | " for x in range(self.dim_x):\n", 79 | " for y in range(self.dim_y):\n", 80 | " print(what[x,y] if (x,y) in what else '?', end='')\n", 81 | " print(' ', end='')\n", 82 | " print('')" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 67, 88 | "metadata": { 89 | "collapsed": false 90 | }, 91 | "outputs": [ 92 | { 93 | "name": "stdout", 94 | "output_type": "stream", 95 | "text": [ 96 | ". . . P . . . \n", 97 | ". . . X X X . \n", 98 | "E X . . P . . \n", 99 | ". X . . . . . \n", 100 | ". . . . . . . \n", 101 | ". . . . . . . \n" 102 | ] 103 | }, 104 | { 105 | "data": { 106 | "text/plain": [ 107 | "[(2, 0)]" 108 | ] 109 | }, 110 | "execution_count": 67, 111 | "metadata": {}, 112 | "output_type": "execute_result" 113 | } 114 | ], 115 | "source": [ 116 | "g = MapGraph(example)\n", 117 | "g.show()\n", 118 | "g.find_all('E')" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "The solution is maybe somewhat counterintuitive - we start our search from emergency, not policeman" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 68, 131 | "metadata": { 132 | "collapsed": true 133 | }, 134 | "outputs": [], 135 | "source": [ 136 | "def bfs_from(graph, source):\n", 137 | " q = deque()\n", 138 | " # initially source is on the queue\n", 139 | " distance = {source: 0}\n", 140 | " q.appendleft(source)\n", 141 | " # while queue is not empty\n", 142 | " while len(q) > 0:\n", 143 | " # consider the node that has been in the queue for\n", 144 | " # the longest\n", 145 | " node = q.popleft()\n", 146 | " # for all neighbours\n", 147 | " for neighbour in graph.neighbours(node):\n", 148 | " # if they were NOT visted yet\n", 149 | " if neighbour not in distance:\n", 150 | " # mark their distance and put them on queue\n", 151 | " distance[neighbour] = distance[node] + 1\n", 152 | " q.append(neighbour)\n", 153 | " return distance" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 76, 159 | "metadata": { 160 | "collapsed": false 161 | }, 162 | "outputs": [ 163 | { 164 | "name": "stdout", 165 | "output_type": "stream", 166 | "text": [ 167 | ". . . P . . . \n", 168 | ". . . X X X . \n", 169 | "E X . . P . . \n", 170 | ". X . . . . . \n", 171 | ". . . . . . . \n", 172 | ". . . . . . . \n", 173 | "\n", 174 | "2 3 4 5 6 7 8 \n", 175 | "1 2 3 ? ? ? 9 \n", 176 | "0 ? 4 5 6 7 8 \n", 177 | "1 ? 5 6 7 8 9 \n", 178 | "2 3 4 5 6 7 8 \n", 179 | "3 4 5 6 7 8 9 \n" 180 | ] 181 | } 182 | ], 183 | "source": [ 184 | "g = MapGraph(example)\n", 185 | "distances = bfs_from(g, g.find_all('E')[0])\n", 186 | "g.show()\n", 187 | "print('')\n", 188 | "g.show(distances)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 70, 194 | "metadata": { 195 | "collapsed": false 196 | }, 197 | "outputs": [], 198 | "source": [ 199 | "def solve_google(example):\n", 200 | " g = MapGraph(example)\n", 201 | " # comute distances from emergency to everywhere welse\n", 202 | " distances = bfs_from(g, g.find_all('E')[0])\n", 203 | " # find the minimum distance policeman\n", 204 | " res = float('inf')\n", 205 | " for policeman in g.find_all('P'):\n", 206 | " res = min(res, distances[policeman])\n", 207 | " return res" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 71, 213 | "metadata": { 214 | "collapsed": false 215 | }, 216 | "outputs": [ 217 | { 218 | "data": { 219 | "text/plain": [ 220 | "5" 221 | ] 222 | }, 223 | "execution_count": 71, 224 | "metadata": {}, 225 | "output_type": "execute_result" 226 | } 227 | ], 228 | "source": [ 229 | "solve_google(example)" 230 | ] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": {}, 235 | "source": [ 236 | "## Success! \n", 237 | "\n", 238 | "We solve the problem correctly in $O(nm)$, which is the best solution we can hoped for!\n", 239 | "\n", 240 | "## Follow up question\n", 241 | "\n", 242 | "Find the worst place for emergency (furthest from all policeman)\n", 243 | "\n", 244 | "Think about how to solve it." 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 72, 250 | "metadata": { 251 | "collapsed": true 252 | }, 253 | "outputs": [], 254 | "source": [ 255 | "example2 = [\n", 256 | " \"...P...\",\n", 257 | " \"...XXX.\",\n", 258 | " \"EX..P..\",\n", 259 | " \".X.....\",\n", 260 | " \".......\",\n", 261 | " \".......\",\n", 262 | "]" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "There are many suboptimal solutions, but it turns out we can still solve it in $O(nm)$, by starting BFS from all the policeman simultaneoursly!" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 73, 275 | "metadata": { 276 | "collapsed": true 277 | }, 278 | "outputs": [], 279 | "source": [ 280 | "def bfs_from_many(graph, sources):\n", 281 | " q = deque()\n", 282 | " distance = {s: 0 for s in sources} # <--- this line changed\n", 283 | " q.extendleft(sources) # <--- this line changed\n", 284 | " while len(q) > 0:\n", 285 | " node = q.popleft()\n", 286 | " for neighbour in graph.neighbours(node):\n", 287 | " if neighbour not in distance:\n", 288 | " distance[neighbour] = distance[node] + 1\n", 289 | " q.append(neighbour)\n", 290 | " return distance" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": 74, 296 | "metadata": { 297 | "collapsed": false 298 | }, 299 | "outputs": [ 300 | { 301 | "name": "stdout", 302 | "output_type": "stream", 303 | "text": [ 304 | ". . . P . . . \n", 305 | ". . . X X X . \n", 306 | "E X . . P . . \n", 307 | ". X . . . . . \n", 308 | ". . . . . . . \n", 309 | ". . . . . . . \n", 310 | "\n", 311 | "3 2 1 0 1 2 3 \n", 312 | "4 3 2 ? ? ? 3 \n", 313 | "5 ? 2 1 0 1 2 \n", 314 | "6 ? 3 2 1 2 3 \n", 315 | "6 5 4 3 2 3 4 \n", 316 | "7 6 5 4 3 4 5 \n" 317 | ] 318 | } 319 | ], 320 | "source": [ 321 | "g = MapGraph(example2)\n", 322 | "distances = bfs_from_many(g, g.find_all('P'))\n", 323 | "g.show()\n", 324 | "print('')\n", 325 | "g.show(distances)" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 77, 331 | "metadata": { 332 | "collapsed": true 333 | }, 334 | "outputs": [], 335 | "source": [ 336 | "def solve_google_hard(example):\n", 337 | " g = MapGraph(example)\n", 338 | " # compute distances from all the policeman\n", 339 | " distances = bfs_from_many(g, g.find_all('P'))\n", 340 | " # find the minimum distance policeman\n", 341 | " return max(distances.values())" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 78, 347 | "metadata": { 348 | "collapsed": false 349 | }, 350 | "outputs": [ 351 | { 352 | "data": { 353 | "text/plain": [ 354 | "7" 355 | ] 356 | }, 357 | "execution_count": 78, 358 | "metadata": {}, 359 | "output_type": "execute_result" 360 | } 361 | ], 362 | "source": [ 363 | "solve_google_hard(example2)" 364 | ] 365 | } 366 | ], 367 | "metadata": { 368 | "kernelspec": { 369 | "display_name": "Python 3", 370 | "language": "python", 371 | "name": "python3" 372 | }, 373 | "language_info": { 374 | "codemirror_mode": { 375 | "name": "ipython", 376 | "version": 3 377 | }, 378 | "file_extension": ".py", 379 | "mimetype": "text/x-python", 380 | "name": "python", 381 | "nbconvert_exporter": "python", 382 | "pygments_lexer": "ipython3", 383 | "version": "3.4.1" 384 | } 385 | }, 386 | "nbformat": 4, 387 | "nbformat_minor": 0 388 | } 389 | -------------------------------------------------------------------------------- /lecture16/Bignums.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "class Number(list):\n", 12 | " @staticmethod\n", 13 | " def wrap(n):\n", 14 | " return Number(reversed([ord(c) - ord('0') for c in str(n)]))\n", 15 | " \n", 16 | " def __repr__(self):\n", 17 | " return ''.join([str(d) for d in reversed(self)])" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": { 24 | "collapsed": false 25 | }, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/plain": [ 30 | "([1, 1], [2, 1])" 31 | ] 32 | }, 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "output_type": "execute_result" 36 | } 37 | ], 38 | "source": [ 39 | "a = Number.wrap(11)\n", 40 | "b = Number.wrap(12)\n", 41 | "a, b\n", 42 | "list(a), list(b)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "## Addition" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 3, 55 | "metadata": { 56 | "collapsed": false 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "def add(a, b):\n", 61 | " c = Number()\n", 62 | " carry = 0\n", 63 | " for i in range(max(len(a), len(b))):\n", 64 | " r = carry + (a[i] if i < len(a) else 0) + (b[i] if i < len(b) else 0)\n", 65 | " c.append(r % 10)\n", 66 | " carry = r / 10\n", 67 | " if carry > 0:\n", 68 | " c.append(carry)\n", 69 | " return c\n", 70 | "\n", 71 | "Number.__add__ = add\n" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 4, 77 | "metadata": { 78 | "collapsed": false 79 | }, 80 | "outputs": [ 81 | { 82 | "data": { 83 | "text/plain": [ 84 | "13" 85 | ] 86 | }, 87 | "execution_count": 4, 88 | "metadata": {}, 89 | "output_type": "execute_result" 90 | } 91 | ], 92 | "source": [ 93 | "Number.wrap(6) + Number.wrap(7)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 5, 99 | "metadata": { 100 | "collapsed": false 101 | }, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "76" 107 | ] 108 | }, 109 | "execution_count": 5, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "Number.wrap(6) + Number.wrap(70)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "# Substraction" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 80, 128 | "metadata": { 129 | "collapsed": false 130 | }, 131 | "outputs": [], 132 | "source": [ 133 | "def sub(a, b):\n", 134 | " c = Number()\n", 135 | " carry = 0\n", 136 | " for i in range(max(len(a), len(b))):\n", 137 | " r = carry + (a[i] if i < len(a) else 0) - (b[i] if i < len(b) else 0)\n", 138 | " carry = 0 \n", 139 | " while r < 0:\n", 140 | " r += 10\n", 141 | " carry -= 1\n", 142 | " c.append(r % 10)\n", 143 | " assert carry == 0, \"negative result\"\n", 144 | " while len(c) > 1 and c[-1] == 0:\n", 145 | " c.pop()\n", 146 | " return c\n", 147 | "Number.__sub__ = sub" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 81, 153 | "metadata": { 154 | "collapsed": false 155 | }, 156 | "outputs": [ 157 | { 158 | "data": { 159 | "text/plain": [ 160 | "1" 161 | ] 162 | }, 163 | "execution_count": 81, 164 | "metadata": {}, 165 | "output_type": "execute_result" 166 | } 167 | ], 168 | "source": [ 169 | "a, b = Number.wrap(7), Number.wrap(6)\n", 170 | "sub(a,b)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 82, 176 | "metadata": { 177 | "collapsed": false 178 | }, 179 | "outputs": [ 180 | { 181 | "data": { 182 | "text/plain": [ 183 | "64" 184 | ] 185 | }, 186 | "execution_count": 82, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": [ 192 | "Number.wrap(70) - Number.wrap(6)" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 83, 198 | "metadata": { 199 | "collapsed": false 200 | }, 201 | "outputs": [ 202 | { 203 | "data": { 204 | "text/plain": [ 205 | "0" 206 | ] 207 | }, 208 | "execution_count": 83, 209 | "metadata": {}, 210 | "output_type": "execute_result" 211 | } 212 | ], 213 | "source": [ 214 | "Number.wrap(70) - Number.wrap(70)" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "# Multiplication by digit" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 126, 227 | "metadata": { 228 | "collapsed": false 229 | }, 230 | "outputs": [], 231 | "source": [ 232 | "def mul_by_digit(number, digit):\n", 233 | " res = Number()\n", 234 | " carry = 0\n", 235 | " for i in range(len(number)):\n", 236 | " r = carry + number[i] * digit\n", 237 | " res.append(r % 10)\n", 238 | " carry = r / 10\n", 239 | " if carry > 0:\n", 240 | " res.append(carry)\n", 241 | " return res" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 127, 247 | "metadata": { 248 | "collapsed": false 249 | }, 250 | "outputs": [ 251 | { 252 | "data": { 253 | "text/plain": [ 254 | "1107" 255 | ] 256 | }, 257 | "execution_count": 127, 258 | "metadata": {}, 259 | "output_type": "execute_result" 260 | } 261 | ], 262 | "source": [ 263 | "mul_by_digit(Number.wrap(123), 9)" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": {}, 269 | "source": [ 270 | "# Karatsuba Multiplication" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 116, 276 | "metadata": { 277 | "collapsed": false 278 | }, 279 | "outputs": [], 280 | "source": [ 281 | "def mul(a, b):\n", 282 | " if len(a) < len(b):\n", 283 | " a, b = b, a\n", 284 | " # now a is longer of the two\n", 285 | " assert len(a) >= len(b)\n", 286 | " if len(b) == 0:\n", 287 | " return Number.wrap(0)\n", 288 | " elif len(b) == 1:\n", 289 | " return mul_by_digit(a, b[0])\n", 290 | " \n", 291 | " mid = len(a) / 2\n", 292 | " y1, x1 = a[:mid], a[mid:]\n", 293 | " y2, x2 = b[:mid], b[mid:]\n", 294 | " H = mul(x1, x2)\n", 295 | " L = mul(y1, y2)\n", 296 | " M = mul(add(x1,y1), add(x2,y2))\n", 297 | " M = sub(M,H)\n", 298 | " M = sub(M,L)\n", 299 | " \n", 300 | " res = Number([0] * (2 * mid) + list(H)) + Number([0] * (mid) + list(M)) + L\n", 301 | " return res" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": 124, 307 | "metadata": { 308 | "collapsed": false 309 | }, 310 | "outputs": [ 311 | { 312 | "data": { 313 | "text/plain": [ 314 | "1353" 315 | ] 316 | }, 317 | "execution_count": 124, 318 | "metadata": {}, 319 | "output_type": "execute_result" 320 | } 321 | ], 322 | "source": [ 323 | "mul(Number.wrap(123), Number.wrap(11))" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 123, 329 | "metadata": { 330 | "collapsed": false 331 | }, 332 | "outputs": [], 333 | "source": [ 334 | "for a in range(1112):\n", 335 | " for b in range(123):\n", 336 | " A,B = Number.wrap(a), Number.wrap(b)\n", 337 | " assert int(str(mul(A, B))) == a*b" 338 | ] 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "metadata": {}, 343 | "source": [ 344 | "# Multiplication" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": 12, 350 | "metadata": { 351 | "collapsed": false 352 | }, 353 | "outputs": [], 354 | "source": [ 355 | "def sqrt(a):\n", 356 | " nextx, x = None, a\n", 357 | " while True:\n", 358 | " print(bin(x), x)\n", 359 | " nextx = (x + (a/x)) / 2\n", 360 | " if x == nextx:\n", 361 | " break\n", 362 | " x = nextx\n", 363 | " return x" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": 13, 369 | "metadata": { 370 | "collapsed": false 371 | }, 372 | "outputs": [ 373 | { 374 | "name": "stdout", 375 | "output_type": "stream", 376 | "text": [ 377 | "('0b10000011100010000100', 538756)\n", 378 | "('0b1000001110001000010', 269378)\n", 379 | "('0b100000111000100010', 134690)\n", 380 | "('0b10000011100010010', 67346)\n", 381 | "('0b1000001110001100', 33676)\n", 382 | "('0b100000111001101', 16845)\n", 383 | "('0b10000011110110', 8438)\n", 384 | "('0b1000010011010', 4250)\n", 385 | "('0b100010001100', 2188)\n", 386 | "('0b10011000001', 1217)\n", 387 | "('0b1100111101', 829)\n", 388 | "('0b1011100011', 739)\n", 389 | "('0b1011011110', 734)\n" 390 | ] 391 | }, 392 | { 393 | "data": { 394 | "text/plain": [ 395 | "734" 396 | ] 397 | }, 398 | "execution_count": 13, 399 | "metadata": {}, 400 | "output_type": "execute_result" 401 | } 402 | ], 403 | "source": [ 404 | "sqrt(734 * 734)" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": 14, 410 | "metadata": { 411 | "collapsed": false 412 | }, 413 | "outputs": [ 414 | { 415 | "name": "stdout", 416 | "output_type": "stream", 417 | "text": [ 418 | "('0b10000011111001001011', 540235)\n", 419 | "('0b1000001111100100110', 270118)\n", 420 | "('0b100000111110010011', 135059)\n", 421 | "('0b10000011111001011', 67531)\n", 422 | "('0b1000001111101001', 33769)\n", 423 | "('0b100000111111100', 16892)\n", 424 | "('0b10000100001101', 8461)\n", 425 | "('0b1000010100110', 4262)\n", 426 | "('0b100010010010', 2194)\n", 427 | "('0b10011000100', 1220)\n", 428 | "('0b1100111111', 831)\n", 429 | "('0b1011100100', 740)\n", 430 | "('0b1011011111', 735)\n" 431 | ] 432 | }, 433 | { 434 | "data": { 435 | "text/plain": [ 436 | "735" 437 | ] 438 | }, 439 | "execution_count": 14, 440 | "metadata": {}, 441 | "output_type": "execute_result" 442 | } 443 | ], 444 | "source": [ 445 | "sqrt(735 * 735 + 10)" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": 15, 451 | "metadata": { 452 | "collapsed": false 453 | }, 454 | "outputs": [ 455 | { 456 | "name": "stdout", 457 | "output_type": "stream", 458 | "text": [ 459 | "('0b10000011111000110111', 540215)\n", 460 | "('0b1000001111100011100', 270108)\n", 461 | "('0b100000111110001110', 135054)\n", 462 | "('0b10000011111001000', 67528)\n", 463 | "('0b1000001111100111', 33767)\n", 464 | "('0b100000111111011', 16891)\n", 465 | "('0b10000100001101', 8461)\n", 466 | "('0b1000010100110', 4262)\n", 467 | "('0b100010010010', 2194)\n", 468 | "('0b10011000100', 1220)\n", 469 | "('0b1100111111', 831)\n", 470 | "('0b1011100100', 740)\n", 471 | "('0b1011011111', 735)\n", 472 | "('0b1011011110', 734)\n" 473 | ] 474 | }, 475 | { 476 | "data": { 477 | "text/plain": [ 478 | "734" 479 | ] 480 | }, 481 | "execution_count": 15, 482 | "metadata": {}, 483 | "output_type": "execute_result" 484 | } 485 | ], 486 | "source": [ 487 | "sqrt(735 * 735 - 10)" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": 11, 493 | "metadata": { 494 | "collapsed": false 495 | }, 496 | "outputs": [ 497 | { 498 | "name": "stdout", 499 | "output_type": "stream", 500 | "text": [ 501 | "200000000000000000000\n", 502 | "100000000000000000000\n", 503 | "50000000000000000001\n", 504 | "25000000000000000002\n", 505 | "12500000000000000004\n", 506 | "6250000000000000009\n", 507 | "3125000000000000020\n", 508 | "1562500000000000041\n", 509 | "781250000000000084\n", 510 | "390625000000000169\n", 511 | "195312500000000340\n", 512 | "97656250000000681\n", 513 | "48828125000001364\n", 514 | "24414062500002729\n", 515 | "12207031250005460\n", 516 | "6103515625010921\n", 517 | "3051757812521844\n", 518 | "1525878906293689\n", 519 | "762939453212380\n", 520 | "381469726737261\n", 521 | "190734863630774\n", 522 | "95367432339674\n", 523 | "47683717218412\n", 524 | "23841860706357\n", 525 | "11920934547482\n", 526 | "5960475662345\n", 527 | "2980254608357\n", 528 | "1490160858358\n", 529 | "745147536028\n", 530 | "372707969625\n", 531 | "186622291390\n", 532 | "93846987363\n", 533 | "47989057987\n", 534 | "26078337348\n", 535 | "16873768965\n", 536 | "14363242737\n", 537 | "14143837480\n", 538 | "14142135726\n", 539 | "14142135623\n" 540 | ] 541 | }, 542 | { 543 | "data": { 544 | "text/plain": [ 545 | "1.4142135623" 546 | ] 547 | }, 548 | "execution_count": 11, 549 | "metadata": {}, 550 | "output_type": "execute_result" 551 | } 552 | ], 553 | "source": [ 554 | "sqrt(2 * 10**20) / float(10**10)" 555 | ] 556 | }, 557 | { 558 | "cell_type": "code", 559 | "execution_count": null, 560 | "metadata": { 561 | "collapsed": true 562 | }, 563 | "outputs": [], 564 | "source": [] 565 | } 566 | ], 567 | "metadata": { 568 | "kernelspec": { 569 | "display_name": "Python 2", 570 | "language": "python", 571 | "name": "python2" 572 | }, 573 | "language_info": { 574 | "codemirror_mode": { 575 | "name": "ipython", 576 | "version": 2 577 | }, 578 | "file_extension": ".py", 579 | "mimetype": "text/x-python", 580 | "name": "python", 581 | "nbconvert_exporter": "python", 582 | "pygments_lexer": "ipython2", 583 | "version": "2.7.8" 584 | } 585 | }, 586 | "nbformat": 4, 587 | "nbformat_minor": 0 588 | } 589 | -------------------------------------------------------------------------------- /lecture9/Hashing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Rabin-Karp method\n", 8 | "\n", 9 | "If you think hard enough, there's nothing that diferrentiates piecs of text from numbers. You can think of letters as digits and base of the numbers as sufficently big to accomodate for all the digits. For example take the following text\n", 10 | "\n", 11 | "$$\n", 12 | "babacb\n", 13 | "$$\n", 14 | "\n", 15 | "it can be though of as a number base 26 (for all the english letters):\n", 16 | "\n", 17 | "$$\n", 18 | "(1,0,1,0,2,1)_{26}\n", 19 | "$$\n", 20 | "\n", 21 | "We can transform this number to base 10 using the following equation.\n", 22 | "\n", 23 | "$$\n", 24 | "1*26^5 + 0 * 26^4 + 1*26^3 + 0*26^2 + 2*26^1 + 1*26^0 = 11899005\n", 25 | "$$\n", 26 | "\n", 27 | "From the formulation above the following property should be clear.\n", 28 | "\n", 29 | "$$\n", 30 | "abba = abb * 26 + b\n", 31 | "$$\n", 32 | "\n", 33 | "in general we can write $concat(word, letter) = base * word + letter$ (1).\n", 34 | "\n", 35 | "There's also a small technicality. When we compare numbers then $0001$ and $001$ and $1$ are equivalent. This means that we cannot map any letter to 0 if we want to be able to successfuly compare the numbers. " 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "Observation (1) allows us to quickly compute hashes for all the prefixes of a given word. Just like in class we are going to use modular arithmetic for our computations." 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 8, 48 | "metadata": { 49 | "collapsed": false 50 | }, 51 | "outputs": [ 52 | { 53 | "name": "stdout", 54 | "output_type": "stream", 55 | "text": [ 56 | "'A' 65\n", 57 | "'a' 97\n", 58 | "'b' 98\n", 59 | "'c' 99\n", 60 | "' ' 32\n", 61 | "'c' - 'a' + 1 = 3\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | "# we need to map letters to numbers. Python function ord does the job\n", 67 | "print(repr('A'), ord('A'))\n", 68 | "print(repr('a'), ord('a'))\n", 69 | "print(repr('b'), ord('b'))\n", 70 | "print(repr('c'), ord('c'))\n", 71 | "print(repr(' '), ord(' '))\n", 72 | "\n", 73 | "print('%s - %s + 1 = %d' % (repr('c'), repr('a'), ord('c') - ord('a') + 1))\n" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 17, 79 | "metadata": { 80 | "collapsed": false 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "BIG_FAT_PRIME = 2**32 - 1\n", 85 | "ENGLISH_BASE = 30 # in theory 27 is sufficient but better safe than sorry!\n", 86 | "\n", 87 | "def compute_hashes(text, base=ENGLISH_BASE, modulo = BIG_FAT_PRIME):\n", 88 | " # \n", 89 | " h = [None for _ in range(len(text) + 1)]\n", 90 | " h[0] = 0 # hash of empty word is 0\n", 91 | " for i in range(len(text)):\n", 92 | " # we only deal with english letters so we subtract 'a'\n", 93 | " # to normalize range. We add 0 to avoid creating zero digit.\n", 94 | " letter_as_number = (ord(text[i]) - ord('a') + 1)\n", 95 | " h[i + 1] = h[i] * base + letter_as_number\n", 96 | " h[i + 1] %= modulo\n", 97 | " # at the end of the iteration h[i+1] is the hash\n", 98 | " # of prefix of text of lenght (i+1) which in\n", 99 | " # Python is text[:(i+1)]\n", 100 | " return h" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "PROTIP: If you happen to ever implemented this is lower level programming language like C or C++, be ware of integer overflows." 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 18, 113 | "metadata": { 114 | "collapsed": false 115 | }, 116 | "outputs": [ 117 | { 118 | "data": { 119 | "text/plain": [ 120 | "[0, 2, 61, 1832, 54961, 1648833, 49464992]" 121 | ] 122 | }, 123 | "execution_count": 18, 124 | "metadata": {}, 125 | "output_type": "execute_result" 126 | } 127 | ], 128 | "source": [ 129 | "compute_hashes(\"babacb\")" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 19, 135 | "metadata": { 136 | "collapsed": false 137 | }, 138 | "outputs": [ 139 | { 140 | "data": { 141 | "text/plain": [ 142 | "[0, 2, 61, 1832, 54964, 1648924, 49467724]" 143 | ] 144 | }, 145 | "execution_count": 19, 146 | "metadata": {}, 147 | "output_type": "execute_result" 148 | } 149 | ], 150 | "source": [ 151 | "compute_hashes(\"babddd\")" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 20, 157 | "metadata": { 158 | "collapsed": false 159 | }, 160 | "outputs": [ 161 | { 162 | "data": { 163 | "text/plain": [ 164 | "[0,\n", 165 | " 2,\n", 166 | " 61,\n", 167 | " 1832,\n", 168 | " 54964,\n", 169 | " 1648924,\n", 170 | " 49467721,\n", 171 | " 1484031649,\n", 172 | " 1571276524,\n", 173 | " 4188622771,\n", 174 | " 1104631594,\n", 175 | " 3074176759,\n", 176 | " 2030989594,\n", 177 | " 800145691,\n", 178 | " 2529534259]" 179 | ] 180 | }, 181 | "execution_count": 20, 182 | "metadata": {}, 183 | "output_type": "execute_result" 184 | } 185 | ], 186 | "source": [ 187 | "# for longer strings modulo matters\n", 188 | "compute_hashes(\"babddasdasdsad\")" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "## hashes of substrings\n", 196 | "\n", 197 | "Now here's a crutial observation. Let's take polynomial representation of string $babacb$ (where $X$ is the base)\n", 198 | "\n", 199 | "$$\n", 200 | "b*X^5 + a * X^4 + b*X^3 + a*X^2 + c*X^1 + b*X^0 \n", 201 | "$$\n", 202 | "\n", 203 | "Say we want to compute hash of $ac$ which is conveniently appears on 4-th index the string we originally hashed. Moreover we have hashes of all the prefixes - it seems like we are in good shape:\n", 204 | "\n", 205 | "\\begin{align}\n", 206 | "\\text{we have }\\ \\ \\ & hash(babac) &=\\ & b*X^4 + a * X^3 + b*X^2 &+& a*X^1 + c*X^0 \\\\\n", 207 | "\\text{we have }\\ \\ \\ & hash(bab) &=\\ & b*X^2 + a * X^1 + b*X^0&&\\\\\n", 208 | "\\text{we WANT }\\ \\ \\ & hash(ac) &=\\ & && a*X^1 + c*X^0\\\\\n", 209 | "\\end{align}\n", 210 | "\n", 211 | "\n", 212 | "From above we can clearly see that:\n", 213 | "\n", 214 | "$$\n", 215 | "hash(ac) = hash(babac) - X^2 * hash(bab)\n", 216 | "$$\n", 217 | "\n", 218 | "We can generalize this to arbitrary substring of our hashed string. Assume we hashed string $s_0, s_1, ..., s_{n-1}$ such that $h_0 = hash(\\emptyset)$, $h_1 = hash(s_0)$, $h_2 = hash(s_0, s_1)$ etc. \n", 219 | "Then we can compute $hash(s_i, ..., s_j)$ using the following formula:\n", 220 | "\n", 221 | "$$\n", 222 | "hash(s_i, ..., s_{j-1}) = h_j - h_i * X ^{j - i}\n", 223 | "$$\n", 224 | "\n", 225 | "This looks very close to $O(1)$ complexity hash computation if not for $X ^{j - i}$. But since there are at most $n$ different powers of $X$ that we are interested in, we can precompute them in $O(n)$ time." 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 37, 231 | "metadata": { 232 | "collapsed": true 233 | }, 234 | "outputs": [], 235 | "source": [ 236 | "def compute_powers(n, base=ENGLISH_BASE, modulo=BIG_FAT_PRIME):\n", 237 | " powers = [None for _ in range(n + 1)]\n", 238 | " powers[0] = 1\n", 239 | " for i in range(n):\n", 240 | " powers[i+1] = (powers[i] * base) % modulo\n", 241 | " return powers" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 38, 247 | "metadata": { 248 | "collapsed": false 249 | }, 250 | "outputs": [ 251 | { 252 | "data": { 253 | "text/plain": [ 254 | "[1,\n", 255 | " 30,\n", 256 | " 900,\n", 257 | " 27000,\n", 258 | " 810000,\n", 259 | " 24300000,\n", 260 | " 729000000,\n", 261 | " 395163525,\n", 262 | " 3264971160,\n", 263 | " 3459854310,\n", 264 | " 716414220]" 265 | ] 266 | }, 267 | "execution_count": 38, 268 | "metadata": {}, 269 | "output_type": "execute_result" 270 | } 271 | ], 272 | "source": [ 273 | "compute_powers(10)" 274 | ] 275 | }, 276 | { 277 | "cell_type": "markdown", 278 | "metadata": {}, 279 | "source": [ 280 | "Now we can put all those observations together into efficient detastructure that allows us to compute hashes of substrings in $O(1)$" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 77, 286 | "metadata": { 287 | "collapsed": true 288 | }, 289 | "outputs": [], 290 | "source": [ 291 | "class Hasher(object):\n", 292 | " def __init__(self, word):\n", 293 | " self.h = compute_hashes(word)\n", 294 | " self.powers = compute_powers(len(word))\n", 295 | " \n", 296 | " def substring_hash(self, i, j):\n", 297 | " result = self.h[j] - self.h[i] * self.powers[j-i]\n", 298 | " return result % BIG_FAT_PRIME" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": 78, 304 | "metadata": { 305 | "collapsed": false 306 | }, 307 | "outputs": [], 308 | "source": [ 309 | "TEXT = \"abcxabcx\"\n", 310 | "h = Hasher(TEXT)" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 79, 316 | "metadata": { 317 | "collapsed": false 318 | }, 319 | "outputs": [ 320 | { 321 | "name": "stdout", 322 | "output_type": "stream", 323 | "text": [ 324 | "[ab]cxabcx 32\n", 325 | "abcx[ab]cx 32\n", 326 | "abc[xa]bcx 721\n" 327 | ] 328 | } 329 | ], 330 | "source": [ 331 | "def highlight(word, i, j):\n", 332 | " return word[:i] + \"[\" + word[i:j] + \"]\" + word[j:]\n", 333 | "\n", 334 | "print(highlight(TEXT, 0, 2), h.substring_hash(0, 2))\n", 335 | "print(highlight(TEXT, 4, 6), h.substring_hash(4, 6))\n", 336 | "print(highlight(TEXT, 3, 5), h.substring_hash(3, 5))" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "## Hasher complexity analysis.\n", 344 | "\n", 345 | "Preprocessing (`__init__`):\n", 346 | "- `compute_hashes` is $O(n)$\n", 347 | "- 'compute_powers` is $O(n)$\n", 348 | "Therefore precomputing is $O(n)$.\n", 349 | "\n", 350 | "Queries (`substring_hash`) is of complexity $O(1)$ - it is just a simple formula." 351 | ] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "metadata": {}, 356 | "source": [ 357 | "Notice that this technique is very powerful. More powerful than we need for pattern matching. It should not be a surprise that we can easily use it to solve pattern matching" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": 100, 363 | "metadata": { 364 | "collapsed": false 365 | }, 366 | "outputs": [], 367 | "source": [ 368 | "# hasher for text\n", 369 | "text_h = Hasher(\"to be or not to be\")" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": 101, 375 | "metadata": { 376 | "collapsed": false 377 | }, 378 | "outputs": [ 379 | { 380 | "data": { 381 | "text/plain": [ 382 | "65" 383 | ] 384 | }, 385 | "execution_count": 101, 386 | "metadata": {}, 387 | "output_type": "execute_result" 388 | } 389 | ], 390 | "source": [ 391 | "# hash of the pattern\n", 392 | "compute_hashes(\"be\")[-1]" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 102, 398 | "metadata": { 399 | "collapsed": false 400 | }, 401 | "outputs": [ 402 | { 403 | "data": { 404 | "text/plain": [ 405 | "65" 406 | ] 407 | }, 408 | "execution_count": 102, 409 | "metadata": {}, 410 | "output_type": "execute_result" 411 | } 412 | ], 413 | "source": [ 414 | "# hash of the occurence of \"be\" in original text. \n", 415 | "text_h.substring_hash(3, 5)" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": 105, 421 | "metadata": { 422 | "collapsed": false 423 | }, 424 | "outputs": [], 425 | "source": [ 426 | "def compute_matches(text, pattern):\n", 427 | " # hash of patter\n", 428 | " pattern_hash = compute_hashes(pattern)[-1]\n", 429 | " # hasher for text\n", 430 | " text_h = Hasher(text)\n", 431 | " res = []\n", 432 | " for i in range(len(text) - len(pattern) + 1):\n", 433 | " # i is potential match start index\n", 434 | " # compare hash in text with hash of pattern\n", 435 | " if text_h.substring_hash(i, i + len(pattern)) == pattern_hash:\n", 436 | " # if matching append to result list.\n", 437 | " res.append(i)\n", 438 | " return res" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": 106, 444 | "metadata": { 445 | "collapsed": false 446 | }, 447 | "outputs": [ 448 | { 449 | "data": { 450 | "text/plain": [ 451 | "[3, 16]" 452 | ] 453 | }, 454 | "execution_count": 106, 455 | "metadata": {}, 456 | "output_type": "execute_result" 457 | } 458 | ], 459 | "source": [ 460 | "compute_matches(\"to be or not to be\", \"be\")" 461 | ] 462 | }, 463 | { 464 | "cell_type": "markdown", 465 | "metadata": {}, 466 | "source": [ 467 | "## Rabin-Karp complexity analysis\n", 468 | "\n", 469 | "Assume that pattern is of length $n$ and text of length $m$\n", 470 | "\n", 471 | "- pattern hash: $O(n)$\n", 472 | "- text preprocessing $O(m)$.\n", 473 | "- $n - m$ iterations of main loop with $O(1)$ hash computation in each loop\n", 474 | "\n", 475 | "total: $O(n+m)$\n", 476 | "\n", 477 | "Catch? Relies on luck." 478 | ] 479 | } 480 | ], 481 | "metadata": { 482 | "kernelspec": { 483 | "display_name": "Python 3", 484 | "language": "python", 485 | "name": "python3" 486 | }, 487 | "language_info": { 488 | "codemirror_mode": { 489 | "name": "ipython", 490 | "version": 3 491 | }, 492 | "file_extension": ".py", 493 | "mimetype": "text/x-python", 494 | "name": "python", 495 | "nbconvert_exporter": "python", 496 | "pygments_lexer": "ipython3", 497 | "version": "3.4.1" 498 | } 499 | }, 500 | "nbformat": 4, 501 | "nbformat_minor": 0 502 | } 503 | -------------------------------------------------------------------------------- /lecture1/Stock Exchange.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 5, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import cProfile\n", 12 | "import random\n", 13 | "import time" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "# Stock Exchange Problem" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "Consider the following problem. We have a non-empty array representing evolution of price of a particular stock over time.\n", 28 | "```python\n", 29 | "A = [20, 3, 19, 1, 15, 6]\n", 30 | "```\n", 31 | "Given this information we want to find what is the optimal profit we can make using single buy and single sell operation (here we have perfect knowledge of the prices - you can imagine that this quantity is something that quantitative traders would like to know, to compare their decision to the best possible decision given perfect knowledge of the future).\n", 32 | "\n", 33 | "More formally we want to find two numbers $b$, $s$, such that $$0 \\leq b \\leq s \\leq |A|$$ and $$A_s - A_b$$ is maximum possible. Of course we cannot sell before buying.\n", 34 | "\n", 35 | "For example for the array given above, the biggest profit we can make is $16$ (make sure you can see that). Below we present three different solutions to this problem." 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 1, 41 | "metadata": { 42 | "collapsed": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "# here we seed the random number generator, to ensure, that we generate \n", 47 | "# the same random instance every time we pass seed equal to a particular \n", 48 | "# value. This way the speed comparison is fair.\n", 49 | "def make_prices(n, seed):\n", 50 | " \"\"\" Return array of n random prices. \"\"\"\n", 51 | " random.seed(seed)\n", 52 | " return [ random.random() for _ in range(n) ]" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "### Naive solution\n", 60 | "This solution is a direct search of values of $b$ and $s$. The complexity is $O(n^2)$ (intiutively we have two nested for loops, each of which does $O(n)$ iterations, when $n$ = len(A)." 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 2, 66 | "metadata": { 67 | "collapsed": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "def naive(A):\n", 72 | " \"\"\" return best gain on A, using naive method \n", 73 | " running time, due to doubly-nest loop, is O(n^2)\n", 74 | " \"\"\"\n", 75 | " n = len(A)\n", 76 | " ans = 0\n", 77 | " for i0 in range(n):\n", 78 | " for j0 in range(i0,n):\n", 79 | " ans = max(ans, A[j0]-A[i0])\n", 80 | " return ans" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 3, 86 | "metadata": { 87 | "collapsed": false 88 | }, 89 | "outputs": [ 90 | { 91 | "data": { 92 | "text/plain": [ 93 | "16" 94 | ] 95 | }, 96 | "execution_count": 3, 97 | "metadata": {}, 98 | "output_type": "execute_result" 99 | } 100 | ], 101 | "source": [ 102 | "naive([20, 3, 19, 1, 15, 6])" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 7, 108 | "metadata": { 109 | "collapsed": false 110 | }, 111 | "outputs": [ 112 | { 113 | "name": "stdout", 114 | "output_type": "stream", 115 | "text": [ 116 | " 50025009 function calls in 14.463 seconds\n", 117 | "\n", 118 | " Ordered by: standard name\n", 119 | "\n", 120 | " ncalls tottime percall cumtime percall filename:lineno(function)\n", 121 | " 1 0.004 0.004 0.005 0.005 :1(make_prices)\n", 122 | " 1 9.050 9.050 14.458 14.458 :1(naive)\n", 123 | " 1 0.000 0.000 14.463 14.463 :1()\n", 124 | " 1 0.000 0.000 0.000 0.000 random.py:100(seed)\n", 125 | " 1 0.000 0.000 0.000 0.000 {function seed at 0x7f90fbfeb578}\n", 126 | " 1 0.000 0.000 0.000 0.000 {len}\n", 127 | " 50005000 5.213 0.000 5.213 0.000 {max}\n", 128 | " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", 129 | " 10000 0.001 0.000 0.001 0.000 {method 'random' of '_random.Random' objects}\n", 130 | " 10002 0.195 0.000 0.195 0.000 {range}\n", 131 | "\n", 132 | "\n" 133 | ] 134 | } 135 | ], 136 | "source": [ 137 | "# slowness alert!\n", 138 | "cProfile.run(\"naive(make_prices(10000, 1))\")" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "### Divide and conquer approach\n", 146 | "Here we split our array into two simpler problems corresponding to left half of the array $L$ and right half $R$. \n", 147 | "For example if \n", 148 | "```python\n", 149 | "A = [20, 3, 19, 1, 15, 6]\n", 150 | "```\n", 151 | "we can imagine that\n", 152 | "```python\n", 153 | "L = [20, 3, 19]\n", 154 | "R = [1, 15, 6]\n", 155 | "```\n", 156 | "In order to reduce our problem to those simpler problems we need to consider three cases:\n", 157 | "1. $ b,s \\in L$ - we can solve it by solving original problem for $L$\n", 158 | "2. $ b,s \\in R$ - we can solve it by solving original problem for $R$\n", 159 | "3. $ b \\in L$ and $ s \\in R$ - we can solve it by finding minimum in $L$ and maximum in $R$ and returning the difference\n", 160 | "\n", 161 | "We need not consider the case $s \\in L$ and $b \\in R$ (why?).\n", 162 | "\n", 163 | "This way we reduced our problem to two smaller problems. This is good - we will keep changing bigger problems into smaller problems until we get problem so small that it is trivial to solve - in this case if our array is of size 1, then the maximum profit we can make is $0$.\n", 164 | "\n", 165 | "The complexity of this solution is can be calculated by solving the following equation:\n", 166 | "\n", 167 | "\\begin{align}\n", 168 | "T(1) =& 1\\\\\n", 169 | "T(n) =& T(n / 2) + T(n / 2) + O(n)\n", 170 | "\\end{align}\n", 171 | "The three summands in the equation above come for cases 1,2,3 listed above. In particular notice that case 3 requires single read through data and therefor has complexity $O(n)$. The solution to this set of equation is $T(n) = O(n \\lg{n})$" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 9, 177 | "metadata": { 178 | "collapsed": true 179 | }, 180 | "outputs": [], 181 | "source": [ 182 | "def dc(A, lo=None, hi=None):\n", 183 | " \"\"\" return best gain on A[lo:hi], using divide & conquer \n", 184 | " running time is solution to T(n) = 2*T(n/2) + Theta(n) = Theta(n log n)\n", 185 | " \"\"\"\n", 186 | " if lo is None:\n", 187 | " lo = 0\n", 188 | " if hi is None:\n", 189 | " hi = len(A)\n", 190 | " n = hi-lo\n", 191 | " # base case\n", 192 | " if n == 1:\n", 193 | " return 0\n", 194 | " # divide and conquer\n", 195 | " # divide into lo:mid and mid:hi\n", 196 | " mid = (lo+hi)//2 \n", 197 | " # recurse on left half\n", 198 | " gain_low = dc(A, lo, mid)\n", 199 | " # recurse on right half\n", 200 | " gain_high = dc(A, mid, hi)\n", 201 | " # figure out best gain for buying in left half, selling in right half\n", 202 | " buy_price = min([ A[i] for i in range(lo, mid) ])\n", 203 | " sell_price = max([ A[i] for i in range(mid, hi)])\n", 204 | " gain_cross = sell_price - buy_price\n", 205 | " # optimum is max of three cases just solved\n", 206 | " return max(gain_low, gain_high, gain_cross)" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 10, 212 | "metadata": { 213 | "collapsed": false 214 | }, 215 | "outputs": [ 216 | { 217 | "data": { 218 | "text/plain": [ 219 | "16" 220 | ] 221 | }, 222 | "execution_count": 10, 223 | "metadata": {}, 224 | "output_type": "execute_result" 225 | } 226 | ], 227 | "source": [ 228 | "dc([20, 3, 19, 1, 15, 6])" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 11, 234 | "metadata": { 235 | "collapsed": false 236 | }, 237 | "outputs": [ 238 | { 239 | "name": "stdout", 240 | "output_type": "stream", 241 | "text": [ 242 | " 80001 function calls (60003 primitive calls) in 0.073 seconds\n", 243 | "\n", 244 | " Ordered by: standard name\n", 245 | "\n", 246 | " ncalls tottime percall cumtime percall filename:lineno(function)\n", 247 | " 1 0.004 0.004 0.005 0.005 :1(make_prices)\n", 248 | " 19999/1 0.047 0.000 0.068 0.068 :1(dc)\n", 249 | " 1 0.000 0.000 0.073 0.073 :1()\n", 250 | " 1 0.000 0.000 0.000 0.000 random.py:100(seed)\n", 251 | " 1 0.000 0.000 0.000 0.000 {function seed at 0x7f90fbfeb578}\n", 252 | " 1 0.000 0.000 0.000 0.000 {len}\n", 253 | " 19998 0.008 0.000 0.008 0.000 {max}\n", 254 | " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", 255 | " 10000 0.001 0.000 0.001 0.000 {method 'random' of '_random.Random' objects}\n", 256 | " 9999 0.004 0.000 0.004 0.000 {min}\n", 257 | " 19999 0.008 0.000 0.008 0.000 {range}\n", 258 | "\n", 259 | "\n" 260 | ] 261 | } 262 | ], 263 | "source": [ 264 | "cProfile.run(\"dc(make_prices(10000, 1))\")" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "### Solution by algorithmic thinking\n", 272 | "Notice that if we know a $k$ such that $b \\leq k \\leq s$ then we can find $b$ and $s$. Indeed, $b$ is minimum to the left of $k$ and $s$ is maximum to the right of $k$. Since we don't know which $k$ is correct, we need to try all values. Implementing that solution naively leads to $O(n^2)$ complexity. Not happy.\n", 273 | "\n", 274 | "To improve on it notice that we can precompute answer to all the questions of form *minimum to the left of $k$* (and store them in the array $B$) and *maximum to the right of $k$* (and store them in array $S$) in complexity $O(n)$. Once promputed - we can just look them up in complexity $O(1)$, which we will do $n$ times - once for each value of $k$. The total complexity is sequal to:\n", 275 | "\n", 276 | "$$\n", 277 | "\\text{work to precompute B} + \\text{work to precompute S} + \\text{work to evaluate all values of k}\n", 278 | "$$\n", 279 | "\n", 280 | "Notice that all the of those have complexity $O(n)$, so the total complexity is $O(n)$." 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 12, 286 | "metadata": { 287 | "collapsed": true 288 | }, 289 | "outputs": [], 290 | "source": [ 291 | "def lin(A):\n", 292 | " \"\"\" return best gain, computed by linear-time alg \n", 293 | " running time is Theta(n)\n", 294 | " \"\"\"\n", 295 | " n = len(A)\n", 296 | " # B[k] = min{ A[i0]: i0 <= k } for k = 0, 1, ..., n-1\n", 297 | " # = price to buy at if you have to buy no later than k (and sell no earlier than k)\n", 298 | " B = [A[0]] * n\n", 299 | " for k in range(1, n):\n", 300 | " B[k] = min(B[k-1],A[k])\n", 301 | " # S[k] = max{ A[j0]: j0 >= k } for k = 0, 1, ..., n-1\n", 302 | " # = price to sell at if you have to sell no earlier than k (but bought no later than k)\n", 303 | " S = [A[n-1]] * n\n", 304 | " for k in range(n-2, -1, -1):\n", 305 | " S[k] = max(S[k+1], A[k])\n", 306 | " # G[k] = S[k] - B[k] for k = 0, 1, ..., n-1\n", 307 | " # = best gain from buying no later than k, then selling no earlier than k\n", 308 | " G = [ S[k]-B[k] for k in range(n) ]\n", 309 | " # opt = max { G[k]: 0 <= k < n }\n", 310 | " # = best possible gain for given input A\n", 311 | " opt = max(G)\n", 312 | " return opt\n" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 13, 318 | "metadata": { 319 | "collapsed": false 320 | }, 321 | "outputs": [ 322 | { 323 | "data": { 324 | "text/plain": [ 325 | "16" 326 | ] 327 | }, 328 | "execution_count": 13, 329 | "metadata": {}, 330 | "output_type": "execute_result" 331 | } 332 | ], 333 | "source": [ 334 | "lin([20, 3, 19, 1, 15, 6])" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": 14, 340 | "metadata": { 341 | "collapsed": false 342 | }, 343 | "outputs": [ 344 | { 345 | "name": "stdout", 346 | "output_type": "stream", 347 | "text": [ 348 | " 30010 function calls in 0.026 seconds\n", 349 | "\n", 350 | " Ordered by: standard name\n", 351 | "\n", 352 | " ncalls tottime percall cumtime percall filename:lineno(function)\n", 353 | " 1 0.004 0.004 0.006 0.006 :1(make_prices)\n", 354 | " 1 0.015 0.015 0.020 0.020 :1(lin)\n", 355 | " 1 0.000 0.000 0.026 0.026 :1()\n", 356 | " 1 0.000 0.000 0.000 0.000 random.py:100(seed)\n", 357 | " 1 0.000 0.000 0.000 0.000 {function seed at 0x7f90fbfeb578}\n", 358 | " 1 0.000 0.000 0.000 0.000 {len}\n", 359 | " 10000 0.003 0.000 0.003 0.000 {max}\n", 360 | " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", 361 | " 10000 0.001 0.000 0.001 0.000 {method 'random' of '_random.Random' objects}\n", 362 | " 9999 0.003 0.000 0.003 0.000 {min}\n", 363 | " 4 0.001 0.000 0.001 0.000 {range}\n", 364 | "\n", 365 | "\n" 366 | ] 367 | } 368 | ], 369 | "source": [ 370 | "cProfile.run(\"lin(make_prices(10000, 1))\")" 371 | ] 372 | }, 373 | { 374 | "cell_type": "markdown", 375 | "metadata": { 376 | "collapsed": true 377 | }, 378 | "source": [ 379 | "# Problems to think about (non-examinable, non-compulsory, strictly for fun...)\n", 380 | "1. **Maximum sum subsequence problem** - given an array A find a contiguous subsequence of maximum sum. For example for\n", 381 | "```python\n", 382 | "A = [10, -2, 10, 5, -4, 14]\n", 383 | "```\n", 384 | "the answer is 15." 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": null, 390 | "metadata": { 391 | "collapsed": false 392 | }, 393 | "outputs": [], 394 | "source": [ 395 | "# Hint to problem 1\n", 396 | "cyph = lambda x: chr((ord(x) + 64) % 128)\n", 397 | "''.join(map(cyph, '\\x08).4z`2%$5#%`4()3`02/\",%-`4/`4(%`34/#+`%8#(!.\\'%`02/\",%-'))" 398 | ] 399 | } 400 | ], 401 | "metadata": { 402 | "kernelspec": { 403 | "display_name": "Python 2", 404 | "language": "python", 405 | "name": "python2" 406 | }, 407 | "language_info": { 408 | "codemirror_mode": { 409 | "name": "ipython", 410 | "version": 2 411 | }, 412 | "file_extension": ".py", 413 | "mimetype": "text/x-python", 414 | "name": "python", 415 | "nbconvert_exporter": "python", 416 | "pygments_lexer": "ipython2", 417 | "version": "2.7.8" 418 | } 419 | }, 420 | "nbformat": 4, 421 | "nbformat_minor": 0 422 | } 423 | -------------------------------------------------------------------------------- /challenge1/tests/mon4b.in: -------------------------------------------------------------------------------- 1 | 21243 1242 2 | OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO 3 | -------------------------------------------------------------------------------- /lecture7/Radix Sort Performance.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 26, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import random\n", 12 | "import cProfile" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "# Radix sort\n", 20 | "\n", 21 | "Let's look at the code from pervious notebook. It is slightly augmented - `radix_sort_by_ith_digit` was incorporated in `radix_sort` and there are a few minor tweaks. " 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 59, 27 | "metadata": { 28 | "collapsed": false 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "def is_sorted(array):\n", 33 | " for i in xrange(1, len(array)):\n", 34 | " if array[i-1] > array[i]:\n", 35 | " return False\n", 36 | " return True\n", 37 | "\n", 38 | "def radix_sort(array, b):\n", 39 | " assert b > 1\n", 40 | " i = 0\n", 41 | " while True:\n", 42 | " if is_sorted(array):\n", 43 | " break\n", 44 | " \n", 45 | " buckets = [ [] for _ in xrange(b)]\n", 46 | " for num in array:\n", 47 | " # we no longer use an extra function call for computing the digit\n", 48 | " bucket_idx = (num / b**i) % b\n", 49 | " buckets[bucket_idx].append(num)\n", 50 | " \n", 51 | " # we reuse original space in the array,\n", 52 | " # rather than allocating a new one.\n", 53 | " next_index = 0\n", 54 | " for bucket in buckets:\n", 55 | " for num in bucket:\n", 56 | " array[next_index] = num\n", 57 | " next_index += 1\n", 58 | " \n", 59 | " i += 1" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "### Verify that it works on a simple example" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 60, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "[1, 2, 2, 3, 4, 5, 5, 6]" 80 | ] 81 | }, 82 | "execution_count": 60, 83 | "metadata": {}, 84 | "output_type": "execute_result" 85 | } 86 | ], 87 | "source": [ 88 | "example = [5,3,2,5,6,1,2,4]\n", 89 | "radix_sort(example, 2)\n", 90 | "example" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "### We are now moving on to bigger examples\n", 98 | "The line\n", 99 | "```python\n", 100 | "random.seed(1)\n", 101 | "```\n", 102 | "ensures that we always generate the same example given test size (for fairness" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 61, 108 | "metadata": { 109 | "collapsed": false 110 | }, 111 | "outputs": [], 112 | "source": [ 113 | "def generate_test(test_size):\n", 114 | " random.seed(1)\n", 115 | " example = [ random.randint(0,2**30) for _ in range(test_size)]\n", 116 | " return example" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "Let's try it for radix sort on different bases." 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 62, 129 | "metadata": { 130 | "collapsed": false 131 | }, 132 | "outputs": [ 133 | { 134 | "name": "stdout", 135 | "output_type": "stream", 136 | "text": [ 137 | " 30000065 function calls in 19.327 seconds\n", 138 | "\n", 139 | " Ordered by: standard name\n", 140 | "\n", 141 | " ncalls tottime percall cumtime percall filename:lineno(function)\n", 142 | " 31 0.153 0.005 0.153 0.005 :1(is_sorted)\n", 143 | " 1 17.945 17.945 19.315 19.315 :7(radix_sort)\n", 144 | " 1 0.012 0.012 19.327 19.327 :1()\n", 145 | " 31 0.000 0.000 0.000 0.000 {len}\n", 146 | " 30000000 1.217 0.000 1.217 0.000 {method 'append' of 'list' objects}\n", 147 | " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", 148 | "\n", 149 | "\n" 150 | ] 151 | } 152 | ], 153 | "source": [ 154 | "# BASE 2\n", 155 | "example = generate_test(1000000)\n", 156 | "cProfile.run(\"radix_sort(example, 2)\")\n", 157 | "assert is_sorted(example)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 64, 163 | "metadata": { 164 | "collapsed": false 165 | }, 166 | "outputs": [ 167 | { 168 | "name": "stdout", 169 | "output_type": "stream", 170 | "text": [ 171 | " 2000009 function calls in 1.696 seconds\n", 172 | "\n", 173 | " Ordered by: standard name\n", 174 | "\n", 175 | " ncalls tottime percall cumtime percall filename:lineno(function)\n", 176 | " 3 0.150 0.050 0.150 0.050 :1(is_sorted)\n", 177 | " 1 1.363 1.363 1.673 1.673 :7(radix_sort)\n", 178 | " 1 0.022 0.022 1.696 1.696 :1()\n", 179 | " 3 0.000 0.000 0.000 0.000 {len}\n", 180 | " 2000000 0.160 0.000 0.160 0.000 {method 'append' of 'list' objects}\n", 181 | " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", 182 | "\n", 183 | "\n" 184 | ] 185 | } 186 | ], 187 | "source": [ 188 | "# BASE 2^16\n", 189 | "example = generate_test(1000000)\n", 190 | "cProfile.run(\"radix_sort(example, 2**16)\")\n", 191 | "assert is_sorted(example)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "Intuitively it makes sense. We only need two iterators for $b=2^{16}$, while we need 16 for $b=2$. We cannot really have a single iteration, as $b=2^{32}$ is more than the length of the longest array we expect to ever sort.\n", 199 | "\n", 200 | "Now let's try to run the standard sorting algorithm that is implemented by Python (hybrid of insertion sort and quicksort). It is a comparison based sort and is therefore $O(n\\ lg\\ n)$. Radix sort is $O(n)$. We therefore expect our code to be faster." 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 65, 206 | "metadata": { 207 | "collapsed": false 208 | }, 209 | "outputs": [ 210 | { 211 | "name": "stdout", 212 | "output_type": "stream", 213 | "text": [ 214 | " 3 function calls in 0.547 seconds\n", 215 | "\n", 216 | " Ordered by: standard name\n", 217 | "\n", 218 | " ncalls tottime percall cumtime percall filename:lineno(function)\n", 219 | " 1 0.000 0.000 0.547 0.547 :1()\n", 220 | " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", 221 | " 1 0.547 0.547 0.547 0.547 {method 'sort' of 'list' objects}\n", 222 | "\n", 223 | "\n" 224 | ] 225 | } 226 | ], 227 | "source": [ 228 | "example = generate_test(1000000)\n", 229 | "cProfile.run(\"example.sort()\")\n", 230 | "assert is_sorted(example)" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "In reality our code is 3 times slower. What a shame! Let's not give up yet..." 238 | ] 239 | }, 240 | { 241 | "cell_type": "markdown", 242 | "metadata": {}, 243 | "source": [ 244 | "### A bit of bit magic.\n", 245 | "\n", 246 | "One of basic operations in radix sort is divide and modulo. In particular if our base is $b$, then in every $i$-th iteration of radix sort algorithm one of the most common operations is indexing:\n", 247 | "\n", 248 | "```python\n", 249 | "# determine appropriate bucket.\n", 250 | "(num / b**i) % b\n", 251 | "```\n", 252 | "\n", 253 | "This is great, but it consists of of expensive modulo and division operations (they can take up multiple processor cycles). \n", 254 | "\n", 255 | "Let's assume that b is a power of 2, i.e. $b=2^k$. Notice that `num / b**i` is equivalent to `num >> (k * i)`. In order to understand why this is the case first convince yourself that division by 2 is equivalent to shaving off rightmost bit." 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 66, 261 | "metadata": { 262 | "collapsed": false 263 | }, 264 | "outputs": [ 265 | { 266 | "name": "stdout", 267 | "output_type": "stream", 268 | "text": [ 269 | "(6, 6)\n", 270 | "(8, 8)\n" 271 | ] 272 | } 273 | ], 274 | "source": [ 275 | "print(100 / 16, 100 >> 4)\n", 276 | "print(128 / 16, 128 >> 4)" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "metadata": {}, 282 | "source": [ 283 | "Similarly notice that for $b=2^k$ we have `num % b` equivalent to `num & (b-1)`. To understand that notice that k lowest bits of `num` correspond to the reminder mod $b$." 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 67, 289 | "metadata": { 290 | "collapsed": false 291 | }, 292 | "outputs": [ 293 | { 294 | "name": "stdout", 295 | "output_type": "stream", 296 | "text": [ 297 | "(4, 4)\n", 298 | "(0, 0)\n", 299 | "(11, 11)\n" 300 | ] 301 | } 302 | ], 303 | "source": [ 304 | "print(100 % 16, 100 & 15)\n", 305 | "print(128 % 16, 128 & 15)\n", 306 | "print(11 % 16, 11 & 15)" 307 | ] 308 | }, 309 | { 310 | "cell_type": "markdown", 311 | "metadata": {}, 312 | "source": [ 313 | "Both `&` and `>>` are very efficient and only take on processor cycle. We can augment implementation of radix sort from above to use them instead of `%` and `/`." 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": 68, 319 | "metadata": { 320 | "collapsed": true 321 | }, 322 | "outputs": [], 323 | "source": [ 324 | "def fast_radix_sort(array, k):\n", 325 | " \"\"\"Fast radix sort with base 2**k.\n", 326 | " \n", 327 | " This implementation uses bitwise operations\"\"\"\n", 328 | " assert k > 0\n", 329 | " i = 0\n", 330 | " \n", 331 | " b=2**k\n", 332 | " b_m1 = b - 1\n", 333 | " \n", 334 | " while True:\n", 335 | " if is_sorted(array):\n", 336 | " break\n", 337 | " shift = k * i\n", 338 | " buckets = [ [] for _ in xrange(b)]\n", 339 | " for num in array:\n", 340 | " bucket_idx = (num >> shift) & b_m1\n", 341 | " buckets[bucket_idx].append(num)\n", 342 | " \n", 343 | " next_index = 0\n", 344 | " for bucket in buckets:\n", 345 | " for num in bucket:\n", 346 | " array[next_index] = num\n", 347 | " next_index += 1\n", 348 | " \n", 349 | " i += 1" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 69, 355 | "metadata": { 356 | "collapsed": false 357 | }, 358 | "outputs": [ 359 | { 360 | "data": { 361 | "text/plain": [ 362 | "[1, 2, 2, 3, 4, 5, 5, 6]" 363 | ] 364 | }, 365 | "execution_count": 69, 366 | "metadata": {}, 367 | "output_type": "execute_result" 368 | } 369 | ], 370 | "source": [ 371 | "example = [5,3,2,5,6,1,2,4]\n", 372 | "fast_radix_sort(example, 16)\n", 373 | "example" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": 71, 379 | "metadata": { 380 | "collapsed": false 381 | }, 382 | "outputs": [ 383 | { 384 | "name": "stdout", 385 | "output_type": "stream", 386 | "text": [ 387 | " 2000009 function calls in 1.413 seconds\n", 388 | "\n", 389 | " Ordered by: standard name\n", 390 | "\n", 391 | " ncalls tottime percall cumtime percall filename:lineno(function)\n", 392 | " 3 0.154 0.051 0.154 0.051 :1(is_sorted)\n", 393 | " 1 1.081 1.081 1.390 1.390 :1(fast_radix_sort)\n", 394 | " 1 0.023 0.023 1.413 1.413 :1()\n", 395 | " 3 0.000 0.000 0.000 0.000 {len}\n", 396 | " 2000000 0.155 0.000 0.155 0.000 {method 'append' of 'list' objects}\n", 397 | " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", 398 | "\n", 399 | "\n" 400 | ] 401 | } 402 | ], 403 | "source": [ 404 | "example = generate_test(1000000)\n", 405 | "cProfile.run(\"fast_radix_sort(example, 16)\")\n", 406 | "assert is_sorted(example)" 407 | ] 408 | }, 409 | { 410 | "cell_type": "markdown", 411 | "metadata": {}, 412 | "source": [ 413 | "*Good news* We got 0.2s speed up Yay!\n", 414 | "\n", 415 | "*Bad news* We are still nowhere near the performance of `.sort`. Why is that?\n", 416 | "\n", 417 | "The answer is in our choice of programming language. Python is interpreted and is known to have slowdowns up to 100x compared to low level langauges like C/C++. That's why many of Python routines are secretly implemented in C, `.sort` being one of them. In order to make the comparison fair we should also be allowed to write our implementation in C. Thankfully there's a Cython python extension, which makes it easy to interface with Python and compiles python-like code to C or C++. It should be pretty straightforward, but don't worry if you don't understand the details of the implementation below." 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": 72, 423 | "metadata": { 424 | "collapsed": false 425 | }, 426 | "outputs": [ 427 | { 428 | "name": "stdout", 429 | "output_type": "stream", 430 | "text": [ 431 | "The Cython extension is already loaded. To reload it, use:\n", 432 | " %reload_ext Cython\n" 433 | ] 434 | } 435 | ], 436 | "source": [ 437 | "%load_ext Cython" 438 | ] 439 | }, 440 | { 441 | "cell_type": "code", 442 | "execution_count": 73, 443 | "metadata": { 444 | "collapsed": false 445 | }, 446 | "outputs": [], 447 | "source": [ 448 | "%%cython --cplus\n", 449 | "\n", 450 | "from libcpp.vector cimport vector\n", 451 | "cimport cython\n", 452 | "\n", 453 | "\n", 454 | "cdef c_is_sorted(list array):\n", 455 | " \"\"\"Equivalent to implementation of is_sorted from above.\n", 456 | " \n", 457 | " However this one is compiled to pure C++ (thanks to cdef).\n", 458 | " We cannot call it directly for Python.\n", 459 | " \n", 460 | " The reasons it is slightly different from above is the fact that\n", 461 | " \n", 462 | " for current_num in array\n", 463 | " \n", 464 | " is super-efficient in Cython.\"\"\"\n", 465 | " cdef unsigned int lastnum = 0\n", 466 | " cdef bint first_iter = True\n", 467 | " \n", 468 | " for current_num in array:\n", 469 | " if not first_iter:\n", 470 | " if lastnum > current_num:\n", 471 | " return False\n", 472 | " else:\n", 473 | " first_iter = False\n", 474 | " lastnum = current_num\n", 475 | " return True\n", 476 | "\n", 477 | "def c_radix_sort(list array, int k):\n", 478 | " assert k > 0\n", 479 | " # Just like in C Cython requires us \n", 480 | " # to forward declare the variables\n", 481 | " cdef int i = 0\n", 482 | " cdef int b = 2 ** k\n", 483 | " cdef int b_m1 = b - 1\n", 484 | " cdef int shift = 0\n", 485 | " cdef int next_index = 0\n", 486 | " cdef int num = 0\n", 487 | " # vector[vector[int]] is a list of lists of integers.\n", 488 | " # (actually it is very efficient dynamically resizeable\n", 489 | " # array)\n", 490 | " cdef vector[vector[int]] buckets\n", 491 | " \n", 492 | " # initialize list with b empty arrays\n", 493 | " for _ in xrange(b):\n", 494 | " buckets.push_back(vector[int]())\n", 495 | "\n", 496 | " \n", 497 | " # The code below barely changed compared to origninal\n", 498 | " # the only difference is the fast that we access buckets\n", 499 | " # slightly differently to be compliant with vector API.\n", 500 | " while True:\n", 501 | " if c_is_sorted(array):\n", 502 | " break\n", 503 | " shift = i * k\n", 504 | "\n", 505 | " for bucket_idx in xrange(b):\n", 506 | " buckets[bucket_idx].clear()\n", 507 | " \n", 508 | " for num in array:\n", 509 | " bucket_idx = (num >> shift) & b_m1\n", 510 | " buckets[bucket_idx].push_back(num)\n", 511 | " \n", 512 | " next_index = 0\n", 513 | " for bucket_idx in xrange(b):\n", 514 | " for in_bucket_idx in xrange(buckets[bucket_idx].size()):\n", 515 | " array[next_index] = buckets[bucket_idx][in_bucket_idx]\n", 516 | " next_index += 1\n", 517 | " \n", 518 | " i += 1\n" 519 | ] 520 | }, 521 | { 522 | "cell_type": "code", 523 | "execution_count": 74, 524 | "metadata": { 525 | "collapsed": false 526 | }, 527 | "outputs": [ 528 | { 529 | "data": { 530 | "text/plain": [ 531 | "[1, 2, 2, 3, 4, 5, 5, 6]" 532 | ] 533 | }, 534 | "execution_count": 74, 535 | "metadata": {}, 536 | "output_type": "execute_result" 537 | } 538 | ], 539 | "source": [ 540 | "example = [5,3,2,5,6,1,2,4]\n", 541 | "c_radix_sort(example, 16)\n", 542 | "example" 543 | ] 544 | }, 545 | { 546 | "cell_type": "code", 547 | "execution_count": 75, 548 | "metadata": { 549 | "collapsed": false 550 | }, 551 | "outputs": [ 552 | { 553 | "name": "stdout", 554 | "output_type": "stream", 555 | "text": [ 556 | " 3 function calls in 0.128 seconds\n", 557 | "\n", 558 | " Ordered by: standard name\n", 559 | "\n", 560 | " ncalls tottime percall cumtime percall filename:lineno(function)\n", 561 | " 1 0.000 0.000 0.128 0.128 :1()\n", 562 | " 1 0.128 0.128 0.128 0.128 {_cython_magic_a617e99601e7e788cc896c9cdd2003a9.c_radix_sort}\n", 563 | " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", 564 | "\n", 565 | "\n" 566 | ] 567 | } 568 | ], 569 | "source": [ 570 | "example = generate_test(1000000)\n", 571 | "cProfile.run(\"c_radix_sort(example, 16)\")\n", 572 | "assert is_sorted(example)" 573 | ] 574 | }, 575 | { 576 | "cell_type": "markdown", 577 | "metadata": {}, 578 | "source": [ 579 | "## Victory!\n", 580 | "\n", 581 | "Our implementation of radix sort is 4x faster that default Python sort. \n", 582 | "\n", 583 | "This is expected as it has lower complexity and a very low constant of proportionality. \n", 584 | "\n", 585 | "Bear in mind that Python sort is more general though - it would be nontrival to use radix sort to sort long strings for example." 586 | ] 587 | } 588 | ], 589 | "metadata": { 590 | "kernelspec": { 591 | "display_name": "Python 2", 592 | "language": "python", 593 | "name": "python2" 594 | }, 595 | "language_info": { 596 | "codemirror_mode": { 597 | "name": "ipython", 598 | "version": 2 599 | }, 600 | "file_extension": ".py", 601 | "mimetype": "text/x-python", 602 | "name": "python", 603 | "nbconvert_exporter": "python", 604 | "pygments_lexer": "ipython2", 605 | "version": "2.7.8" 606 | } 607 | }, 608 | "nbformat": 4, 609 | "nbformat_minor": 0 610 | } 611 | -------------------------------------------------------------------------------- /lecture1/Fast exponentiation and fibonacci sequence.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import cProfile\n", 13 | "from operator import mul" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "# Fast Exponentiation" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "We can quickly compute $a^b \\ mod\\ m$ using, the following trick best illustrated by example.\n", 28 | "\n", 29 | "$$\n", 30 | "5^{13} = 5 * 5^{12} = 5 * (5^6)^2 = 5 * ((5^3)^2)^2 = 5 * ((5*5^2)^2)^2 = 5 * ((5*5*5)^2)^2\n", 31 | "$$\n", 32 | "\n", 33 | "In that example even though we would naively need 11 multiplications to calculate results we managed to get away with 5.\n", 34 | "In general we can write out \n", 35 | "\n", 36 | "$$\n", 37 | "fexp(a,b,m) =\n", 38 | "\\begin{cases}\n", 39 | "a & \\text{if}\\ b=1\\\\\n", 40 | "fexp(a,b/2,m)^2 &\\text{if}\\ b\\ \\text{even}\\\\\n", 41 | "a \\cdot fexp(a,b-1,m) &\\text{otherwise}\n", 42 | "\\end{cases}\n", 43 | "$$\n" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 2, 49 | "metadata": { 50 | "collapsed": false 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "def fexp_recursive(a, b, m, mul_op=mul):\n", 55 | " # We can easiely handle b = 0, here, but we choose not to\n", 56 | " # this will be helpful later when we deal with matrices...\n", 57 | " assert b >= 1\n", 58 | " if b == 1:\n", 59 | " return a\n", 60 | " elif b % 2 == 0:\n", 61 | " conquered = fexp_recursive(a, b / 2, m, mul_op=mul_op)\n", 62 | " return mul_op(conquered, conquered) % m\n", 63 | " else:\n", 64 | " b_one_less = fexp_recursive(a, b - 1, m, mul_op=mul_op) \n", 65 | " return mul_op(a, b_one_less) % m" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 3, 71 | "metadata": { 72 | "collapsed": false 73 | }, 74 | "outputs": [ 75 | { 76 | "data": { 77 | "text/plain": [ 78 | "64" 79 | ] 80 | }, 81 | "execution_count": 3, 82 | "metadata": {}, 83 | "output_type": "execute_result" 84 | } 85 | ], 86 | "source": [ 87 | "fexp_recursive(2, 6, 1000)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 4, 93 | "metadata": { 94 | "collapsed": false 95 | }, 96 | "outputs": [ 97 | { 98 | "data": { 99 | "text/plain": [ 100 | "4" 101 | ] 102 | }, 103 | "execution_count": 4, 104 | "metadata": {}, 105 | "output_type": "execute_result" 106 | } 107 | ], 108 | "source": [ 109 | "fexp_recursive(2, 6, 10)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 5, 115 | "metadata": { 116 | "collapsed": false 117 | }, 118 | "outputs": [ 119 | { 120 | "name": "stdout", 121 | "output_type": "stream", 122 | "text": [ 123 | " 89 function calls (46 primitive calls) in 0.000 seconds\n", 124 | "\n", 125 | " Ordered by: standard name\n", 126 | "\n", 127 | " ncalls tottime percall cumtime percall filename:lineno(function)\n", 128 | " 44/1 0.000 0.000 0.000 0.000 :1(fexp_recursive)\n", 129 | " 1 0.000 0.000 0.000 0.000 :1()\n", 130 | " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", 131 | " 43 0.000 0.000 0.000 0.000 {operator.mul}\n", 132 | "\n", 133 | "\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "cProfile.run(\"fexp_recursive(2, 10000000000, 10)\")" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "### Iterative approach\n", 146 | "Very similar algorithm can be written out without recursion by looking at binary representation of b and noticing that if $i-th$ bit is one, then we need to multiply the result by $$a^{2^i}$$\n", 147 | "Don't worry if you don't fully understand the code below. It is included here, to show you that there are multiple ways of approaching implementation of this kind of solution. Also iterative algorithms are sometimes preferred - we will get back to that point below." 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 6, 153 | "metadata": { 154 | "collapsed": true 155 | }, 156 | "outputs": [], 157 | "source": [ 158 | "def fexp_iterative(a, b, m, mul_op=mul):\n", 159 | " assert b >= 1\n", 160 | " result = a\n", 161 | " multiplier = a\n", 162 | " b -= 1\n", 163 | " while b > 0:\n", 164 | " if b % 2 == 1:\n", 165 | " result = mul_op(result, multiplier) % m\n", 166 | " multiplier = mul_op(multiplier, multiplier) % m\n", 167 | " b /= 2\n", 168 | " return result" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 7, 174 | "metadata": { 175 | "collapsed": false 176 | }, 177 | "outputs": [ 178 | { 179 | "data": { 180 | "text/plain": [ 181 | "64" 182 | ] 183 | }, 184 | "execution_count": 7, 185 | "metadata": {}, 186 | "output_type": "execute_result" 187 | } 188 | ], 189 | "source": [ 190 | "fexp_iterative(2, 6, 1000)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 8, 196 | "metadata": { 197 | "collapsed": false 198 | }, 199 | "outputs": [ 200 | { 201 | "data": { 202 | "text/plain": [ 203 | "4" 204 | ] 205 | }, 206 | "execution_count": 8, 207 | "metadata": {}, 208 | "output_type": "execute_result" 209 | } 210 | ], 211 | "source": [ 212 | "fexp_iterative(2, 6, 10)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 9, 218 | "metadata": { 219 | "collapsed": false 220 | }, 221 | "outputs": [ 222 | { 223 | "name": "stdout", 224 | "output_type": "stream", 225 | "text": [ 226 | " 57 function calls in 0.000 seconds\n", 227 | "\n", 228 | " Ordered by: standard name\n", 229 | "\n", 230 | " ncalls tottime percall cumtime percall filename:lineno(function)\n", 231 | " 1 0.000 0.000 0.000 0.000 :1(fexp_iterative)\n", 232 | " 1 0.000 0.000 0.000 0.000 :1()\n", 233 | " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", 234 | " 54 0.000 0.000 0.000 0.000 {operator.mul}\n", 235 | "\n", 236 | "\n" 237 | ] 238 | } 239 | ], 240 | "source": [ 241 | "cProfile.run(\"fexp_iterative(2, 10000000000, 10)\")" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "# Fibonacci sequence" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "The Fibonacci Sequence is the series of numbers: 0, 1, 1, 2, 3, 5, 8, 13, 21, 34, ... The next number is found by adding up the two numbers before it. I.e. 3 is found by adding the two numbers before it (1+2). Here we will explore 3 different algorithms for computing the $n^{th}$ Fibonacci number and analyze their time complexity. We denote the $n^{th}$ Fibonacci number as $F_{n}$. Code for the following 3 algorithms is in recitation1.py which is available on the Stellar site under recitation materials. \n" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": {}, 261 | "source": [ 262 | "### Naive Recursion\n", 263 | "\n", 264 | "By definition, $F_{n} = F_{n - 1} + F_{n - 2}$. As this is the ``naive'' algorithm, let's not try to be too clever and instead simply write an algorithm using only this definition!\n", 265 | "\n", 266 | "Now to analyze the runtime. Formally this algorithm can be analyzed by solving the recurrence, $T(n) = T(n - 1) + T(n - 2) + \\Theta(1)$. This is a tough recursion to solve! Let us separately find an upper and lower bound instead of a $\\Theta$ relation. \n", 267 | "\n", 268 | "It is clear that the recurrence $T(n) = 2T(n - 1) + \\Theta(1)$ is strictly greater than our original, so let us use it to find an upper bound. Each recursive call results in two child recursive calls until the base case is reached. Therefore, there will be $\\Theta(2^{i})$ recursive calls made at the $i^{th}$ level of recursion. Since, the subproblem size only decreases by one on each call, there will be $\\Theta(n)$ levels of recursion before the base case is reached. Therefore this recurrence solves to be $\\Theta(2^{n})$ and we can conclude that our algorithm is $O(2^{n})$\n", 269 | "\n", 270 | "The recurrence $T(n) = 2T(n - 2) + \\Theta(1)$ is strictly less than our original. Using similar logic as above we can see that this recurrence solves to $\\Theta(2^{\\frac{n}{2}})$ and we conclude that our algorithm is $\\Omega(2^{\\frac{n}{2}})$.\n", 271 | "\n", 272 | "Challenge Problem: Find a tight asymptotic bound to this algorithms runtime. Hint: Draw a tree diagraming recursive calls and look for the pattern!\n", 273 | "\n" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 10, 279 | "metadata": { 280 | "collapsed": true 281 | }, 282 | "outputs": [], 283 | "source": [ 284 | "def fibonacci_recursive_slow(n, m):\n", 285 | " assert n >= 0\n", 286 | " if n == 0:\n", 287 | " return 0\n", 288 | " elif n == 1:\n", 289 | " return 1\n", 290 | " else:\n", 291 | " return (fibonacci_recursive_slow(n - 1, m) + fibonacci_recursive_slow(n - 2, m)) % m " 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 11, 297 | "metadata": { 298 | "collapsed": false 299 | }, 300 | "outputs": [ 301 | { 302 | "data": { 303 | "text/plain": [ 304 | "55" 305 | ] 306 | }, 307 | "execution_count": 11, 308 | "metadata": {}, 309 | "output_type": "execute_result" 310 | } 311 | ], 312 | "source": [ 313 | "fibonacci_recursive_slow(10, 1000)" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": 12, 319 | "metadata": { 320 | "collapsed": false 321 | }, 322 | "outputs": [ 323 | { 324 | "name": "stdout", 325 | "output_type": "stream", 326 | "text": [ 327 | " 2692539 function calls (3 primitive calls) in 0.888 seconds\n", 328 | "\n", 329 | " Ordered by: standard name\n", 330 | "\n", 331 | " ncalls tottime percall cumtime percall filename:lineno(function)\n", 332 | "2692537/1 0.888 0.000 0.888 0.888 :1(fibonacci_recursive_slow)\n", 333 | " 1 0.000 0.000 0.888 0.888 :1()\n", 334 | " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", 335 | "\n", 336 | "\n" 337 | ] 338 | } 339 | ], 340 | "source": [ 341 | "cProfile.run(\"fibonacci_recursive_slow(30, 1000)\")" 342 | ] 343 | }, 344 | { 345 | "cell_type": "markdown", 346 | "metadata": {}, 347 | "source": [ 348 | "### Memoized Recursion\n", 349 | "It's often the case that we can improve the efficiency of algorithms by exploiting natural ``structures'' present in the problem. Notice in the naive algorithm that we often compute the same thing multiple times! This occurs because we have overlapping subproblems. For example, both $F_{n - 1}$ and $F_{n - 2}$ depend on the solution to $F_{n - 3}$. We can take advantage of this structure by memoizing (storing) the solutions to subproblems as we go. Therefore instead of recalculating them we can simply look them up! Look in recitation1.py for python code.\n", 350 | "\n", 351 | "This improved algorithm has a time complexity of $\\Theta(n)$. This can be seen from the fact that we in total solve for $\\Theta(n)$ $F_{i}$s, each of which take only constant time to compute. \n" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 13, 357 | "metadata": { 358 | "collapsed": true 359 | }, 360 | "outputs": [], 361 | "source": [ 362 | "cache = {}\n", 363 | "def fibonacci_recursive_fast(n, m):\n", 364 | " if not (n,m) in cache:\n", 365 | " assert n >= 0\n", 366 | " if n == 0:\n", 367 | " result = 0\n", 368 | " elif n == 1:\n", 369 | " result = 1\n", 370 | " else:\n", 371 | " result = (fibonacci_recursive_fast(n - 1, m) + fibonacci_recursive_fast(n - 2, m)) % m\n", 372 | " cache[(n,m)] = result\n", 373 | " return cache[(n,m)]" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": 14, 379 | "metadata": { 380 | "collapsed": false 381 | }, 382 | "outputs": [ 383 | { 384 | "data": { 385 | "text/plain": [ 386 | "55" 387 | ] 388 | }, 389 | "execution_count": 14, 390 | "metadata": {}, 391 | "output_type": "execute_result" 392 | } 393 | ], 394 | "source": [ 395 | "fibonacci_recursive_fast(10, 1000)" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": 15, 401 | "metadata": { 402 | "collapsed": false 403 | }, 404 | "outputs": [ 405 | { 406 | "name": "stdout", 407 | "output_type": "stream", 408 | "text": [ 409 | " 1783 function calls (3 primitive calls) in 0.004 seconds\n", 410 | "\n", 411 | " Ordered by: standard name\n", 412 | "\n", 413 | " ncalls tottime percall cumtime percall filename:lineno(function)\n", 414 | " 1781/1 0.004 0.000 0.004 0.004 :2(fibonacci_recursive_fast)\n", 415 | " 1 0.000 0.000 0.004 0.004 :1()\n", 416 | " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", 417 | "\n", 418 | "\n" 419 | ] 420 | } 421 | ], 422 | "source": [ 423 | "cProfile.run(\"fibonacci_recursive_fast(900, 1000)\")" 424 | ] 425 | }, 426 | { 427 | "cell_type": "markdown", 428 | "metadata": {}, 429 | "source": [ 430 | "### Iterative versus recursive solutions\n", 431 | "The code below should give a runtime error on a standard Python interpreter - because its exceeding the default stack limit. This kind of limitation is why we often opt for iterative versions of the algorithm. Don't worry though, it turns out that for every recursive solution there exists an itertive equivalent. Indeed - we can emulate recursion stack with a stack datastructure. Such a solution is often tedious to implemented and constact factor of the runtime become large. There's why we often seek for *natural order of calculation*, i.e. order in which we compute the subproblems, such that by the time we need a particular result it has alredy been computed. For example in case of Fibonacci the natural order of computatation is to compute $F_1$, then $F_2$, then $F_3$ etc. Notice how resulting solution is even simpler than the recursive one!" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": 16, 437 | "metadata": { 438 | "collapsed": true 439 | }, 440 | "outputs": [], 441 | "source": [ 442 | "# fibonacci_recursive_fast(9000, 1000)" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": 17, 448 | "metadata": { 449 | "collapsed": true 450 | }, 451 | "outputs": [], 452 | "source": [ 453 | "def fibonnaci_iterative(n, m):\n", 454 | " assert n >= 0\n", 455 | " if n == 0:\n", 456 | " return 0\n", 457 | " f_current, f_previous = 1, 0\n", 458 | " for _ in range(n - 1):\n", 459 | " f_current, f_previous = f_current + f_previous % m, f_current\n", 460 | " return f_current" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": 18, 466 | "metadata": { 467 | "collapsed": false 468 | }, 469 | "outputs": [ 470 | { 471 | "data": { 472 | "text/plain": [ 473 | "55" 474 | ] 475 | }, 476 | "execution_count": 18, 477 | "metadata": {}, 478 | "output_type": "execute_result" 479 | } 480 | ], 481 | "source": [ 482 | "fibonnaci_iterative(10, 1000)" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": 19, 488 | "metadata": { 489 | "collapsed": false 490 | }, 491 | "outputs": [ 492 | { 493 | "name": "stdout", 494 | "output_type": "stream", 495 | "text": [ 496 | " 4 function calls in 1.025 seconds\n", 497 | "\n", 498 | " Ordered by: standard name\n", 499 | "\n", 500 | " ncalls tottime percall cumtime percall filename:lineno(function)\n", 501 | " 1 0.901 0.901 1.025 1.025 :1(fibonnaci_iterative)\n", 502 | " 1 0.000 0.000 1.025 1.025 :1()\n", 503 | " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", 504 | " 1 0.124 0.124 0.124 0.124 {range}\n", 505 | "\n", 506 | "\n" 507 | ] 508 | } 509 | ], 510 | "source": [ 511 | "cProfile.run(\"fibonnaci_iterative(10000000, 1000)\")" 512 | ] 513 | }, 514 | { 515 | "cell_type": "markdown", 516 | "metadata": {}, 517 | "source": [ 518 | "### Matrix exponentiation\n", 519 | "\n", 520 | "Take a moment to think back to the recursive squaring algorithm from lecture. In a similar fashion, we can compute the $n^{th}$ Fibonacci number in logarithmic time by repeatedly squaring the matrix \n", 521 | "\n", 522 | "\n", 523 | "\\begin{align}\n", 524 | "\\begin{bmatrix}\n", 525 | " 1 & 1 \\\\\n", 526 | " 1 & 0\n", 527 | "\\end{bmatrix}\n", 528 | "\\end{align}\n", 529 | "\n", 530 | "In fact\n", 531 | "\n", 532 | "\n", 533 | "\\begin{align}\n", 534 | "\\begin{bmatrix}\n", 535 | " 1 & 1 \\\\\n", 536 | " 1 & 0\n", 537 | "\\end{bmatrix} ^{n} \n", 538 | "= \n", 539 | "\\begin{bmatrix}\n", 540 | " F_{n + 1} & F_{n} \\\\\n", 541 | " F_{n} & F_{n - 1}\n", 542 | "\\end{bmatrix}\n", 543 | "\\end{align}\n", 544 | "\n", 545 | "To give a rough proof of why this is the case, let us use induction on $n$. Our claim is trivially true in the base case $n = 1$. Now assuming that our claim holds for this matrix to the $n^{th}$ power, we must show that our claim is also true for this matrix to the $(n + 1)^{th}$ power. \n", 546 | "\n", 547 | "\n", 548 | "\\begin{align}\n", 549 | "\\begin{bmatrix}\n", 550 | " 1 & 1 \\\\\n", 551 | " 1 & 0\n", 552 | "\\end{bmatrix}\n", 553 | "*\n", 554 | " \\begin{bmatrix}\n", 555 | " 1 & 1 \\\\\n", 556 | " 1 & 0 \n", 557 | "\\end{bmatrix} ^{n} \n", 558 | "= \n", 559 | "\\begin{bmatrix}\n", 560 | " 1 & 1 \\\\\n", 561 | " 1 & 0 \\\\\n", 562 | "\\end{bmatrix}\n", 563 | "*\n", 564 | "\\begin{bmatrix}\n", 565 | " F_{n + 1} & F_{n} \\\\\n", 566 | " F_{n} & F_{n - 1}\n", 567 | "\\end{bmatrix}\n", 568 | " = \n", 569 | " \\begin{bmatrix}\n", 570 | " F_{n + 1} + F_{n} & F_{n} + F_{n - 1} \\\\\n", 571 | " F_{n + 1} & F_{n}\n", 572 | "\\end{bmatrix}\n", 573 | "=\n", 574 | "\\begin{bmatrix}\n", 575 | " F_{n + 2} & F_{n+1} \\\\\n", 576 | " F_{n + 1} & F_{n }\n", 577 | "\\end{bmatrix}\n", 578 | "\\end{align}\n", 579 | "\n", 580 | "Success! \n", 581 | "\n", 582 | "The runtime analysis for this algorithm is identical to that for modular exponentiation using repeated squaring. In particular we do not include cost of matrix multiply in our analysis, because matrix has constant size." 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": 23, 588 | "metadata": { 589 | "collapsed": false 590 | }, 591 | "outputs": [], 592 | "source": [ 593 | "F = np.array([[1, 1],\n", 594 | " [1, 0]])\n", 595 | "\n", 596 | "def fibonnaci_matrix(n, m):\n", 597 | " Fn = fexp_recursive(F, n, m, mul_op=np.dot)\n", 598 | " return Fn[0][1]" 599 | ] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "execution_count": 24, 604 | "metadata": { 605 | "collapsed": false 606 | }, 607 | "outputs": [ 608 | { 609 | "data": { 610 | "text/plain": [ 611 | "55" 612 | ] 613 | }, 614 | "execution_count": 24, 615 | "metadata": {}, 616 | "output_type": "execute_result" 617 | } 618 | ], 619 | "source": [ 620 | "fibonnaci_matrix(10, 1000)" 621 | ] 622 | }, 623 | { 624 | "cell_type": "code", 625 | "execution_count": 25, 626 | "metadata": { 627 | "collapsed": false 628 | }, 629 | "outputs": [ 630 | { 631 | "name": "stdout", 632 | "output_type": "stream", 633 | "text": [ 634 | " 90 function calls (47 primitive calls) in 0.001 seconds\n", 635 | "\n", 636 | " Ordered by: standard name\n", 637 | "\n", 638 | " ncalls tottime percall cumtime percall filename:lineno(function)\n", 639 | " 44/1 0.000 0.000 0.001 0.001 :1(fexp_recursive)\n", 640 | " 1 0.000 0.000 0.001 0.001 :4(fibonnaci_matrix)\n", 641 | " 1 0.000 0.000 0.001 0.001 :1()\n", 642 | " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", 643 | " 43 0.000 0.000 0.000 0.000 {numpy.core.multiarray.dot}\n", 644 | "\n", 645 | "\n" 646 | ] 647 | } 648 | ], 649 | "source": [ 650 | "cProfile.run(\"fibonnaci_matrix(10000000000, 1000)\")" 651 | ] 652 | }, 653 | { 654 | "cell_type": "markdown", 655 | "metadata": {}, 656 | "source": [ 657 | "# Problems to think about (non-examinable, non-compulsory, strictly for fun...)\n", 658 | "\n", 659 | "1. Give an example of another operation besides multiplication and matrix multiply that can be efficiently composed using fast exponentiation.\n", 660 | "\n", 661 | "2. Compute the n-th item of tribonacci sequence using the three methods presented above:\n", 662 | "\n", 663 | "\\begin{align}\n", 664 | "s_n =\n", 665 | "\\begin{cases}\n", 666 | "1 & \\text{if}\\ n \\in \\{ 1,2,3 \\} \\\\\n", 667 | "2s_{n-1} + 2s_{n-2} + s_{n-3} & \\text{otherwise}\n", 668 | "\\end{cases}\n", 669 | "\\end{align}\n" 670 | ] 671 | }, 672 | { 673 | "cell_type": "code", 674 | "execution_count": null, 675 | "metadata": { 676 | "collapsed": true 677 | }, 678 | "outputs": [], 679 | "source": [] 680 | } 681 | ], 682 | "metadata": { 683 | "kernelspec": { 684 | "display_name": "Python 2", 685 | "language": "python", 686 | "name": "python2" 687 | }, 688 | "language_info": { 689 | "codemirror_mode": { 690 | "name": "ipython", 691 | "version": 2 692 | }, 693 | "file_extension": ".py", 694 | "mimetype": "text/x-python", 695 | "name": "python", 696 | "nbconvert_exporter": "python", 697 | "pygments_lexer": "ipython2", 698 | "version": "2.7.8" 699 | } 700 | }, 701 | "nbformat": 4, 702 | "nbformat_minor": 0 703 | } 704 | -------------------------------------------------------------------------------- /lecture2/binary_search.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 75, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "from scipy.spatial.distance import cosine as cosine_similarity\n", 13 | "import random\n", 14 | "import cProfile" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# Binary search simulation\n", 22 | "\n", 23 | "To understand the kind of problems that computers need to deal with on daily bases I prepared a little simulation.\n", 24 | "\n", 25 | "You are given a hand of 20 sorted cards and your task is to find a particular card. Have fun!" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 76, 31 | "metadata": { 32 | "collapsed": false 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "def generated_ordered_cards():\n", 37 | " cards = []\n", 38 | " cards.extend(unichr(x) for x in range(127185, 127185 + 14))\n", 39 | " cards.extend(unichr(x) for x in range(127169, 127169 + 14))\n", 40 | " cards.extend(unichr(x) for x in range(127153, 127153 + 14)) \n", 41 | " cards.extend(unichr(x) for x in range(127137, 127137 + 14))\n", 42 | " return cards" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 77, 48 | "metadata": { 49 | "collapsed": false 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "DOMAIN = generated_ordered_cards()\n", 54 | "NUM_SAMPLES = 20\n", 55 | "\n", 56 | "class Problem(object):\n", 57 | " def __init__(self):\n", 58 | " self.elements = set()\n", 59 | " while len(self.elements) < NUM_SAMPLES:\n", 60 | " self.elements.add(random.choice(DOMAIN))\n", 61 | " self.elements = sorted(list(self.elements), key=lambda x: DOMAIN.index(x))\n", 62 | " self.hide_all()\n", 63 | " self.query = random.choice(self.elements)\n", 64 | " \n", 65 | " def ask(self, position):\n", 66 | " assert 0 <= position < NUM_SAMPLES\n", 67 | " self.visible[position] = True\n", 68 | " return self\n", 69 | " \n", 70 | " def hide_all(self):\n", 71 | " self.visible = [False for _ in range(NUM_SAMPLES)]\n", 72 | " \n", 73 | " def _repr_html_(self):\n", 74 | " els_html = []\n", 75 | " for el_idx in range(len(self.elements)):\n", 76 | " if self.visible[el_idx]:\n", 77 | " els_html.append(\"%s\" % (self.elements[el_idx]))\n", 78 | " else:\n", 79 | " els_html.append(\"%d\" % (el_idx,))\n", 80 | " header_html = u\"

Find %s! (♣ < ♦ < ♥ < ♠)


\" % (self.query,)\n", 81 | " table_html = \"%s
\" % (\"\".join(els_html))\n", 82 | " return header_html + table_html" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 81, 88 | "metadata": { 89 | "collapsed": false 90 | }, 91 | "outputs": [ 92 | { 93 | "data": { 94 | "text/html": [ 95 | "

Find 🂷! (♣ < ♦ < ♥ < ♠)


012345678910111213141516171819
" 96 | ], 97 | "text/plain": [ 98 | "<__main__.Problem at 0x7ff69c3e8090>" 99 | ] 100 | }, 101 | "execution_count": 81, 102 | "metadata": {}, 103 | "output_type": "execute_result" 104 | } 105 | ], 106 | "source": [ 107 | "p = Problem()\n", 108 | "p" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 86, 114 | "metadata": { 115 | "collapsed": false 116 | }, 117 | "outputs": [ 118 | { 119 | "data": { 120 | "text/html": [ 121 | "

Find 🂷! (♣ < ♦ < ♥ < ♠)


0123456789🃈11🂱🂶🂷🂦16171819
" 122 | ], 123 | "text/plain": [ 124 | "<__main__.Problem at 0x7ff69c3e8090>" 125 | ] 126 | }, 127 | "execution_count": 86, 128 | "metadata": {}, 129 | "output_type": "execute_result" 130 | } 131 | ], 132 | "source": [ 133 | "p.ask(14)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "# Binary search implemetation - GLOVE vectors\n", 146 | "\n", 147 | "Glove vectors try to assign vectors of numbers to words in surprising ways. One interesting property they have is\n", 148 | "ability to solve many analogies. For example:\n", 149 | "\n", 150 | "$$\n", 151 | "V_{\\text{berlin}} - V_{\\text{germany}} \\approx V_{\\text{paris}} - V_{\\text{france}}\n", 152 | "$$\n", 153 | "\n", 154 | "Today we will try to write an algorithm that can quickly find vectors from Glove database" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 92, 160 | "metadata": { 161 | "collapsed": false 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "class Glove(object):\n", 166 | " def __init__(self, path):\n", 167 | " self.word_vector = []\n", 168 | " \n", 169 | " with open(path) as f:\n", 170 | " for line in f:\n", 171 | " if len(line) < 1:\n", 172 | " break\n", 173 | " line = line.split(' ')\n", 174 | " word, vector = line[0], np.array([float(x) for x in line[1:]])\n", 175 | " self.word_vector.append((word, vector))\n", 176 | " self.word_vector.sort()\n", 177 | " \n", 178 | " def __call__(self, key):\n", 179 | " return self.find_vector(key)\n", 180 | " \n", 181 | " def find_vector(self, key):\n", 182 | " for word,vector in self.word_vector:\n", 183 | " if word == key:\n", 184 | " return vector\n", 185 | " raise KeyError(key)\n", 186 | " \n", 187 | " def find_closest_word(self, key_vector,blacklist=[]):\n", 188 | " best_similarity = float('inf')\n", 189 | " best_word = None\n", 190 | " for word, vector in self.word_vector:\n", 191 | " if word in blacklist:\n", 192 | " continue\n", 193 | " similarity = cosine_similarity(vector, key_vector)\n", 194 | " if best_similarity > similarity:\n", 195 | " best_similarity = similarity\n", 196 | " best_word = word\n", 197 | " return best_word" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 93, 203 | "metadata": { 204 | "collapsed": false 205 | }, 206 | "outputs": [], 207 | "source": [ 208 | "# Download from http://nlp.stanford.edu/projects/glove/\n", 209 | "glove = Glove(\"/home/sidor/projects/Dali/data/glove/glove.6B.300d.txt\")\n" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 52, 215 | "metadata": { 216 | "collapsed": false 217 | }, 218 | "outputs": [], 219 | "source": [ 220 | "# this call is quite slow...\n", 221 | "def analogy(thisword, tothis, islikethis):\n", 222 | " key_vector = glove(tothis) - glove(thisword) + glove(islikethis)\n", 223 | " best_word = glove.find_closest_word(key_vector, blacklist=[thisword, tothis,islikethis])\n", 224 | " print(\"%s is to %s like %s to <%s>\" % (thisword, tothis, islikethis, best_word,))" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 88, 230 | "metadata": { 231 | "collapsed": false 232 | }, 233 | "outputs": [ 234 | { 235 | "name": "stdout", 236 | "output_type": "stream", 237 | "text": [ 238 | "germany is to berlin like france to \n" 239 | ] 240 | } 241 | ], 242 | "source": [ 243 | "analogy(\"germany\", \"berlin\", \"france\")" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 46, 249 | "metadata": { 250 | "collapsed": false 251 | }, 252 | "outputs": [ 253 | { 254 | "name": "stdout", 255 | "output_type": "stream", 256 | "text": [ 257 | "movie is to movies like school to \n" 258 | ] 259 | } 260 | ], 261 | "source": [ 262 | "analogy(\"movie\", \"movies\", \"school\")" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 57, 268 | "metadata": { 269 | "collapsed": false 270 | }, 271 | "outputs": [ 272 | { 273 | "name": "stdout", 274 | "output_type": "stream", 275 | "text": [ 276 | "movie is to actor like school to \n" 277 | ] 278 | } 279 | ], 280 | "source": [ 281 | "analogy(\"movie\", \"actor\", \"school\")" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": 55, 287 | "metadata": { 288 | "collapsed": false 289 | }, 290 | "outputs": [ 291 | { 292 | "name": "stdout", 293 | "output_type": "stream", 294 | "text": [ 295 | "smaller is to small like bigger to \n" 296 | ] 297 | } 298 | ], 299 | "source": [ 300 | "analogy(\"smaller\", \"small\", \"bigger\")" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 56, 306 | "metadata": { 307 | "collapsed": false 308 | }, 309 | "outputs": [ 310 | { 311 | "name": "stdout", 312 | "output_type": "stream", 313 | "text": [ 314 | "smaller is to small like big to \n" 315 | ] 316 | } 317 | ], 318 | "source": [ 319 | "analogy(\"smaller\", \"small\", \"big\")" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 60, 325 | "metadata": { 326 | "collapsed": false 327 | }, 328 | "outputs": [ 329 | { 330 | "name": "stdout", 331 | "output_type": "stream", 332 | "text": [ 333 | "one is to two like two to \n" 334 | ] 335 | } 336 | ], 337 | "source": [ 338 | "analogy(\"one\", \"two\", \"two\")" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 59, 344 | "metadata": { 345 | "collapsed": false 346 | }, 347 | "outputs": [ 348 | { 349 | "name": "stdout", 350 | "output_type": "stream", 351 | "text": [ 352 | "king is to queen like man to \n" 353 | ] 354 | } 355 | ], 356 | "source": [ 357 | "analogy(\"king\", \"queen\", \"man\")" 358 | ] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "metadata": {}, 363 | "source": [ 364 | "### Finding a vector given a word\n", 365 | "Notice that implementation of find_word above is quite naive:\n", 366 | " \n", 367 | "```python\n", 368 | " def find_vector(self, key):\n", 369 | " for word,vector in self.word_vector:\n", 370 | " if word == key:\n", 371 | " return vector\n", 372 | " raise KeyError(key)\n", 373 | "```\n", 374 | "\n", 375 | "Every query to look up a word takes about 100ms. This is not very good if we want to run millions of such queries per second!" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": 94, 381 | "metadata": { 382 | "collapsed": false 383 | }, 384 | "outputs": [ 385 | { 386 | "name": "stdout", 387 | "output_type": "stream", 388 | "text": [ 389 | " 4 function calls in 0.135 seconds\n", 390 | "\n", 391 | " Ordered by: standard name\n", 392 | "\n", 393 | " ncalls tottime percall cumtime percall filename:lineno(function)\n", 394 | " 1 0.000 0.000 0.135 0.135 :14(__call__)\n", 395 | " 1 0.135 0.135 0.135 0.135 :17(find_vector)\n", 396 | " 1 0.000 0.000 0.135 0.135 :1()\n", 397 | " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", 398 | "\n", 399 | "\n" 400 | ] 401 | } 402 | ], 403 | "source": [ 404 | "cProfile.run('glove(\"zebra\")')" 405 | ] 406 | }, 407 | { 408 | "cell_type": "markdown", 409 | "metadata": {}, 410 | "source": [ 411 | "### Implementation using binary search\n", 412 | "\n", 413 | "Here we perform binary search algorithm. List of words `glove.word_vectors` is sorted alphabetically. Now we wish to find index `i`, such that `glove.word_vectors[i][0] == key` for some `key` word.\n", 414 | "\n", 415 | "Let $l$ be the lowest index in our array (zero) and $h$ be the highest index in our array (in this case 400000 - 1). To find and index on which a particular word is stored we find the middle $m = (h + l) / 2$. If key at index $m$ (denoted $k_m$) is less than desired key $k^*$ we know that $k^*$ is on some index in range $(m, h)$ otherwise it is in range $(l, m-1)$. This idea can be recursively applied to all the subranges. Notice that with each query range size is halved, so the complexity of the solution is $O(\\log{(h - l)})$" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": 95, 421 | "metadata": { 422 | "collapsed": false 423 | }, 424 | "outputs": [], 425 | "source": [ 426 | "def find_vector(self, key, lo=None, hi=None, debug=False):\n", 427 | " # Make sure by default we search over entire table\n", 428 | " if lo is None:\n", 429 | " lo = 0\n", 430 | " if hi is None:\n", 431 | " hi = len(self.word_vector) - 1\n", 432 | " \n", 433 | " if lo > hi:\n", 434 | " raise KeyError(key)\n", 435 | " \n", 436 | " mid = (hi + lo) / 2\n", 437 | " word, vector = self.word_vector[mid]\n", 438 | " if debug:\n", 439 | " print(\"Looking for %s in range(%d, %d). Middle is %s\" % (key, lo, hi, word))\n", 440 | "\n", 441 | " if word == key:\n", 442 | " return vector\n", 443 | " elif key < word:\n", 444 | " return self.find_vector(key, lo, mid - 1, debug=debug)\n", 445 | " else: # key > word\n", 446 | " return self.find_vector(key, mid + 1, hi, debug=debug)\n", 447 | " \n", 448 | "Glove.find_vector = find_vector" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": 96, 454 | "metadata": { 455 | "collapsed": false 456 | }, 457 | "outputs": [ 458 | { 459 | "name": "stdout", 460 | "output_type": "stream", 461 | "text": [ 462 | "Looking for zebra in range(0, 399999). Middle is jurnal\n", 463 | "Looking for zebra in range(200000, 399999). Middle is ramdass\n", 464 | "Looking for zebra in range(300000, 399999). Middle is syme\n", 465 | "Looking for zebra in range(350000, 399999). Middle is vadims\n", 466 | "Looking for zebra in range(375000, 399999). Middle is wilbarger\n", 467 | "Looking for zebra in range(387500, 399999). Middle is yevtushenko\n", 468 | "Looking for zebra in range(393750, 399999). Middle is zeughaus\n", 469 | "Looking for zebra in range(393750, 396873). Middle is zaa\n", 470 | "Looking for zebra in range(395312, 396873). Middle is zaremba\n", 471 | "Looking for zebra in range(396093, 396873). Middle is zehava\n", 472 | "Looking for zebra in range(396093, 396482). Middle is zayu\n", 473 | "Looking for zebra in range(396288, 396482). Middle is zeami\n", 474 | "Looking for zebra in range(396386, 396482). Middle is zeder\n", 475 | "Looking for zebra in range(396386, 396433). Middle is zebu\n", 476 | "Looking for zebra in range(396386, 396408). Middle is zebic\n", 477 | "Looking for zebra in range(396398, 396408). Middle is zebras\n", 478 | "Looking for zebra in range(396398, 396402). Middle is zebra\n" 479 | ] 480 | } 481 | ], 482 | "source": [ 483 | "vec = glove.find_vector(\"zebra\", debug=True)" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 72, 489 | "metadata": { 490 | "collapsed": false 491 | }, 492 | "outputs": [ 493 | { 494 | "name": "stdout", 495 | "output_type": "stream", 496 | "text": [ 497 | "Looking for ronrivest in range(0, 399999). Middle is jurnal\n", 498 | "Looking for ronrivest in range(200000, 399999). Middle is ramdass\n", 499 | "Looking for ronrivest in range(300000, 399999). Middle is syme\n", 500 | "Looking for ronrivest in range(300000, 349998). Middle is sensationalism\n", 501 | "Looking for ronrivest in range(300000, 324998). Middle is rs6\n", 502 | "Looking for ronrivest in range(300000, 312498). Middle is rescue\n", 503 | "Looking for ronrivest in range(306250, 312498). Middle is riveras\n", 504 | "Looking for ronrivest in range(309375, 312498). Middle is romero\n", 505 | "Looking for ronrivest in range(310937, 312498). Middle is rostekhnadzor\n", 506 | "Looking for ronrivest in range(310937, 311716). Middle is rosaleda\n", 507 | "Looking for ronrivest in range(310937, 311325). Middle is ronstadt\n", 508 | "Looking for ronrivest in range(310937, 311130). Middle is ronayne\n", 509 | "Looking for ronrivest in range(311034, 311130). Middle is rongbuk\n", 510 | "Looking for ronrivest in range(311083, 311130). Middle is ronin\n", 511 | "Looking for ronrivest in range(311107, 311130). Middle is ronni\n", 512 | "Looking for ronrivest in range(311119, 311130). Middle is rono\n", 513 | "Looking for ronrivest in range(311125, 311130). Middle is ronsard\n", 514 | "Looking for ronrivest in range(311125, 311126). Middle is ronon\n", 515 | "Looking for ronrivest in range(311126, 311126). Middle is ronquillo\n", 516 | "Not found!\n" 517 | ] 518 | } 519 | ], 520 | "source": [ 521 | "try:\n", 522 | " vec = glove.find_vector(\"ronrivest\", debug=True)\n", 523 | "except KeyError:\n", 524 | " print(\"Not found!\")" 525 | ] 526 | }, 527 | { 528 | "cell_type": "code", 529 | "execution_count": 97, 530 | "metadata": { 531 | "collapsed": false 532 | }, 533 | "outputs": [ 534 | { 535 | "name": "stdout", 536 | "output_type": "stream", 537 | "text": [ 538 | " 21 function calls (5 primitive calls) in 0.000 seconds\n", 539 | "\n", 540 | " Ordered by: standard name\n", 541 | "\n", 542 | " ncalls tottime percall cumtime percall filename:lineno(function)\n", 543 | " 1 0.000 0.000 0.000 0.000 :14(__call__)\n", 544 | " 17/1 0.000 0.000 0.000 0.000 :1(find_vector)\n", 545 | " 1 0.000 0.000 0.000 0.000 :1()\n", 546 | " 1 0.000 0.000 0.000 0.000 {len}\n", 547 | " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", 548 | "\n", 549 | "\n" 550 | ] 551 | } 552 | ], 553 | "source": [ 554 | "cProfile.run('glove(\"zebra\")')" 555 | ] 556 | }, 557 | { 558 | "cell_type": "markdown", 559 | "metadata": { 560 | "collapsed": true 561 | }, 562 | "source": [ 563 | "### Extra problems (not graded)\n", 564 | "1. Here's another way of thinking about binary search - we are looking for higest $x$ such that property $p(x)$ is true. For example if we are looking for index of a key in array $p(x) = k_x \\leq k^*$. Can you give examples of other properties that we can binary search over? What makes a property suitable for use in binary search?\n", 565 | "\n", 566 | "2. Binary search can **not** be used to minimize a quadratic function. Can you find a similar algorithm that can?" 567 | ] 568 | }, 569 | { 570 | "cell_type": "code", 571 | "execution_count": null, 572 | "metadata": { 573 | "collapsed": false 574 | }, 575 | "outputs": [], 576 | "source": [ 577 | "# Hint to problem 2\n", 578 | "cyph = lambda x: chr((ord(x) + 64) % 128)\n", 579 | "''.join(map(cyph, \"\\t.34%!$`/&`30,)44).'`2!.'%`).`47/`0)%#%3l`#/.3)$%2`30,)44).'`)4`).`4(2%%`0)%#%3n\"))" 580 | ] 581 | } 582 | ], 583 | "metadata": { 584 | "kernelspec": { 585 | "display_name": "Python 2", 586 | "language": "python", 587 | "name": "python2" 588 | }, 589 | "language_info": { 590 | "codemirror_mode": { 591 | "name": "ipython", 592 | "version": 2 593 | }, 594 | "file_extension": ".py", 595 | "mimetype": "text/x-python", 596 | "name": "python", 597 | "nbconvert_exporter": "python", 598 | "pygments_lexer": "ipython2", 599 | "version": "2.7.8" 600 | } 601 | }, 602 | "nbformat": 4, 603 | "nbformat_minor": 0 604 | } 605 | -------------------------------------------------------------------------------- /lecture7/Understanding Radix Sort.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Understanding radix sort\n", 8 | "\n", 9 | "First building block we will need is the `is_sorted` function. " 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "def is_sorted(array):\n", 21 | " \"\"\"Takes a sequence and returns true if an only if sequence is sorted.\"\"\"\n", 22 | " # check all the n-1 pairs of adjacent elements for \n", 23 | " # order violation\n", 24 | " for i in xrange(1, len(array)):\n", 25 | " if array[i-1] > array[i]:\n", 26 | " return False\n", 27 | " # if no violations, then by transitivity of < the sequence is sorted.\n", 28 | " return True" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": { 35 | "collapsed": false 36 | }, 37 | "outputs": [ 38 | { 39 | "name": "stdout", 40 | "output_type": "stream", 41 | "text": [ 42 | "([1, 4, 6, 7, 8], True)\n", 43 | "([1, 4, 7, 6, 8], False)\n" 44 | ] 45 | } 46 | ], 47 | "source": [ 48 | "# Verify implementation on few test cases\n", 49 | "example1 = [1,4,6,7,8]\n", 50 | "example2 = [1,4,7,6,8]\n", 51 | "print(example1, is_sorted(example1))\n", 52 | "print(example2, is_sorted(example2))" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "## Stable sorting\n", 60 | "\n", 61 | "Python sort is stable. It means that if we have two elements that have the same value of *sorting key* they will appear in the output in the same order that they appeared in the input.\n", 62 | "\n", 63 | "Let's see an example: we have a list of pairs $(a,b)$ and we want to sort them in the nondecreasing order by $a$ and by nondecreasing $b$ if $a$'s are the same" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 3, 69 | "metadata": { 70 | "collapsed": false 71 | }, 72 | "outputs": [ 73 | { 74 | "data": { 75 | "text/plain": [ 76 | "[(3, 1), (3, 2), (1, 1), (1, 2), (2, 2), (2, 1)]" 77 | ] 78 | }, 79 | "execution_count": 3, 80 | "metadata": {}, 81 | "output_type": "execute_result" 82 | } 83 | ], 84 | "source": [ 85 | "example = [ (3,1),(3,2),(1,1),(1,2), (2,2),(2,1)]\n", 86 | "example" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "We can achieve that by sorting first by $b$ and then **stable-sorting** by $a$." 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 4, 99 | "metadata": { 100 | "collapsed": false 101 | }, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "[(3, 1), (1, 1), (2, 1), (3, 2), (1, 2), (2, 2)]" 107 | ] 108 | }, 109 | "execution_count": 4, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "sorted_idx2 = sorted(example, key=lambda x: x[1])\n", 116 | "sorted_idx2" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 5, 122 | "metadata": { 123 | "collapsed": false 124 | }, 125 | "outputs": [ 126 | { 127 | "data": { 128 | "text/plain": [ 129 | "[(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)]" 130 | ] 131 | }, 132 | "execution_count": 5, 133 | "metadata": {}, 134 | "output_type": "execute_result" 135 | } 136 | ], 137 | "source": [ 138 | "sorted_idx12 = sorted(sorted_idx2, key=lambda x: x[0])\n", 139 | "sorted_idx12" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "### Unstable sort example\n", 147 | "\n", 148 | "Sort does no have to be stable. For example merge sort isn't." 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 6, 154 | "metadata": { 155 | "collapsed": false 156 | }, 157 | "outputs": [], 158 | "source": [ 159 | "class MaxHeap(object):\n", 160 | " def __init__(self, array, key):\n", 161 | " self.storage = array\n", 162 | " self.heap_size = len(array)\n", 163 | " self.key = key\n", 164 | " self.heapify()\n", 165 | " \n", 166 | " def fix_down(self, index):\n", 167 | " while index < self.heap_size:\n", 168 | " # pick maximum child\n", 169 | " max_child_idx = None\n", 170 | " if 2 * index + 1 < self.heap_size:\n", 171 | " max_child_idx = 2 * index + 1\n", 172 | "\n", 173 | " if 2 * index + 2 < self.heap_size and \\\n", 174 | " self.key(self.storage[2 * index + 1]) < self.key(self.storage[2 * index + 2]):\n", 175 | " max_child_idx = 2 * index + 2\n", 176 | "\n", 177 | " if max_child_idx is None or \\\n", 178 | " self.key(self.storage[index]) > self.key(self.storage[max_child_idx]):\n", 179 | " # heap property satisfied\n", 180 | " break\n", 181 | " \n", 182 | " self.storage[index], self.storage[max_child_idx] = self.storage[max_child_idx], self.storage[index]\n", 183 | " index = max_child_idx\n", 184 | " \n", 185 | " def fix_up(self, index):\n", 186 | " assert index < self.heap_size\n", 187 | " while index > 0:\n", 188 | " parent_idx = (index - 1) // 2\n", 189 | " if self.key(self.storage[index]) >= self.key(self.storage[parent_idx]):\n", 190 | " self.storage[index], self.storage[parent_idx] = self.storage[parent_idx], self.storage[index]\n", 191 | " index = parent_idx\n", 192 | " else:\n", 193 | " break\n", 194 | " def extract_max(self):\n", 195 | " self.storage[0], self.storage[self.heap_size - 1] = self.storage[self.heap_size -1], self.storage[0]\n", 196 | " self.heap_size -= 1\n", 197 | " self.fix_down(0)\n", 198 | " return self.storage[self.heap_size]\n", 199 | " \n", 200 | " def heapify(self):\n", 201 | " for i in range(self.heap_size - 1, -1, -1):\n", 202 | " self.fix_down(i)\n", 203 | " \n", 204 | "def heap_sort(array, key=lambda x:x):\n", 205 | " h = MaxHeap(array[:], key)\n", 206 | " while h.heap_size > 0:\n", 207 | " h.extract_max()\n", 208 | " return h.storage" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 7, 214 | "metadata": { 215 | "collapsed": false 216 | }, 217 | "outputs": [ 218 | { 219 | "data": { 220 | "text/plain": [ 221 | "[(2, 1), (1, 1), (3, 1), (2, 2), (3, 2), (1, 2)]" 222 | ] 223 | }, 224 | "execution_count": 7, 225 | "metadata": {}, 226 | "output_type": "execute_result" 227 | } 228 | ], 229 | "source": [ 230 | "sorted_idx2 = heap_sort(example, key=lambda x: x[1])\n", 231 | "sorted_idx2" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 8, 237 | "metadata": { 238 | "collapsed": false 239 | }, 240 | "outputs": [ 241 | { 242 | "name": "stdout", 243 | "output_type": "stream", 244 | "text": [ 245 | "[(1, 2), (1, 1), (2, 1), (2, 2), (3, 1), (3, 2)]\n", 246 | "Notice that secondary sorting criterion is violated.\n" 247 | ] 248 | } 249 | ], 250 | "source": [ 251 | "sorted_idx12 = heap_sort(sorted_idx2, key=lambda x: x[0])\n", 252 | "print(sorted_idx12)\n", 253 | "print(\"Notice that secondary sorting criterion is violated.\")" 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "## Counting Sort\n", 261 | "\n", 262 | "In order to keep complexity at $O(n)$, we will need to divise a procude that sorts without using comparisons.\n", 263 | "\n", 264 | "Assume we only have elements $0, 1, ..., (k-1)$ in the array. We know that all zeros come before all ones etc. We can therefore put all the numbers in $k$ different buckets and later read them off." 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": 9, 270 | "metadata": { 271 | "collapsed": false 272 | }, 273 | "outputs": [], 274 | "source": [ 275 | "def count_sort(array, k, key=lambda x: x):\n", 276 | " \"\"\"Stable sorts array by using key to determine ordering of elements.\n", 277 | " \n", 278 | " Assumes all elements are in range(0, k)\"\"\"\n", 279 | " # initialize array \n", 280 | " buckets = [[] for _ in range(k)]\n", 281 | " # for every key store all the elements\n", 282 | " # with that key\n", 283 | " for element in array:\n", 284 | " buckets[key(element)].append(element)\n", 285 | " output = []\n", 286 | " # red numbers from buckets in order\n", 287 | " for bucket in buckets:\n", 288 | " for element in bucket:\n", 289 | " output.append(element)\n", 290 | " return output" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": 10, 296 | "metadata": { 297 | "collapsed": false 298 | }, 299 | "outputs": [ 300 | { 301 | "data": { 302 | "text/plain": [ 303 | "[1, 2, 2, 3, 4, 5, 5]" 304 | ] 305 | }, 306 | "execution_count": 10, 307 | "metadata": {}, 308 | "output_type": "execute_result" 309 | } 310 | ], 311 | "source": [ 312 | "count_sort([4,3,2,5,5,1,2], 10)" 313 | ] 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "metadata": {}, 318 | "source": [ 319 | "### Count sort complexity analysis\n", 320 | "\n", 321 | "We have the following steps:\n", 322 | "- allocate space for $b$ buckets: $O(b)$\n", 323 | "- loop throgh all the elements in the input array and put them in buckets $O(n)$\n", 324 | "- remove elements from the buckets $O(n)$\n", 325 | "\n", 326 | "Therefore the total complexity is $O(n+b)$\n" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "## Radix sort idea\n", 334 | "\n", 335 | "Imagine that you want to compare two long numbers. For example 85823421348134214 and 85823421348452456. The algorithm you would use is to compare the first digit and if it is the same then compare the next digit etc. We can say that first digit is the primary comparison criterion, second digit is the secondary sorting criterion etc. This is almost correct, but we actually need to make sure that we add extra zeros at the beginning of the number that is shorter (because sorter numbers come before longer numbers). \n", 336 | "\n", 337 | "Radix sort uses this idea directly for sorting. It first sorts the numbers by last digit. The it *stable-sorts* it by the second to last digit (making second to last digit primary sorting criterion and the last digit secondary sorting criterion) and so on. At the end of that process we end up with digitst sorted in exactly the order we discussed above.\n", 338 | "\n", 339 | "To implement that idea let's first look at how we would obtain the digits. " 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": 11, 345 | "metadata": { 346 | "collapsed": true 347 | }, 348 | "outputs": [], 349 | "source": [ 350 | "def ith_digit(number, i):\n", 351 | " \"\"\"Returns the i-th digit from the end. \n", 352 | " \n", 353 | " i=0 resuts the very last digit.\"\"\"\n", 354 | " for _ in range(i):\n", 355 | " number /= 10\n", 356 | " return number % 10" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": 12, 362 | "metadata": { 363 | "collapsed": false 364 | }, 365 | "outputs": [ 366 | { 367 | "name": "stdout", 368 | "output_type": "stream", 369 | "text": [ 370 | "3\n", 371 | "2\n", 372 | "1\n", 373 | "0\n", 374 | "0\n" 375 | ] 376 | } 377 | ], 378 | "source": [ 379 | "print(ith_digit(123, 0))\n", 380 | "print(ith_digit(123, 1))\n", 381 | "print(ith_digit(123, 2))\n", 382 | "print(ith_digit(123, 3))\n", 383 | "print(ith_digit(123, 4))" 384 | ] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": {}, 389 | "source": [ 390 | "Sweet! We have a function that returns i-th digit and it even yields additional zeros in the front - just what we needed.\n", 391 | "\n", 392 | "\n", 393 | "## Radix Sort using digit $i$ from the end\n", 394 | "\n", 395 | "The idea is to use count sort with the digit being the key. \n", 396 | "\n", 397 | "For example if we sort `[123, 42, 73]` by the last digit, bucket nr 2 will have one number `[42]`, while bucket number three would have two numbers `[123, 73]`, while the remaining eight buckets would be empty. It is imporant that bucket nr two has `[123, 73]` not `[73, 123]` - this way if we read out the numbers in order they appear in the buckets we will get a stable sort." 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": 13, 403 | "metadata": { 404 | "collapsed": true 405 | }, 406 | "outputs": [], 407 | "source": [ 408 | "def radix_sort_by_ith_digit(array, i):\n", 409 | " return count_sort(array, \n", 410 | " 10, # we have 10 different digits.\n", 411 | " key=lambda number: ith_digit(number, i)) # use i-th digit as a key." 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": 14, 417 | "metadata": { 418 | "collapsed": false 419 | }, 420 | "outputs": [ 421 | { 422 | "data": { 423 | "text/plain": [ 424 | "[42, 123, 73]" 425 | ] 426 | }, 427 | "execution_count": 14, 428 | "metadata": {}, 429 | "output_type": "execute_result" 430 | } 431 | ], 432 | "source": [ 433 | "# sort by the last digit\n", 434 | "pass1 = radix_sort_by_ith_digit([123,42,73], 0)\n", 435 | "pass1" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": 15, 441 | "metadata": { 442 | "collapsed": false 443 | }, 444 | "outputs": [ 445 | { 446 | "data": { 447 | "text/plain": [ 448 | "[123, 42, 73]" 449 | ] 450 | }, 451 | "execution_count": 15, 452 | "metadata": {}, 453 | "output_type": "execute_result" 454 | } 455 | ], 456 | "source": [ 457 | "# sort result of previous pass by the second to last digit\n", 458 | "pass2 = radix_sort_by_ith_digit(pass1, 1)\n", 459 | "pass2" 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": 16, 465 | "metadata": { 466 | "collapsed": false 467 | }, 468 | "outputs": [ 469 | { 470 | "data": { 471 | "text/plain": [ 472 | "[42, 73, 123]" 473 | ] 474 | }, 475 | "execution_count": 16, 476 | "metadata": {}, 477 | "output_type": "execute_result" 478 | } 479 | ], 480 | "source": [ 481 | "# sort result of previous pass by the third to last digit\n", 482 | "# none of the numbers are longer than third digit, so we are done.\n", 483 | "pass3 = radix_sort_by_ith_digit(pass2, 2)\n", 484 | "pass3" 485 | ] 486 | }, 487 | { 488 | "cell_type": "markdown", 489 | "metadata": {}, 490 | "source": [ 491 | "What happened above is exactly radix sort! Sort iteratively by digits further and further from the end until the sequence ends up sorted." 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "execution_count": 17, 497 | "metadata": { 498 | "collapsed": true 499 | }, 500 | "outputs": [], 501 | "source": [ 502 | "def radix_sort(array):\n", 503 | " \"\"\"Returns array sorted by i-th digit from the end.\n", 504 | " \n", 505 | " The sorting procedure is stable.\"\"\"\n", 506 | " i = 0\n", 507 | " while True:\n", 508 | " if is_sorted(array):\n", 509 | " # we stop once the array is sorted\n", 510 | " # the latest this can happen is when \n", 511 | " # we run the number of passes eqaul to\n", 512 | " # the length of the longest number\n", 513 | " break\n", 514 | " # stable sort by i-th digit.\n", 515 | " array = radix_sort_by_ith_digit(array, i)\n", 516 | " i += 1\n", 517 | " return array" 518 | ] 519 | }, 520 | { 521 | "cell_type": "code", 522 | "execution_count": 18, 523 | "metadata": { 524 | "collapsed": false 525 | }, 526 | "outputs": [ 527 | { 528 | "data": { 529 | "text/plain": [ 530 | "[42, 73, 123]" 531 | ] 532 | }, 533 | "execution_count": 18, 534 | "metadata": {}, 535 | "output_type": "execute_result" 536 | } 537 | ], 538 | "source": [ 539 | "radix_sort([123,42,73])" 540 | ] 541 | }, 542 | { 543 | "cell_type": "code", 544 | "execution_count": 19, 545 | "metadata": { 546 | "collapsed": false 547 | }, 548 | "outputs": [ 549 | { 550 | "data": { 551 | "text/plain": [ 552 | "[42, 73, 123, 512, 524, 5214, 123123, 142124]" 553 | ] 554 | }, 555 | "execution_count": 19, 556 | "metadata": {}, 557 | "output_type": "execute_result" 558 | } 559 | ], 560 | "source": [ 561 | "# harder example\n", 562 | "radix_sort([123,42,73, 123123, 142124, 524, 512, 5214])" 563 | ] 564 | }, 565 | { 566 | "cell_type": "markdown", 567 | "metadata": {}, 568 | "source": [ 569 | "### Radix sort with different numeric base. \n", 570 | "\n", 571 | "Let's try to improve our algorithm slightly. Notice that the fact that we use digits in base $10$ is kind of arbitrary. How hard would it be to use any $b \\geq 2$? In theory all we should be required to do is to change the digit extraction procedure and the number of buckets. \n", 572 | "\n", 573 | "Let's start with the digits." 574 | ] 575 | }, 576 | { 577 | "cell_type": "code", 578 | "execution_count": 20, 579 | "metadata": { 580 | "collapsed": true 581 | }, 582 | "outputs": [], 583 | "source": [ 584 | "def ith_digit(number, b, i):\n", 585 | " \"\"\"Returns the i-th digit from the end (base b).\n", 586 | " \n", 587 | " i=0 resuts the very last digit.\"\"\"\n", 588 | " for _ in range(i):\n", 589 | " number /= b # changed 10 to b\n", 590 | " return number % b # changed 10 to b" 591 | ] 592 | }, 593 | { 594 | "cell_type": "code", 595 | "execution_count": 21, 596 | "metadata": { 597 | "collapsed": false 598 | }, 599 | "outputs": [ 600 | { 601 | "name": "stdout", 602 | "output_type": "stream", 603 | "text": [ 604 | "7 mod 2\n", 605 | "1\n", 606 | "1\n", 607 | "1\n", 608 | "0\n", 609 | "0\n" 610 | ] 611 | } 612 | ], 613 | "source": [ 614 | "print(\"7 mod 2\")\n", 615 | "print(ith_digit(7, 2, 0))\n", 616 | "print(ith_digit(7, 2, 1))\n", 617 | "print(ith_digit(7, 2, 2))\n", 618 | "print(ith_digit(7, 2, 3))\n", 619 | "print(ith_digit(7, 2, 4))" 620 | ] 621 | }, 622 | { 623 | "cell_type": "code", 624 | "execution_count": 22, 625 | "metadata": { 626 | "collapsed": false 627 | }, 628 | "outputs": [ 629 | { 630 | "name": "stdout", 631 | "output_type": "stream", 632 | "text": [ 633 | "7 mod 3\n", 634 | "1\n", 635 | "2\n", 636 | "0\n", 637 | "0\n", 638 | "0\n" 639 | ] 640 | } 641 | ], 642 | "source": [ 643 | "print(\"7 mod 3\")\n", 644 | "print(ith_digit(7, 3, 0))\n", 645 | "print(ith_digit(7, 3, 1))\n", 646 | "print(ith_digit(7, 3, 2))\n", 647 | "print(ith_digit(7, 3, 3))\n", 648 | "print(ith_digit(7, 3, 4))" 649 | ] 650 | }, 651 | { 652 | "cell_type": "markdown", 653 | "metadata": {}, 654 | "source": [ 655 | "Now we are ready to augment to radix_sort." 656 | ] 657 | }, 658 | { 659 | "cell_type": "code", 660 | "execution_count": 23, 661 | "metadata": { 662 | "collapsed": true 663 | }, 664 | "outputs": [], 665 | "source": [ 666 | "def radix_sort_by_ith_digit(array, b, i):\n", 667 | " \"\"\"Returns array sorted by i-th digit from the end (base b).\n", 668 | " \n", 669 | " The sorting procedure is stable.\"\"\"\n", 670 | " return count_sort(array, b, key=lambda number: ith_digit(number, b, i))" 671 | ] 672 | }, 673 | { 674 | "cell_type": "code", 675 | "execution_count": 24, 676 | "metadata": { 677 | "collapsed": true 678 | }, 679 | "outputs": [], 680 | "source": [ 681 | "def radix_sort(array, b):\n", 682 | " \"\"\"Returns array sorted by i-th digit from the end.\n", 683 | " \n", 684 | " The sorting procedure is stable.\"\"\"\n", 685 | " i = 0\n", 686 | " while True:\n", 687 | " if is_sorted(array):\n", 688 | " # we stop once the array is sorted\n", 689 | " # the latest this can happen is when \n", 690 | " # we run the number of passes eqaul to\n", 691 | " # the length of the longest number\n", 692 | " break\n", 693 | " print(\"Iteration %d\" % (i,))\n", 694 | " # stable sort by i-th digit.\n", 695 | " array = radix_sort_by_ith_digit(array, b, i)\n", 696 | " i += 1\n", 697 | " return array" 698 | ] 699 | }, 700 | { 701 | "cell_type": "markdown", 702 | "metadata": {}, 703 | "source": [ 704 | "Let's try sorting in base $b=2$" 705 | ] 706 | }, 707 | { 708 | "cell_type": "code", 709 | "execution_count": 25, 710 | "metadata": { 711 | "collapsed": false 712 | }, 713 | "outputs": [ 714 | { 715 | "name": "stdout", 716 | "output_type": "stream", 717 | "text": [ 718 | "Iteration 0\n", 719 | "Iteration 1\n", 720 | "Iteration 2\n", 721 | "Iteration 3\n", 722 | "Iteration 4\n", 723 | "Iteration 5\n", 724 | "Iteration 6\n" 725 | ] 726 | }, 727 | { 728 | "data": { 729 | "text/plain": [ 730 | "[42, 73, 123]" 731 | ] 732 | }, 733 | "execution_count": 25, 734 | "metadata": {}, 735 | "output_type": "execute_result" 736 | } 737 | ], 738 | "source": [ 739 | "radix_sort([123,42,73], 2)" 740 | ] 741 | }, 742 | { 743 | "cell_type": "markdown", 744 | "metadata": {}, 745 | "source": [ 746 | "Whoah! 7 iterations? That is a lot to sort just 3 numbers. How about if we increase the base? Maybe $b=1000$ ?" 747 | ] 748 | }, 749 | { 750 | "cell_type": "code", 751 | "execution_count": 26, 752 | "metadata": { 753 | "collapsed": false 754 | }, 755 | "outputs": [ 756 | { 757 | "name": "stdout", 758 | "output_type": "stream", 759 | "text": [ 760 | "Iteration 0\n" 761 | ] 762 | }, 763 | { 764 | "data": { 765 | "text/plain": [ 766 | "[42, 73, 123]" 767 | ] 768 | }, 769 | "execution_count": 26, 770 | "metadata": {}, 771 | "output_type": "execute_result" 772 | } 773 | ], 774 | "source": [ 775 | "radix_sort([123,42,73], 1000)" 776 | ] 777 | }, 778 | { 779 | "cell_type": "markdown", 780 | "metadata": {}, 781 | "source": [ 782 | "Much better - we only have one iteration. Notice however that we have many more buckets than numbers - even though in theory we decrease number of iterations, now every iteration is dominated by looping through every bucket. In this example $1000$ buckets visited in one iteration are much worst than two buckets visited in $7$ iterations (total of $14$ acceses). Actually array acceses contribute another 3 operations per iteraions ($7 * 3 = 21$) adding up to total of $35$ operations, but this is still much less than $1000$." 783 | ] 784 | }, 785 | { 786 | "cell_type": "code", 787 | "execution_count": 27, 788 | "metadata": { 789 | "collapsed": false 790 | }, 791 | "outputs": [ 792 | { 793 | "name": "stdout", 794 | "output_type": "stream", 795 | "text": [ 796 | "Iteration 0\n", 797 | "Iteration 1\n", 798 | "Iteration 2\n" 799 | ] 800 | }, 801 | { 802 | "data": { 803 | "text/plain": [ 804 | "[42, 73, 123]" 805 | ] 806 | }, 807 | "execution_count": 27, 808 | "metadata": {}, 809 | "output_type": "execute_result" 810 | } 811 | ], 812 | "source": [ 813 | "# much healthier choice\n", 814 | "radix_sort([123,42,73], 5)" 815 | ] 816 | }, 817 | { 818 | "cell_type": "markdown", 819 | "metadata": {}, 820 | "source": [ 821 | "## Radix sort complexity analysis\n", 822 | "\n", 823 | "Let $b$ be the base and $n$ size of the array. Moreover let's assume that all the numbers in the array are less than or equal $a$.\n", 824 | "\n", 825 | "\n", 826 | "Single iteration of count sort is $O(n + b)$.\n", 827 | "\n", 828 | "How many iterations are there? At most as many as the number of digits in the longest number: O($log_b\\ a$)\n", 829 | "\n", 830 | "Therefore the total complexity of the algorithm is O($(n+b) log_b\\ a)$.\n", 831 | "\n", 832 | "In theory we often assume that both $b$ and $a$ are constants - they are after all independent of $n$ - they won't influence the run time as $n$ grows. That's why some theorists say that Radix Sort is $O(n)$." 833 | ] 834 | }, 835 | { 836 | "cell_type": "markdown", 837 | "metadata": {}, 838 | "source": [ 839 | "## Exercises\n", 840 | "\n", 841 | "1. We said that the best possible algorithm that does sorting has complexity $O(n\\ lg\\ n)$. How is it possible that radix sort takes only $O(n)$ time? \n", 842 | "\n", 843 | "2. Can you come up with a sorting problem where it would be hard to use Radix Sort?" 844 | ] 845 | }, 846 | { 847 | "cell_type": "markdown", 848 | "metadata": {}, 849 | "source": [ 850 | "# Be sure to checkout the Performance of Radix Sort notebook!" 851 | ] 852 | }, 853 | { 854 | "cell_type": "markdown", 855 | "metadata": {}, 856 | "source": [ 857 | "# Aside: implementation of count sort from the lectures\n", 858 | "\n", 859 | "This implementation has the samee time and space complexity, but is faster in practice." 860 | ] 861 | }, 862 | { 863 | "cell_type": "code", 864 | "execution_count": 28, 865 | "metadata": { 866 | "collapsed": true 867 | }, 868 | "outputs": [], 869 | "source": [ 870 | "def count_sort_from_the_lecture(array, k, key=lambda x: x):\n", 871 | " # initialize array \n", 872 | " count = [0 for _ in range(k)]\n", 873 | " # for every key count the number of times\n", 874 | " # it occurs\n", 875 | " for element in array:\n", 876 | " count[key(element)] += 1\n", 877 | " # compute cumulative count of occurences\n", 878 | " for i in range(1, k):\n", 879 | " count[i] += count[i-1]\n", 880 | " # create output array\n", 881 | " output = [None for _ in range(len(array))]\n", 882 | " # fill in output array computing slots using\n", 883 | " # counts array\n", 884 | " for i in range(len(array) - 1, -1, -1):\n", 885 | " output[count[key(array[i])] - 1] = array[i]\n", 886 | " count[key(array[i])] -= 1\n", 887 | " return output" 888 | ] 889 | }, 890 | { 891 | "cell_type": "code", 892 | "execution_count": 29, 893 | "metadata": { 894 | "collapsed": false 895 | }, 896 | "outputs": [ 897 | { 898 | "data": { 899 | "text/plain": [ 900 | "[1, 2, 2, 3, 4, 5, 5]" 901 | ] 902 | }, 903 | "execution_count": 29, 904 | "metadata": {}, 905 | "output_type": "execute_result" 906 | } 907 | ], 908 | "source": [ 909 | "count_sort_from_the_lecture([4,3,2,5,5,1,2], 10)" 910 | ] 911 | } 912 | ], 913 | "metadata": { 914 | "kernelspec": { 915 | "display_name": "Python 2", 916 | "language": "python", 917 | "name": "python2" 918 | }, 919 | "language_info": { 920 | "codemirror_mode": { 921 | "name": "ipython", 922 | "version": 2 923 | }, 924 | "file_extension": ".py", 925 | "mimetype": "text/x-python", 926 | "name": "python", 927 | "nbconvert_exporter": "python", 928 | "pygments_lexer": "ipython2", 929 | "version": "2.7.8" 930 | } 931 | }, 932 | "nbformat": 4, 933 | "nbformat_minor": 0 934 | } 935 | --------------------------------------------------------------------------------