├── challenge1
    ├── .gitignore
    ├── tests
    │   ├── mon0.out
    │   ├── mon1a.out
    │   ├── mon1b.out
    │   ├── mon1c.out
    │   ├── mon1d.out
    │   ├── mon2a.out
    │   ├── mon2b.out
    │   ├── mon2c.out
    │   ├── mon3c.out
    │   ├── mon3d.out
    │   ├── mon5c.out
    │   ├── mon8b.out
    │   ├── mon10a.out
    │   ├── mon10b.out
    │   ├── mon3a.out
    │   ├── mon3b.out
    │   ├── mon4a.out
    │   ├── mon4b.out
    │   ├── mon5a.out
    │   ├── mon5b.out
    │   ├── mon6a.out
    │   ├── mon6b.out
    │   ├── mon7a.out
    │   ├── mon7b.out
    │   ├── mon8a.out
    │   ├── mon9a.out
    │   ├── mon9b.out
    │   ├── mon10c.out
    │   ├── mon0.in
    │   ├── mon1b.in
    │   ├── mon1c.in
    │   ├── mon1a.in
    │   ├── mon1d.in
    │   ├── mon2c.in
    │   ├── mon2a.in
    │   ├── mon2b.in
    │   ├── mon3c.in
    │   ├── mon3a.in
    │   ├── mon3b.in
    │   ├── mon3d.in
    │   └── mon4b.in
    ├── url.txt
    ├── README.md
    ├── run_tests.sh
    ├── coins.py
    └── coins.cpp
├── hello.txt
├── .gitignore
├── challenge2
    ├── pilots_nlog2n.py
    ├── .gitignore
    ├── url.txt
    ├── README.md
    ├── download_test.sh
    ├── run_tests.sh
    ├── pilots_n3.py
    ├── pilots_n2.py
    ├── pilots.cpp
    ├── pilots_nlogn.py
    └── pilots_n.py
├── Lecture 17.ipynb
├── lecture4
    └── Heaps.ipynb
├── lecture12
    └── Google BFS.ipynb
├── lecture16
    └── Bignums.ipynb
├── lecture9
    └── Hashing.ipynb
├── lecture1
    ├── Stock Exchange.ipynb
    └── Fast exponentiation and fibonacci sequence.ipynb
├── lecture7
    ├── Radix Sort Performance.ipynb
    └── Understanding Radix Sort.ipynb
└── lecture2
    └── binary_search.ipynb


/challenge1/.gitignore:
--------------------------------------------------------------------------------
1 | coins
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon0.out:
--------------------------------------------------------------------------------
1 | 8
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon1a.out:
--------------------------------------------------------------------------------
1 | 12
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon1b.out:
--------------------------------------------------------------------------------
1 | 12
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon1c.out:
--------------------------------------------------------------------------------
1 | 0
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon1d.out:
--------------------------------------------------------------------------------
1 | 3
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon2a.out:
--------------------------------------------------------------------------------
1 | 24
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon2b.out:
--------------------------------------------------------------------------------
1 | 30
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon2c.out:
--------------------------------------------------------------------------------
1 | 0
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon3c.out:
--------------------------------------------------------------------------------
1 | 0
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon3d.out:
--------------------------------------------------------------------------------
1 | 5
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon5c.out:
--------------------------------------------------------------------------------
1 | 0
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon8b.out:
--------------------------------------------------------------------------------
1 | 0
2 | 


--------------------------------------------------------------------------------
/hello.txt:
--------------------------------------------------------------------------------
1 | hi
2 | how are you?
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints/
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon10a.out:
--------------------------------------------------------------------------------
1 | 405963
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon10b.out:
--------------------------------------------------------------------------------
1 | 37260
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon3a.out:
--------------------------------------------------------------------------------
1 | 960
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon3b.out:
--------------------------------------------------------------------------------
1 | 1243
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon4a.out:
--------------------------------------------------------------------------------
1 | 10032
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon4b.out:
--------------------------------------------------------------------------------
1 | 9944
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon5a.out:
--------------------------------------------------------------------------------
1 | 9120
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon5b.out:
--------------------------------------------------------------------------------
1 | 47898
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon6a.out:
--------------------------------------------------------------------------------
1 | 120258
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon6b.out:
--------------------------------------------------------------------------------
1 | 4806
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon7a.out:
--------------------------------------------------------------------------------
1 | 14240
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon7b.out:
--------------------------------------------------------------------------------
1 | 332673
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon8a.out:
--------------------------------------------------------------------------------
1 | 141284
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon9a.out:
--------------------------------------------------------------------------------
1 | 11859
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon9b.out:
--------------------------------------------------------------------------------
1 | 82660
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon10c.out:
--------------------------------------------------------------------------------
1 | 1000000
2 | 


--------------------------------------------------------------------------------
/challenge2/pilots_nlog2n.py:
--------------------------------------------------------------------------------
1 | # LAZYYYYYY
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon0.in:
--------------------------------------------------------------------------------
1 | 15 3
2 | RORROOROOROOORO
3 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon1b.in:
--------------------------------------------------------------------------------
1 | 14 2
2 | ORROOOOOOORROO
3 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon1c.in:
--------------------------------------------------------------------------------
1 | 12 6
2 | OORROROOROOR
3 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon1a.in:
--------------------------------------------------------------------------------
1 | 17 3
2 | OROOOOOROOOOORRRR
3 | 


--------------------------------------------------------------------------------
/challenge2/.gitignore:
--------------------------------------------------------------------------------
1 | pilots
2 | tests.zip
3 | tests/
4 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon1d.in:
--------------------------------------------------------------------------------
1 | 20 2
2 | RRORRROROORRROORRRRO
3 | 


--------------------------------------------------------------------------------
/challenge2/url.txt:
--------------------------------------------------------------------------------
1 | http://main.edu.pl/en/archive/oi/17/pil
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon2c.in:
--------------------------------------------------------------------------------
1 | 29 3
2 | RRRRRRRRRRRRRRRRORRRRRRRRRRRR
3 | 


--------------------------------------------------------------------------------
/challenge1/url.txt:
--------------------------------------------------------------------------------
1 | http://main.edu.pl/en/archive/pa/2010/mon
2 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon2a.in:
--------------------------------------------------------------------------------
1 | 30 3
2 | OORROOOOORRROROOOOOROOOOOOOROR
3 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon2b.in:
--------------------------------------------------------------------------------
1 | 33 4
2 | ORROOOOOOOROORROROOOROROOOOOOOOOO
3 | 


--------------------------------------------------------------------------------
/challenge1/README.md:
--------------------------------------------------------------------------------
 1 | # The coins challenge
 2 | 
 3 | To run c++ solution run
 4 | ```bash
 5 | g++ coins.cpp -O2 -o coins
 6 | ./run_tests.sh ./coins
 7 | ```
 8 | 
 9 | To run Python solution run
10 | 
11 | ```bash
12 | ./run_tests.sh python2 coins.py
13 | ```
14 | 


--------------------------------------------------------------------------------
/challenge2/README.md:
--------------------------------------------------------------------------------
 1 | # The pilots challenge
 2 | 
 3 | To run c++ solution run
 4 | ```bash
 5 | g++ pilots.cpp -O2 -o pilots
 6 | ./run_tests.sh ./pilots
 7 | ```
 8 | 
 9 | To run Python solution run
10 | 
11 | ```bash
12 | ./run_tests.sh python2 pilots.py
13 | ```
14 | 


--------------------------------------------------------------------------------
/challenge2/download_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # stop script on error and print it
 4 | set -e
 5 | # inform me of undefined variables
 6 | set -u
 7 | # handle cascading failures well
 8 | set -o pipefail
 9 | 
10 |  curl -o tests.zip "http://main.edu.pl/en/user.phtml?op=tests&c=1700&task=633"
11 |  unzip tests.zip -d tests
12 | 


--------------------------------------------------------------------------------
/challenge1/run_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # stop script on error and print it
 4 | set -e
 5 | # inform me of undefined variables
 6 | set -u
 7 | # handle cascading failures well
 8 | set -o pipefail
 9 | 
10 | SCRIPT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
11 | 
12 | for filename in $SCRIPT_DIR/tests/*.in; do
13 |     output_filename=${filename::-3}.out
14 |     time $@ < $filename | diff -bs - $output_filename
15 | done
16 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon3c.in:
--------------------------------------------------------------------------------
1 | 421 3
2 | RRRRRORRRRRORRRRRRRRRRRRRRRRRRRRORRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRORRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRORRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRORRRRRRORRRRRRRRRRRRRORRRRRRRORRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRORRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRORRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRORRRRRRRRRRRRRRRRRRRRORRRRRRORRORRRRRORRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRORRORRRRRRRRRRRRRRRRRRRRRRRORRRRR
3 | 


--------------------------------------------------------------------------------
/challenge2/run_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # stop script on error and print it
 4 | set -e
 5 | # inform me of undefined variables
 6 | set -u
 7 | # handle cascading failures well
 8 | set -o pipefail
 9 | 
10 | SCRIPT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
11 | 
12 | for filename in `ls $SCRIPT_DIR/tests/*.in | sort --version-sort -f`; do
13 |     output_filename=${filename::-3}.out
14 |     time $@ < $filename | diff -bs - $output_filename
15 | done
16 | 


--------------------------------------------------------------------------------
/challenge2/pilots_n3.py:
--------------------------------------------------------------------------------
 1 | # n^3 solution
 2 | 
 3 | # read in the first line of input
 4 | k, n = [int(x) for x in raw_input().split(' ')]
 5 | # read in the second line of input
 6 | A    = [int(x) for x in raw_input().split(' ')]
 7 | assert len(A) == n
 8 | 
 9 | # check all the subsequences and keep the length
10 | best_sequence_length = 0
11 | 
12 | for start in range(0, n):
13 |     for end in range(start + 1, n + 1):
14 |         current_seq = A[start:end]
15 |         # line below is O(n)
16 |         difference =  max(current_seq) - min(current_seq)
17 |         if difference <= k:
18 |             best_sequence_length = max(best_sequence_length,
19 |                                        len(current_seq))
20 | 
21 | print best_sequence_length
22 | 


--------------------------------------------------------------------------------
/challenge2/pilots_n2.py:
--------------------------------------------------------------------------------
 1 | # n^3 solution
 2 | 
 3 | # read in the first line of input
 4 | k, n = [int(x) for x in raw_input().split(' ')]
 5 | # read in the second line of input
 6 | A    = [int(x) for x in raw_input().split(' ')]
 7 | assert len(A) == n
 8 | 
 9 | # check all the subsequences and keep the length
10 | best_sequence_length = 0
11 | 
12 | for start in range(0, n):
13 |     minimum_so_far = A[start]
14 |     maximum_so_far = A[start]
15 |     for end in range(start + 1, n + 1):
16 |         minimum_so_far = min(minimum_so_far, A[end - 1])
17 |         maximum_so_far = max(maximum_so_far, A[end - 1])
18 |         # line below is O(n)
19 |         difference =  maximum_so_far - minimum_so_far
20 |         if difference <= k:
21 |             best_sequence_length = max(best_sequence_length,
22 |                                        end - start)
23 |         else:
24 |             # smartness (difference will newer decrease!)
25 |             break
26 | 
27 | print best_sequence_length
28 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon3a.in:
--------------------------------------------------------------------------------
1 | 993 7
2 | OOOOOROOOOOOOORORROOROOOOOOOROOOOOOOOOOOOOOOOOOOOROOOOROOOOOROROROOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROROORROOROROOOOOROOROOOOOOOOOOOOOOOOOOOOOOOOOOOROOROOROOOROOOOOOOOROOOROOOOOOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOOOOOORROOOOROOOOOOOOOOOOOOOOOOOOOOOOROOOOROOOOORROOOROOROOROOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOROROORROOOROOROORROOOROOOOOOOOOOOOOOOOOOOOOROROORORROROOOOOROOOOOROOOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOORROROOOOROOOOROOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOOOOOROOOOROOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOORROOOOOOROOOOROOOOOOOROOOOOOOOOOOOOOOOOOOOOROOOROOOOOOROOOROOOOROOOOOOOROOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOROOOROOOOOOOOOOOOOOOOOOOOOOOROOOOROOOOORROOROROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOOROOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROROOOORROOOOOROORROOOOOROOOOOOOOOOOOOOOOOOOOOROOOOOOOROOOOROOOOOOROOROOOOOOOOOOOOOOOOOOOOOOOOOOOROOROOOOROOOOROOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOROROOOOOOOOOOROROOOOOOORRORROOOOOOOOOOOOOOOOOOOOOOOOOROOOOOOROOOOORORROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO
3 | 


--------------------------------------------------------------------------------
/challenge2/pilots.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <cstdio>
 3 | #include <algorithm>
 4 | #include <queue>
 5 | #define ST first
 6 | #define ND second
 7 | using namespace std;
 8 | typedef pair<int,int> PI;
 9 | 
10 | 
11 | int t,n,T[3000007]; //12 MB
12 | deque<int> MX, MN; // wartosc,pozycja
13 | //TOTAL: 12 MB
14 | 
15 | bool test(int x) {	
16 | 	//printf("%d..\n", x);
17 | 	MX.clear(); MN.clear();
18 | 	for(int i=0; i<n; ++i) {
19 | 		while(!MN.empty() && T[MN.back()]>=T[i]) MN.pop_back();
20 | 		MN.push_back(i);
21 | 		while(i>=x && MN.front()<=(i-x)) MN.pop_front();
22 | 		
23 | 		while(!MX.empty() && T[MX.back()]<=T[i]) MX.pop_back();
24 | 		MX.push_back(i);
25 | 		while(i>=x && MX.front()<=(i-x)) MX.pop_front();
26 | 		if(i>=x-1 && T[MX.front()]-T[MN.front()]<=t) return 1;
27 | 		//if(i>=x-1) printf("   %d: %d, %d\n", i, T[MN.front()], T[MX.front()]);
28 | 	}
29 | 	return 0;
30 | } 
31 | 	
32 | 
33 | 
34 | 
35 | 
36 | int main() {
37 | 	scanf("%d%d", &t,&n);
38 | 	for(int i=0; i<n; ++i) scanf("%d", T+i);
39 | 	int l=1,r=n,sr,wyn;
40 | 	while(l<=r) {
41 | 		sr=(l+r)/2;
42 | 		if(test(sr)) { wyn=sr; l=sr+1; }
43 | 		else r=sr-1;
44 | 	}
45 | 	printf("%d\n", wyn);
46 | }
47 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon3b.in:
--------------------------------------------------------------------------------
1 | 1243 1242
2 | OOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO
3 | 


--------------------------------------------------------------------------------
/challenge2/pilots_nlogn.py:
--------------------------------------------------------------------------------
 1 | # n * log n solution
 2 | 
 3 | from bintrees import FastAVLTree
 4 | 
 5 | # read in the first line of input
 6 | k, n = [int(x) for x in raw_input().split(' ')]
 7 | # read in the second line of input
 8 | A    = [int(x) for x in raw_input().split(' ')]
 9 | assert len(A) == n
10 | 
11 | # check all the subsequences and keep the length
12 | best_sequence_length = 0
13 | 
14 | 
15 | min_tree = FastAVLTree()
16 | max_tree = FastAVLTree()
17 | 
18 | def ok():
19 |     if max_tree.is_empty():
20 |         return True
21 |     else:
22 |         difference = max_tree.max_item()[0][0] \
23 |                      - min_tree.min_item()[0][0]
24 |         return  difference <= k
25 | 
26 | start, end = 0, 0
27 | 
28 | while end < len(A):
29 |     # extend the sequence until violation reached.
30 |     while end < len(A) and ok():
31 |         best_sequence_length = max(best_sequence_length,
32 |                                    end - start)
33 |         min_tree.insert((A[end], end), None)
34 |         max_tree.insert((A[end], end), None)
35 |         end += 1
36 | 
37 |     if ok():
38 |         best_sequence_length = max(best_sequence_length,
39 |                            end - start)
40 | 
41 |     # move starting position by 1.
42 |     min_tree.remove((A[start], start))
43 |     max_tree.remove((A[start], start))
44 |     start += 1
45 | 
46 | print best_sequence_length
47 | 


--------------------------------------------------------------------------------
/challenge2/pilots_n.py:
--------------------------------------------------------------------------------
 1 | # n solution
 2 | 
 3 | from collections import deque
 4 | 
 5 | # read in the first line of input
 6 | k, n = [int(x) for x in raw_input().split(' ')]
 7 | # read in the second line of input
 8 | A    = [int(x) for x in raw_input().split(' ')]
 9 | assert len(A) == n
10 | 
11 | # check all the subsequences and keep the length
12 | best_sequence_length = 0
13 | 
14 | 
15 | min_extravaganza = deque()
16 | max_extravaganza = deque()
17 | 
18 | def ok():
19 |     if len(min_extravaganza) == 0:
20 |         return True
21 |     else:
22 |         difference = A[max_extravaganza[0]] - A[min_extravaganza[0]]
23 |         return  difference <= k
24 | 
25 | start, end = 0, 0
26 | 
27 | while end < len(A):
28 |     while end < len(A) and ok():
29 |         best_sequence_length = max(best_sequence_length,
30 |                                    end - start)
31 | 
32 |         while len(min_extravaganza) > 0 and \
33 |                 A[min_extravaganza[-1]] >= A[end]:
34 |             min_extravaganza.pop()
35 |         min_extravaganza.append(end)
36 | 
37 |         while len(max_extravaganza) > 0 and \
38 |                 A[max_extravaganza[-1]] <= A[end]:
39 |             max_extravaganza.pop()
40 |         max_extravaganza.append(end)
41 | 
42 |         end += 1
43 | 
44 |     if ok():
45 |         best_sequence_length = max(best_sequence_length,
46 |                                    end - start)
47 | 
48 |     if min_extravaganza[0] == start:
49 |         min_extravaganza.popleft()
50 |     if max_extravaganza[0] == start:
51 |         max_extravaganza.popleft()
52 |     start += 1
53 | 
54 | print best_sequence_length
55 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon3d.in:
--------------------------------------------------------------------------------
1 | 1835 4
2 | ROOORRRORRRRRORRRRORRROOORORRRRRRRRRRRRRRRRRRRRRRRRORORRRRRRRRRRRORORRORROORRROORORRRRRRRRORROORRRRRRRRRORRRRRRORRRRRRRRORRORRRORRRRRRRRRRRRORRRRRROORORRRRORRROORORRROORRORRRRRRRRRRORRRRRRRRRRRRRRRORRRRORRRRRRRRRRRRRRORRRRRRROORRORRROOORRRRORRRRRRRRRORRRRORRORRORRRORRRORROORRRORORORRRRRRRORRRRRORRRRRORRRRORRRRRRRORRRORRORRRRRRORRORRRRRRRRRRRRRRRRRORRRROORRRORRRRRRRRRRRRRRRRRRRRRORRRRRRRRORRRRRORRRRORRRRRRORRORORRORRROORRRRRRRRORRORRRRRORRRRORRRRRRRRRRRRRRRRRRROORORRRRRRRRRORRRRRRORRORRRRORORROROROROROOORRRRRRRRRRRRRORRORRRRRROORRRRRRRRRRRORROORRRRORRRRRRRRROORROORRRRRRRROORORORRORRORRORRRRRRROORRRRRRRORRRORRRRRRORRROROORRRRRRORROORRRRROOROORRRRRORRRRRRRRRRORRRRRRRRRRORRRORRRRORRRRRRRRRRRRORRRRRRROOORRRRRRRRORRRORRRRORRRORRRRRRORRORROOROOORRRRRRRRRRRORORRRRRRRRRRORRRORRRRORRRRRROROORORRRRRRROROORRRRRRRRRROORORRRRRRRRRRRORRRRRRRRROORRRRORORRRRRRRRRRRORRRRRRRRRORRRORRRROORRRRRRRORRRRRRRORORRRORRRRRORRRRRRRRRRRRRRRRRRRRORRRORORRRRRRRRORRRRRORRRRRRRRRORRRRORRRORRRRRRORRRRRRRRRRRRRRORORRRRRRORORROROORRRORRORRORRRRRORRRROOORRRRRRORORORRRRRROORORRRRRRRRRRRRRRRRRORRRRRRRRRRORORORRRRRRRRRRORORRRRRRRORRRRRRRRRORRRRRRRRRRRRORRRRRRRRRRROOOOORRRRRORRRROROROORRRRORRRRRORORORORORRRRRRRRRRRRRRORRRRRRRORRRORRRRRRRRRRRRRRRRRRRRRORRRRRRRRRRRRRORROROORRORRRRORRRRRRORRRRORRRRRRRRORRRRRRORRRRRRRRRROORRRRRRRRRRRORRORRRRRORRORRRROOROOORRRRORRROORRRRRRRRORRRRRRORRRRRRRRRRRORORRRRRORORORRRRRORRRRRRRRROORRRRORRRORRRRRRRRORRRRRRRRRRRRRRRRRRRRRRRRORRRRRRRORRRRRRRRRRORORRRRRRRORRRRORRRRRRRRRRRORORRRRRORRRRORORRORORRRRRRROOORRRORRRRRRRRORRRRRRRRRRRRRORRRRRRRRRORRRRORRRRORRRRRRORRRRRORRRRRRRRRORRRRRROORRRORRRORRORRRRRORRRRRORRRRROORORRRRRORRRRRRRRRRRRRORORORRRORORRRROROROORRRRRRRRRRRORRRRRRORRRRRRRORRRRRRORRRRORROROROORRRRRRORRRRRRRRORRRRRRRRRRRRRRRRRRORRRORRRRRRROOORORORRRRRRRRRRRRRRORRRRORRRORRRRRROORRRRRRRRRROORRRRORROORRRRORRRRRRRRR
3 | 


--------------------------------------------------------------------------------
/challenge1/coins.py:
--------------------------------------------------------------------------------
 1 | # OVERALL COMPLEXITY: O(n)
 2 | # here we use a dictionary
 3 | 
 4 | def main():
 5 |     # n - number of coins
 6 |     # k - we need k as many tails as heads
 7 |     n, k = [int(v) for v in raw_input().split()]
 8 |     # coins - buffer to hold all the characters
 9 |     coins = list(raw_input())
10 |     assert len(coins) == n
11 |     coins_as_numbers = []
12 |     for coin in coins:
13 |         if coin == 'R':
14 |             coins_as_numbers.append(k)
15 |         elif coin == 'O':
16 |             coins_as_numbers.append(-1)
17 |         else:
18 |             assert False
19 |     # NEW PROBLEM: find longest contiguous subsequence of
20 |     # sum 0 (sum zero implies k times as many heads)
21 | 
22 | 
23 |     # for prefix sums it is important to add an extra element
24 |     # for empty sequence at the beginning.
25 |     prefix_sums = [0]
26 | 
27 |     # compute the prefix sums
28 |     for coin in coins_as_numbers:
29 |         prefix_sums.append(prefix_sums[-1] + coin)
30 | 
31 |     # new problem: find a pair of elements in the array,
32 |     # that have the same value and that are furthest
33 |     # away possible.
34 | 
35 |     # this dictionary maps from array values,
36 |     # to the index of prefix_sums, where the value
37 |     # occurs for the first time
38 |     # Specifically leftmost_index_of_value[S]
39 |     # is equal to smallest such i that prefix_sums[i] = S
40 |     # NOTE: it is build iteratively left to right
41 |     leftmost_index_of_value = {}
42 |     for s_idx, s in enumerate(prefix_sums):
43 |         if s not in leftmost_index_of_value:
44 |             leftmost_index_of_value[s] = s_idx
45 | 
46 |     res = 0
47 |     for s_idx, s in enumerate(prefix_sums):
48 |         res = max(res, s_idx - leftmost_index_of_value[s])
49 | 
50 |     print(res)
51 | 
52 | if __name__ == '__main__':
53 |     main()
54 | 


--------------------------------------------------------------------------------
/challenge1/coins.cpp:
--------------------------------------------------------------------------------
 1 | #include <algorithm>
 2 | #include <cstdio>
 3 | 
 4 | // OVERALL COMPLEXITY: O(n lg n)
 5 | // (due to sorting)
 6 | 
 7 | // maximum number of coins
 8 | const int MAXN = 1000005;
 9 | 
10 | // n - number of coins
11 | // k - we need k as many tails as heads
12 | int n, k;
13 | // c - buffer to hold all the characters
14 | char c[MAXN];
15 | // array of (prefix_sum[i], i) values
16 | std::pair<long long, int> seq_and_index[MAXN];
17 | 
18 | int main() {
19 |     scanf("%d%d", &n, &k);
20 |     scanf("%s", c);
21 |     // for prefix sums it is important to add an extra element
22 |     // for empty sequence at the beginning.
23 |     // this is so that when we compute
24 |     //
25 |     //     seq_and_index[n].second - seq_and_index[0].second
26 |     //
27 |     // we get sum of elements for indexes 1 <= i <= n
28 | 
29 |     // We reuse seq_and_index for two things
30 |     // initially it just stores values k and -1
31 |     // later we compute prefix sum
32 |     seq_and_index[0] = std::make_pair(0,0);
33 |     for (int i=0; i <n; ++i) {
34 |         // we convert R to k and O to -1
35 |         if (c[i] == 'R') {
36 |             seq_and_index[i + 1].first = k;
37 |         } else {
38 |             seq_and_index[i + 1].first = -1;
39 |         }
40 |         seq_and_index[i+1].second = i + 1;
41 |     }
42 | 
43 |     // NEW PROBLEM: find longest contiguous subsequence of
44 |     // sum 0 (sum zero implies k times as many heads)
45 | 
46 |     // compute the prefix sum
47 |     for (int i=1; i <=n; ++i) {
48 |         seq_and_index[i].first += seq_and_index[i-1].first;
49 |     }
50 | 
51 |     // new problem: find a pair of elements in the array,
52 |     // that have the same value and that are furthest
53 |     // away possible.
54 | 
55 |     int res = 0;
56 | 
57 |     // sort by value.
58 |     std::sort(seq_and_index, seq_and_index + n + 1);
59 | 
60 |     // for all groups with the same value
61 |     // compute max and min index in that group.
62 |     // update result with the difference between those indexes
63 | 
64 |     int min_index, max_index;
65 |     for (int i=0; i <=n; ++i) {
66 |         if ( i == 0 || seq_and_index[i].first != seq_and_index[i - 1].first) {
67 |             min_index = seq_and_index[i].second;
68 |             max_index = seq_and_index[i].second;
69 |         }
70 |         min_index = std::min(min_index, seq_and_index[i].second);
71 |         max_index = std::max(max_index, seq_and_index[i].second);
72 |         res = std::max(res, max_index - min_index);
73 |     }
74 |     printf("%d\n", res);
75 | }
76 | 


--------------------------------------------------------------------------------
/Lecture 17.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# LCS "
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 9,
 13 |    "metadata": {
 14 |     "collapsed": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import numpy as np"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 61,
 24 |    "metadata": {
 25 |     "collapsed": false
 26 |    },
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "def LCS(s1, s2):\n",
 30 |     "    # initialize state\n",
 31 |     "    n, m = len(s1) + 1, len(s2) + 1\n",
 32 |     "    dp =   [\n",
 33 |     "        [ 0 for _ in range(m)] \n",
 34 |     "        for _ in range(n)\n",
 35 |     "    ]\n",
 36 |     "    pred = [\n",
 37 |     "        [ (-1,-1) for _ in range(m)]\n",
 38 |     "        for _ in range(n)\n",
 39 |     "    ]\n",
 40 |     "    \n",
 41 |     "    for i in range(1,n):\n",
 42 |     "        for j in range(1,m):\n",
 43 |     "            # CASE 1: ignore character in s1\n",
 44 |     "            dp[i][j], pred[i][j] = dp[i-1][j], (i-1,j)\n",
 45 |     "            # CASE 2: ignore character in s2\n",
 46 |     "            if dp[i][j] < dp[i][j-1]:\n",
 47 |     "                dp[i][j], pred[i][j] = dp[i][j-1], (i,j-1)\n",
 48 |     "            # CASE 3: extend\n",
 49 |     "            if s1[i-1] == s2[j-1] and dp[i][j] < dp[i-1][j-1] + 1:\n",
 50 |     "                dp[i][j], pred[i][j] = dp[i-1][j-1] + 1, (i-1,j-1)\n",
 51 |     "    \n",
 52 |     "    print (np.array(dp))\n",
 53 |     "\n",
 54 |     "    # recover the solution\n",
 55 |     "    x,y = (n-1,m-1)\n",
 56 |     "    solution = []\n",
 57 |     "    while pred[x][y] != (-1,-1):\n",
 58 |     "        if pred[x][y] == (x-1, y-1):\n",
 59 |     "            solution.append(s1[x-1])\n",
 60 |     "        (x,y) = pred[x][y]\n",
 61 |     "    solution.reverse()\n",
 62 |     "    \n",
 63 |     "    return solution"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 62,
 69 |    "metadata": {
 70 |     "collapsed": false
 71 |    },
 72 |    "outputs": [
 73 |     {
 74 |      "name": "stdout",
 75 |      "output_type": "stream",
 76 |      "text": [
 77 |       "[[0 0 0 0 0]\n",
 78 |       " [0 1 1 1 1]\n",
 79 |       " [0 1 2 2 2]\n",
 80 |       " [0 1 2 2 2]\n",
 81 |       " [0 1 2 3 3]]\n"
 82 |      ]
 83 |     },
 84 |     {
 85 |      "data": {
 86 |       "text/plain": [
 87 |        "['b', 'a', 'b']"
 88 |       ]
 89 |      },
 90 |      "execution_count": 62,
 91 |      "metadata": {},
 92 |      "output_type": "execute_result"
 93 |     }
 94 |    ],
 95 |    "source": [
 96 |     "LCS(\"bacb\", \"babd\")"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 63,
102 |    "metadata": {
103 |     "collapsed": false
104 |    },
105 |    "outputs": [
106 |     {
107 |      "name": "stdout",
108 |      "output_type": "stream",
109 |      "text": [
110 |       "[[0 0 0 0 0 0 0 0 0 0]\n",
111 |       " [0 0 0 1 1 1 1 1 1 1]\n",
112 |       " [0 0 0 1 2 2 2 2 2 2]\n",
113 |       " [0 1 1 1 2 2 2 2 2 2]\n",
114 |       " [0 1 2 2 2 2 2 2 2 2]\n",
115 |       " [0 1 2 3 3 3 3 3 3 3]\n",
116 |       " [0 1 2 3 4 4 4 4 4 4]\n",
117 |       " [0 1 2 3 4 4 4 5 5 5]\n",
118 |       " [0 1 2 3 4 4 4 5 6 6]\n",
119 |       " [0 1 2 3 4 4 5 5 6 7]]\n"
120 |      ]
121 |     },
122 |     {
123 |      "data": {
124 |       "text/plain": [
125 |        "['t', 'h', 'i', 's', 'l', 'c', 's']"
126 |       ]
127 |      },
128 |      "execution_count": 63,
129 |      "metadata": {},
130 |      "output_type": "execute_result"
131 |     }
132 |    ],
133 |    "source": [
134 |     "LCS(\"isthislcs\", \"thisislcs\")"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "metadata": {},
140 |    "source": [
141 |     "# Viterbi algorithm"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 1,
147 |    "metadata": {
148 |     "collapsed": true
149 |    },
150 |    "outputs": [],
151 |    "source": [
152 |     "states = ('Healthy', 'Fever')\n",
153 |     " \n",
154 |     "observations = ('normal', 'cold', 'dizzy')\n",
155 |     " \n",
156 |     "start_probability = {'Healthy': 0.6, 'Fever': 0.4}\n",
157 |     " \n",
158 |     "transition_probability = {\n",
159 |     "   'Healthy' : {'Healthy': 0.7, 'Fever': 0.3},\n",
160 |     "   'Fever' : {'Healthy': 0.4, 'Fever': 0.6}\n",
161 |     "   }\n",
162 |     " \n",
163 |     "emission_probability = {\n",
164 |     "   'Healthy' : {'normal': 0.5, 'cold': 0.4, 'dizzy': 0.1},\n",
165 |     "   'Fever' : {'normal': 0.1, 'cold': 0.3, 'dizzy': 0.6}\n",
166 |     "}"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 7,
172 |    "metadata": {
173 |     "collapsed": false
174 |    },
175 |    "outputs": [],
176 |    "source": [
177 |     "def viterbi(obs, states, start_p, trans_p, emit_p):\n",
178 |     "    V = [{}]\n",
179 |     "    predecessor = [{}]\n",
180 |     "    \n",
181 |     "    # Initialize base cases (t == 0)\n",
182 |     "    for y in states:\n",
183 |     "        V[0][y] = start_p[y] * emit_p[y][obs[0]]\n",
184 |     "        predecessor[0][y] = None\n",
185 |     "    \n",
186 |     "    # Run Viterbi for t > 0\n",
187 |     "    for t in range(1, len(obs)):\n",
188 |     "        V.append({})\n",
189 |     "        predecessor.append({})\n",
190 |     "\n",
191 |     "        for y in states:\n",
192 |     "            highest_probability, best_y_prev = 0.0, None\n",
193 |     "            for y_prev in states:\n",
194 |     "                probability_from_y_prev = V[t-1][y_prev] * trans_p[y_prev][y] * emit_p[y][obs[t]]\n",
195 |     "                if probability_from_y_prev > highest_probability:\n",
196 |     "                    highest_probability, best_y_prev = probability_from_y_prev, y_prev\n",
197 |     "            V[t][y] = highest_probability\n",
198 |     "            predecessor[t][y] = best_y_prev\n",
199 |     "    \n",
200 |     "    print(*dptable(V), sep='')\n",
201 |     "    \n",
202 |     "    # Return the most likely sequence over the given time frame\n",
203 |     "    n = len(obs) - 1\n",
204 |     "    (prob, state) = max((V[n][y], y) for y in states)\n",
205 |     "    \n",
206 |     "    # Recover the path\n",
207 |     "    path = []\n",
208 |     "    for t in range(n,-1,-1):\n",
209 |     "        path.append(state)\n",
210 |     "        state = predecessor[t][state]\n",
211 |     "    path.reverse()\n",
212 |     "    \n",
213 |     "    return (prob, path)\n",
214 |     "\n",
215 |     "# Don't study this; it just prints a table of the steps.\n",
216 |     "def dptable(V):\n",
217 |     "    yield \"    \"\n",
218 |     "    yield \" \".join((\"%7d\" % i) for i in range(len(V)))\n",
219 |     "    yield \"\\n\"\n",
220 |     "    for y in V[0]:\n",
221 |     "        yield \"%.5s: \" % y\n",
222 |     "        yield \" \".join(\"%.7s\" % (\"%f\" % v[y]) for v in V)\n",
223 |     "        yield \"\\n\""
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": 8,
229 |    "metadata": {
230 |     "collapsed": false
231 |    },
232 |    "outputs": [
233 |     {
234 |      "name": "stdout",
235 |      "output_type": "stream",
236 |      "text": [
237 |       "          0       1       2\n",
238 |       "Fever: 0.04000 0.02700 0.01512\n",
239 |       "Healt: 0.30000 0.08400 0.00588\n",
240 |       "\n"
241 |      ]
242 |     },
243 |     {
244 |      "data": {
245 |       "text/plain": [
246 |        "(0.01512, ['Healthy', 'Healthy', 'Fever'])"
247 |       ]
248 |      },
249 |      "execution_count": 8,
250 |      "metadata": {},
251 |      "output_type": "execute_result"
252 |     }
253 |    ],
254 |    "source": [
255 |     "viterbi(observations, states, start_probability, transition_probability, emission_probability)"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": null,
261 |    "metadata": {
262 |     "collapsed": true
263 |    },
264 |    "outputs": [],
265 |    "source": []
266 |   }
267 |  ],
268 |  "metadata": {
269 |   "kernelspec": {
270 |    "display_name": "Python 3",
271 |    "language": "python",
272 |    "name": "python3"
273 |   },
274 |   "language_info": {
275 |    "codemirror_mode": {
276 |     "name": "ipython",
277 |     "version": 3
278 |    },
279 |    "file_extension": ".py",
280 |    "mimetype": "text/x-python",
281 |    "name": "python",
282 |    "nbconvert_exporter": "python",
283 |    "pygments_lexer": "ipython3",
284 |    "version": "3.4.1"
285 |   }
286 |  },
287 |  "nbformat": 4,
288 |  "nbformat_minor": 0
289 | }
290 | 


--------------------------------------------------------------------------------
/lecture4/Heaps.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 10,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "class MaxHeap(object):\n",
 12 |     "    def __init__(self, max_size=128):\n",
 13 |     "        self.storage = [None for _ in range(max_size)]\n",
 14 |     "        self.heap_size = 0\n",
 15 |     "    \n",
 16 |     "    def display(self, width=80):\n",
 17 |     "        next_element = 0\n",
 18 |     "        for i in range(0, 1000):\n",
 19 |     "            level_elts = 2**i\n",
 20 |     "            elts = self.storage[next_element:min(next_element + level_elts, self.heap_size)]\n",
 21 |     "            next_element = min(next_element + level_elts, self.heap_size)\n",
 22 |     "            if len(elts) == 0:\n",
 23 |     "                break\n",
 24 |     "            positions = [ (i+1) * width / (level_elts + 1) for i in range(level_elts)]\n",
 25 |     "            output = \"\"\n",
 26 |     "            for j, (elt, pos) in enumerate(zip(elts, positions)):\n",
 27 |     "                idx = 2**i + j - 1\n",
 28 |     "                while len(output) <= pos:\n",
 29 |     "                    output += \" \"\n",
 30 |     "                output += \"%d[@%d]\" % (elt,idx)\n",
 31 |     "            print(output)\n",
 32 |     "            print()\n",
 33 |     "    \n",
 34 |     "    def fix_down(self, index):\n",
 35 |     "        while index < self.heap_size:\n",
 36 |     "            # pick maximum child\n",
 37 |     "            max_child_idx = None\n",
 38 |     "            if  2 * index + 1 < self.heap_size:\n",
 39 |     "                max_child_idx = 2 * index + 1\n",
 40 |     "\n",
 41 |     "            if 2 * index + 2 < self.heap_size and \\\n",
 42 |     "                    self.storage[2 * index + 1] < self.storage[2 * index + 2]:\n",
 43 |     "                max_child_idx = 2 * index + 2\n",
 44 |     "\n",
 45 |     "            if max_child_idx is None or \\\n",
 46 |     "                    self.storage[index] > self.storage[max_child_idx]:\n",
 47 |     "                # heap property satisfied\n",
 48 |     "                break\n",
 49 |     "            \n",
 50 |     "            self.storage[index], self.storage[max_child_idx] = self.storage[max_child_idx], self.storage[index]\n",
 51 |     "            index = max_child_idx\n",
 52 |     "            \n",
 53 |     "    def fix_up(self, index):\n",
 54 |     "        assert index < self.heap_size\n",
 55 |     "        while index > 0:\n",
 56 |     "            parent_idx = (index - 1) // 2\n",
 57 |     "            if self.storage[index] >= self.storage[parent_idx]:\n",
 58 |     "                self.storage[index],  self.storage[parent_idx] =  self.storage[parent_idx], self.storage[index]\n",
 59 |     "                index = parent_idx\n",
 60 |     "            else:\n",
 61 |     "                break\n",
 62 |     "    \n",
 63 |     "    def insert(self, element):\n",
 64 |     "        self.heap_size += 1\n",
 65 |     "        new_index = self.heap_size - 1\n",
 66 |     "        self.storage[new_index] = element\n",
 67 |     "        self.fix_up(new_index)\n",
 68 |     "\n",
 69 |     "    def extract_max(self):\n",
 70 |     "        self.storage[0], self.storage[self.heap_size - 1] = self.storage[self.heap_size -1], self.storage[0]\n",
 71 |     "        self.heap_size -= 1\n",
 72 |     "        self.fix_down(0)\n",
 73 |     "        return self.storage[self.heap_size]\n",
 74 |     "        \n",
 75 |     "    def heapify(self):\n",
 76 |     "        for i in range(self.heap_size - 1, -1, -1):\n",
 77 |     "            self.fix_down(i)\n",
 78 |     "    \n",
 79 |     "    @staticmethod\n",
 80 |     "    def wrap_list(lst):\n",
 81 |     "        h = MaxHeap(0)\n",
 82 |     "        h.storage = lst\n",
 83 |     "        h.heap_size = len(lst)\n",
 84 |     "        return h"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 13,
 90 |    "metadata": {
 91 |     "collapsed": false
 92 |    },
 93 |    "outputs": [
 94 |     {
 95 |      "name": "stdout",
 96 |      "output_type": "stream",
 97 |      "text": [
 98 |       "                                         3[@0]\n",
 99 |       "\n",
100 |       "                           4[@1]                      5[@2]\n",
101 |       "\n",
102 |       "                 8[@3]           6[@4]           1[@5]           10[@6]\n",
103 |       "\n",
104 |       "         9[@7]    5[@8]\n",
105 |       "\n"
106 |      ]
107 |     }
108 |    ],
109 |    "source": [
110 |     "example = [3,4,5,8,6,1,10,9,5]\n",
111 |     "h = MaxHeap.wrap_list(example)\n",
112 |     "h.display()"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 14,
118 |    "metadata": {
119 |     "collapsed": false
120 |    },
121 |    "outputs": [
122 |     {
123 |      "name": "stdout",
124 |      "output_type": "stream",
125 |      "text": [
126 |       "                                         3[@0]\n",
127 |       "\n",
128 |       "                           8[@1]                      5[@2]\n",
129 |       "\n",
130 |       "                 9[@3]           6[@4]           1[@5]           10[@6]\n",
131 |       "\n",
132 |       "         4[@7]    5[@8]\n",
133 |       "\n"
134 |      ]
135 |     }
136 |    ],
137 |    "source": [
138 |     "h.fix_down(1)\n",
139 |     "h.display()"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 15,
145 |    "metadata": {
146 |     "collapsed": false
147 |    },
148 |    "outputs": [
149 |     {
150 |      "name": "stdout",
151 |      "output_type": "stream",
152 |      "text": [
153 |       "                                         8[@0]\n",
154 |       "\n",
155 |       "                           3[@1]                      5[@2]\n",
156 |       "\n",
157 |       "                 9[@3]           6[@4]           1[@5]           10[@6]\n",
158 |       "\n",
159 |       "         4[@7]    5[@8]\n",
160 |       "\n"
161 |      ]
162 |     }
163 |    ],
164 |    "source": [
165 |     "h.fix_up(1)\n",
166 |     "h.display()"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 16,
172 |    "metadata": {
173 |     "collapsed": false
174 |    },
175 |    "outputs": [
176 |     {
177 |      "name": "stdout",
178 |      "output_type": "stream",
179 |      "text": [
180 |       "                                         10[@0]\n",
181 |       "\n",
182 |       "                           8[@1]                      2[@2]\n",
183 |       "\n",
184 |       "                 6[@3]           3[@4]           1[@5]           2[@6]\n",
185 |       "\n",
186 |       "         4[@7]    5[@8]\n",
187 |       "\n"
188 |      ]
189 |     }
190 |    ],
191 |    "source": [
192 |     "example = [3,10,2,6,8,1,2,4,5]\n",
193 |     "h = MaxHeap.wrap_list(example)\n",
194 |     "h.heapify()\n",
195 |     "h.display()"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": 17,
201 |    "metadata": {
202 |     "collapsed": false
203 |    },
204 |    "outputs": [],
205 |    "source": [
206 |     "h = MaxHeap(128)\n",
207 |     "h.display()"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": 18,
213 |    "metadata": {
214 |     "collapsed": false
215 |    },
216 |    "outputs": [
217 |     {
218 |      "name": "stdout",
219 |      "output_type": "stream",
220 |      "text": [
221 |       "                                         10[@0]\n",
222 |       "\n",
223 |       "                           5[@1]                      4[@2]\n",
224 |       "\n",
225 |       "                 1[@3]\n",
226 |       "\n"
227 |      ]
228 |     }
229 |    ],
230 |    "source": [
231 |     "h.insert(1)\n",
232 |     "h.insert(4)\n",
233 |     "h.insert(5)\n",
234 |     "h.insert(10)\n",
235 |     "h.display()"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 19,
241 |    "metadata": {
242 |     "collapsed": false
243 |    },
244 |    "outputs": [
245 |     {
246 |      "name": "stdout",
247 |      "output_type": "stream",
248 |      "text": [
249 |       "10\n",
250 |       "5\n",
251 |       "4\n",
252 |       "1\n"
253 |      ]
254 |     }
255 |    ],
256 |    "source": [
257 |     "while h.heap_size > 0:\n",
258 |     "    print(h.extract_max())"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "code",
263 |    "execution_count": 22,
264 |    "metadata": {
265 |     "collapsed": true
266 |    },
267 |    "outputs": [],
268 |    "source": [
269 |     "def heap_sort(array):\n",
270 |     "    as_heap = MaxHeap.wrap_list(array)\n",
271 |     "    as_heap.heapify()\n",
272 |     "    while as_heap.heap_size > 0:\n",
273 |     "        as_heap.extract_max()"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": 23,
279 |    "metadata": {
280 |     "collapsed": false
281 |    },
282 |    "outputs": [
283 |     {
284 |      "name": "stdout",
285 |      "output_type": "stream",
286 |      "text": [
287 |       "[1, 2, 2, 3, 4, 5, 6, 8, 10]\n"
288 |      ]
289 |     }
290 |    ],
291 |    "source": [
292 |     "example = [3,10,2,6,8,1,2,4,5]\n",
293 |     "heap_sort(example)\n",
294 |     "print(example)"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "code",
299 |    "execution_count": null,
300 |    "metadata": {
301 |     "collapsed": true
302 |    },
303 |    "outputs": [],
304 |    "source": []
305 |   }
306 |  ],
307 |  "metadata": {
308 |   "kernelspec": {
309 |    "display_name": "Python 3",
310 |    "language": "python",
311 |    "name": "python3"
312 |   },
313 |   "language_info": {
314 |    "codemirror_mode": {
315 |     "name": "ipython",
316 |     "version": 3
317 |    },
318 |    "file_extension": ".py",
319 |    "mimetype": "text/x-python",
320 |    "name": "python",
321 |    "nbconvert_exporter": "python",
322 |    "pygments_lexer": "ipython3",
323 |    "version": "3.4.1"
324 |   }
325 |  },
326 |  "nbformat": 4,
327 |  "nbformat_minor": 0
328 | }
329 | 


--------------------------------------------------------------------------------
/lecture12/Google BFS.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Google BFS question"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {
 14 |     "collapsed": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "from collections import deque"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "metadata": {},
 24 |    "source": [
 25 |     "Given a 2D board with emergency $E$ and some policeman $P$, what is the distance from an emergency to closest policeman. Nodes marked as $X$, are walls and cannot be traversed.\n",
 26 |     "\n",
 27 |     "\n",
 28 |     "Think a little bit about how to solve it"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 66,
 34 |    "metadata": {
 35 |     "collapsed": false
 36 |    },
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "example = [\n",
 40 |     "    \"...P...\",\n",
 41 |     "    \"...XXX.\",\n",
 42 |     "    \"EX..P..\",\n",
 43 |     "    \".X.....\",\n",
 44 |     "    \".......\",\n",
 45 |     "    \".......\",\n",
 46 |     "]\n",
 47 |     "\n",
 48 |     "class MapGraph(object):\n",
 49 |     "    def __init__(self, mmap):\n",
 50 |     "        \"\"\"Graph abstraction for our map\"\"\"\n",
 51 |     "        self.dim_x, self.dim_y = len(mmap), len(mmap[0])\n",
 52 |     "        self.mmap = {(x,y):mmap[x][y] for x in range(self.dim_x) \n",
 53 |     "                                      for y in range(self.dim_y)}\n",
 54 |     "\n",
 55 |     "    def neighbours(self, node):\n",
 56 |     "        \"\"\"Returns all the direct neighbors of a given node\"\"\"\n",
 57 |     "        x,y = node\n",
 58 |     "        # There are four directions in which we can go\n",
 59 |     "        for dx, dy in [[0,1], [0,-1], [-1,0], [1,0]]:\n",
 60 |     "            nx, ny = x + dx, y + dy\n",
 61 |     "            if (0 <= nx < self.dim_x and # but we cannot got through walls\n",
 62 |     "                    0 <= ny < self.dim_y # (at least not yet!)\n",
 63 |     "                    and self.mmap[nx,ny] != 'X'):\n",
 64 |     "                yield nx, ny\n",
 65 |     "    \n",
 66 |     "    def find_all(self, letter):\n",
 67 |     "        \"\"\"Finds all the coordinates where a given letter occurs\"\"\"\n",
 68 |     "        res = []\n",
 69 |     "        for x in range(self.dim_x):\n",
 70 |     "            for y in range(self.dim_y):\n",
 71 |     "                if self.mmap[x,y] == letter:\n",
 72 |     "                    res.append((x,y))\n",
 73 |     "        return res\n",
 74 |     "    \n",
 75 |     "    def show(self, what=None):\n",
 76 |     "        \"\"\"Displays the graph\"\"\"\n",
 77 |     "        what = what or self.mmap\n",
 78 |     "        for x in range(self.dim_x):\n",
 79 |     "            for y in range(self.dim_y):\n",
 80 |     "                print(what[x,y] if (x,y) in what else '?', end='')\n",
 81 |     "                print(' ', end='')\n",
 82 |     "            print('')"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 67,
 88 |    "metadata": {
 89 |     "collapsed": false
 90 |    },
 91 |    "outputs": [
 92 |     {
 93 |      "name": "stdout",
 94 |      "output_type": "stream",
 95 |      "text": [
 96 |       ". . . P . . . \n",
 97 |       ". . . X X X . \n",
 98 |       "E X . . P . . \n",
 99 |       ". X . . . . . \n",
100 |       ". . . . . . . \n",
101 |       ". . . . . . . \n"
102 |      ]
103 |     },
104 |     {
105 |      "data": {
106 |       "text/plain": [
107 |        "[(2, 0)]"
108 |       ]
109 |      },
110 |      "execution_count": 67,
111 |      "metadata": {},
112 |      "output_type": "execute_result"
113 |     }
114 |    ],
115 |    "source": [
116 |     "g = MapGraph(example)\n",
117 |     "g.show()\n",
118 |     "g.find_all('E')"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "The solution is maybe somewhat counterintuitive - we start our search from emergency, not policeman"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": 68,
131 |    "metadata": {
132 |     "collapsed": true
133 |    },
134 |    "outputs": [],
135 |    "source": [
136 |     "def bfs_from(graph, source):\n",
137 |     "    q = deque()\n",
138 |     "    # initially source is on the queue\n",
139 |     "    distance = {source: 0}\n",
140 |     "    q.appendleft(source)\n",
141 |     "    # while queue is not empty\n",
142 |     "    while len(q) > 0:\n",
143 |     "        # consider the node that has been in the queue for\n",
144 |     "        # the longest\n",
145 |     "        node = q.popleft()\n",
146 |     "        # for all neighbours\n",
147 |     "        for neighbour in graph.neighbours(node):\n",
148 |     "            # if they were NOT visted yet\n",
149 |     "            if neighbour not in distance:\n",
150 |     "                # mark their distance and put them on queue\n",
151 |     "                distance[neighbour] = distance[node] + 1\n",
152 |     "                q.append(neighbour)\n",
153 |     "    return distance"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 76,
159 |    "metadata": {
160 |     "collapsed": false
161 |    },
162 |    "outputs": [
163 |     {
164 |      "name": "stdout",
165 |      "output_type": "stream",
166 |      "text": [
167 |       ". . . P . . . \n",
168 |       ". . . X X X . \n",
169 |       "E X . . P . . \n",
170 |       ". X . . . . . \n",
171 |       ". . . . . . . \n",
172 |       ". . . . . . . \n",
173 |       "\n",
174 |       "2 3 4 5 6 7 8 \n",
175 |       "1 2 3 ? ? ? 9 \n",
176 |       "0 ? 4 5 6 7 8 \n",
177 |       "1 ? 5 6 7 8 9 \n",
178 |       "2 3 4 5 6 7 8 \n",
179 |       "3 4 5 6 7 8 9 \n"
180 |      ]
181 |     }
182 |    ],
183 |    "source": [
184 |     "g = MapGraph(example)\n",
185 |     "distances = bfs_from(g, g.find_all('E')[0])\n",
186 |     "g.show()\n",
187 |     "print('')\n",
188 |     "g.show(distances)"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 70,
194 |    "metadata": {
195 |     "collapsed": false
196 |    },
197 |    "outputs": [],
198 |    "source": [
199 |     "def solve_google(example):\n",
200 |     "    g = MapGraph(example)\n",
201 |     "    # comute distances from emergency to everywhere welse\n",
202 |     "    distances = bfs_from(g, g.find_all('E')[0])\n",
203 |     "    # find the minimum distance policeman\n",
204 |     "    res = float('inf')\n",
205 |     "    for policeman in g.find_all('P'):\n",
206 |     "        res = min(res, distances[policeman])\n",
207 |     "    return res"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": 71,
213 |    "metadata": {
214 |     "collapsed": false
215 |    },
216 |    "outputs": [
217 |     {
218 |      "data": {
219 |       "text/plain": [
220 |        "5"
221 |       ]
222 |      },
223 |      "execution_count": 71,
224 |      "metadata": {},
225 |      "output_type": "execute_result"
226 |     }
227 |    ],
228 |    "source": [
229 |     "solve_google(example)"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "markdown",
234 |    "metadata": {},
235 |    "source": [
236 |     "## Success! \n",
237 |     "\n",
238 |     "We solve the problem correctly in $O(nm)$, which is the best solution we can hoped for!\n",
239 |     "\n",
240 |     "## Follow up question\n",
241 |     "\n",
242 |     "Find the worst place for emergency (furthest from all policeman)\n",
243 |     "\n",
244 |     "Think about how to solve it."
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "code",
249 |    "execution_count": 72,
250 |    "metadata": {
251 |     "collapsed": true
252 |    },
253 |    "outputs": [],
254 |    "source": [
255 |     "example2 = [\n",
256 |     "    \"...P...\",\n",
257 |     "    \"...XXX.\",\n",
258 |     "    \"EX..P..\",\n",
259 |     "    \".X.....\",\n",
260 |     "    \".......\",\n",
261 |     "    \".......\",\n",
262 |     "]"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "markdown",
267 |    "metadata": {},
268 |    "source": [
269 |     "There are many suboptimal solutions, but it turns out we can still solve it in $O(nm)$, by starting BFS from all the policeman simultaneoursly!"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": 73,
275 |    "metadata": {
276 |     "collapsed": true
277 |    },
278 |    "outputs": [],
279 |    "source": [
280 |     "def bfs_from_many(graph, sources):\n",
281 |     "    q = deque()\n",
282 |     "    distance = {s: 0 for s in sources} # <--- this line changed\n",
283 |     "    q.extendleft(sources)              # <--- this line changed\n",
284 |     "    while len(q) > 0:\n",
285 |     "        node = q.popleft()\n",
286 |     "        for neighbour in graph.neighbours(node):\n",
287 |     "            if neighbour not in distance:\n",
288 |     "                distance[neighbour] = distance[node] + 1\n",
289 |     "                q.append(neighbour)\n",
290 |     "    return distance"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "code",
295 |    "execution_count": 74,
296 |    "metadata": {
297 |     "collapsed": false
298 |    },
299 |    "outputs": [
300 |     {
301 |      "name": "stdout",
302 |      "output_type": "stream",
303 |      "text": [
304 |       ". . . P . . . \n",
305 |       ". . . X X X . \n",
306 |       "E X . . P . . \n",
307 |       ". X . . . . . \n",
308 |       ". . . . . . . \n",
309 |       ". . . . . . . \n",
310 |       "\n",
311 |       "3 2 1 0 1 2 3 \n",
312 |       "4 3 2 ? ? ? 3 \n",
313 |       "5 ? 2 1 0 1 2 \n",
314 |       "6 ? 3 2 1 2 3 \n",
315 |       "6 5 4 3 2 3 4 \n",
316 |       "7 6 5 4 3 4 5 \n"
317 |      ]
318 |     }
319 |    ],
320 |    "source": [
321 |     "g = MapGraph(example2)\n",
322 |     "distances = bfs_from_many(g, g.find_all('P'))\n",
323 |     "g.show()\n",
324 |     "print('')\n",
325 |     "g.show(distances)"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": 77,
331 |    "metadata": {
332 |     "collapsed": true
333 |    },
334 |    "outputs": [],
335 |    "source": [
336 |     "def solve_google_hard(example):\n",
337 |     "    g = MapGraph(example)\n",
338 |     "    # compute distances from all the policeman\n",
339 |     "    distances = bfs_from_many(g, g.find_all('P'))\n",
340 |     "    # find the minimum distance policeman\n",
341 |     "    return max(distances.values())"
342 |    ]
343 |   },
344 |   {
345 |    "cell_type": "code",
346 |    "execution_count": 78,
347 |    "metadata": {
348 |     "collapsed": false
349 |    },
350 |    "outputs": [
351 |     {
352 |      "data": {
353 |       "text/plain": [
354 |        "7"
355 |       ]
356 |      },
357 |      "execution_count": 78,
358 |      "metadata": {},
359 |      "output_type": "execute_result"
360 |     }
361 |    ],
362 |    "source": [
363 |     "solve_google_hard(example2)"
364 |    ]
365 |   }
366 |  ],
367 |  "metadata": {
368 |   "kernelspec": {
369 |    "display_name": "Python 3",
370 |    "language": "python",
371 |    "name": "python3"
372 |   },
373 |   "language_info": {
374 |    "codemirror_mode": {
375 |     "name": "ipython",
376 |     "version": 3
377 |    },
378 |    "file_extension": ".py",
379 |    "mimetype": "text/x-python",
380 |    "name": "python",
381 |    "nbconvert_exporter": "python",
382 |    "pygments_lexer": "ipython3",
383 |    "version": "3.4.1"
384 |   }
385 |  },
386 |  "nbformat": 4,
387 |  "nbformat_minor": 0
388 | }
389 | 


--------------------------------------------------------------------------------
/lecture16/Bignums.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "class Number(list):\n",
 12 |     "    @staticmethod\n",
 13 |     "    def wrap(n):\n",
 14 |     "        return Number(reversed([ord(c) - ord('0') for c in str(n)]))\n",
 15 |     "    \n",
 16 |     "    def __repr__(self):\n",
 17 |     "        return ''.join([str(d) for d in reversed(self)])"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {
 24 |     "collapsed": false
 25 |    },
 26 |    "outputs": [
 27 |     {
 28 |      "data": {
 29 |       "text/plain": [
 30 |        "([1, 1], [2, 1])"
 31 |       ]
 32 |      },
 33 |      "execution_count": 2,
 34 |      "metadata": {},
 35 |      "output_type": "execute_result"
 36 |     }
 37 |    ],
 38 |    "source": [
 39 |     "a = Number.wrap(11)\n",
 40 |     "b = Number.wrap(12)\n",
 41 |     "a, b\n",
 42 |     "list(a), list(b)"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "## Addition"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 3,
 55 |    "metadata": {
 56 |     "collapsed": false
 57 |    },
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "def add(a, b):\n",
 61 |     "    c = Number()\n",
 62 |     "    carry = 0\n",
 63 |     "    for i in range(max(len(a), len(b))):\n",
 64 |     "        r = carry + (a[i] if i < len(a) else 0) + (b[i] if i < len(b) else 0)\n",
 65 |     "        c.append(r % 10)\n",
 66 |     "        carry = r / 10\n",
 67 |     "    if carry > 0:\n",
 68 |     "        c.append(carry)\n",
 69 |     "    return c\n",
 70 |     "\n",
 71 |     "Number.__add__ = add\n"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 4,
 77 |    "metadata": {
 78 |     "collapsed": false
 79 |    },
 80 |    "outputs": [
 81 |     {
 82 |      "data": {
 83 |       "text/plain": [
 84 |        "13"
 85 |       ]
 86 |      },
 87 |      "execution_count": 4,
 88 |      "metadata": {},
 89 |      "output_type": "execute_result"
 90 |     }
 91 |    ],
 92 |    "source": [
 93 |     "Number.wrap(6) + Number.wrap(7)"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 5,
 99 |    "metadata": {
100 |     "collapsed": false
101 |    },
102 |    "outputs": [
103 |     {
104 |      "data": {
105 |       "text/plain": [
106 |        "76"
107 |       ]
108 |      },
109 |      "execution_count": 5,
110 |      "metadata": {},
111 |      "output_type": "execute_result"
112 |     }
113 |    ],
114 |    "source": [
115 |     "Number.wrap(6) + Number.wrap(70)"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "# Substraction"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 80,
128 |    "metadata": {
129 |     "collapsed": false
130 |    },
131 |    "outputs": [],
132 |    "source": [
133 |     "def sub(a, b):\n",
134 |     "    c = Number()\n",
135 |     "    carry = 0\n",
136 |     "    for i in range(max(len(a), len(b))):\n",
137 |     "        r = carry + (a[i] if i < len(a) else 0) - (b[i] if i < len(b) else 0)\n",
138 |     "        carry = 0 \n",
139 |     "        while r < 0:\n",
140 |     "            r += 10\n",
141 |     "            carry -= 1\n",
142 |     "        c.append(r % 10)\n",
143 |     "    assert carry == 0, \"negative result\"\n",
144 |     "    while len(c) > 1 and c[-1] == 0:\n",
145 |     "        c.pop()\n",
146 |     "    return c\n",
147 |     "Number.__sub__ = sub"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 81,
153 |    "metadata": {
154 |     "collapsed": false
155 |    },
156 |    "outputs": [
157 |     {
158 |      "data": {
159 |       "text/plain": [
160 |        "1"
161 |       ]
162 |      },
163 |      "execution_count": 81,
164 |      "metadata": {},
165 |      "output_type": "execute_result"
166 |     }
167 |    ],
168 |    "source": [
169 |     "a, b = Number.wrap(7), Number.wrap(6)\n",
170 |     "sub(a,b)"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 82,
176 |    "metadata": {
177 |     "collapsed": false
178 |    },
179 |    "outputs": [
180 |     {
181 |      "data": {
182 |       "text/plain": [
183 |        "64"
184 |       ]
185 |      },
186 |      "execution_count": 82,
187 |      "metadata": {},
188 |      "output_type": "execute_result"
189 |     }
190 |    ],
191 |    "source": [
192 |     "Number.wrap(70) - Number.wrap(6)"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": 83,
198 |    "metadata": {
199 |     "collapsed": false
200 |    },
201 |    "outputs": [
202 |     {
203 |      "data": {
204 |       "text/plain": [
205 |        "0"
206 |       ]
207 |      },
208 |      "execution_count": 83,
209 |      "metadata": {},
210 |      "output_type": "execute_result"
211 |     }
212 |    ],
213 |    "source": [
214 |     "Number.wrap(70) - Number.wrap(70)"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "markdown",
219 |    "metadata": {},
220 |    "source": [
221 |     "# Multiplication by digit"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": 126,
227 |    "metadata": {
228 |     "collapsed": false
229 |    },
230 |    "outputs": [],
231 |    "source": [
232 |     "def mul_by_digit(number, digit):\n",
233 |     "    res = Number()\n",
234 |     "    carry = 0\n",
235 |     "    for i in range(len(number)):\n",
236 |     "        r = carry + number[i] * digit\n",
237 |     "        res.append(r % 10)\n",
238 |     "        carry = r / 10\n",
239 |     "    if carry > 0:\n",
240 |     "        res.append(carry)\n",
241 |     "    return res"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "code",
246 |    "execution_count": 127,
247 |    "metadata": {
248 |     "collapsed": false
249 |    },
250 |    "outputs": [
251 |     {
252 |      "data": {
253 |       "text/plain": [
254 |        "1107"
255 |       ]
256 |      },
257 |      "execution_count": 127,
258 |      "metadata": {},
259 |      "output_type": "execute_result"
260 |     }
261 |    ],
262 |    "source": [
263 |     "mul_by_digit(Number.wrap(123), 9)"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "markdown",
268 |    "metadata": {},
269 |    "source": [
270 |     "# Karatsuba Multiplication"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": 116,
276 |    "metadata": {
277 |     "collapsed": false
278 |    },
279 |    "outputs": [],
280 |    "source": [
281 |     "def mul(a, b):\n",
282 |     "    if len(a) < len(b):\n",
283 |     "        a, b = b, a\n",
284 |     "    # now a is longer of the two\n",
285 |     "    assert len(a) >= len(b)\n",
286 |     "    if len(b) == 0:\n",
287 |     "        return Number.wrap(0)\n",
288 |     "    elif len(b) == 1:\n",
289 |     "        return mul_by_digit(a, b[0])\n",
290 |     "    \n",
291 |     "    mid = len(a) / 2\n",
292 |     "    y1, x1 = a[:mid], a[mid:]\n",
293 |     "    y2, x2 = b[:mid], b[mid:]\n",
294 |     "    H = mul(x1, x2)\n",
295 |     "    L = mul(y1, y2)\n",
296 |     "    M = mul(add(x1,y1), add(x2,y2))\n",
297 |     "    M = sub(M,H)\n",
298 |     "    M = sub(M,L)\n",
299 |     "    \n",
300 |     "    res = Number([0] * (2 * mid) + list(H)) + Number([0] * (mid) + list(M))   + L\n",
301 |     "    return res"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "code",
306 |    "execution_count": 124,
307 |    "metadata": {
308 |     "collapsed": false
309 |    },
310 |    "outputs": [
311 |     {
312 |      "data": {
313 |       "text/plain": [
314 |        "1353"
315 |       ]
316 |      },
317 |      "execution_count": 124,
318 |      "metadata": {},
319 |      "output_type": "execute_result"
320 |     }
321 |    ],
322 |    "source": [
323 |     "mul(Number.wrap(123), Number.wrap(11))"
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "code",
328 |    "execution_count": 123,
329 |    "metadata": {
330 |     "collapsed": false
331 |    },
332 |    "outputs": [],
333 |    "source": [
334 |     "for a in range(1112):\n",
335 |     "    for b in range(123):\n",
336 |     "        A,B = Number.wrap(a), Number.wrap(b)\n",
337 |     "        assert int(str(mul(A, B))) == a*b"
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "markdown",
342 |    "metadata": {},
343 |    "source": [
344 |     "# Multiplication"
345 |    ]
346 |   },
347 |   {
348 |    "cell_type": "code",
349 |    "execution_count": 12,
350 |    "metadata": {
351 |     "collapsed": false
352 |    },
353 |    "outputs": [],
354 |    "source": [
355 |     "def sqrt(a):\n",
356 |     "    nextx, x = None, a\n",
357 |     "    while True:\n",
358 |     "        print(bin(x), x)\n",
359 |     "        nextx = (x + (a/x)) / 2\n",
360 |     "        if x == nextx:\n",
361 |     "            break\n",
362 |     "        x = nextx\n",
363 |     "    return x"
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "code",
368 |    "execution_count": 13,
369 |    "metadata": {
370 |     "collapsed": false
371 |    },
372 |    "outputs": [
373 |     {
374 |      "name": "stdout",
375 |      "output_type": "stream",
376 |      "text": [
377 |       "('0b10000011100010000100', 538756)\n",
378 |       "('0b1000001110001000010', 269378)\n",
379 |       "('0b100000111000100010', 134690)\n",
380 |       "('0b10000011100010010', 67346)\n",
381 |       "('0b1000001110001100', 33676)\n",
382 |       "('0b100000111001101', 16845)\n",
383 |       "('0b10000011110110', 8438)\n",
384 |       "('0b1000010011010', 4250)\n",
385 |       "('0b100010001100', 2188)\n",
386 |       "('0b10011000001', 1217)\n",
387 |       "('0b1100111101', 829)\n",
388 |       "('0b1011100011', 739)\n",
389 |       "('0b1011011110', 734)\n"
390 |      ]
391 |     },
392 |     {
393 |      "data": {
394 |       "text/plain": [
395 |        "734"
396 |       ]
397 |      },
398 |      "execution_count": 13,
399 |      "metadata": {},
400 |      "output_type": "execute_result"
401 |     }
402 |    ],
403 |    "source": [
404 |     "sqrt(734 * 734)"
405 |    ]
406 |   },
407 |   {
408 |    "cell_type": "code",
409 |    "execution_count": 14,
410 |    "metadata": {
411 |     "collapsed": false
412 |    },
413 |    "outputs": [
414 |     {
415 |      "name": "stdout",
416 |      "output_type": "stream",
417 |      "text": [
418 |       "('0b10000011111001001011', 540235)\n",
419 |       "('0b1000001111100100110', 270118)\n",
420 |       "('0b100000111110010011', 135059)\n",
421 |       "('0b10000011111001011', 67531)\n",
422 |       "('0b1000001111101001', 33769)\n",
423 |       "('0b100000111111100', 16892)\n",
424 |       "('0b10000100001101', 8461)\n",
425 |       "('0b1000010100110', 4262)\n",
426 |       "('0b100010010010', 2194)\n",
427 |       "('0b10011000100', 1220)\n",
428 |       "('0b1100111111', 831)\n",
429 |       "('0b1011100100', 740)\n",
430 |       "('0b1011011111', 735)\n"
431 |      ]
432 |     },
433 |     {
434 |      "data": {
435 |       "text/plain": [
436 |        "735"
437 |       ]
438 |      },
439 |      "execution_count": 14,
440 |      "metadata": {},
441 |      "output_type": "execute_result"
442 |     }
443 |    ],
444 |    "source": [
445 |     "sqrt(735 * 735 + 10)"
446 |    ]
447 |   },
448 |   {
449 |    "cell_type": "code",
450 |    "execution_count": 15,
451 |    "metadata": {
452 |     "collapsed": false
453 |    },
454 |    "outputs": [
455 |     {
456 |      "name": "stdout",
457 |      "output_type": "stream",
458 |      "text": [
459 |       "('0b10000011111000110111', 540215)\n",
460 |       "('0b1000001111100011100', 270108)\n",
461 |       "('0b100000111110001110', 135054)\n",
462 |       "('0b10000011111001000', 67528)\n",
463 |       "('0b1000001111100111', 33767)\n",
464 |       "('0b100000111111011', 16891)\n",
465 |       "('0b10000100001101', 8461)\n",
466 |       "('0b1000010100110', 4262)\n",
467 |       "('0b100010010010', 2194)\n",
468 |       "('0b10011000100', 1220)\n",
469 |       "('0b1100111111', 831)\n",
470 |       "('0b1011100100', 740)\n",
471 |       "('0b1011011111', 735)\n",
472 |       "('0b1011011110', 734)\n"
473 |      ]
474 |     },
475 |     {
476 |      "data": {
477 |       "text/plain": [
478 |        "734"
479 |       ]
480 |      },
481 |      "execution_count": 15,
482 |      "metadata": {},
483 |      "output_type": "execute_result"
484 |     }
485 |    ],
486 |    "source": [
487 |     "sqrt(735 * 735 - 10)"
488 |    ]
489 |   },
490 |   {
491 |    "cell_type": "code",
492 |    "execution_count": 11,
493 |    "metadata": {
494 |     "collapsed": false
495 |    },
496 |    "outputs": [
497 |     {
498 |      "name": "stdout",
499 |      "output_type": "stream",
500 |      "text": [
501 |       "200000000000000000000\n",
502 |       "100000000000000000000\n",
503 |       "50000000000000000001\n",
504 |       "25000000000000000002\n",
505 |       "12500000000000000004\n",
506 |       "6250000000000000009\n",
507 |       "3125000000000000020\n",
508 |       "1562500000000000041\n",
509 |       "781250000000000084\n",
510 |       "390625000000000169\n",
511 |       "195312500000000340\n",
512 |       "97656250000000681\n",
513 |       "48828125000001364\n",
514 |       "24414062500002729\n",
515 |       "12207031250005460\n",
516 |       "6103515625010921\n",
517 |       "3051757812521844\n",
518 |       "1525878906293689\n",
519 |       "762939453212380\n",
520 |       "381469726737261\n",
521 |       "190734863630774\n",
522 |       "95367432339674\n",
523 |       "47683717218412\n",
524 |       "23841860706357\n",
525 |       "11920934547482\n",
526 |       "5960475662345\n",
527 |       "2980254608357\n",
528 |       "1490160858358\n",
529 |       "745147536028\n",
530 |       "372707969625\n",
531 |       "186622291390\n",
532 |       "93846987363\n",
533 |       "47989057987\n",
534 |       "26078337348\n",
535 |       "16873768965\n",
536 |       "14363242737\n",
537 |       "14143837480\n",
538 |       "14142135726\n",
539 |       "14142135623\n"
540 |      ]
541 |     },
542 |     {
543 |      "data": {
544 |       "text/plain": [
545 |        "1.4142135623"
546 |       ]
547 |      },
548 |      "execution_count": 11,
549 |      "metadata": {},
550 |      "output_type": "execute_result"
551 |     }
552 |    ],
553 |    "source": [
554 |     "sqrt(2 * 10**20) / float(10**10)"
555 |    ]
556 |   },
557 |   {
558 |    "cell_type": "code",
559 |    "execution_count": null,
560 |    "metadata": {
561 |     "collapsed": true
562 |    },
563 |    "outputs": [],
564 |    "source": []
565 |   }
566 |  ],
567 |  "metadata": {
568 |   "kernelspec": {
569 |    "display_name": "Python 2",
570 |    "language": "python",
571 |    "name": "python2"
572 |   },
573 |   "language_info": {
574 |    "codemirror_mode": {
575 |     "name": "ipython",
576 |     "version": 2
577 |    },
578 |    "file_extension": ".py",
579 |    "mimetype": "text/x-python",
580 |    "name": "python",
581 |    "nbconvert_exporter": "python",
582 |    "pygments_lexer": "ipython2",
583 |    "version": "2.7.8"
584 |   }
585 |  },
586 |  "nbformat": 4,
587 |  "nbformat_minor": 0
588 | }
589 | 


--------------------------------------------------------------------------------
/lecture9/Hashing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Rabin-Karp method\n",
  8 |     "\n",
  9 |     "If you think hard enough, there's nothing that diferrentiates piecs of text from numbers. You can think of letters as digits and base of the numbers as sufficently big to accomodate for all the digits. For example take the following text\n",
 10 |     "\n",
 11 |     "$$\n",
 12 |     "babacb\n",
 13 |     "$$\n",
 14 |     "\n",
 15 |     "it can be though of as a number base 26 (for all the english letters):\n",
 16 |     "\n",
 17 |     "$$\n",
 18 |     "(1,0,1,0,2,1)_{26}\n",
 19 |     "$$\n",
 20 |     "\n",
 21 |     "We can transform this number to base 10 using the following equation.\n",
 22 |     "\n",
 23 |     "$$\n",
 24 |     "1*26^5 + 0 * 26^4 + 1*26^3 + 0*26^2 + 2*26^1 + 1*26^0 = 11899005\n",
 25 |     "$$\n",
 26 |     "\n",
 27 |     "From the formulation above the following property should be clear.\n",
 28 |     "\n",
 29 |     "$$\n",
 30 |     "abba = abb * 26 + b\n",
 31 |     "$$\n",
 32 |     "\n",
 33 |     "in general we can write $concat(word, letter) = base * word + letter$ (1).\n",
 34 |     "\n",
 35 |     "There's also a small technicality. When we compare numbers then $0001$ and $001$ and $1$ are equivalent. This means that we cannot map any letter to 0 if we want to be able to successfuly compare the numbers. "
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "Observation (1) allows us to quickly compute hashes for all the prefixes of a given word. Just like in class we are going to use modular arithmetic for our computations."
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 8,
 48 |    "metadata": {
 49 |     "collapsed": false
 50 |    },
 51 |    "outputs": [
 52 |     {
 53 |      "name": "stdout",
 54 |      "output_type": "stream",
 55 |      "text": [
 56 |       "'A' 65\n",
 57 |       "'a' 97\n",
 58 |       "'b' 98\n",
 59 |       "'c' 99\n",
 60 |       "' ' 32\n",
 61 |       "'c' - 'a' + 1 = 3\n"
 62 |      ]
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     "# we need to map letters to numbers. Python function ord does the job\n",
 67 |     "print(repr('A'), ord('A'))\n",
 68 |     "print(repr('a'), ord('a'))\n",
 69 |     "print(repr('b'), ord('b'))\n",
 70 |     "print(repr('c'), ord('c'))\n",
 71 |     "print(repr(' '), ord(' '))\n",
 72 |     "\n",
 73 |     "print('%s - %s + 1 = %d' % (repr('c'), repr('a'), ord('c') - ord('a') + 1))\n"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 17,
 79 |    "metadata": {
 80 |     "collapsed": false
 81 |    },
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "BIG_FAT_PRIME = 2**32 - 1\n",
 85 |     "ENGLISH_BASE = 30 # in theory 27 is sufficient but better safe than sorry!\n",
 86 |     "\n",
 87 |     "def compute_hashes(text, base=ENGLISH_BASE, modulo = BIG_FAT_PRIME):\n",
 88 |     "    # \n",
 89 |     "    h = [None for _ in range(len(text) + 1)]\n",
 90 |     "    h[0] = 0 # hash of empty word is 0\n",
 91 |     "    for i in range(len(text)):\n",
 92 |     "        # we only deal with english letters so we subtract 'a'\n",
 93 |     "        # to normalize range. We add 0 to avoid creating zero digit.\n",
 94 |     "        letter_as_number = (ord(text[i]) - ord('a') + 1)\n",
 95 |     "        h[i + 1] = h[i] * base + letter_as_number\n",
 96 |     "        h[i + 1] %= modulo\n",
 97 |     "        # at the end of the iteration h[i+1] is the hash\n",
 98 |     "        # of prefix of text of lenght (i+1) which in\n",
 99 |     "        # Python is text[:(i+1)]\n",
100 |     "    return h"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "metadata": {},
106 |    "source": [
107 |     "PROTIP: If you happen to ever implemented this is lower level programming language like C or C++, be ware of integer overflows."
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 18,
113 |    "metadata": {
114 |     "collapsed": false
115 |    },
116 |    "outputs": [
117 |     {
118 |      "data": {
119 |       "text/plain": [
120 |        "[0, 2, 61, 1832, 54961, 1648833, 49464992]"
121 |       ]
122 |      },
123 |      "execution_count": 18,
124 |      "metadata": {},
125 |      "output_type": "execute_result"
126 |     }
127 |    ],
128 |    "source": [
129 |     "compute_hashes(\"babacb\")"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 19,
135 |    "metadata": {
136 |     "collapsed": false
137 |    },
138 |    "outputs": [
139 |     {
140 |      "data": {
141 |       "text/plain": [
142 |        "[0, 2, 61, 1832, 54964, 1648924, 49467724]"
143 |       ]
144 |      },
145 |      "execution_count": 19,
146 |      "metadata": {},
147 |      "output_type": "execute_result"
148 |     }
149 |    ],
150 |    "source": [
151 |     "compute_hashes(\"babddd\")"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 20,
157 |    "metadata": {
158 |     "collapsed": false
159 |    },
160 |    "outputs": [
161 |     {
162 |      "data": {
163 |       "text/plain": [
164 |        "[0,\n",
165 |        " 2,\n",
166 |        " 61,\n",
167 |        " 1832,\n",
168 |        " 54964,\n",
169 |        " 1648924,\n",
170 |        " 49467721,\n",
171 |        " 1484031649,\n",
172 |        " 1571276524,\n",
173 |        " 4188622771,\n",
174 |        " 1104631594,\n",
175 |        " 3074176759,\n",
176 |        " 2030989594,\n",
177 |        " 800145691,\n",
178 |        " 2529534259]"
179 |       ]
180 |      },
181 |      "execution_count": 20,
182 |      "metadata": {},
183 |      "output_type": "execute_result"
184 |     }
185 |    ],
186 |    "source": [
187 |     "# for longer strings modulo matters\n",
188 |     "compute_hashes(\"babddasdasdsad\")"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "markdown",
193 |    "metadata": {},
194 |    "source": [
195 |     "## hashes of substrings\n",
196 |     "\n",
197 |     "Now here's a crutial observation. Let's take polynomial representation of string  $babacb$ (where $X$ is the base)\n",
198 |     "\n",
199 |     "$$\n",
200 |     "b*X^5 + a * X^4 + b*X^3 + a*X^2 + c*X^1 + b*X^0 \n",
201 |     "$$\n",
202 |     "\n",
203 |     "Say we want to compute hash of $ac$ which is conveniently appears on 4-th index the string we originally hashed. Moreover we have hashes of all the prefixes - it seems like we are in good shape:\n",
204 |     "\n",
205 |     "\\begin{align}\n",
206 |     "\\text{we have }\\ \\ \\ & hash(babac) &=\\ & b*X^4 + a * X^3 + b*X^2 &+& a*X^1 + c*X^0 \\\\\n",
207 |     "\\text{we have }\\ \\ \\  & hash(bab)   &=\\ & b*X^2 + a * X^1 + b*X^0&&\\\\\n",
208 |     "\\text{we WANT }\\ \\ \\  & hash(ac)    &=\\ &                         && a*X^1 + c*X^0\\\\\n",
209 |     "\\end{align}\n",
210 |     "\n",
211 |     "\n",
212 |     "From above we can clearly see that:\n",
213 |     "\n",
214 |     "$$\n",
215 |     "hash(ac) = hash(babac) - X^2 * hash(bab)\n",
216 |     "$$\n",
217 |     "\n",
218 |     "We can generalize this to arbitrary substring of our hashed string. Assume we hashed string $s_0, s_1, ..., s_{n-1}$ such that $h_0 = hash(\\emptyset)$, $h_1 = hash(s_0)$, $h_2 = hash(s_0, s_1)$ etc. \n",
219 |     "Then we can compute $hash(s_i, ..., s_j)$ using the following formula:\n",
220 |     "\n",
221 |     "$$\n",
222 |     "hash(s_i, ..., s_{j-1}) = h_j - h_i * X ^{j - i}\n",
223 |     "$$\n",
224 |     "\n",
225 |     "This looks very close to $O(1)$ complexity hash computation if not for $X ^{j - i}$. But since there are at most $n$ different powers of $X$ that we are interested in, we can precompute them in $O(n)$ time."
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": 37,
231 |    "metadata": {
232 |     "collapsed": true
233 |    },
234 |    "outputs": [],
235 |    "source": [
236 |     "def compute_powers(n, base=ENGLISH_BASE, modulo=BIG_FAT_PRIME):\n",
237 |     "    powers = [None for _ in range(n + 1)]\n",
238 |     "    powers[0] = 1\n",
239 |     "    for i in range(n):\n",
240 |     "        powers[i+1] = (powers[i] * base) % modulo\n",
241 |     "    return powers"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "code",
246 |    "execution_count": 38,
247 |    "metadata": {
248 |     "collapsed": false
249 |    },
250 |    "outputs": [
251 |     {
252 |      "data": {
253 |       "text/plain": [
254 |        "[1,\n",
255 |        " 30,\n",
256 |        " 900,\n",
257 |        " 27000,\n",
258 |        " 810000,\n",
259 |        " 24300000,\n",
260 |        " 729000000,\n",
261 |        " 395163525,\n",
262 |        " 3264971160,\n",
263 |        " 3459854310,\n",
264 |        " 716414220]"
265 |       ]
266 |      },
267 |      "execution_count": 38,
268 |      "metadata": {},
269 |      "output_type": "execute_result"
270 |     }
271 |    ],
272 |    "source": [
273 |     "compute_powers(10)"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "markdown",
278 |    "metadata": {},
279 |    "source": [
280 |     "Now we can put all those observations together into efficient detastructure that allows us to compute hashes of substrings in $O(1)$"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "code",
285 |    "execution_count": 77,
286 |    "metadata": {
287 |     "collapsed": true
288 |    },
289 |    "outputs": [],
290 |    "source": [
291 |     "class Hasher(object):\n",
292 |     "    def __init__(self, word):\n",
293 |     "        self.h = compute_hashes(word)\n",
294 |     "        self.powers = compute_powers(len(word))\n",
295 |     "        \n",
296 |     "    def substring_hash(self, i, j):\n",
297 |     "        result = self.h[j] - self.h[i] * self.powers[j-i]\n",
298 |     "        return result % BIG_FAT_PRIME"
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "code",
303 |    "execution_count": 78,
304 |    "metadata": {
305 |     "collapsed": false
306 |    },
307 |    "outputs": [],
308 |    "source": [
309 |     "TEXT = \"abcxabcx\"\n",
310 |     "h = Hasher(TEXT)"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "code",
315 |    "execution_count": 79,
316 |    "metadata": {
317 |     "collapsed": false
318 |    },
319 |    "outputs": [
320 |     {
321 |      "name": "stdout",
322 |      "output_type": "stream",
323 |      "text": [
324 |       "[ab]cxabcx 32\n",
325 |       "abcx[ab]cx 32\n",
326 |       "abc[xa]bcx 721\n"
327 |      ]
328 |     }
329 |    ],
330 |    "source": [
331 |     "def highlight(word, i, j):\n",
332 |     "    return word[:i] + \"[\" + word[i:j] + \"]\" + word[j:]\n",
333 |     "\n",
334 |     "print(highlight(TEXT, 0, 2), h.substring_hash(0, 2))\n",
335 |     "print(highlight(TEXT, 4, 6), h.substring_hash(4, 6))\n",
336 |     "print(highlight(TEXT, 3, 5), h.substring_hash(3, 5))"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "markdown",
341 |    "metadata": {},
342 |    "source": [
343 |     "## Hasher complexity analysis.\n",
344 |     "\n",
345 |     "Preprocessing (`__init__`):\n",
346 |     "- `compute_hashes` is $O(n)$\n",
347 |     "- 'compute_powers` is $O(n)$\n",
348 |     "Therefore precomputing is $O(n)$.\n",
349 |     "\n",
350 |     "Queries (`substring_hash`) is of complexity $O(1)$ - it is just a simple formula."
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "markdown",
355 |    "metadata": {},
356 |    "source": [
357 |     "Notice that this technique is very powerful. More powerful than we need for pattern matching. It should not be a surprise that we can easily use it to solve pattern matching"
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "code",
362 |    "execution_count": 100,
363 |    "metadata": {
364 |     "collapsed": false
365 |    },
366 |    "outputs": [],
367 |    "source": [
368 |     "# hasher for text\n",
369 |     "text_h = Hasher(\"to be or not to be\")"
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "code",
374 |    "execution_count": 101,
375 |    "metadata": {
376 |     "collapsed": false
377 |    },
378 |    "outputs": [
379 |     {
380 |      "data": {
381 |       "text/plain": [
382 |        "65"
383 |       ]
384 |      },
385 |      "execution_count": 101,
386 |      "metadata": {},
387 |      "output_type": "execute_result"
388 |     }
389 |    ],
390 |    "source": [
391 |     "# hash of the pattern\n",
392 |     "compute_hashes(\"be\")[-1]"
393 |    ]
394 |   },
395 |   {
396 |    "cell_type": "code",
397 |    "execution_count": 102,
398 |    "metadata": {
399 |     "collapsed": false
400 |    },
401 |    "outputs": [
402 |     {
403 |      "data": {
404 |       "text/plain": [
405 |        "65"
406 |       ]
407 |      },
408 |      "execution_count": 102,
409 |      "metadata": {},
410 |      "output_type": "execute_result"
411 |     }
412 |    ],
413 |    "source": [
414 |     "# hash of the occurence of \"be\" in original text. \n",
415 |     "text_h.substring_hash(3, 5)"
416 |    ]
417 |   },
418 |   {
419 |    "cell_type": "code",
420 |    "execution_count": 105,
421 |    "metadata": {
422 |     "collapsed": false
423 |    },
424 |    "outputs": [],
425 |    "source": [
426 |     "def compute_matches(text, pattern):\n",
427 |     "    # hash of patter\n",
428 |     "    pattern_hash = compute_hashes(pattern)[-1]\n",
429 |     "    # hasher for text\n",
430 |     "    text_h = Hasher(text)\n",
431 |     "    res = []\n",
432 |     "    for i in range(len(text) - len(pattern) + 1):\n",
433 |     "        # i is potential match start index\n",
434 |     "        # compare hash in text with hash of pattern\n",
435 |     "        if text_h.substring_hash(i, i + len(pattern)) == pattern_hash:\n",
436 |     "            # if matching append to result list.\n",
437 |     "            res.append(i)\n",
438 |     "    return res"
439 |    ]
440 |   },
441 |   {
442 |    "cell_type": "code",
443 |    "execution_count": 106,
444 |    "metadata": {
445 |     "collapsed": false
446 |    },
447 |    "outputs": [
448 |     {
449 |      "data": {
450 |       "text/plain": [
451 |        "[3, 16]"
452 |       ]
453 |      },
454 |      "execution_count": 106,
455 |      "metadata": {},
456 |      "output_type": "execute_result"
457 |     }
458 |    ],
459 |    "source": [
460 |     "compute_matches(\"to be or not to be\", \"be\")"
461 |    ]
462 |   },
463 |   {
464 |    "cell_type": "markdown",
465 |    "metadata": {},
466 |    "source": [
467 |     "## Rabin-Karp complexity analysis\n",
468 |     "\n",
469 |     "Assume that pattern is of length $n$ and text of length $m$\n",
470 |     "\n",
471 |     "- pattern hash: $O(n)$\n",
472 |     "- text preprocessing $O(m)$.\n",
473 |     "- $n - m$ iterations of main loop with $O(1)$ hash computation in each loop\n",
474 |     "\n",
475 |     "total: $O(n+m)$\n",
476 |     "\n",
477 |     "Catch? Relies on luck."
478 |    ]
479 |   }
480 |  ],
481 |  "metadata": {
482 |   "kernelspec": {
483 |    "display_name": "Python 3",
484 |    "language": "python",
485 |    "name": "python3"
486 |   },
487 |   "language_info": {
488 |    "codemirror_mode": {
489 |     "name": "ipython",
490 |     "version": 3
491 |    },
492 |    "file_extension": ".py",
493 |    "mimetype": "text/x-python",
494 |    "name": "python",
495 |    "nbconvert_exporter": "python",
496 |    "pygments_lexer": "ipython3",
497 |    "version": "3.4.1"
498 |   }
499 |  },
500 |  "nbformat": 4,
501 |  "nbformat_minor": 0
502 | }
503 | 


--------------------------------------------------------------------------------
/lecture1/Stock Exchange.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 5,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import cProfile\n",
 12 |     "import random\n",
 13 |     "import time"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "# Stock Exchange Problem"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "Consider the following problem. We have a non-empty array representing evolution of price of a particular stock over time.\n",
 28 |     "```python\n",
 29 |     "A = [20, 3, 19, 1, 15, 6]\n",
 30 |     "```\n",
 31 |     "Given this information we want to find what is the optimal profit we can make using single buy and single sell operation (here we have perfect knowledge of the prices - you can imagine that this quantity is something that quantitative traders would like to know, to compare their decision to the best possible decision given perfect knowledge of the future).\n",
 32 |     "\n",
 33 |     "More formally we want to find two numbers $b$, $s$, such that $$0 \\leq b \\leq s \\leq |A|$$ and $$A_s - A_b$$ is maximum possible. Of course we cannot sell before buying.\n",
 34 |     "\n",
 35 |     "For example for the array given above, the biggest profit we can make is $16$ (make sure you can see that). Below we present three different solutions to this problem."
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 1,
 41 |    "metadata": {
 42 |     "collapsed": true
 43 |    },
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "# here we seed the random number generator, to ensure, that we generate \n",
 47 |     "# the same random instance every time we pass seed equal to a particular \n",
 48 |     "# value. This way the speed comparison is fair.\n",
 49 |     "def make_prices(n, seed):\n",
 50 |     "    \"\"\" Return array of n random prices. \"\"\"\n",
 51 |     "    random.seed(seed)\n",
 52 |     "    return [ random.random() for _ in range(n) ]"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "### Naive solution\n",
 60 |     "This solution is a direct search of values of $b$ and $s$. The complexity is $O(n^2)$ (intiutively we have two nested for loops, each of which does $O(n)$ iterations, when $n$ = len(A)."
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 2,
 66 |    "metadata": {
 67 |     "collapsed": true
 68 |    },
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "def naive(A):\n",
 72 |     "    \"\"\" return best gain on A, using naive method \n",
 73 |     "        running time, due to doubly-nest loop, is O(n^2)\n",
 74 |     "    \"\"\"\n",
 75 |     "    n = len(A)\n",
 76 |     "    ans = 0\n",
 77 |     "    for i0 in range(n):\n",
 78 |     "        for j0 in range(i0,n):\n",
 79 |     "            ans = max(ans, A[j0]-A[i0])\n",
 80 |     "    return ans"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 3,
 86 |    "metadata": {
 87 |     "collapsed": false
 88 |    },
 89 |    "outputs": [
 90 |     {
 91 |      "data": {
 92 |       "text/plain": [
 93 |        "16"
 94 |       ]
 95 |      },
 96 |      "execution_count": 3,
 97 |      "metadata": {},
 98 |      "output_type": "execute_result"
 99 |     }
100 |    ],
101 |    "source": [
102 |     "naive([20, 3, 19, 1, 15, 6])"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 7,
108 |    "metadata": {
109 |     "collapsed": false
110 |    },
111 |    "outputs": [
112 |     {
113 |      "name": "stdout",
114 |      "output_type": "stream",
115 |      "text": [
116 |       "         50025009 function calls in 14.463 seconds\n",
117 |       "\n",
118 |       "   Ordered by: standard name\n",
119 |       "\n",
120 |       "   ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n",
121 |       "        1    0.004    0.004    0.005    0.005 <ipython-input-1-a3d706331740>:1(make_prices)\n",
122 |       "        1    9.050    9.050   14.458   14.458 <ipython-input-2-1b173f4b093c>:1(naive)\n",
123 |       "        1    0.000    0.000   14.463   14.463 <string>:1(<module>)\n",
124 |       "        1    0.000    0.000    0.000    0.000 random.py:100(seed)\n",
125 |       "        1    0.000    0.000    0.000    0.000 {function seed at 0x7f90fbfeb578}\n",
126 |       "        1    0.000    0.000    0.000    0.000 {len}\n",
127 |       " 50005000    5.213    0.000    5.213    0.000 {max}\n",
128 |       "        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
129 |       "    10000    0.001    0.000    0.001    0.000 {method 'random' of '_random.Random' objects}\n",
130 |       "    10002    0.195    0.000    0.195    0.000 {range}\n",
131 |       "\n",
132 |       "\n"
133 |      ]
134 |     }
135 |    ],
136 |    "source": [
137 |     "# slowness alert!\n",
138 |     "cProfile.run(\"naive(make_prices(10000, 1))\")"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "markdown",
143 |    "metadata": {},
144 |    "source": [
145 |     "### Divide and conquer approach\n",
146 |     "Here we split our array into two simpler problems corresponding to left half of the array $L$ and right half $R$. \n",
147 |     "For example if \n",
148 |     "```python\n",
149 |     "A = [20, 3, 19, 1, 15, 6]\n",
150 |     "```\n",
151 |     "we can imagine that\n",
152 |     "```python\n",
153 |     "L = [20, 3, 19]\n",
154 |     "R = [1,  15, 6]\n",
155 |     "```\n",
156 |     "In order to reduce our problem to those simpler problems we need to consider three cases:\n",
157 |     "1. $ b,s \\in L$ - we can solve it by solving original problem for $L$\n",
158 |     "2. $ b,s \\in R$ - we can solve it by solving original problem for $R$\n",
159 |     "3. $ b \\in L$ and $ s \\in R$ - we can solve it by finding minimum in $L$ and maximum in $R$ and returning the difference\n",
160 |     "\n",
161 |     "We need not consider the case $s \\in L$ and $b \\in R$ (why?).\n",
162 |     "\n",
163 |     "This way we reduced our problem to two smaller problems. This is good - we will keep changing bigger problems into smaller problems until we get problem so small that it is trivial to solve - in this case if our array is of size 1, then the maximum profit we can make is $0$.\n",
164 |     "\n",
165 |     "The complexity of this solution is can be calculated by solving the following equation:\n",
166 |     "\n",
167 |     "\\begin{align}\n",
168 |     "T(1) =& 1\\\\\n",
169 |     "T(n) =& T(n / 2) + T(n / 2) + O(n)\n",
170 |     "\\end{align}\n",
171 |     "The three summands in the equation above come for cases 1,2,3 listed above. In particular notice that case 3 requires single read through data and therefor has complexity $O(n)$. The solution to this set of equation is $T(n) = O(n \\lg{n})$"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": 9,
177 |    "metadata": {
178 |     "collapsed": true
179 |    },
180 |    "outputs": [],
181 |    "source": [
182 |     "def dc(A, lo=None, hi=None):\n",
183 |     "    \"\"\" return best gain on A[lo:hi], using divide & conquer \n",
184 |     "        running time is solution to T(n) = 2*T(n/2) + Theta(n) = Theta(n log n)\n",
185 |     "    \"\"\"\n",
186 |     "    if lo is None:\n",
187 |     "        lo = 0\n",
188 |     "    if hi is None:\n",
189 |     "        hi = len(A)\n",
190 |     "    n = hi-lo\n",
191 |     "    # base case\n",
192 |     "    if n == 1:\n",
193 |     "        return 0\n",
194 |     "    # divide and conquer\n",
195 |     "    # divide into lo:mid and mid:hi\n",
196 |     "    mid = (lo+hi)//2            \n",
197 |     "    # recurse on left half\n",
198 |     "    gain_low = dc(A, lo, mid)\n",
199 |     "    # recurse on right half\n",
200 |     "    gain_high = dc(A, mid, hi)\n",
201 |     "    # figure out best gain for buying in left half, selling in right half\n",
202 |     "    buy_price = min([ A[i] for i in range(lo, mid) ])\n",
203 |     "    sell_price = max([ A[i] for i in range(mid, hi)])\n",
204 |     "    gain_cross = sell_price - buy_price\n",
205 |     "    # optimum is max of three cases just solved\n",
206 |     "    return max(gain_low, gain_high, gain_cross)"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 10,
212 |    "metadata": {
213 |     "collapsed": false
214 |    },
215 |    "outputs": [
216 |     {
217 |      "data": {
218 |       "text/plain": [
219 |        "16"
220 |       ]
221 |      },
222 |      "execution_count": 10,
223 |      "metadata": {},
224 |      "output_type": "execute_result"
225 |     }
226 |    ],
227 |    "source": [
228 |     "dc([20, 3, 19, 1, 15, 6])"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": 11,
234 |    "metadata": {
235 |     "collapsed": false
236 |    },
237 |    "outputs": [
238 |     {
239 |      "name": "stdout",
240 |      "output_type": "stream",
241 |      "text": [
242 |       "         80001 function calls (60003 primitive calls) in 0.073 seconds\n",
243 |       "\n",
244 |       "   Ordered by: standard name\n",
245 |       "\n",
246 |       "   ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n",
247 |       "        1    0.004    0.004    0.005    0.005 <ipython-input-1-a3d706331740>:1(make_prices)\n",
248 |       "  19999/1    0.047    0.000    0.068    0.068 <ipython-input-9-e8b912c3a064>:1(dc)\n",
249 |       "        1    0.000    0.000    0.073    0.073 <string>:1(<module>)\n",
250 |       "        1    0.000    0.000    0.000    0.000 random.py:100(seed)\n",
251 |       "        1    0.000    0.000    0.000    0.000 {function seed at 0x7f90fbfeb578}\n",
252 |       "        1    0.000    0.000    0.000    0.000 {len}\n",
253 |       "    19998    0.008    0.000    0.008    0.000 {max}\n",
254 |       "        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
255 |       "    10000    0.001    0.000    0.001    0.000 {method 'random' of '_random.Random' objects}\n",
256 |       "     9999    0.004    0.000    0.004    0.000 {min}\n",
257 |       "    19999    0.008    0.000    0.008    0.000 {range}\n",
258 |       "\n",
259 |       "\n"
260 |      ]
261 |     }
262 |    ],
263 |    "source": [
264 |     "cProfile.run(\"dc(make_prices(10000, 1))\")"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "markdown",
269 |    "metadata": {},
270 |    "source": [
271 |     "### Solution by algorithmic thinking\n",
272 |     "Notice that if we know a $k$ such that $b \\leq k \\leq s$ then we can find $b$ and $s$. Indeed, $b$ is minimum to the left of $k$ and $s$ is maximum to the right of $k$. Since we don't know which $k$ is correct, we need to try all values. Implementing that solution naively leads to $O(n^2)$ complexity. Not happy.\n",
273 |     "\n",
274 |     "To improve on it notice that we can precompute answer to all the questions of form *minimum to the left of $k$* (and store them in the array $B$) and *maximum to the right of $k$* (and store them in array $S$) in complexity $O(n)$. Once promputed - we can just look them up in complexity $O(1)$, which we will do $n$ times - once for each value of $k$. The total complexity is sequal to:\n",
275 |     "\n",
276 |     "$$\n",
277 |     "\\text{work to precompute B} + \\text{work to precompute S} + \\text{work to evaluate all values of k}\n",
278 |     "$$\n",
279 |     "\n",
280 |     "Notice that all the of those have complexity $O(n)$, so the total complexity is $O(n)$."
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "code",
285 |    "execution_count": 12,
286 |    "metadata": {
287 |     "collapsed": true
288 |    },
289 |    "outputs": [],
290 |    "source": [
291 |     "def lin(A):\n",
292 |     "    \"\"\" return best gain, computed by linear-time alg \n",
293 |     "        running time is Theta(n)\n",
294 |     "    \"\"\"\n",
295 |     "    n = len(A)\n",
296 |     "    # B[k] = min{ A[i0]: i0 <= k }   for k = 0, 1, ..., n-1\n",
297 |     "    #      = price to buy at if you have to buy no later than k (and sell no earlier than k)\n",
298 |     "    B = [A[0]] * n\n",
299 |     "    for k in range(1, n):\n",
300 |     "        B[k] = min(B[k-1],A[k])\n",
301 |     "    # S[k] = max{ A[j0]: j0 >= k }   for k = 0, 1, ..., n-1\n",
302 |     "    #      = price to sell at if you have to sell no earlier than k (but bought no later than k)\n",
303 |     "    S = [A[n-1]] * n\n",
304 |     "    for k in range(n-2, -1, -1):\n",
305 |     "        S[k] = max(S[k+1], A[k])\n",
306 |     "    # G[k] = S[k] - B[k] for k = 0, 1, ..., n-1\n",
307 |     "    #      = best gain from buying no later than k, then selling no earlier than k\n",
308 |     "    G = [ S[k]-B[k] for k in range(n) ]\n",
309 |     "    # opt = max { G[k]: 0 <= k < n }\n",
310 |     "    #     = best possible gain for given input A\n",
311 |     "    opt = max(G)\n",
312 |     "    return opt\n"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "code",
317 |    "execution_count": 13,
318 |    "metadata": {
319 |     "collapsed": false
320 |    },
321 |    "outputs": [
322 |     {
323 |      "data": {
324 |       "text/plain": [
325 |        "16"
326 |       ]
327 |      },
328 |      "execution_count": 13,
329 |      "metadata": {},
330 |      "output_type": "execute_result"
331 |     }
332 |    ],
333 |    "source": [
334 |     "lin([20, 3, 19, 1, 15, 6])"
335 |    ]
336 |   },
337 |   {
338 |    "cell_type": "code",
339 |    "execution_count": 14,
340 |    "metadata": {
341 |     "collapsed": false
342 |    },
343 |    "outputs": [
344 |     {
345 |      "name": "stdout",
346 |      "output_type": "stream",
347 |      "text": [
348 |       "         30010 function calls in 0.026 seconds\n",
349 |       "\n",
350 |       "   Ordered by: standard name\n",
351 |       "\n",
352 |       "   ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n",
353 |       "        1    0.004    0.004    0.006    0.006 <ipython-input-1-a3d706331740>:1(make_prices)\n",
354 |       "        1    0.015    0.015    0.020    0.020 <ipython-input-12-21a4740e4169>:1(lin)\n",
355 |       "        1    0.000    0.000    0.026    0.026 <string>:1(<module>)\n",
356 |       "        1    0.000    0.000    0.000    0.000 random.py:100(seed)\n",
357 |       "        1    0.000    0.000    0.000    0.000 {function seed at 0x7f90fbfeb578}\n",
358 |       "        1    0.000    0.000    0.000    0.000 {len}\n",
359 |       "    10000    0.003    0.000    0.003    0.000 {max}\n",
360 |       "        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
361 |       "    10000    0.001    0.000    0.001    0.000 {method 'random' of '_random.Random' objects}\n",
362 |       "     9999    0.003    0.000    0.003    0.000 {min}\n",
363 |       "        4    0.001    0.000    0.001    0.000 {range}\n",
364 |       "\n",
365 |       "\n"
366 |      ]
367 |     }
368 |    ],
369 |    "source": [
370 |     "cProfile.run(\"lin(make_prices(10000, 1))\")"
371 |    ]
372 |   },
373 |   {
374 |    "cell_type": "markdown",
375 |    "metadata": {
376 |     "collapsed": true
377 |    },
378 |    "source": [
379 |     "# Problems to think about (non-examinable, non-compulsory, strictly for fun...)\n",
380 |     "1. **Maximum sum subsequence problem** - given an array A find a contiguous subsequence of maximum sum. For example for\n",
381 |     "```python\n",
382 |     "A = [10, -2, 10, 5, -4, 14]\n",
383 |     "```\n",
384 |     "the answer is 15."
385 |    ]
386 |   },
387 |   {
388 |    "cell_type": "code",
389 |    "execution_count": null,
390 |    "metadata": {
391 |     "collapsed": false
392 |    },
393 |    "outputs": [],
394 |    "source": [
395 |     "# Hint to problem 1\n",
396 |     "cyph = lambda x: chr((ord(x) + 64) % 128)\n",
397 |     "''.join(map(cyph, '\\x08).4z`2%$5#%`4()3`02/\",%-`4/`4(%`34/#+`%8#(!.\\'%`02/\",%-'))"
398 |    ]
399 |   }
400 |  ],
401 |  "metadata": {
402 |   "kernelspec": {
403 |    "display_name": "Python 2",
404 |    "language": "python",
405 |    "name": "python2"
406 |   },
407 |   "language_info": {
408 |    "codemirror_mode": {
409 |     "name": "ipython",
410 |     "version": 2
411 |    },
412 |    "file_extension": ".py",
413 |    "mimetype": "text/x-python",
414 |    "name": "python",
415 |    "nbconvert_exporter": "python",
416 |    "pygments_lexer": "ipython2",
417 |    "version": "2.7.8"
418 |   }
419 |  },
420 |  "nbformat": 4,
421 |  "nbformat_minor": 0
422 | }
423 | 


--------------------------------------------------------------------------------
/challenge1/tests/mon4b.in:
--------------------------------------------------------------------------------
1 | 21243 1242
2 | OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOROOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO
3 | 


--------------------------------------------------------------------------------
/lecture7/Radix Sort Performance.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 26,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import random\n",
 12 |     "import cProfile"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "markdown",
 17 |    "metadata": {},
 18 |    "source": [
 19 |     "# Radix sort\n",
 20 |     "\n",
 21 |     "Let's look at the code from pervious notebook. It is slightly augmented - `radix_sort_by_ith_digit` was incorporated in `radix_sort` and there are a few minor tweaks. "
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 59,
 27 |    "metadata": {
 28 |     "collapsed": false
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "def is_sorted(array):\n",
 33 |     "    for i in xrange(1, len(array)):\n",
 34 |     "        if array[i-1] > array[i]:\n",
 35 |     "            return False\n",
 36 |     "    return True\n",
 37 |     "\n",
 38 |     "def radix_sort(array, b):\n",
 39 |     "    assert b > 1\n",
 40 |     "    i = 0\n",
 41 |     "    while True:\n",
 42 |     "        if is_sorted(array):\n",
 43 |     "            break\n",
 44 |     "        \n",
 45 |     "        buckets = [ [] for _ in xrange(b)]\n",
 46 |     "        for num in array:\n",
 47 |     "            # we no longer use an extra function call for computing the digit\n",
 48 |     "            bucket_idx = (num / b**i) % b\n",
 49 |     "            buckets[bucket_idx].append(num)\n",
 50 |     "            \n",
 51 |     "        # we reuse original space in the array,\n",
 52 |     "        # rather than allocating a new one.\n",
 53 |     "        next_index = 0\n",
 54 |     "        for bucket in buckets:\n",
 55 |     "            for num in bucket:\n",
 56 |     "                array[next_index] = num\n",
 57 |     "                next_index += 1\n",
 58 |     "        \n",
 59 |     "        i += 1"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "### Verify that it works on a simple example"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 60,
 72 |    "metadata": {
 73 |     "collapsed": false
 74 |    },
 75 |    "outputs": [
 76 |     {
 77 |      "data": {
 78 |       "text/plain": [
 79 |        "[1, 2, 2, 3, 4, 5, 5, 6]"
 80 |       ]
 81 |      },
 82 |      "execution_count": 60,
 83 |      "metadata": {},
 84 |      "output_type": "execute_result"
 85 |     }
 86 |    ],
 87 |    "source": [
 88 |     "example = [5,3,2,5,6,1,2,4]\n",
 89 |     "radix_sort(example, 2)\n",
 90 |     "example"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "metadata": {},
 96 |    "source": [
 97 |     "### We are now moving on to bigger examples\n",
 98 |     "The line\n",
 99 |     "```python\n",
100 |     "random.seed(1)\n",
101 |     "```\n",
102 |     "ensures that we always generate the same example given test size (for fairness"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 61,
108 |    "metadata": {
109 |     "collapsed": false
110 |    },
111 |    "outputs": [],
112 |    "source": [
113 |     "def generate_test(test_size):\n",
114 |     "    random.seed(1)\n",
115 |     "    example = [ random.randint(0,2**30) for _ in range(test_size)]\n",
116 |     "    return example"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "markdown",
121 |    "metadata": {},
122 |    "source": [
123 |     "Let's try it for radix sort on different bases."
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 62,
129 |    "metadata": {
130 |     "collapsed": false
131 |    },
132 |    "outputs": [
133 |     {
134 |      "name": "stdout",
135 |      "output_type": "stream",
136 |      "text": [
137 |       "         30000065 function calls in 19.327 seconds\n",
138 |       "\n",
139 |       "   Ordered by: standard name\n",
140 |       "\n",
141 |       "   ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n",
142 |       "       31    0.153    0.005    0.153    0.005 <ipython-input-59-77f9f94e9409>:1(is_sorted)\n",
143 |       "        1   17.945   17.945   19.315   19.315 <ipython-input-59-77f9f94e9409>:7(radix_sort)\n",
144 |       "        1    0.012    0.012   19.327   19.327 <string>:1(<module>)\n",
145 |       "       31    0.000    0.000    0.000    0.000 {len}\n",
146 |       " 30000000    1.217    0.000    1.217    0.000 {method 'append' of 'list' objects}\n",
147 |       "        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
148 |       "\n",
149 |       "\n"
150 |      ]
151 |     }
152 |    ],
153 |    "source": [
154 |     "# BASE 2\n",
155 |     "example = generate_test(1000000)\n",
156 |     "cProfile.run(\"radix_sort(example, 2)\")\n",
157 |     "assert is_sorted(example)"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 64,
163 |    "metadata": {
164 |     "collapsed": false
165 |    },
166 |    "outputs": [
167 |     {
168 |      "name": "stdout",
169 |      "output_type": "stream",
170 |      "text": [
171 |       "         2000009 function calls in 1.696 seconds\n",
172 |       "\n",
173 |       "   Ordered by: standard name\n",
174 |       "\n",
175 |       "   ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n",
176 |       "        3    0.150    0.050    0.150    0.050 <ipython-input-59-77f9f94e9409>:1(is_sorted)\n",
177 |       "        1    1.363    1.363    1.673    1.673 <ipython-input-59-77f9f94e9409>:7(radix_sort)\n",
178 |       "        1    0.022    0.022    1.696    1.696 <string>:1(<module>)\n",
179 |       "        3    0.000    0.000    0.000    0.000 {len}\n",
180 |       "  2000000    0.160    0.000    0.160    0.000 {method 'append' of 'list' objects}\n",
181 |       "        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
182 |       "\n",
183 |       "\n"
184 |      ]
185 |     }
186 |    ],
187 |    "source": [
188 |     "# BASE 2^16\n",
189 |     "example = generate_test(1000000)\n",
190 |     "cProfile.run(\"radix_sort(example, 2**16)\")\n",
191 |     "assert is_sorted(example)"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "markdown",
196 |    "metadata": {},
197 |    "source": [
198 |     "Intuitively it makes sense. We only need two iterators for $b=2^{16}$, while we need 16 for $b=2$. We cannot really have a single iteration, as $b=2^{32}$ is more than the length of the longest array we expect to ever sort.\n",
199 |     "\n",
200 |     "Now let's try to run the standard sorting algorithm that is implemented by Python (hybrid of insertion sort and quicksort). It is a comparison based sort and is therefore $O(n\\ lg\\ n)$. Radix sort is $O(n)$. We therefore expect our code to be faster."
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": 65,
206 |    "metadata": {
207 |     "collapsed": false
208 |    },
209 |    "outputs": [
210 |     {
211 |      "name": "stdout",
212 |      "output_type": "stream",
213 |      "text": [
214 |       "         3 function calls in 0.547 seconds\n",
215 |       "\n",
216 |       "   Ordered by: standard name\n",
217 |       "\n",
218 |       "   ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n",
219 |       "        1    0.000    0.000    0.547    0.547 <string>:1(<module>)\n",
220 |       "        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
221 |       "        1    0.547    0.547    0.547    0.547 {method 'sort' of 'list' objects}\n",
222 |       "\n",
223 |       "\n"
224 |      ]
225 |     }
226 |    ],
227 |    "source": [
228 |     "example = generate_test(1000000)\n",
229 |     "cProfile.run(\"example.sort()\")\n",
230 |     "assert is_sorted(example)"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "markdown",
235 |    "metadata": {},
236 |    "source": [
237 |     "In reality our code is 3 times slower. What a shame! Let's not give up yet..."
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "markdown",
242 |    "metadata": {},
243 |    "source": [
244 |     "### A bit of bit magic.\n",
245 |     "\n",
246 |     "One of basic operations in radix sort is divide and modulo. In particular if our base is $b$, then in every $i$-th iteration of radix sort algorithm one of the most common operations is indexing:\n",
247 |     "\n",
248 |     "```python\n",
249 |     "# determine appropriate bucket.\n",
250 |     "(num / b**i) % b\n",
251 |     "```\n",
252 |     "\n",
253 |     "This is great, but it consists of of expensive modulo and division operations (they can take up multiple processor cycles). \n",
254 |     "\n",
255 |     "Let's assume that b is a power of 2, i.e. $b=2^k$. Notice that `num / b**i` is equivalent to `num >> (k * i)`. In order to understand why this is the case first convince yourself that division by 2 is equivalent to shaving off rightmost bit."
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": 66,
261 |    "metadata": {
262 |     "collapsed": false
263 |    },
264 |    "outputs": [
265 |     {
266 |      "name": "stdout",
267 |      "output_type": "stream",
268 |      "text": [
269 |       "(6, 6)\n",
270 |       "(8, 8)\n"
271 |      ]
272 |     }
273 |    ],
274 |    "source": [
275 |     "print(100 / 16, 100 >> 4)\n",
276 |     "print(128 / 16, 128 >> 4)"
277 |    ]
278 |   },
279 |   {
280 |    "cell_type": "markdown",
281 |    "metadata": {},
282 |    "source": [
283 |     "Similarly notice that for $b=2^k$ we have `num % b` equivalent to `num & (b-1)`. To understand that notice that k lowest bits of `num` correspond to the reminder mod $b$."
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "code",
288 |    "execution_count": 67,
289 |    "metadata": {
290 |     "collapsed": false
291 |    },
292 |    "outputs": [
293 |     {
294 |      "name": "stdout",
295 |      "output_type": "stream",
296 |      "text": [
297 |       "(4, 4)\n",
298 |       "(0, 0)\n",
299 |       "(11, 11)\n"
300 |      ]
301 |     }
302 |    ],
303 |    "source": [
304 |     "print(100 % 16, 100 & 15)\n",
305 |     "print(128 % 16, 128 & 15)\n",
306 |     "print(11 % 16, 11 & 15)"
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "markdown",
311 |    "metadata": {},
312 |    "source": [
313 |     "Both `&` and `>>` are very efficient and only take on processor cycle. We can augment implementation of radix sort from above to use them instead of `%` and `/`."
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "code",
318 |    "execution_count": 68,
319 |    "metadata": {
320 |     "collapsed": true
321 |    },
322 |    "outputs": [],
323 |    "source": [
324 |     "def fast_radix_sort(array, k):\n",
325 |     "    \"\"\"Fast radix sort with base 2**k.\n",
326 |     "    \n",
327 |     "    This implementation uses bitwise operations\"\"\"\n",
328 |     "    assert k > 0\n",
329 |     "    i = 0\n",
330 |     "    \n",
331 |     "    b=2**k\n",
332 |     "    b_m1 = b - 1\n",
333 |     "    \n",
334 |     "    while True:\n",
335 |     "        if is_sorted(array):\n",
336 |     "            break\n",
337 |     "        shift = k * i\n",
338 |     "        buckets = [ [] for _ in xrange(b)]\n",
339 |     "        for num in array:\n",
340 |     "            bucket_idx = (num >> shift) & b_m1\n",
341 |     "            buckets[bucket_idx].append(num)\n",
342 |     "                        \n",
343 |     "        next_index = 0\n",
344 |     "        for bucket in buckets:\n",
345 |     "            for num in bucket:\n",
346 |     "                array[next_index] = num\n",
347 |     "                next_index += 1\n",
348 |     "        \n",
349 |     "        i += 1"
350 |    ]
351 |   },
352 |   {
353 |    "cell_type": "code",
354 |    "execution_count": 69,
355 |    "metadata": {
356 |     "collapsed": false
357 |    },
358 |    "outputs": [
359 |     {
360 |      "data": {
361 |       "text/plain": [
362 |        "[1, 2, 2, 3, 4, 5, 5, 6]"
363 |       ]
364 |      },
365 |      "execution_count": 69,
366 |      "metadata": {},
367 |      "output_type": "execute_result"
368 |     }
369 |    ],
370 |    "source": [
371 |     "example = [5,3,2,5,6,1,2,4]\n",
372 |     "fast_radix_sort(example, 16)\n",
373 |     "example"
374 |    ]
375 |   },
376 |   {
377 |    "cell_type": "code",
378 |    "execution_count": 71,
379 |    "metadata": {
380 |     "collapsed": false
381 |    },
382 |    "outputs": [
383 |     {
384 |      "name": "stdout",
385 |      "output_type": "stream",
386 |      "text": [
387 |       "         2000009 function calls in 1.413 seconds\n",
388 |       "\n",
389 |       "   Ordered by: standard name\n",
390 |       "\n",
391 |       "   ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n",
392 |       "        3    0.154    0.051    0.154    0.051 <ipython-input-59-77f9f94e9409>:1(is_sorted)\n",
393 |       "        1    1.081    1.081    1.390    1.390 <ipython-input-68-2dc028b9f7cf>:1(fast_radix_sort)\n",
394 |       "        1    0.023    0.023    1.413    1.413 <string>:1(<module>)\n",
395 |       "        3    0.000    0.000    0.000    0.000 {len}\n",
396 |       "  2000000    0.155    0.000    0.155    0.000 {method 'append' of 'list' objects}\n",
397 |       "        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
398 |       "\n",
399 |       "\n"
400 |      ]
401 |     }
402 |    ],
403 |    "source": [
404 |     "example = generate_test(1000000)\n",
405 |     "cProfile.run(\"fast_radix_sort(example, 16)\")\n",
406 |     "assert is_sorted(example)"
407 |    ]
408 |   },
409 |   {
410 |    "cell_type": "markdown",
411 |    "metadata": {},
412 |    "source": [
413 |     "*Good news* We got 0.2s speed up Yay!\n",
414 |     "\n",
415 |     "*Bad news* We are still nowhere near the performance of `.sort`. Why is that?\n",
416 |     "\n",
417 |     "The answer is in our choice of programming language. Python is interpreted and is known to have slowdowns up to 100x compared to low level langauges like C/C++. That's why many of Python routines are secretly implemented in C, `.sort` being one of them. In order to make the comparison fair we should also be allowed to write our implementation in C. Thankfully there's a Cython python extension, which makes it easy to interface with Python and compiles python-like code to C or C++. It should be pretty straightforward, but don't worry if you don't understand the details of the implementation below."
418 |    ]
419 |   },
420 |   {
421 |    "cell_type": "code",
422 |    "execution_count": 72,
423 |    "metadata": {
424 |     "collapsed": false
425 |    },
426 |    "outputs": [
427 |     {
428 |      "name": "stdout",
429 |      "output_type": "stream",
430 |      "text": [
431 |       "The Cython extension is already loaded. To reload it, use:\n",
432 |       "  %reload_ext Cython\n"
433 |      ]
434 |     }
435 |    ],
436 |    "source": [
437 |     "%load_ext Cython"
438 |    ]
439 |   },
440 |   {
441 |    "cell_type": "code",
442 |    "execution_count": 73,
443 |    "metadata": {
444 |     "collapsed": false
445 |    },
446 |    "outputs": [],
447 |    "source": [
448 |     "%%cython --cplus\n",
449 |     "\n",
450 |     "from libcpp.vector cimport vector\n",
451 |     "cimport cython\n",
452 |     "\n",
453 |     "\n",
454 |     "cdef c_is_sorted(list array):\n",
455 |     "    \"\"\"Equivalent to implementation of is_sorted from above.\n",
456 |     "    \n",
457 |     "    However this one is compiled to pure C++ (thanks to cdef).\n",
458 |     "    We cannot call it directly for Python.\n",
459 |     "    \n",
460 |     "    The reasons it is slightly different from above is the fact that\n",
461 |     "    \n",
462 |     "        for current_num in array\n",
463 |     "        \n",
464 |     "    is super-efficient in Cython.\"\"\"\n",
465 |     "    cdef unsigned int lastnum = 0\n",
466 |     "    cdef bint         first_iter = True\n",
467 |     "    \n",
468 |     "    for current_num in array:\n",
469 |     "        if not first_iter:\n",
470 |     "            if lastnum > current_num:\n",
471 |     "                return False\n",
472 |     "        else:\n",
473 |     "            first_iter = False\n",
474 |     "        lastnum = current_num\n",
475 |     "    return True\n",
476 |     "\n",
477 |     "def c_radix_sort(list array, int k):\n",
478 |     "    assert k > 0\n",
479 |     "    # Just like in C Cython requires us \n",
480 |     "    # to forward declare the variables\n",
481 |     "    cdef int i          = 0\n",
482 |     "    cdef int b          = 2 ** k\n",
483 |     "    cdef int b_m1       = b - 1\n",
484 |     "    cdef int shift      = 0\n",
485 |     "    cdef int next_index = 0\n",
486 |     "    cdef int num        = 0\n",
487 |     "    # vector[vector[int]] is a list of lists of integers.\n",
488 |     "    # (actually it is very efficient dynamically resizeable\n",
489 |     "    # array)\n",
490 |     "    cdef vector[vector[int]] buckets\n",
491 |     "    \n",
492 |     "    # initialize list with b empty arrays\n",
493 |     "    for _ in xrange(b):\n",
494 |     "        buckets.push_back(vector[int]())\n",
495 |     "\n",
496 |     "        \n",
497 |     "    # The code below barely changed compared to origninal\n",
498 |     "    # the only difference is the fast that we access buckets\n",
499 |     "    # slightly differently to be compliant with vector API.\n",
500 |     "    while True:\n",
501 |     "        if c_is_sorted(array):\n",
502 |     "            break\n",
503 |     "        shift = i * k\n",
504 |     "\n",
505 |     "        for bucket_idx in xrange(b):\n",
506 |     "            buckets[bucket_idx].clear()\n",
507 |     "        \n",
508 |     "        for num in array:\n",
509 |     "            bucket_idx = (num >> shift) & b_m1\n",
510 |     "            buckets[bucket_idx].push_back(num)\n",
511 |     "        \n",
512 |     "        next_index = 0\n",
513 |     "        for bucket_idx in xrange(b):\n",
514 |     "            for in_bucket_idx in xrange(buckets[bucket_idx].size()):\n",
515 |     "                array[next_index] = buckets[bucket_idx][in_bucket_idx]\n",
516 |     "                next_index += 1\n",
517 |     "        \n",
518 |     "        i += 1\n"
519 |    ]
520 |   },
521 |   {
522 |    "cell_type": "code",
523 |    "execution_count": 74,
524 |    "metadata": {
525 |     "collapsed": false
526 |    },
527 |    "outputs": [
528 |     {
529 |      "data": {
530 |       "text/plain": [
531 |        "[1, 2, 2, 3, 4, 5, 5, 6]"
532 |       ]
533 |      },
534 |      "execution_count": 74,
535 |      "metadata": {},
536 |      "output_type": "execute_result"
537 |     }
538 |    ],
539 |    "source": [
540 |     "example = [5,3,2,5,6,1,2,4]\n",
541 |     "c_radix_sort(example, 16)\n",
542 |     "example"
543 |    ]
544 |   },
545 |   {
546 |    "cell_type": "code",
547 |    "execution_count": 75,
548 |    "metadata": {
549 |     "collapsed": false
550 |    },
551 |    "outputs": [
552 |     {
553 |      "name": "stdout",
554 |      "output_type": "stream",
555 |      "text": [
556 |       "         3 function calls in 0.128 seconds\n",
557 |       "\n",
558 |       "   Ordered by: standard name\n",
559 |       "\n",
560 |       "   ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n",
561 |       "        1    0.000    0.000    0.128    0.128 <string>:1(<module>)\n",
562 |       "        1    0.128    0.128    0.128    0.128 {_cython_magic_a617e99601e7e788cc896c9cdd2003a9.c_radix_sort}\n",
563 |       "        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
564 |       "\n",
565 |       "\n"
566 |      ]
567 |     }
568 |    ],
569 |    "source": [
570 |     "example = generate_test(1000000)\n",
571 |     "cProfile.run(\"c_radix_sort(example, 16)\")\n",
572 |     "assert is_sorted(example)"
573 |    ]
574 |   },
575 |   {
576 |    "cell_type": "markdown",
577 |    "metadata": {},
578 |    "source": [
579 |     "## Victory!\n",
580 |     "\n",
581 |     "Our implementation of radix sort is 4x faster that default Python sort. \n",
582 |     "\n",
583 |     "This is expected as it has lower complexity and a very low constant of proportionality. \n",
584 |     "\n",
585 |     "Bear in mind that Python sort is more general though - it would be nontrival to use radix sort to sort long strings for example."
586 |    ]
587 |   }
588 |  ],
589 |  "metadata": {
590 |   "kernelspec": {
591 |    "display_name": "Python 2",
592 |    "language": "python",
593 |    "name": "python2"
594 |   },
595 |   "language_info": {
596 |    "codemirror_mode": {
597 |     "name": "ipython",
598 |     "version": 2
599 |    },
600 |    "file_extension": ".py",
601 |    "mimetype": "text/x-python",
602 |    "name": "python",
603 |    "nbconvert_exporter": "python",
604 |    "pygments_lexer": "ipython2",
605 |    "version": "2.7.8"
606 |   }
607 |  },
608 |  "nbformat": 4,
609 |  "nbformat_minor": 0
610 | }
611 | 


--------------------------------------------------------------------------------
/lecture1/Fast exponentiation and fibonacci sequence.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import cProfile\n",
 13 |     "from operator import mul"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "# Fast Exponentiation"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "We can quickly compute $a^b \\ mod\\ m$ using, the following trick best illustrated by example.\n",
 28 |     "\n",
 29 |     "$$\n",
 30 |     "5^{13} = 5 * 5^{12} = 5 * (5^6)^2 = 5 * ((5^3)^2)^2 = 5 * ((5*5^2)^2)^2 = 5 * ((5*5*5)^2)^2\n",
 31 |     "$$\n",
 32 |     "\n",
 33 |     "In that example even though we would naively need 11 multiplications to calculate results we managed to get away with 5.\n",
 34 |     "In general we can write out \n",
 35 |     "\n",
 36 |     "$$\n",
 37 |     "fexp(a,b,m) =\n",
 38 |     "\\begin{cases}\n",
 39 |     "a & \\text{if}\\ b=1\\\\\n",
 40 |     "fexp(a,b/2,m)^2 &\\text{if}\\ b\\ \\text{even}\\\\\n",
 41 |     "a \\cdot fexp(a,b-1,m) &\\text{otherwise}\n",
 42 |     "\\end{cases}\n",
 43 |     "$$\n"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 2,
 49 |    "metadata": {
 50 |     "collapsed": false
 51 |    },
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "def fexp_recursive(a, b, m, mul_op=mul):\n",
 55 |     "    # We can easiely handle b = 0, here, but we choose not to\n",
 56 |     "    # this will be helpful later when we deal with matrices...\n",
 57 |     "    assert b >= 1\n",
 58 |     "    if b == 1:\n",
 59 |     "        return a\n",
 60 |     "    elif b % 2 == 0:\n",
 61 |     "        conquered = fexp_recursive(a, b / 2, m, mul_op=mul_op)\n",
 62 |     "        return  mul_op(conquered, conquered) % m\n",
 63 |     "    else:\n",
 64 |     "        b_one_less = fexp_recursive(a, b - 1, m, mul_op=mul_op)        \n",
 65 |     "        return mul_op(a, b_one_less) % m"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 3,
 71 |    "metadata": {
 72 |     "collapsed": false
 73 |    },
 74 |    "outputs": [
 75 |     {
 76 |      "data": {
 77 |       "text/plain": [
 78 |        "64"
 79 |       ]
 80 |      },
 81 |      "execution_count": 3,
 82 |      "metadata": {},
 83 |      "output_type": "execute_result"
 84 |     }
 85 |    ],
 86 |    "source": [
 87 |     "fexp_recursive(2, 6, 1000)"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 4,
 93 |    "metadata": {
 94 |     "collapsed": false
 95 |    },
 96 |    "outputs": [
 97 |     {
 98 |      "data": {
 99 |       "text/plain": [
100 |        "4"
101 |       ]
102 |      },
103 |      "execution_count": 4,
104 |      "metadata": {},
105 |      "output_type": "execute_result"
106 |     }
107 |    ],
108 |    "source": [
109 |     "fexp_recursive(2, 6, 10)"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 5,
115 |    "metadata": {
116 |     "collapsed": false
117 |    },
118 |    "outputs": [
119 |     {
120 |      "name": "stdout",
121 |      "output_type": "stream",
122 |      "text": [
123 |       "         89 function calls (46 primitive calls) in 0.000 seconds\n",
124 |       "\n",
125 |       "   Ordered by: standard name\n",
126 |       "\n",
127 |       "   ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n",
128 |       "     44/1    0.000    0.000    0.000    0.000 <ipython-input-2-78a9a64277e1>:1(fexp_recursive)\n",
129 |       "        1    0.000    0.000    0.000    0.000 <string>:1(<module>)\n",
130 |       "        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
131 |       "       43    0.000    0.000    0.000    0.000 {operator.mul}\n",
132 |       "\n",
133 |       "\n"
134 |      ]
135 |     }
136 |    ],
137 |    "source": [
138 |     "cProfile.run(\"fexp_recursive(2, 10000000000, 10)\")"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "markdown",
143 |    "metadata": {},
144 |    "source": [
145 |     "### Iterative approach\n",
146 |     "Very similar algorithm can be written out without recursion by looking at binary representation of b and noticing that if $i-th$ bit is one, then we need to multiply the result by $$a^{2^i}$$\n",
147 |     "Don't worry if you don't fully understand the code below. It is included here, to show you that there are multiple ways of approaching implementation of this kind of solution. Also iterative algorithms are sometimes preferred - we will get back to that point below."
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 6,
153 |    "metadata": {
154 |     "collapsed": true
155 |    },
156 |    "outputs": [],
157 |    "source": [
158 |     "def fexp_iterative(a, b, m, mul_op=mul):\n",
159 |     "    assert b >= 1\n",
160 |     "    result = a\n",
161 |     "    multiplier = a\n",
162 |     "    b -= 1\n",
163 |     "    while b > 0:\n",
164 |     "        if b % 2 == 1:\n",
165 |     "            result = mul_op(result, multiplier) % m\n",
166 |     "        multiplier = mul_op(multiplier, multiplier) % m\n",
167 |     "        b /= 2\n",
168 |     "    return result"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 7,
174 |    "metadata": {
175 |     "collapsed": false
176 |    },
177 |    "outputs": [
178 |     {
179 |      "data": {
180 |       "text/plain": [
181 |        "64"
182 |       ]
183 |      },
184 |      "execution_count": 7,
185 |      "metadata": {},
186 |      "output_type": "execute_result"
187 |     }
188 |    ],
189 |    "source": [
190 |     "fexp_iterative(2, 6, 1000)"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": 8,
196 |    "metadata": {
197 |     "collapsed": false
198 |    },
199 |    "outputs": [
200 |     {
201 |      "data": {
202 |       "text/plain": [
203 |        "4"
204 |       ]
205 |      },
206 |      "execution_count": 8,
207 |      "metadata": {},
208 |      "output_type": "execute_result"
209 |     }
210 |    ],
211 |    "source": [
212 |     "fexp_iterative(2, 6, 10)"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "code",
217 |    "execution_count": 9,
218 |    "metadata": {
219 |     "collapsed": false
220 |    },
221 |    "outputs": [
222 |     {
223 |      "name": "stdout",
224 |      "output_type": "stream",
225 |      "text": [
226 |       "         57 function calls in 0.000 seconds\n",
227 |       "\n",
228 |       "   Ordered by: standard name\n",
229 |       "\n",
230 |       "   ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n",
231 |       "        1    0.000    0.000    0.000    0.000 <ipython-input-6-dc8e4fdca639>:1(fexp_iterative)\n",
232 |       "        1    0.000    0.000    0.000    0.000 <string>:1(<module>)\n",
233 |       "        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
234 |       "       54    0.000    0.000    0.000    0.000 {operator.mul}\n",
235 |       "\n",
236 |       "\n"
237 |      ]
238 |     }
239 |    ],
240 |    "source": [
241 |     "cProfile.run(\"fexp_iterative(2, 10000000000, 10)\")"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "markdown",
246 |    "metadata": {},
247 |    "source": [
248 |     "# Fibonacci sequence"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "markdown",
253 |    "metadata": {},
254 |    "source": [
255 |     "The Fibonacci Sequence is the series of numbers: 0, 1, 1, 2, 3, 5, 8, 13, 21, 34, ... The next number is found by adding up the two numbers before it. I.e. 3 is found by adding the two numbers before it (1+2). Here we will explore 3 different algorithms for computing the $n^{th}$ Fibonacci number and analyze their time complexity. We denote the $n^{th}$ Fibonacci number as $F_{n}$. Code for the following 3 algorithms is in recitation1.py which is available on the Stellar site under recitation materials. \n"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "markdown",
260 |    "metadata": {},
261 |    "source": [
262 |     "### Naive Recursion\n",
263 |     "\n",
264 |     "By definition, $F_{n} = F_{n - 1} + F_{n - 2}$. As this is the ``naive'' algorithm, let's not try to be too clever and instead simply write an algorithm using only this definition!\n",
265 |     "\n",
266 |     "Now to analyze the runtime. Formally this algorithm can be analyzed by solving the recurrence, $T(n) = T(n - 1) + T(n - 2) + \\Theta(1)$. This is a tough recursion to solve! Let us separately find an upper and lower bound instead of a $\\Theta$ relation. \n",
267 |     "\n",
268 |     "It is clear that the recurrence $T(n) = 2T(n - 1) + \\Theta(1)$ is strictly greater than our original, so let us use it to find an upper bound. Each recursive call results in two child recursive calls until the base case is reached. Therefore, there will be $\\Theta(2^{i})$ recursive calls made at the $i^{th}$ level of recursion. Since, the subproblem size only decreases by one on each call, there will be $\\Theta(n)$ levels of recursion before the base case is reached. Therefore this recurrence solves to be $\\Theta(2^{n})$ and we can conclude that our algorithm is $O(2^{n})$\n",
269 |     "\n",
270 |     "The recurrence $T(n) = 2T(n - 2) + \\Theta(1)$ is strictly less than our original. Using similar logic as above we can see that this recurrence solves to $\\Theta(2^{\\frac{n}{2}})$ and we conclude that our algorithm is $\\Omega(2^{\\frac{n}{2}})$.\n",
271 |     "\n",
272 |     "Challenge Problem: Find a tight asymptotic bound to this algorithms runtime. Hint: Draw a tree diagraming recursive calls and look for the pattern!\n",
273 |     "\n"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": 10,
279 |    "metadata": {
280 |     "collapsed": true
281 |    },
282 |    "outputs": [],
283 |    "source": [
284 |     "def fibonacci_recursive_slow(n, m):\n",
285 |     "    assert n >= 0\n",
286 |     "    if n == 0:\n",
287 |     "        return 0\n",
288 |     "    elif n == 1:\n",
289 |     "        return 1\n",
290 |     "    else:\n",
291 |     "        return (fibonacci_recursive_slow(n - 1, m) + fibonacci_recursive_slow(n - 2, m)) % m "
292 |    ]
293 |   },
294 |   {
295 |    "cell_type": "code",
296 |    "execution_count": 11,
297 |    "metadata": {
298 |     "collapsed": false
299 |    },
300 |    "outputs": [
301 |     {
302 |      "data": {
303 |       "text/plain": [
304 |        "55"
305 |       ]
306 |      },
307 |      "execution_count": 11,
308 |      "metadata": {},
309 |      "output_type": "execute_result"
310 |     }
311 |    ],
312 |    "source": [
313 |     "fibonacci_recursive_slow(10, 1000)"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "code",
318 |    "execution_count": 12,
319 |    "metadata": {
320 |     "collapsed": false
321 |    },
322 |    "outputs": [
323 |     {
324 |      "name": "stdout",
325 |      "output_type": "stream",
326 |      "text": [
327 |       "         2692539 function calls (3 primitive calls) in 0.888 seconds\n",
328 |       "\n",
329 |       "   Ordered by: standard name\n",
330 |       "\n",
331 |       "   ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n",
332 |       "2692537/1    0.888    0.000    0.888    0.888 <ipython-input-10-c32dd02825c7>:1(fibonacci_recursive_slow)\n",
333 |       "        1    0.000    0.000    0.888    0.888 <string>:1(<module>)\n",
334 |       "        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
335 |       "\n",
336 |       "\n"
337 |      ]
338 |     }
339 |    ],
340 |    "source": [
341 |     "cProfile.run(\"fibonacci_recursive_slow(30, 1000)\")"
342 |    ]
343 |   },
344 |   {
345 |    "cell_type": "markdown",
346 |    "metadata": {},
347 |    "source": [
348 |     "### Memoized Recursion\n",
349 |     "It's often the case that we can improve the efficiency of algorithms by exploiting natural ``structures'' present in the problem. Notice in the naive algorithm that we often compute the same thing multiple times! This occurs because we have overlapping subproblems. For example, both $F_{n - 1}$ and $F_{n - 2}$ depend on the solution to $F_{n - 3}$. We can take advantage of this structure by memoizing (storing) the solutions to subproblems as we go. Therefore instead of recalculating them we can simply look them up! Look in recitation1.py for python code.\n",
350 |     "\n",
351 |     "This improved algorithm has a time complexity of $\\Theta(n)$. This can be seen from the fact that we in total solve for $\\Theta(n)$ $F_{i}$s, each of which take only constant time to compute. \n"
352 |    ]
353 |   },
354 |   {
355 |    "cell_type": "code",
356 |    "execution_count": 13,
357 |    "metadata": {
358 |     "collapsed": true
359 |    },
360 |    "outputs": [],
361 |    "source": [
362 |     "cache = {}\n",
363 |     "def fibonacci_recursive_fast(n, m):\n",
364 |     "    if not (n,m) in cache:\n",
365 |     "        assert n >= 0\n",
366 |     "        if n == 0:\n",
367 |     "            result = 0\n",
368 |     "        elif n == 1:\n",
369 |     "            result = 1\n",
370 |     "        else:\n",
371 |     "            result = (fibonacci_recursive_fast(n - 1, m) + fibonacci_recursive_fast(n - 2, m)) % m\n",
372 |     "        cache[(n,m)] = result\n",
373 |     "    return cache[(n,m)]"
374 |    ]
375 |   },
376 |   {
377 |    "cell_type": "code",
378 |    "execution_count": 14,
379 |    "metadata": {
380 |     "collapsed": false
381 |    },
382 |    "outputs": [
383 |     {
384 |      "data": {
385 |       "text/plain": [
386 |        "55"
387 |       ]
388 |      },
389 |      "execution_count": 14,
390 |      "metadata": {},
391 |      "output_type": "execute_result"
392 |     }
393 |    ],
394 |    "source": [
395 |     "fibonacci_recursive_fast(10, 1000)"
396 |    ]
397 |   },
398 |   {
399 |    "cell_type": "code",
400 |    "execution_count": 15,
401 |    "metadata": {
402 |     "collapsed": false
403 |    },
404 |    "outputs": [
405 |     {
406 |      "name": "stdout",
407 |      "output_type": "stream",
408 |      "text": [
409 |       "         1783 function calls (3 primitive calls) in 0.004 seconds\n",
410 |       "\n",
411 |       "   Ordered by: standard name\n",
412 |       "\n",
413 |       "   ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n",
414 |       "   1781/1    0.004    0.000    0.004    0.004 <ipython-input-13-45ec8644bee2>:2(fibonacci_recursive_fast)\n",
415 |       "        1    0.000    0.000    0.004    0.004 <string>:1(<module>)\n",
416 |       "        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
417 |       "\n",
418 |       "\n"
419 |      ]
420 |     }
421 |    ],
422 |    "source": [
423 |     "cProfile.run(\"fibonacci_recursive_fast(900, 1000)\")"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "markdown",
428 |    "metadata": {},
429 |    "source": [
430 |     "### Iterative versus recursive solutions\n",
431 |     "The code below should give a runtime error on a standard Python interpreter - because its exceeding the default stack limit. This kind of limitation is why we often opt for iterative versions of the algorithm. Don't worry though, it turns out that for every recursive solution there exists an itertive equivalent. Indeed - we can emulate recursion stack with a stack datastructure. Such a solution is often tedious to implemented and constact factor of the runtime become large. There's why we often seek for *natural order of calculation*, i.e. order in which we compute the subproblems, such that by the time we need a particular result it has alredy been computed. For example in case of Fibonacci the natural order of computatation is to compute $F_1$, then $F_2$, then $F_3$ etc. Notice how resulting solution is even simpler than the recursive one!"
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "code",
436 |    "execution_count": 16,
437 |    "metadata": {
438 |     "collapsed": true
439 |    },
440 |    "outputs": [],
441 |    "source": [
442 |     "# fibonacci_recursive_fast(9000, 1000)"
443 |    ]
444 |   },
445 |   {
446 |    "cell_type": "code",
447 |    "execution_count": 17,
448 |    "metadata": {
449 |     "collapsed": true
450 |    },
451 |    "outputs": [],
452 |    "source": [
453 |     "def fibonnaci_iterative(n, m):\n",
454 |     "    assert n >= 0\n",
455 |     "    if n == 0:\n",
456 |     "        return 0\n",
457 |     "    f_current, f_previous = 1, 0\n",
458 |     "    for _ in range(n - 1):\n",
459 |     "        f_current, f_previous = f_current + f_previous % m, f_current\n",
460 |     "    return f_current"
461 |    ]
462 |   },
463 |   {
464 |    "cell_type": "code",
465 |    "execution_count": 18,
466 |    "metadata": {
467 |     "collapsed": false
468 |    },
469 |    "outputs": [
470 |     {
471 |      "data": {
472 |       "text/plain": [
473 |        "55"
474 |       ]
475 |      },
476 |      "execution_count": 18,
477 |      "metadata": {},
478 |      "output_type": "execute_result"
479 |     }
480 |    ],
481 |    "source": [
482 |     "fibonnaci_iterative(10, 1000)"
483 |    ]
484 |   },
485 |   {
486 |    "cell_type": "code",
487 |    "execution_count": 19,
488 |    "metadata": {
489 |     "collapsed": false
490 |    },
491 |    "outputs": [
492 |     {
493 |      "name": "stdout",
494 |      "output_type": "stream",
495 |      "text": [
496 |       "         4 function calls in 1.025 seconds\n",
497 |       "\n",
498 |       "   Ordered by: standard name\n",
499 |       "\n",
500 |       "   ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n",
501 |       "        1    0.901    0.901    1.025    1.025 <ipython-input-17-732ce64d7038>:1(fibonnaci_iterative)\n",
502 |       "        1    0.000    0.000    1.025    1.025 <string>:1(<module>)\n",
503 |       "        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
504 |       "        1    0.124    0.124    0.124    0.124 {range}\n",
505 |       "\n",
506 |       "\n"
507 |      ]
508 |     }
509 |    ],
510 |    "source": [
511 |     "cProfile.run(\"fibonnaci_iterative(10000000, 1000)\")"
512 |    ]
513 |   },
514 |   {
515 |    "cell_type": "markdown",
516 |    "metadata": {},
517 |    "source": [
518 |     "### Matrix exponentiation\n",
519 |     "\n",
520 |     "Take a moment to think back to the recursive squaring algorithm from lecture. In a similar fashion, we can compute the $n^{th}$ Fibonacci number in logarithmic time by repeatedly squaring the matrix \n",
521 |     "\n",
522 |     "\n",
523 |     "\\begin{align}\n",
524 |     "\\begin{bmatrix}\n",
525 |     "    1 & 1 \\\\\n",
526 |     "    1 & 0\n",
527 |     "\\end{bmatrix}\n",
528 |     "\\end{align}\n",
529 |     "\n",
530 |     "In fact\n",
531 |     "\n",
532 |     "\n",
533 |     "\\begin{align}\n",
534 |     "\\begin{bmatrix}\n",
535 |     "    1 & 1 \\\\\n",
536 |     "    1 & 0\n",
537 |     "\\end{bmatrix} ^{n} \n",
538 |     "= \n",
539 |     "\\begin{bmatrix}\n",
540 |     "    F_{n + 1} & F_{n} \\\\\n",
541 |     "    F_{n} & F_{n - 1}\n",
542 |     "\\end{bmatrix}\n",
543 |     "\\end{align}\n",
544 |     "\n",
545 |     "To give a rough proof of why this is the case, let us use induction on $n$. Our claim is trivially true in the base case $n = 1$. Now assuming that our claim holds for this matrix to the $n^{th}$ power, we must show that our claim is also true for this matrix to the $(n + 1)^{th}$ power. \n",
546 |     "\n",
547 |     "\n",
548 |     "\\begin{align}\n",
549 |     "\\begin{bmatrix}\n",
550 |     "    1 & 1 \\\\\n",
551 |     "    1 & 0\n",
552 |     "\\end{bmatrix}\n",
553 |     "*\n",
554 |     " \\begin{bmatrix}\n",
555 |     "    1 & 1 \\\\\n",
556 |     "    1 & 0 \n",
557 |     "\\end{bmatrix} ^{n} \n",
558 |     "= \n",
559 |     "\\begin{bmatrix}\n",
560 |     "    1 & 1 \\\\\n",
561 |     "    1 & 0 \\\\\n",
562 |     "\\end{bmatrix}\n",
563 |     "*\n",
564 |     "\\begin{bmatrix}\n",
565 |     "    F_{n + 1} & F_{n} \\\\\n",
566 |     "    F_{n} & F_{n - 1}\n",
567 |     "\\end{bmatrix}\n",
568 |     " = \n",
569 |     " \\begin{bmatrix}\n",
570 |     "    F_{n + 1} + F_{n} & F_{n} + F_{n - 1} \\\\\n",
571 |     "    F_{n + 1} & F_{n}\n",
572 |     "\\end{bmatrix}\n",
573 |     "=\n",
574 |     "\\begin{bmatrix}\n",
575 |     "    F_{n + 2} & F_{n+1} \\\\\n",
576 |     "    F_{n + 1} & F_{n }\n",
577 |     "\\end{bmatrix}\n",
578 |     "\\end{align}\n",
579 |     "\n",
580 |     "Success! \n",
581 |     "\n",
582 |     "The runtime analysis for this algorithm is identical to that for modular exponentiation using repeated squaring. In particular we do not include cost of matrix multiply in our analysis, because matrix has constant size."
583 |    ]
584 |   },
585 |   {
586 |    "cell_type": "code",
587 |    "execution_count": 23,
588 |    "metadata": {
589 |     "collapsed": false
590 |    },
591 |    "outputs": [],
592 |    "source": [
593 |     "F = np.array([[1, 1],\n",
594 |     "              [1, 0]])\n",
595 |     "\n",
596 |     "def fibonnaci_matrix(n, m):\n",
597 |     "    Fn = fexp_recursive(F, n, m, mul_op=np.dot)\n",
598 |     "    return Fn[0][1]"
599 |    ]
600 |   },
601 |   {
602 |    "cell_type": "code",
603 |    "execution_count": 24,
604 |    "metadata": {
605 |     "collapsed": false
606 |    },
607 |    "outputs": [
608 |     {
609 |      "data": {
610 |       "text/plain": [
611 |        "55"
612 |       ]
613 |      },
614 |      "execution_count": 24,
615 |      "metadata": {},
616 |      "output_type": "execute_result"
617 |     }
618 |    ],
619 |    "source": [
620 |     "fibonnaci_matrix(10, 1000)"
621 |    ]
622 |   },
623 |   {
624 |    "cell_type": "code",
625 |    "execution_count": 25,
626 |    "metadata": {
627 |     "collapsed": false
628 |    },
629 |    "outputs": [
630 |     {
631 |      "name": "stdout",
632 |      "output_type": "stream",
633 |      "text": [
634 |       "         90 function calls (47 primitive calls) in 0.001 seconds\n",
635 |       "\n",
636 |       "   Ordered by: standard name\n",
637 |       "\n",
638 |       "   ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n",
639 |       "     44/1    0.000    0.000    0.001    0.001 <ipython-input-2-78a9a64277e1>:1(fexp_recursive)\n",
640 |       "        1    0.000    0.000    0.001    0.001 <ipython-input-23-eff9346cd7e3>:4(fibonnaci_matrix)\n",
641 |       "        1    0.000    0.000    0.001    0.001 <string>:1(<module>)\n",
642 |       "        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
643 |       "       43    0.000    0.000    0.000    0.000 {numpy.core.multiarray.dot}\n",
644 |       "\n",
645 |       "\n"
646 |      ]
647 |     }
648 |    ],
649 |    "source": [
650 |     "cProfile.run(\"fibonnaci_matrix(10000000000, 1000)\")"
651 |    ]
652 |   },
653 |   {
654 |    "cell_type": "markdown",
655 |    "metadata": {},
656 |    "source": [
657 |     "# Problems to think about (non-examinable, non-compulsory, strictly for fun...)\n",
658 |     "\n",
659 |     "1. Give an example of another operation besides multiplication and matrix multiply that can be efficiently composed using fast exponentiation.\n",
660 |     "\n",
661 |     "2. Compute the n-th item of tribonacci sequence using the three methods presented above:\n",
662 |     "\n",
663 |     "\\begin{align}\n",
664 |     "s_n =\n",
665 |     "\\begin{cases}\n",
666 |     "1 & \\text{if}\\ n \\in \\{ 1,2,3 \\} \\\\\n",
667 |     "2s_{n-1} + 2s_{n-2} + s_{n-3} & \\text{otherwise}\n",
668 |     "\\end{cases}\n",
669 |     "\\end{align}\n"
670 |    ]
671 |   },
672 |   {
673 |    "cell_type": "code",
674 |    "execution_count": null,
675 |    "metadata": {
676 |     "collapsed": true
677 |    },
678 |    "outputs": [],
679 |    "source": []
680 |   }
681 |  ],
682 |  "metadata": {
683 |   "kernelspec": {
684 |    "display_name": "Python 2",
685 |    "language": "python",
686 |    "name": "python2"
687 |   },
688 |   "language_info": {
689 |    "codemirror_mode": {
690 |     "name": "ipython",
691 |     "version": 2
692 |    },
693 |    "file_extension": ".py",
694 |    "mimetype": "text/x-python",
695 |    "name": "python",
696 |    "nbconvert_exporter": "python",
697 |    "pygments_lexer": "ipython2",
698 |    "version": "2.7.8"
699 |   }
700 |  },
701 |  "nbformat": 4,
702 |  "nbformat_minor": 0
703 | }
704 | 


--------------------------------------------------------------------------------
/lecture2/binary_search.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 75,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "from scipy.spatial.distance import cosine as cosine_similarity\n",
 13 |     "import random\n",
 14 |     "import cProfile"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "# Binary search simulation\n",
 22 |     "\n",
 23 |     "To understand the kind of problems that computers need to deal with on daily bases I prepared a little simulation.\n",
 24 |     "\n",
 25 |     "You are given a hand of 20 sorted cards and your task is to find a particular card. Have fun!"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 76,
 31 |    "metadata": {
 32 |     "collapsed": false
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "def generated_ordered_cards():\n",
 37 |     "    cards = []\n",
 38 |     "    cards.extend(unichr(x) for x in range(127185, 127185 + 14))\n",
 39 |     "    cards.extend(unichr(x) for x in range(127169, 127169 + 14))\n",
 40 |     "    cards.extend(unichr(x) for x in range(127153, 127153 + 14))    \n",
 41 |     "    cards.extend(unichr(x) for x in range(127137, 127137 + 14))\n",
 42 |     "    return cards"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 77,
 48 |    "metadata": {
 49 |     "collapsed": false
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "DOMAIN = generated_ordered_cards()\n",
 54 |     "NUM_SAMPLES = 20\n",
 55 |     "\n",
 56 |     "class Problem(object):\n",
 57 |     "    def __init__(self):\n",
 58 |     "        self.elements = set()\n",
 59 |     "        while len(self.elements) < NUM_SAMPLES:\n",
 60 |     "            self.elements.add(random.choice(DOMAIN))\n",
 61 |     "        self.elements = sorted(list(self.elements), key=lambda x: DOMAIN.index(x))\n",
 62 |     "        self.hide_all()\n",
 63 |     "        self.query = random.choice(self.elements)\n",
 64 |     "    \n",
 65 |     "    def ask(self, position):\n",
 66 |     "        assert 0 <= position < NUM_SAMPLES\n",
 67 |     "        self.visible[position] = True\n",
 68 |     "        return self\n",
 69 |     "    \n",
 70 |     "    def hide_all(self):\n",
 71 |     "        self.visible = [False for _ in range(NUM_SAMPLES)]\n",
 72 |     "        \n",
 73 |     "    def _repr_html_(self):\n",
 74 |     "        els_html = []\n",
 75 |     "        for el_idx in range(len(self.elements)):\n",
 76 |     "            if self.visible[el_idx]:\n",
 77 |     "                els_html.append(\"<td style='text-align:center'><font size='5'><b>%s</b></font></td>\" % (self.elements[el_idx]))\n",
 78 |     "            else:\n",
 79 |     "                els_html.append(\"<td style='text-align:center;background:lightskyblue'<small><font color='grey'>%d</font></small></td>\" % (el_idx,))\n",
 80 |     "        header_html = u\"<center><h1>Find %s! (♣ < ♦ < ♥ < ♠)</h1></center><br />\" % (self.query,)\n",
 81 |     "        table_html  = \"<table width='100%%' height='50px' style='table-layout: fixed'><tr>%s</tr></table>\" % (\"\".join(els_html))\n",
 82 |     "        return header_html + table_html"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 81,
 88 |    "metadata": {
 89 |     "collapsed": false
 90 |    },
 91 |    "outputs": [
 92 |     {
 93 |      "data": {
 94 |       "text/html": [
 95 |        "<center><h1>Find 🂷! (♣ < ♦ < ♥ < ♠)</h1></center><br /><table width='100%' height='50px' style='table-layout: fixed'><tr><td style='text-align:center;background:lightskyblue'<small><font color='grey'>0</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>1</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>2</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>3</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>4</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>5</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>6</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>7</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>8</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>9</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>10</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>11</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>12</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>13</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>14</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>15</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>16</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>17</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>18</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>19</font></small></td></tr></table>"
 96 |       ],
 97 |       "text/plain": [
 98 |        "<__main__.Problem at 0x7ff69c3e8090>"
 99 |       ]
100 |      },
101 |      "execution_count": 81,
102 |      "metadata": {},
103 |      "output_type": "execute_result"
104 |     }
105 |    ],
106 |    "source": [
107 |     "p = Problem()\n",
108 |     "p"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 86,
114 |    "metadata": {
115 |     "collapsed": false
116 |    },
117 |    "outputs": [
118 |     {
119 |      "data": {
120 |       "text/html": [
121 |        "<center><h1>Find 🂷! (♣ < ♦ < ♥ < ♠)</h1></center><br /><table width='100%' height='50px' style='table-layout: fixed'><tr><td style='text-align:center;background:lightskyblue'<small><font color='grey'>0</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>1</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>2</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>3</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>4</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>5</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>6</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>7</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>8</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>9</font></small></td><td style='text-align:center'><font size='5'><b>🃈</b></font></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>11</font></small></td><td style='text-align:center'><font size='5'><b>🂱</b></font></td><td style='text-align:center'><font size='5'><b>🂶</b></font></td><td style='text-align:center'><font size='5'><b>🂷</b></font></td><td style='text-align:center'><font size='5'><b>🂦</b></font></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>16</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>17</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>18</font></small></td><td style='text-align:center;background:lightskyblue'<small><font color='grey'>19</font></small></td></tr></table>"
122 |       ],
123 |       "text/plain": [
124 |        "<__main__.Problem at 0x7ff69c3e8090>"
125 |       ]
126 |      },
127 |      "execution_count": 86,
128 |      "metadata": {},
129 |      "output_type": "execute_result"
130 |     }
131 |    ],
132 |    "source": [
133 |     "p.ask(14)"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "markdown",
138 |    "metadata": {},
139 |    "source": []
140 |   },
141 |   {
142 |    "cell_type": "markdown",
143 |    "metadata": {},
144 |    "source": [
145 |     "# Binary search implemetation - GLOVE vectors\n",
146 |     "\n",
147 |     "Glove vectors try to assign vectors of numbers to words in surprising ways. One interesting property they have is\n",
148 |     "ability to solve many analogies. For example:\n",
149 |     "\n",
150 |     "$$\n",
151 |     "V_{\\text{berlin}} - V_{\\text{germany}} \\approx V_{\\text{paris}} - V_{\\text{france}}\n",
152 |     "$$\n",
153 |     "\n",
154 |     "Today we will try to write an algorithm that can quickly find vectors from Glove database"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 92,
160 |    "metadata": {
161 |     "collapsed": false
162 |    },
163 |    "outputs": [],
164 |    "source": [
165 |     "class Glove(object):\n",
166 |     "    def __init__(self, path):\n",
167 |     "        self.word_vector = []\n",
168 |     "        \n",
169 |     "        with open(path) as f:\n",
170 |     "            for line in f:\n",
171 |     "                if len(line) < 1:\n",
172 |     "                    break\n",
173 |     "                line = line.split(' ')\n",
174 |     "                word, vector = line[0], np.array([float(x) for x in line[1:]])\n",
175 |     "                self.word_vector.append((word, vector))\n",
176 |     "        self.word_vector.sort()\n",
177 |     "        \n",
178 |     "    def __call__(self, key):\n",
179 |     "        return self.find_vector(key)\n",
180 |     "        \n",
181 |     "    def find_vector(self, key):\n",
182 |     "        for word,vector in self.word_vector:\n",
183 |     "            if word == key:\n",
184 |     "                return vector\n",
185 |     "        raise KeyError(key)\n",
186 |     "        \n",
187 |     "    def find_closest_word(self, key_vector,blacklist=[]):\n",
188 |     "        best_similarity = float('inf')\n",
189 |     "        best_word = None\n",
190 |     "        for word, vector in self.word_vector:\n",
191 |     "            if word in blacklist:\n",
192 |     "                continue\n",
193 |     "            similarity = cosine_similarity(vector, key_vector)\n",
194 |     "            if best_similarity > similarity:\n",
195 |     "                best_similarity = similarity\n",
196 |     "                best_word = word\n",
197 |     "        return best_word"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": 93,
203 |    "metadata": {
204 |     "collapsed": false
205 |    },
206 |    "outputs": [],
207 |    "source": [
208 |     "# Download from http://nlp.stanford.edu/projects/glove/\n",
209 |     "glove = Glove(\"/home/sidor/projects/Dali/data/glove/glove.6B.300d.txt\")\n"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 52,
215 |    "metadata": {
216 |     "collapsed": false
217 |    },
218 |    "outputs": [],
219 |    "source": [
220 |     "# this call is quite slow...\n",
221 |     "def analogy(thisword, tothis, islikethis):\n",
222 |     "    key_vector = glove(tothis) - glove(thisword) + glove(islikethis)\n",
223 |     "    best_word = glove.find_closest_word(key_vector, blacklist=[thisword, tothis,islikethis])\n",
224 |     "    print(\"%s is to %s like %s to <%s>\" % (thisword, tothis, islikethis, best_word,))"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "code",
229 |    "execution_count": 88,
230 |    "metadata": {
231 |     "collapsed": false
232 |    },
233 |    "outputs": [
234 |     {
235 |      "name": "stdout",
236 |      "output_type": "stream",
237 |      "text": [
238 |       "germany is to berlin like france to <paris>\n"
239 |      ]
240 |     }
241 |    ],
242 |    "source": [
243 |     "analogy(\"germany\", \"berlin\", \"france\")"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": 46,
249 |    "metadata": {
250 |     "collapsed": false
251 |    },
252 |    "outputs": [
253 |     {
254 |      "name": "stdout",
255 |      "output_type": "stream",
256 |      "text": [
257 |       "movie is to movies like school to <films>\n"
258 |      ]
259 |     }
260 |    ],
261 |    "source": [
262 |     "analogy(\"movie\", \"movies\", \"school\")"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "code",
267 |    "execution_count": 57,
268 |    "metadata": {
269 |     "collapsed": false
270 |    },
271 |    "outputs": [
272 |     {
273 |      "name": "stdout",
274 |      "output_type": "stream",
275 |      "text": [
276 |       "movie is to actor like school to <teacher>\n"
277 |      ]
278 |     }
279 |    ],
280 |    "source": [
281 |     "analogy(\"movie\", \"actor\", \"school\")"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "execution_count": 55,
287 |    "metadata": {
288 |     "collapsed": false
289 |    },
290 |    "outputs": [
291 |     {
292 |      "name": "stdout",
293 |      "output_type": "stream",
294 |      "text": [
295 |       "smaller is to small like bigger to <big>\n"
296 |      ]
297 |     }
298 |    ],
299 |    "source": [
300 |     "analogy(\"smaller\", \"small\", \"bigger\")"
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "code",
305 |    "execution_count": 56,
306 |    "metadata": {
307 |     "collapsed": false
308 |    },
309 |    "outputs": [
310 |     {
311 |      "name": "stdout",
312 |      "output_type": "stream",
313 |      "text": [
314 |       "smaller is to small like big to <huge>\n"
315 |      ]
316 |     }
317 |    ],
318 |    "source": [
319 |     "analogy(\"smaller\", \"small\", \"big\")"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "code",
324 |    "execution_count": 60,
325 |    "metadata": {
326 |     "collapsed": false
327 |    },
328 |    "outputs": [
329 |     {
330 |      "name": "stdout",
331 |      "output_type": "stream",
332 |      "text": [
333 |       "one is to two like two to <three>\n"
334 |      ]
335 |     }
336 |    ],
337 |    "source": [
338 |     "analogy(\"one\", \"two\", \"two\")"
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "code",
343 |    "execution_count": 59,
344 |    "metadata": {
345 |     "collapsed": false
346 |    },
347 |    "outputs": [
348 |     {
349 |      "name": "stdout",
350 |      "output_type": "stream",
351 |      "text": [
352 |       "king is to queen like man to <woman>\n"
353 |      ]
354 |     }
355 |    ],
356 |    "source": [
357 |     "analogy(\"king\", \"queen\", \"man\")"
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "markdown",
362 |    "metadata": {},
363 |    "source": [
364 |     "### Finding a vector given a word\n",
365 |     "Notice that implementation of find_word above is quite naive:\n",
366 |     "    \n",
367 |     "```python\n",
368 |     "    def find_vector(self, key):\n",
369 |     "        for word,vector in self.word_vector:\n",
370 |     "            if word == key:\n",
371 |     "                return vector\n",
372 |     "        raise KeyError(key)\n",
373 |     "```\n",
374 |     "\n",
375 |     "Every query to look up a word takes about 100ms. This is not very good if we want to run millions of such queries per second!"
376 |    ]
377 |   },
378 |   {
379 |    "cell_type": "code",
380 |    "execution_count": 94,
381 |    "metadata": {
382 |     "collapsed": false
383 |    },
384 |    "outputs": [
385 |     {
386 |      "name": "stdout",
387 |      "output_type": "stream",
388 |      "text": [
389 |       "         4 function calls in 0.135 seconds\n",
390 |       "\n",
391 |       "   Ordered by: standard name\n",
392 |       "\n",
393 |       "   ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n",
394 |       "        1    0.000    0.000    0.135    0.135 <ipython-input-92-dbe6e5a0c3c4>:14(__call__)\n",
395 |       "        1    0.135    0.135    0.135    0.135 <ipython-input-92-dbe6e5a0c3c4>:17(find_vector)\n",
396 |       "        1    0.000    0.000    0.135    0.135 <string>:1(<module>)\n",
397 |       "        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
398 |       "\n",
399 |       "\n"
400 |      ]
401 |     }
402 |    ],
403 |    "source": [
404 |     "cProfile.run('glove(\"zebra\")')"
405 |    ]
406 |   },
407 |   {
408 |    "cell_type": "markdown",
409 |    "metadata": {},
410 |    "source": [
411 |     "### Implementation using binary search\n",
412 |     "\n",
413 |     "Here we perform binary search algorithm. List of words `glove.word_vectors` is sorted alphabetically. Now we wish to find index `i`, such that `glove.word_vectors[i][0] == key` for some `key` word.\n",
414 |     "\n",
415 |     "Let $l$ be the lowest index in our array (zero) and $h$ be the highest index in our array (in this case 400000 - 1). To find and index on which a particular word is stored we find the middle $m = (h + l) / 2$. If key at index $m$ (denoted $k_m$) is less than desired key $k^*$ we know that $k^*$ is on some index in range $(m, h)$ otherwise it is in range $(l, m-1)$. This idea can be recursively applied to all the subranges. Notice that with each query range size is halved, so the complexity of the solution is $O(\\log{(h - l)})$"
416 |    ]
417 |   },
418 |   {
419 |    "cell_type": "code",
420 |    "execution_count": 95,
421 |    "metadata": {
422 |     "collapsed": false
423 |    },
424 |    "outputs": [],
425 |    "source": [
426 |     "def find_vector(self, key, lo=None, hi=None, debug=False):\n",
427 |     "    # Make sure by default we search over entire table\n",
428 |     "    if lo is None:\n",
429 |     "        lo = 0\n",
430 |     "    if hi is None:\n",
431 |     "        hi = len(self.word_vector) - 1\n",
432 |     "    \n",
433 |     "    if lo > hi:\n",
434 |     "        raise KeyError(key)\n",
435 |     "    \n",
436 |     "    mid = (hi + lo) / 2\n",
437 |     "    word, vector = self.word_vector[mid]\n",
438 |     "    if debug:\n",
439 |     "        print(\"Looking for %s in range(%d, %d). Middle is %s\" % (key, lo, hi, word))\n",
440 |     "\n",
441 |     "    if word == key:\n",
442 |     "        return vector\n",
443 |     "    elif key < word:\n",
444 |     "        return self.find_vector(key, lo, mid - 1, debug=debug)\n",
445 |     "    else: # key > word\n",
446 |     "        return self.find_vector(key, mid + 1, hi, debug=debug)\n",
447 |     "    \n",
448 |     "Glove.find_vector = find_vector"
449 |    ]
450 |   },
451 |   {
452 |    "cell_type": "code",
453 |    "execution_count": 96,
454 |    "metadata": {
455 |     "collapsed": false
456 |    },
457 |    "outputs": [
458 |     {
459 |      "name": "stdout",
460 |      "output_type": "stream",
461 |      "text": [
462 |       "Looking for zebra in range(0, 399999). Middle is jurnal\n",
463 |       "Looking for zebra in range(200000, 399999). Middle is ramdass\n",
464 |       "Looking for zebra in range(300000, 399999). Middle is syme\n",
465 |       "Looking for zebra in range(350000, 399999). Middle is vadims\n",
466 |       "Looking for zebra in range(375000, 399999). Middle is wilbarger\n",
467 |       "Looking for zebra in range(387500, 399999). Middle is yevtushenko\n",
468 |       "Looking for zebra in range(393750, 399999). Middle is zeughaus\n",
469 |       "Looking for zebra in range(393750, 396873). Middle is zaa\n",
470 |       "Looking for zebra in range(395312, 396873). Middle is zaremba\n",
471 |       "Looking for zebra in range(396093, 396873). Middle is zehava\n",
472 |       "Looking for zebra in range(396093, 396482). Middle is zayu\n",
473 |       "Looking for zebra in range(396288, 396482). Middle is zeami\n",
474 |       "Looking for zebra in range(396386, 396482). Middle is zeder\n",
475 |       "Looking for zebra in range(396386, 396433). Middle is zebu\n",
476 |       "Looking for zebra in range(396386, 396408). Middle is zebic\n",
477 |       "Looking for zebra in range(396398, 396408). Middle is zebras\n",
478 |       "Looking for zebra in range(396398, 396402). Middle is zebra\n"
479 |      ]
480 |     }
481 |    ],
482 |    "source": [
483 |     "vec = glove.find_vector(\"zebra\", debug=True)"
484 |    ]
485 |   },
486 |   {
487 |    "cell_type": "code",
488 |    "execution_count": 72,
489 |    "metadata": {
490 |     "collapsed": false
491 |    },
492 |    "outputs": [
493 |     {
494 |      "name": "stdout",
495 |      "output_type": "stream",
496 |      "text": [
497 |       "Looking for ronrivest in range(0, 399999). Middle is jurnal\n",
498 |       "Looking for ronrivest in range(200000, 399999). Middle is ramdass\n",
499 |       "Looking for ronrivest in range(300000, 399999). Middle is syme\n",
500 |       "Looking for ronrivest in range(300000, 349998). Middle is sensationalism\n",
501 |       "Looking for ronrivest in range(300000, 324998). Middle is rs6\n",
502 |       "Looking for ronrivest in range(300000, 312498). Middle is rescue\n",
503 |       "Looking for ronrivest in range(306250, 312498). Middle is riveras\n",
504 |       "Looking for ronrivest in range(309375, 312498). Middle is romero\n",
505 |       "Looking for ronrivest in range(310937, 312498). Middle is rostekhnadzor\n",
506 |       "Looking for ronrivest in range(310937, 311716). Middle is rosaleda\n",
507 |       "Looking for ronrivest in range(310937, 311325). Middle is ronstadt\n",
508 |       "Looking for ronrivest in range(310937, 311130). Middle is ronayne\n",
509 |       "Looking for ronrivest in range(311034, 311130). Middle is rongbuk\n",
510 |       "Looking for ronrivest in range(311083, 311130). Middle is ronin\n",
511 |       "Looking for ronrivest in range(311107, 311130). Middle is ronni\n",
512 |       "Looking for ronrivest in range(311119, 311130). Middle is rono\n",
513 |       "Looking for ronrivest in range(311125, 311130). Middle is ronsard\n",
514 |       "Looking for ronrivest in range(311125, 311126). Middle is ronon\n",
515 |       "Looking for ronrivest in range(311126, 311126). Middle is ronquillo\n",
516 |       "Not found!\n"
517 |      ]
518 |     }
519 |    ],
520 |    "source": [
521 |     "try:\n",
522 |     "    vec = glove.find_vector(\"ronrivest\", debug=True)\n",
523 |     "except KeyError:\n",
524 |     "    print(\"Not found!\")"
525 |    ]
526 |   },
527 |   {
528 |    "cell_type": "code",
529 |    "execution_count": 97,
530 |    "metadata": {
531 |     "collapsed": false
532 |    },
533 |    "outputs": [
534 |     {
535 |      "name": "stdout",
536 |      "output_type": "stream",
537 |      "text": [
538 |       "         21 function calls (5 primitive calls) in 0.000 seconds\n",
539 |       "\n",
540 |       "   Ordered by: standard name\n",
541 |       "\n",
542 |       "   ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n",
543 |       "        1    0.000    0.000    0.000    0.000 <ipython-input-92-dbe6e5a0c3c4>:14(__call__)\n",
544 |       "     17/1    0.000    0.000    0.000    0.000 <ipython-input-95-eec181511d7b>:1(find_vector)\n",
545 |       "        1    0.000    0.000    0.000    0.000 <string>:1(<module>)\n",
546 |       "        1    0.000    0.000    0.000    0.000 {len}\n",
547 |       "        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
548 |       "\n",
549 |       "\n"
550 |      ]
551 |     }
552 |    ],
553 |    "source": [
554 |     "cProfile.run('glove(\"zebra\")')"
555 |    ]
556 |   },
557 |   {
558 |    "cell_type": "markdown",
559 |    "metadata": {
560 |     "collapsed": true
561 |    },
562 |    "source": [
563 |     "### Extra problems (not graded)\n",
564 |     "1. Here's another way of thinking about binary search - we are looking for higest $x$ such that property $p(x)$ is true. For example if we are looking for index of a key in array $p(x) = k_x \\leq k^*$. Can you give examples of other properties that we can binary search over? What makes a property suitable for use in binary search?\n",
565 |     "\n",
566 |     "2. Binary search can **not** be used to minimize a quadratic function. Can you find a similar algorithm that can?"
567 |    ]
568 |   },
569 |   {
570 |    "cell_type": "code",
571 |    "execution_count": null,
572 |    "metadata": {
573 |     "collapsed": false
574 |    },
575 |    "outputs": [],
576 |    "source": [
577 |     "# Hint to problem 2\n",
578 |     "cyph = lambda x: chr((ord(x) + 64) % 128)\n",
579 |     "''.join(map(cyph, \"\\t.34%!$`/&`30,)44).'`2!.'%`).`47/`0)%#%3l`#/.3)$%2`30,)44).'`)4`).`4(2%%`0)%#%3n\"))"
580 |    ]
581 |   }
582 |  ],
583 |  "metadata": {
584 |   "kernelspec": {
585 |    "display_name": "Python 2",
586 |    "language": "python",
587 |    "name": "python2"
588 |   },
589 |   "language_info": {
590 |    "codemirror_mode": {
591 |     "name": "ipython",
592 |     "version": 2
593 |    },
594 |    "file_extension": ".py",
595 |    "mimetype": "text/x-python",
596 |    "name": "python",
597 |    "nbconvert_exporter": "python",
598 |    "pygments_lexer": "ipython2",
599 |    "version": "2.7.8"
600 |   }
601 |  },
602 |  "nbformat": 4,
603 |  "nbformat_minor": 0
604 | }
605 | 


--------------------------------------------------------------------------------
/lecture7/Understanding Radix Sort.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Understanding radix sort\n",
  8 |     "\n",
  9 |     "First building block we will need is the `is_sorted` function. "
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {
 16 |     "collapsed": true
 17 |    },
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "def is_sorted(array):\n",
 21 |     "    \"\"\"Takes a sequence and returns true if an only if sequence is sorted.\"\"\"\n",
 22 |     "    # check all the n-1 pairs of adjacent elements for \n",
 23 |     "    # order violation\n",
 24 |     "    for i in xrange(1, len(array)):\n",
 25 |     "        if array[i-1] > array[i]:\n",
 26 |     "            return False\n",
 27 |     "    # if no violations, then by transitivity of < the sequence is sorted.\n",
 28 |     "    return True"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 2,
 34 |    "metadata": {
 35 |     "collapsed": false
 36 |    },
 37 |    "outputs": [
 38 |     {
 39 |      "name": "stdout",
 40 |      "output_type": "stream",
 41 |      "text": [
 42 |       "([1, 4, 6, 7, 8], True)\n",
 43 |       "([1, 4, 7, 6, 8], False)\n"
 44 |      ]
 45 |     }
 46 |    ],
 47 |    "source": [
 48 |     "# Verify implementation on few test cases\n",
 49 |     "example1 = [1,4,6,7,8]\n",
 50 |     "example2 = [1,4,7,6,8]\n",
 51 |     "print(example1, is_sorted(example1))\n",
 52 |     "print(example2, is_sorted(example2))"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "## Stable sorting\n",
 60 |     "\n",
 61 |     "Python sort is stable. It means that if we have two elements that have the same value of *sorting key* they will appear in the output in the same order that they appeared in the input.\n",
 62 |     "\n",
 63 |     "Let's see an example: we have a list of pairs $(a,b)$ and we want to sort them in the nondecreasing order by $a$ and by nondecreasing $b$ if $a$'s are the same"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 3,
 69 |    "metadata": {
 70 |     "collapsed": false
 71 |    },
 72 |    "outputs": [
 73 |     {
 74 |      "data": {
 75 |       "text/plain": [
 76 |        "[(3, 1), (3, 2), (1, 1), (1, 2), (2, 2), (2, 1)]"
 77 |       ]
 78 |      },
 79 |      "execution_count": 3,
 80 |      "metadata": {},
 81 |      "output_type": "execute_result"
 82 |     }
 83 |    ],
 84 |    "source": [
 85 |     "example = [ (3,1),(3,2),(1,1),(1,2), (2,2),(2,1)]\n",
 86 |     "example"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "metadata": {},
 92 |    "source": [
 93 |     "We can achieve that by sorting first by $b$ and then **stable-sorting** by $a$."
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 4,
 99 |    "metadata": {
100 |     "collapsed": false
101 |    },
102 |    "outputs": [
103 |     {
104 |      "data": {
105 |       "text/plain": [
106 |        "[(3, 1), (1, 1), (2, 1), (3, 2), (1, 2), (2, 2)]"
107 |       ]
108 |      },
109 |      "execution_count": 4,
110 |      "metadata": {},
111 |      "output_type": "execute_result"
112 |     }
113 |    ],
114 |    "source": [
115 |     "sorted_idx2 = sorted(example, key=lambda x: x[1])\n",
116 |     "sorted_idx2"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 5,
122 |    "metadata": {
123 |     "collapsed": false
124 |    },
125 |    "outputs": [
126 |     {
127 |      "data": {
128 |       "text/plain": [
129 |        "[(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)]"
130 |       ]
131 |      },
132 |      "execution_count": 5,
133 |      "metadata": {},
134 |      "output_type": "execute_result"
135 |     }
136 |    ],
137 |    "source": [
138 |     "sorted_idx12 = sorted(sorted_idx2, key=lambda x: x[0])\n",
139 |     "sorted_idx12"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "metadata": {},
145 |    "source": [
146 |     "### Unstable sort example\n",
147 |     "\n",
148 |     "Sort does no have to be stable. For example merge sort isn't."
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": 6,
154 |    "metadata": {
155 |     "collapsed": false
156 |    },
157 |    "outputs": [],
158 |    "source": [
159 |     "class MaxHeap(object):\n",
160 |     "    def __init__(self, array, key):\n",
161 |     "        self.storage = array\n",
162 |     "        self.heap_size = len(array)\n",
163 |     "        self.key = key\n",
164 |     "        self.heapify()\n",
165 |     "    \n",
166 |     "    def fix_down(self, index):\n",
167 |     "        while index < self.heap_size:\n",
168 |     "            # pick maximum child\n",
169 |     "            max_child_idx = None\n",
170 |     "            if  2 * index + 1 < self.heap_size:\n",
171 |     "                max_child_idx = 2 * index + 1\n",
172 |     "\n",
173 |     "            if 2 * index + 2 < self.heap_size and \\\n",
174 |     "                    self.key(self.storage[2 * index + 1]) < self.key(self.storage[2 * index + 2]):\n",
175 |     "                max_child_idx = 2 * index + 2\n",
176 |     "\n",
177 |     "            if max_child_idx is None or \\\n",
178 |     "                    self.key(self.storage[index]) > self.key(self.storage[max_child_idx]):\n",
179 |     "                # heap property satisfied\n",
180 |     "                break\n",
181 |     "            \n",
182 |     "            self.storage[index], self.storage[max_child_idx] = self.storage[max_child_idx], self.storage[index]\n",
183 |     "            index = max_child_idx\n",
184 |     "            \n",
185 |     "    def fix_up(self, index):\n",
186 |     "        assert index < self.heap_size\n",
187 |     "        while index > 0:\n",
188 |     "            parent_idx = (index - 1) // 2\n",
189 |     "            if self.key(self.storage[index]) >= self.key(self.storage[parent_idx]):\n",
190 |     "                self.storage[index],  self.storage[parent_idx] =  self.storage[parent_idx], self.storage[index]\n",
191 |     "                index = parent_idx\n",
192 |     "            else:\n",
193 |     "                break\n",
194 |     "    def extract_max(self):\n",
195 |     "        self.storage[0], self.storage[self.heap_size - 1] = self.storage[self.heap_size -1], self.storage[0]\n",
196 |     "        self.heap_size -= 1\n",
197 |     "        self.fix_down(0)\n",
198 |     "        return self.storage[self.heap_size]\n",
199 |     "        \n",
200 |     "    def heapify(self):\n",
201 |     "        for i in range(self.heap_size - 1, -1, -1):\n",
202 |     "            self.fix_down(i)\n",
203 |     "            \n",
204 |     "def heap_sort(array, key=lambda x:x):\n",
205 |     "    h = MaxHeap(array[:], key)\n",
206 |     "    while h.heap_size > 0:\n",
207 |     "        h.extract_max()\n",
208 |     "    return h.storage"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": 7,
214 |    "metadata": {
215 |     "collapsed": false
216 |    },
217 |    "outputs": [
218 |     {
219 |      "data": {
220 |       "text/plain": [
221 |        "[(2, 1), (1, 1), (3, 1), (2, 2), (3, 2), (1, 2)]"
222 |       ]
223 |      },
224 |      "execution_count": 7,
225 |      "metadata": {},
226 |      "output_type": "execute_result"
227 |     }
228 |    ],
229 |    "source": [
230 |     "sorted_idx2 = heap_sort(example, key=lambda x: x[1])\n",
231 |     "sorted_idx2"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": 8,
237 |    "metadata": {
238 |     "collapsed": false
239 |    },
240 |    "outputs": [
241 |     {
242 |      "name": "stdout",
243 |      "output_type": "stream",
244 |      "text": [
245 |       "[(1, 2), (1, 1), (2, 1), (2, 2), (3, 1), (3, 2)]\n",
246 |       "Notice that secondary sorting criterion is violated.\n"
247 |      ]
248 |     }
249 |    ],
250 |    "source": [
251 |     "sorted_idx12 = heap_sort(sorted_idx2, key=lambda x: x[0])\n",
252 |     "print(sorted_idx12)\n",
253 |     "print(\"Notice that secondary sorting criterion is violated.\")"
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "markdown",
258 |    "metadata": {},
259 |    "source": [
260 |     "## Counting Sort\n",
261 |     "\n",
262 |     "In order to keep complexity at $O(n)$, we will need to divise a procude that sorts without using comparisons.\n",
263 |     "\n",
264 |     "Assume we only have elements $0, 1, ..., (k-1)$ in the array. We know that all zeros come before all ones etc. We can therefore put all the numbers in $k$ different buckets and later read them off."
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "code",
269 |    "execution_count": 9,
270 |    "metadata": {
271 |     "collapsed": false
272 |    },
273 |    "outputs": [],
274 |    "source": [
275 |     "def count_sort(array, k, key=lambda x: x):\n",
276 |     "    \"\"\"Stable sorts array by using key to determine ordering of elements.\n",
277 |     "    \n",
278 |     "    Assumes all elements are in range(0, k)\"\"\"\n",
279 |     "    # initialize array \n",
280 |     "    buckets = [[] for _ in range(k)]\n",
281 |     "    # for every key store all the elements\n",
282 |     "    # with that key\n",
283 |     "    for element in array:\n",
284 |     "        buckets[key(element)].append(element)\n",
285 |     "    output = []\n",
286 |     "    # red numbers from buckets in order\n",
287 |     "    for bucket in buckets:\n",
288 |     "        for element in bucket:\n",
289 |     "            output.append(element)\n",
290 |     "    return output"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "code",
295 |    "execution_count": 10,
296 |    "metadata": {
297 |     "collapsed": false
298 |    },
299 |    "outputs": [
300 |     {
301 |      "data": {
302 |       "text/plain": [
303 |        "[1, 2, 2, 3, 4, 5, 5]"
304 |       ]
305 |      },
306 |      "execution_count": 10,
307 |      "metadata": {},
308 |      "output_type": "execute_result"
309 |     }
310 |    ],
311 |    "source": [
312 |     "count_sort([4,3,2,5,5,1,2], 10)"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "markdown",
317 |    "metadata": {},
318 |    "source": [
319 |     "### Count sort complexity analysis\n",
320 |     "\n",
321 |     "We have the following steps:\n",
322 |     "- allocate space for $b$ buckets: $O(b)$\n",
323 |     "- loop throgh all the elements in the input array and put them in buckets $O(n)$\n",
324 |     "- remove elements from the buckets $O(n)$\n",
325 |     "\n",
326 |     "Therefore the total complexity is $O(n+b)$\n"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "markdown",
331 |    "metadata": {},
332 |    "source": [
333 |     "## Radix sort idea\n",
334 |     "\n",
335 |     "Imagine that you want to compare two long numbers. For example 85823421348134214 and  85823421348452456. The algorithm you would use is to compare the first digit and if it is the same then compare the next digit etc. We can say that first digit is the primary comparison criterion, second digit is the secondary sorting criterion etc. This is almost correct, but we actually need to make sure that we add extra zeros at the beginning of the number that is shorter (because sorter numbers come before longer numbers). \n",
336 |     "\n",
337 |     "Radix sort uses this idea directly for sorting. It first sorts the numbers by last digit. The it *stable-sorts* it by the second to last digit (making second to last digit primary sorting criterion and the last digit secondary sorting criterion) and so on. At the end of that process we end up with digitst sorted in exactly the order we discussed above.\n",
338 |     "\n",
339 |     "To implement that idea let's first look at how we would obtain the digits. "
340 |    ]
341 |   },
342 |   {
343 |    "cell_type": "code",
344 |    "execution_count": 11,
345 |    "metadata": {
346 |     "collapsed": true
347 |    },
348 |    "outputs": [],
349 |    "source": [
350 |     "def ith_digit(number, i):\n",
351 |     "    \"\"\"Returns the i-th digit from the end. \n",
352 |     "    \n",
353 |     "    i=0 resuts the very last digit.\"\"\"\n",
354 |     "    for _ in range(i):\n",
355 |     "        number /= 10\n",
356 |     "    return number % 10"
357 |    ]
358 |   },
359 |   {
360 |    "cell_type": "code",
361 |    "execution_count": 12,
362 |    "metadata": {
363 |     "collapsed": false
364 |    },
365 |    "outputs": [
366 |     {
367 |      "name": "stdout",
368 |      "output_type": "stream",
369 |      "text": [
370 |       "3\n",
371 |       "2\n",
372 |       "1\n",
373 |       "0\n",
374 |       "0\n"
375 |      ]
376 |     }
377 |    ],
378 |    "source": [
379 |     "print(ith_digit(123, 0))\n",
380 |     "print(ith_digit(123, 1))\n",
381 |     "print(ith_digit(123, 2))\n",
382 |     "print(ith_digit(123, 3))\n",
383 |     "print(ith_digit(123, 4))"
384 |    ]
385 |   },
386 |   {
387 |    "cell_type": "markdown",
388 |    "metadata": {},
389 |    "source": [
390 |     "Sweet! We have a function that returns i-th digit and it even yields additional zeros in the front - just what we needed.\n",
391 |     "\n",
392 |     "\n",
393 |     "## Radix Sort using digit $i$ from the end\n",
394 |     "\n",
395 |     "The idea is to use count sort with the digit being the key. \n",
396 |     "\n",
397 |     "For example if we sort `[123, 42, 73]` by the last digit, bucket nr 2 will have one number `[42]`, while bucket number three would have two numbers `[123, 73]`, while the remaining eight buckets would be empty. It is imporant that bucket nr two has `[123, 73]` not `[73, 123]` - this way if we read out the numbers in order they appear in the buckets we will get a stable sort."
398 |    ]
399 |   },
400 |   {
401 |    "cell_type": "code",
402 |    "execution_count": 13,
403 |    "metadata": {
404 |     "collapsed": true
405 |    },
406 |    "outputs": [],
407 |    "source": [
408 |     "def radix_sort_by_ith_digit(array, i):\n",
409 |     "    return count_sort(array, \n",
410 |     "                      10,     # we have 10 different digits.\n",
411 |     "                      key=lambda number: ith_digit(number, i)) # use i-th digit as a key."
412 |    ]
413 |   },
414 |   {
415 |    "cell_type": "code",
416 |    "execution_count": 14,
417 |    "metadata": {
418 |     "collapsed": false
419 |    },
420 |    "outputs": [
421 |     {
422 |      "data": {
423 |       "text/plain": [
424 |        "[42, 123, 73]"
425 |       ]
426 |      },
427 |      "execution_count": 14,
428 |      "metadata": {},
429 |      "output_type": "execute_result"
430 |     }
431 |    ],
432 |    "source": [
433 |     "# sort by the last digit\n",
434 |     "pass1 = radix_sort_by_ith_digit([123,42,73], 0)\n",
435 |     "pass1"
436 |    ]
437 |   },
438 |   {
439 |    "cell_type": "code",
440 |    "execution_count": 15,
441 |    "metadata": {
442 |     "collapsed": false
443 |    },
444 |    "outputs": [
445 |     {
446 |      "data": {
447 |       "text/plain": [
448 |        "[123, 42, 73]"
449 |       ]
450 |      },
451 |      "execution_count": 15,
452 |      "metadata": {},
453 |      "output_type": "execute_result"
454 |     }
455 |    ],
456 |    "source": [
457 |     "# sort result of previous pass by the second to last digit\n",
458 |     "pass2 = radix_sort_by_ith_digit(pass1, 1)\n",
459 |     "pass2"
460 |    ]
461 |   },
462 |   {
463 |    "cell_type": "code",
464 |    "execution_count": 16,
465 |    "metadata": {
466 |     "collapsed": false
467 |    },
468 |    "outputs": [
469 |     {
470 |      "data": {
471 |       "text/plain": [
472 |        "[42, 73, 123]"
473 |       ]
474 |      },
475 |      "execution_count": 16,
476 |      "metadata": {},
477 |      "output_type": "execute_result"
478 |     }
479 |    ],
480 |    "source": [
481 |     "# sort result of previous pass by the third to last digit\n",
482 |     "# none of the numbers are longer than third digit, so we are done.\n",
483 |     "pass3 = radix_sort_by_ith_digit(pass2, 2)\n",
484 |     "pass3"
485 |    ]
486 |   },
487 |   {
488 |    "cell_type": "markdown",
489 |    "metadata": {},
490 |    "source": [
491 |     "What happened above is exactly radix sort! Sort iteratively by digits further and further from the end until the sequence ends up sorted."
492 |    ]
493 |   },
494 |   {
495 |    "cell_type": "code",
496 |    "execution_count": 17,
497 |    "metadata": {
498 |     "collapsed": true
499 |    },
500 |    "outputs": [],
501 |    "source": [
502 |     "def radix_sort(array):\n",
503 |     "    \"\"\"Returns array sorted by i-th digit from the end.\n",
504 |     "    \n",
505 |     "    The sorting procedure is stable.\"\"\"\n",
506 |     "    i = 0\n",
507 |     "    while True:\n",
508 |     "        if is_sorted(array):\n",
509 |     "            # we stop once the array is sorted\n",
510 |     "            # the latest this can happen is when \n",
511 |     "            # we run the number of passes eqaul to\n",
512 |     "            # the length of the longest number\n",
513 |     "            break\n",
514 |     "        # stable sort by i-th digit.\n",
515 |     "        array = radix_sort_by_ith_digit(array, i)\n",
516 |     "        i += 1\n",
517 |     "    return array"
518 |    ]
519 |   },
520 |   {
521 |    "cell_type": "code",
522 |    "execution_count": 18,
523 |    "metadata": {
524 |     "collapsed": false
525 |    },
526 |    "outputs": [
527 |     {
528 |      "data": {
529 |       "text/plain": [
530 |        "[42, 73, 123]"
531 |       ]
532 |      },
533 |      "execution_count": 18,
534 |      "metadata": {},
535 |      "output_type": "execute_result"
536 |     }
537 |    ],
538 |    "source": [
539 |     "radix_sort([123,42,73])"
540 |    ]
541 |   },
542 |   {
543 |    "cell_type": "code",
544 |    "execution_count": 19,
545 |    "metadata": {
546 |     "collapsed": false
547 |    },
548 |    "outputs": [
549 |     {
550 |      "data": {
551 |       "text/plain": [
552 |        "[42, 73, 123, 512, 524, 5214, 123123, 142124]"
553 |       ]
554 |      },
555 |      "execution_count": 19,
556 |      "metadata": {},
557 |      "output_type": "execute_result"
558 |     }
559 |    ],
560 |    "source": [
561 |     "# harder example\n",
562 |     "radix_sort([123,42,73, 123123, 142124, 524, 512, 5214])"
563 |    ]
564 |   },
565 |   {
566 |    "cell_type": "markdown",
567 |    "metadata": {},
568 |    "source": [
569 |     "### Radix sort with different numeric base. \n",
570 |     "\n",
571 |     "Let's try to improve our algorithm slightly. Notice that the fact that we use digits in base $10$ is kind of arbitrary. How hard would it be to use any $b \\geq 2$? In theory all we should be required to do is to change the digit extraction procedure and the number of buckets. \n",
572 |     "\n",
573 |     "Let's start with the digits."
574 |    ]
575 |   },
576 |   {
577 |    "cell_type": "code",
578 |    "execution_count": 20,
579 |    "metadata": {
580 |     "collapsed": true
581 |    },
582 |    "outputs": [],
583 |    "source": [
584 |     "def ith_digit(number, b, i):\n",
585 |     "    \"\"\"Returns the i-th digit from the end (base b).\n",
586 |     "    \n",
587 |     "    i=0 resuts the very last digit.\"\"\"\n",
588 |     "    for _ in range(i):\n",
589 |     "        number /= b      # changed 10 to b\n",
590 |     "    return number % b    # changed 10 to b"
591 |    ]
592 |   },
593 |   {
594 |    "cell_type": "code",
595 |    "execution_count": 21,
596 |    "metadata": {
597 |     "collapsed": false
598 |    },
599 |    "outputs": [
600 |     {
601 |      "name": "stdout",
602 |      "output_type": "stream",
603 |      "text": [
604 |       "7 mod 2\n",
605 |       "1\n",
606 |       "1\n",
607 |       "1\n",
608 |       "0\n",
609 |       "0\n"
610 |      ]
611 |     }
612 |    ],
613 |    "source": [
614 |     "print(\"7 mod 2\")\n",
615 |     "print(ith_digit(7, 2, 0))\n",
616 |     "print(ith_digit(7, 2, 1))\n",
617 |     "print(ith_digit(7, 2, 2))\n",
618 |     "print(ith_digit(7, 2, 3))\n",
619 |     "print(ith_digit(7, 2, 4))"
620 |    ]
621 |   },
622 |   {
623 |    "cell_type": "code",
624 |    "execution_count": 22,
625 |    "metadata": {
626 |     "collapsed": false
627 |    },
628 |    "outputs": [
629 |     {
630 |      "name": "stdout",
631 |      "output_type": "stream",
632 |      "text": [
633 |       "7 mod 3\n",
634 |       "1\n",
635 |       "2\n",
636 |       "0\n",
637 |       "0\n",
638 |       "0\n"
639 |      ]
640 |     }
641 |    ],
642 |    "source": [
643 |     "print(\"7 mod 3\")\n",
644 |     "print(ith_digit(7, 3, 0))\n",
645 |     "print(ith_digit(7, 3, 1))\n",
646 |     "print(ith_digit(7, 3, 2))\n",
647 |     "print(ith_digit(7, 3, 3))\n",
648 |     "print(ith_digit(7, 3, 4))"
649 |    ]
650 |   },
651 |   {
652 |    "cell_type": "markdown",
653 |    "metadata": {},
654 |    "source": [
655 |     "Now we are ready to augment to radix_sort."
656 |    ]
657 |   },
658 |   {
659 |    "cell_type": "code",
660 |    "execution_count": 23,
661 |    "metadata": {
662 |     "collapsed": true
663 |    },
664 |    "outputs": [],
665 |    "source": [
666 |     "def radix_sort_by_ith_digit(array, b, i):\n",
667 |     "    \"\"\"Returns array sorted by i-th digit from the end (base b).\n",
668 |     "    \n",
669 |     "    The sorting procedure is stable.\"\"\"\n",
670 |     "    return count_sort(array, b, key=lambda number: ith_digit(number, b, i))"
671 |    ]
672 |   },
673 |   {
674 |    "cell_type": "code",
675 |    "execution_count": 24,
676 |    "metadata": {
677 |     "collapsed": true
678 |    },
679 |    "outputs": [],
680 |    "source": [
681 |     "def radix_sort(array, b):\n",
682 |     "    \"\"\"Returns array sorted by i-th digit from the end.\n",
683 |     "    \n",
684 |     "    The sorting procedure is stable.\"\"\"\n",
685 |     "    i = 0\n",
686 |     "    while True:\n",
687 |     "        if is_sorted(array):\n",
688 |     "            # we stop once the array is sorted\n",
689 |     "            # the latest this can happen is when \n",
690 |     "            # we run the number of passes eqaul to\n",
691 |     "            # the length of the longest number\n",
692 |     "            break\n",
693 |     "        print(\"Iteration %d\" % (i,))\n",
694 |     "        # stable sort by i-th digit.\n",
695 |     "        array = radix_sort_by_ith_digit(array, b, i)\n",
696 |     "        i += 1\n",
697 |     "    return array"
698 |    ]
699 |   },
700 |   {
701 |    "cell_type": "markdown",
702 |    "metadata": {},
703 |    "source": [
704 |     "Let's try sorting in base $b=2$"
705 |    ]
706 |   },
707 |   {
708 |    "cell_type": "code",
709 |    "execution_count": 25,
710 |    "metadata": {
711 |     "collapsed": false
712 |    },
713 |    "outputs": [
714 |     {
715 |      "name": "stdout",
716 |      "output_type": "stream",
717 |      "text": [
718 |       "Iteration 0\n",
719 |       "Iteration 1\n",
720 |       "Iteration 2\n",
721 |       "Iteration 3\n",
722 |       "Iteration 4\n",
723 |       "Iteration 5\n",
724 |       "Iteration 6\n"
725 |      ]
726 |     },
727 |     {
728 |      "data": {
729 |       "text/plain": [
730 |        "[42, 73, 123]"
731 |       ]
732 |      },
733 |      "execution_count": 25,
734 |      "metadata": {},
735 |      "output_type": "execute_result"
736 |     }
737 |    ],
738 |    "source": [
739 |     "radix_sort([123,42,73], 2)"
740 |    ]
741 |   },
742 |   {
743 |    "cell_type": "markdown",
744 |    "metadata": {},
745 |    "source": [
746 |     "Whoah! 7 iterations? That is a lot to sort just 3 numbers. How about if we increase the base? Maybe $b=1000$ ?"
747 |    ]
748 |   },
749 |   {
750 |    "cell_type": "code",
751 |    "execution_count": 26,
752 |    "metadata": {
753 |     "collapsed": false
754 |    },
755 |    "outputs": [
756 |     {
757 |      "name": "stdout",
758 |      "output_type": "stream",
759 |      "text": [
760 |       "Iteration 0\n"
761 |      ]
762 |     },
763 |     {
764 |      "data": {
765 |       "text/plain": [
766 |        "[42, 73, 123]"
767 |       ]
768 |      },
769 |      "execution_count": 26,
770 |      "metadata": {},
771 |      "output_type": "execute_result"
772 |     }
773 |    ],
774 |    "source": [
775 |     "radix_sort([123,42,73], 1000)"
776 |    ]
777 |   },
778 |   {
779 |    "cell_type": "markdown",
780 |    "metadata": {},
781 |    "source": [
782 |     "Much better - we only have one iteration. Notice however that we have many more buckets than numbers - even though in theory we decrease number of iterations, now every iteration is dominated by looping through every bucket. In this example $1000$ buckets visited in one iteration are much worst than two buckets visited in $7$ iterations (total of $14$ acceses). Actually array acceses contribute another 3 operations per iteraions ($7 * 3 = 21$) adding up to total of $35$ operations, but this is still much less than $1000$."
783 |    ]
784 |   },
785 |   {
786 |    "cell_type": "code",
787 |    "execution_count": 27,
788 |    "metadata": {
789 |     "collapsed": false
790 |    },
791 |    "outputs": [
792 |     {
793 |      "name": "stdout",
794 |      "output_type": "stream",
795 |      "text": [
796 |       "Iteration 0\n",
797 |       "Iteration 1\n",
798 |       "Iteration 2\n"
799 |      ]
800 |     },
801 |     {
802 |      "data": {
803 |       "text/plain": [
804 |        "[42, 73, 123]"
805 |       ]
806 |      },
807 |      "execution_count": 27,
808 |      "metadata": {},
809 |      "output_type": "execute_result"
810 |     }
811 |    ],
812 |    "source": [
813 |     "# much healthier choice\n",
814 |     "radix_sort([123,42,73], 5)"
815 |    ]
816 |   },
817 |   {
818 |    "cell_type": "markdown",
819 |    "metadata": {},
820 |    "source": [
821 |     "## Radix sort complexity analysis\n",
822 |     "\n",
823 |     "Let $b$ be the base and $n$ size of the array. Moreover let's assume that all the numbers in the array are less than or equal $a$.\n",
824 |     "\n",
825 |     "\n",
826 |     "Single iteration of count sort is $O(n + b)$.\n",
827 |     "\n",
828 |     "How many iterations are there? At most as many as the number of digits in the longest number: O($log_b\\ a$)\n",
829 |     "\n",
830 |     "Therefore the total complexity of the algorithm is O($(n+b) log_b\\ a)$.\n",
831 |     "\n",
832 |     "In theory we often assume that both $b$ and $a$ are constants - they are after all independent of $n$ - they won't influence the run time as $n$ grows. That's why some theorists say that Radix Sort is $O(n)$."
833 |    ]
834 |   },
835 |   {
836 |    "cell_type": "markdown",
837 |    "metadata": {},
838 |    "source": [
839 |     "## Exercises\n",
840 |     "\n",
841 |     "1. We said that the best possible algorithm that does sorting has complexity $O(n\\ lg\\ n)$. How is it possible that radix sort takes only $O(n)$ time? \n",
842 |     "\n",
843 |     "2. Can you come up with a sorting problem where it would be hard to use Radix Sort?"
844 |    ]
845 |   },
846 |   {
847 |    "cell_type": "markdown",
848 |    "metadata": {},
849 |    "source": [
850 |     "# Be sure to checkout the Performance of Radix Sort notebook!"
851 |    ]
852 |   },
853 |   {
854 |    "cell_type": "markdown",
855 |    "metadata": {},
856 |    "source": [
857 |     "# Aside: implementation of count sort from the lectures\n",
858 |     "\n",
859 |     "This implementation has the samee time and space complexity, but is faster in practice."
860 |    ]
861 |   },
862 |   {
863 |    "cell_type": "code",
864 |    "execution_count": 28,
865 |    "metadata": {
866 |     "collapsed": true
867 |    },
868 |    "outputs": [],
869 |    "source": [
870 |     "def count_sort_from_the_lecture(array, k, key=lambda x: x):\n",
871 |     "    # initialize array \n",
872 |     "    count = [0 for _ in range(k)]\n",
873 |     "    # for every key count the number of times\n",
874 |     "    # it occurs\n",
875 |     "    for element in array:\n",
876 |     "        count[key(element)] += 1\n",
877 |     "    # compute cumulative count of occurences\n",
878 |     "    for i in range(1, k):\n",
879 |     "        count[i] += count[i-1]\n",
880 |     "    # create output array\n",
881 |     "    output = [None for _ in range(len(array))]\n",
882 |     "    # fill in output array computing slots using\n",
883 |     "    # counts array\n",
884 |     "    for i in range(len(array) - 1, -1, -1):\n",
885 |     "        output[count[key(array[i])] - 1] = array[i]\n",
886 |     "        count[key(array[i])] -= 1\n",
887 |     "    return output"
888 |    ]
889 |   },
890 |   {
891 |    "cell_type": "code",
892 |    "execution_count": 29,
893 |    "metadata": {
894 |     "collapsed": false
895 |    },
896 |    "outputs": [
897 |     {
898 |      "data": {
899 |       "text/plain": [
900 |        "[1, 2, 2, 3, 4, 5, 5]"
901 |       ]
902 |      },
903 |      "execution_count": 29,
904 |      "metadata": {},
905 |      "output_type": "execute_result"
906 |     }
907 |    ],
908 |    "source": [
909 |     "count_sort_from_the_lecture([4,3,2,5,5,1,2], 10)"
910 |    ]
911 |   }
912 |  ],
913 |  "metadata": {
914 |   "kernelspec": {
915 |    "display_name": "Python 2",
916 |    "language": "python",
917 |    "name": "python2"
918 |   },
919 |   "language_info": {
920 |    "codemirror_mode": {
921 |     "name": "ipython",
922 |     "version": 2
923 |    },
924 |    "file_extension": ".py",
925 |    "mimetype": "text/x-python",
926 |    "name": "python",
927 |    "nbconvert_exporter": "python",
928 |    "pygments_lexer": "ipython2",
929 |    "version": "2.7.8"
930 |   }
931 |  },
932 |  "nbformat": 4,
933 |  "nbformat_minor": 0
934 | }
935 | 


--------------------------------------------------------------------------------