├── .gitignore ├── 1_Divide_and_Conquer ├── 1.py ├── 10.py ├── 11.py ├── 2.py ├── 3.py ├── 4.py ├── 5.py ├── 6.py ├── 8.py ├── 8_another_version.py ├── 9.py ├── Assignment1_DandC.pdf ├── Q8.txt ├── __init__.py ├── img │ ├── 1.png │ ├── 2.png │ ├── 3.png │ ├── 4.png │ ├── 5.png │ └── 6.png ├── readme.md └── readme.pdf ├── 2_DP ├── 1.py ├── 2.py ├── 3.py ├── 4.py ├── 5.py ├── 6.py ├── 7.py ├── Assignment2.pdf ├── readme.md └── readme.pdf ├── 3_Greedy ├── 1.py ├── 2.py ├── 3.py ├── 4.py ├── 5.py ├── 6.py ├── Aesop_Fables.txt ├── Aesop_Fables.txt_compressed ├── Aesop_Fables.txt_compressed_decode ├── Assignment3_Greedy.pdf ├── graph.txt ├── graph.txt_compressed ├── graph.txt_compressed_decode ├── graph2.txt ├── img │ ├── 1.jpg │ └── 3.png ├── readme.md └── readme.pdf ├── 4_LP ├── 7.py ├── Assignment4_LP.pdf ├── img │ └── 6.png ├── readme.md ├── readme.pdf └── simplex_full_version.py ├── 5_Network_flow ├── 1.py ├── 1_dinic.cpp ├── 2.py ├── 2_Ford-Fulkerson.py ├── Assignment5_NF.pdf ├── img │ ├── 1.png │ ├── 2.png │ ├── 4.png │ └── 5.png ├── problem1.data ├── problem2.data ├── readme.md └── readme.pdf ├── 6_NP ├── Assignment6_NP.pdf ├── img │ ├── 2.png │ ├── 2_false.png │ ├── 2_true.png │ ├── 4.png │ ├── 4_false.png │ ├── 4_true.png │ ├── 5.png │ ├── 5_false.png │ └── 5_true.png ├── readme.md └── readme.pdf ├── LICENSE ├── img ├── alipay.jpg └── wechat_pay.png ├── others └── 卜东波老师算法分析与设计作业答案2015版.pdf └── readme.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Windows image file caches 2 | Thumbs.db 3 | ehthumbs.db 4 | 5 | # Folder config file 6 | Desktop.ini 7 | 8 | # Recycle Bin used on file shares 9 | $RECYCLE.BIN/ 10 | 11 | # Windows Installer files 12 | *.cab 13 | *.msi 14 | *.msm 15 | *.msp 16 | 17 | # Windows shortcuts 18 | *.lnk 19 | 20 | # ========================= 21 | # Operating System Files 22 | # ========================= 23 | 24 | # OSX 25 | # ========================= 26 | 27 | .DS_Store 28 | .AppleDouble 29 | .LSOverride 30 | 31 | # Thumbnails 32 | ._* 33 | 34 | # Files that might appear in the root of a volume 35 | .DocumentRevisions-V100 36 | .fseventsd 37 | .Spotlight-V100 38 | .TemporaryItems 39 | .Trashes 40 | .VolumeIcon.icns 41 | 42 | # Directories potentially created on remote AFP share 43 | .AppleDB 44 | .AppleDesktop 45 | Network Trash Folder 46 | Temporary Items 47 | .apdisk 48 | 49 | #IDEA 50 | .idea -------------------------------------------------------------------------------- /1_Divide_and_Conquer/1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/9/23 3 | # @Author : hrwhisper 4 | 5 | 6 | def binary_search(A, la, ra, B, lb, rb, k): 7 | m, n = ra - la, rb - lb 8 | if n == 0: return A[la + k - 1] 9 | if k == 1: return min(A[la], B[lb]) 10 | 11 | b_m = k >> 1 12 | a_m = k - b_m 13 | if A[la + a_m - 1] < B[lb + b_m - 1]: 14 | return binary_search(A, la + a_m, ra, B, lb, lb + b_m, k - a_m) 15 | else: # A[la + a_m - 1] > B[lb + b_m - 1] 16 | return binary_search(A, la, la + a_m, B, lb + b_m, rb, k - b_m) 17 | 18 | 19 | def find_median(A, B): 20 | return binary_search(A, 0, len(A), B, 0, len(A), ((len(A) << 1) + 1) >> 1) 21 | 22 | 23 | def test(test_cnt=100000, array_num=10): 24 | import random 25 | L = 1 26 | R = array_num ** 2 27 | for i in range(test_cnt): 28 | A = [] 29 | B = [] 30 | vis = set() 31 | for _ in range(random.randint(1, array_num)): 32 | t = random.randint(L, R) 33 | while t in vis: 34 | t = random.randint(L, R) 35 | vis.add(t) 36 | A.append(t) 37 | for _ in range(len(A)): 38 | t = random.randint(L, R) 39 | while t in vis: 40 | t = random.randint(L, R) 41 | vis.add(t) 42 | B.append(t) 43 | 44 | A.sort() 45 | B.sort() 46 | C = A + B 47 | n = len(A) << 1 48 | C.sort() 49 | median = C[(n - 1) >> 1] 50 | median2 = find_median(A[:], B[:]) 51 | if median2 != median: 52 | print(A) 53 | print(B) 54 | print(median, median2) 55 | return 56 | print('ok') 57 | 58 | 59 | if __name__ == '__main__': 60 | test() 61 | # s = Solution() 62 | # a = [1,2] 63 | # b = [3,4] 64 | # print(find_median(a, b), s.findMedianSortedArrays(a, b)) 65 | -------------------------------------------------------------------------------- /1_Divide_and_Conquer/10.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/9/27 3 | # @Author : hrwhisper 4 | from math import ceil, log 5 | 6 | 7 | def matrix_brute_mul(A, B): 8 | n = len(A) 9 | C = [[0 for _ in range(n)] for _ in range(n)] 10 | for i in range(n): 11 | for j in range(n): 12 | for k in range(n): 13 | C[i][j] += A[i][k] * B[k][j] 14 | return C 15 | 16 | 17 | def matrix_add_or_sub(A, B, add=True): 18 | n = len(A) 19 | return [[A[i][j] + B[i][j] if add else A[i][j] - B[i][j] for j in range(n)] for i in range(n)] 20 | 21 | 22 | def _strassen_mul(A, B): 23 | n = len(A) 24 | if n == 1: return [[A[0][0] * B[0][0]]] 25 | if n == 2: matrix_brute_mul(A, B) 26 | half_n = n >> 1 27 | A11, A12, A21, A22 = [], [], [], [] 28 | B11, B12, B21, B22 = [], [], [], [] 29 | for i in range(half_n): 30 | A11.append(A[i][:half_n][:]) 31 | A12.append(A[i][half_n:][:]) 32 | B11.append(B[i][:half_n][:]) 33 | B12.append(B[i][half_n:][:]) 34 | A21.append(A[i + half_n][:half_n][:]) 35 | A22.append(A[i + half_n][half_n:][:]) 36 | B21.append(B[i + half_n][:half_n][:]) 37 | B22.append(B[i + half_n][half_n:][:]) 38 | 39 | P1 = _strassen_mul(A11, matrix_add_or_sub(B12, B22, False)) 40 | P2 = _strassen_mul(matrix_add_or_sub(A11, A12), B22) 41 | P3 = _strassen_mul(matrix_add_or_sub(A21, A22), B11) 42 | P4 = _strassen_mul(A22, matrix_add_or_sub(B21, B11, False)) 43 | P5 = _strassen_mul(matrix_add_or_sub(A11, A22), matrix_add_or_sub(B11, B22)) 44 | P6 = _strassen_mul(matrix_add_or_sub(A12, A22, False), matrix_add_or_sub(B21, B22)) 45 | P7 = _strassen_mul(matrix_add_or_sub(A11, A21, False), matrix_add_or_sub(B11, B12)) 46 | 47 | C11 = matrix_add_or_sub(matrix_add_or_sub(matrix_add_or_sub(P4, P5), P6), P2, False) 48 | C12 = matrix_add_or_sub(P1, P2) 49 | C21 = matrix_add_or_sub(P3, P4) 50 | C22 = matrix_add_or_sub(matrix_add_or_sub(matrix_add_or_sub(P1, P5), P3, False), P7, False) 51 | 52 | C = [[] for _ in range(n)] 53 | for i in range(half_n): 54 | C[i].extend(C11[i]) 55 | C[i].extend(C12[i]) 56 | C[i + half_n].extend(C21[i]) 57 | C[i + half_n].extend(C22[i]) 58 | return C 59 | 60 | 61 | def strassen_matrix_mul(A, B): 62 | before_n = len(A) 63 | 64 | n = 2 ** ceil(log(before_n, 2)) 65 | for i in range(before_n): 66 | A[i].extend([0] * (n - before_n)) 67 | B[i].extend([0] * (n - before_n)) 68 | for i in range(before_n, n): 69 | A.append([0] * n) 70 | B.append([0] * n) 71 | 72 | C = _strassen_mul(A, B)[:before_n] 73 | return [row[:before_n] for row in C] 74 | 75 | 76 | def test(test_cnt=1000, max_n=100, L=1, R=1000): 77 | print('start test cnt={} , max_n={}'.format(test_cnt, max_n)) 78 | import random 79 | import numpy as np 80 | for _ in range(test_cnt): 81 | n = random.randint(1, max_n) 82 | A = [] 83 | B = [] 84 | for i in range(n): 85 | A.append([random.randint(L, R) for _ in range(n)]) 86 | B.append([random.randint(L, R) for _ in range(n)]) 87 | C2 = (np.matrix(A) * np.matrix(B)).tolist() 88 | C = strassen_matrix_mul(A[:], B[:]) 89 | if C != C2: 90 | print('Wrong answer') 91 | print(A) 92 | print(B) 93 | print(C) 94 | print(C2) 95 | return 96 | print('ok') 97 | 98 | 99 | if __name__ == '__main__': 100 | # from datetime import datetime 101 | # import random 102 | # 103 | # n = 1024 104 | # A, B = [], [] 105 | # for i in range(n): 106 | # A.append([random.randint(0, n ** 2) for _ in range(n)]) 107 | # B.append([random.randint(0, n ** 2) for _ in range(n)]) 108 | # 109 | # start = datetime.now() 110 | # matrix_brute_mul(A, B) 111 | # print('complete grade-school method in {}s'.format((datetime.now() - start).total_seconds())) 112 | # 113 | # start = datetime.now() 114 | # strassen_matrix_mul(A[:], B[:]) 115 | # print('complete Strassen method in {}s'.format((datetime.now() - start).total_seconds())) 116 | # 117 | # import numpy as np 118 | # start = datetime.now() 119 | # np.matrix(A) * np.matrix(B).tolist() 120 | # print('complete numpy matrix mul in {}s'.format((datetime.now() - start).total_seconds())) 121 | test() 122 | -------------------------------------------------------------------------------- /1_Divide_and_Conquer/11.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/9/24 3 | # @Author : hrwhisper 4 | 5 | 6 | def quick_mul2(x, y): 7 | x, y = str(x), str(y) 8 | if len(x) < len(y): x, y = y, x 9 | n = len(x) 10 | if n == 1: return int(x) * int(y) 11 | if n & 1: 12 | x = '0' + x 13 | n += 1 14 | y = '0' * (n - len(y)) + y 15 | 16 | half_n = n >> 1 17 | xh = int(x[:half_n]) 18 | xl = int(x[half_n:]) 19 | yh = int(y[:half_n]) 20 | yl = int(y[half_n:]) 21 | 22 | p = quick_mul2((xh + xl), (yh + yl)) 23 | h = quick_mul2(xh, yh) 24 | l = quick_mul2(xl, yl) 25 | return h * (10 ** n) + (p - h - l) * (10 ** half_n) + l 26 | 27 | 28 | def quick_mul(x, y): 29 | s_x, s_y = str(x), str(y) 30 | if len(s_x) == 1 or len(s_y) == 1: return x * y 31 | n = max(len(s_x), len(s_y)) 32 | half_n = n >> 1 33 | pow_half_n = 10 ** half_n 34 | xh = x // pow_half_n 35 | xl = x % pow_half_n 36 | yh = y // pow_half_n 37 | yl = y % pow_half_n 38 | p = quick_mul(xh + xl, yh + yl) 39 | h = quick_mul(xh, yh) 40 | l = quick_mul(xl, yl) 41 | if n & 1: n -= 1 42 | return h * (10 ** n) + (p - h - l) * (10 ** half_n) + l 43 | 44 | 45 | def test(test_cnt=10001, L=0, R=11122231): 46 | import random 47 | for i in range(test_cnt): 48 | x, y = random.randint(L, R), random.randint(L, R) 49 | t = x * y 50 | res1 = quick_mul(x, y) 51 | res2 = quick_mul2(x, y) 52 | if t != res1 or t != res2: 53 | print(x, y, t, res1, res2) 54 | return 55 | print('ok') 56 | 57 | 58 | if __name__ == '__main__': 59 | test() 60 | # print(quick_mul(0, 67), 120 * 67) 61 | -------------------------------------------------------------------------------- /1_Divide_and_Conquer/2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/9/22 3 | # @Author : hrwhisper 4 | 5 | def find_kth_element(L, R, A, k): 6 | if L == R: return A[L] 7 | i = partition(L, R, A) 8 | left_element = i - L + 1 9 | if left_element == k: return A[i] 10 | if left_element < k: 11 | return find_kth_element(i + 1, R, A, k - left_element) 12 | else: 13 | return find_kth_element(L, i - 1, A, k) 14 | 15 | 16 | def partition(L, R, A): 17 | i = L + 1 18 | j = R 19 | base = A[L] 20 | while True: 21 | while i < j and A[i] > base: i += 1 22 | while j > L and A[j] < base: j -= 1 23 | if i >= j: break 24 | A[i], A[j] = A[j], A[i] # swap 25 | 26 | A[L], A[j] = A[j], A[L] # swap 27 | return j 28 | 29 | 30 | def test(test_cnt=1000000, array_num=500, L=1, R=125111): 31 | import random 32 | for i in range(test_cnt): 33 | a = [random.randint(L, R) for _ in range(array_num)] 34 | _a = sorted(a[:], reverse=True) 35 | k = random.randint(1, array_num) 36 | if find_kth_element(0, len(a) - 1, a, k) != _a[k - 1]: 37 | print(a) 38 | print(k) 39 | print(find_kth_element(0, len(a) - 1, a, k), _a[k - 1]) 40 | return 41 | print('ok') 42 | 43 | 44 | if __name__ == '__main__': 45 | test() 46 | # a = [700, 597, 91, 541, 242, 451, 538, 351, 585, 700, 728, 711, 752, 777, 1194, 1240, 804, 948, 1201, 843] 47 | # _a = sorted(a[:], reverse=True) 48 | # k = 4 49 | # print(find_kth_element(0, len(a) - 1, a, k), _a[k - 1]) 50 | # a = [3, 2, 1, 5, 6, 4] 51 | # _a = sorted(a[:], reverse=True) 52 | # k = 2 53 | # print(find_kth_element(0, len(a) - 1, a, k), _a[k - 1]) 54 | -------------------------------------------------------------------------------- /1_Divide_and_Conquer/3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/9/23 3 | # @Author : hrwhisper 4 | 5 | class TreeNode(object): 6 | def __init__(self, x): 7 | self.val = x 8 | self.left = None 9 | self.right = None 10 | 11 | 12 | def createTree(node): 13 | if not node or node[0] == '#': return None 14 | root, q = TreeNode(node[0]), [] 15 | q.append(root) 16 | cur, n = q.pop(0), len(node) 17 | 18 | for i in range(1, n): 19 | if node[i] == '#': 20 | if not i & 1: 21 | cur = q.pop(0) 22 | continue 23 | t = TreeNode(node[i]) 24 | q.append(t) 25 | if i & 1: # left son 26 | cur.left = t 27 | else: 28 | cur.right = t 29 | cur = q.pop(0) 30 | return root 31 | 32 | 33 | def printTree(root): 34 | q, ans = [], [] 35 | q.append(root) 36 | while q: 37 | cur = q.pop(0) 38 | if cur: 39 | q.append(cur.left) 40 | q.append(cur.right) 41 | ans.append(cur.val) 42 | else: 43 | ans.append('#') 44 | print(ans) 45 | 46 | 47 | def search_local_minimum(root): 48 | while root: 49 | if not root.left: return root 50 | if root.val > root.left.val: 51 | root = root.left 52 | elif root.val > root.right.val: 53 | root = root.right 54 | else: 55 | return root 56 | 57 | 58 | def test(test_cnt=10000, array_num=2**5 - 1, L=1, R=1251): 59 | import random 60 | for i in range(test_cnt): 61 | vis = set() 62 | root = [] 63 | for _ in range(array_num): 64 | t = random.randint(L, R) 65 | while t in vis: 66 | t = random.randint(L, R) 67 | vis.add(t) 68 | root.append(t) 69 | root = createTree(root) 70 | root = search_local_minimum(root) 71 | if root.left and (root.val > root.left.val or root.val > root.right.val): 72 | printTree(root) 73 | 74 | 75 | if __name__ == '__main__': 76 | test() 77 | # root = createTree([1]) 78 | # print(search_local_minimum(root).val) 79 | # printTree(root) 80 | -------------------------------------------------------------------------------- /1_Divide_and_Conquer/4.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/9/23 3 | # @Author : hrwhisper 4 | 5 | 6 | def search_local_minimum(sx, sy, ex, ey, g): 7 | if sx == ex and sy == ey: 8 | return sx, sy 9 | elif ex - sx == 1 and ey - sy == 1: 10 | temp = [g[sx][sy], g[sx + 1][sy], g[sx][sy + 1], g[sx + 1][sy + 1]] 11 | return [(sx, sy), (sx + 1, sy), (sx, sy + 1), (sx + 1, sy + 1)][temp.index(min(temp))] 12 | 13 | mx, my = (sx + ex) >> 1, (sy + ey) >> 1 14 | min_x, min_y = mx, my 15 | for i in range(sx, ex + 1): 16 | if g[min_x][min_y] > g[i][sy]: min_x, min_y = i, sy 17 | if g[min_x][min_y] > g[i][ey]: min_x, min_y = i, ey 18 | if g[min_x][min_y] > g[i][my]: min_x, min_y = i, my 19 | 20 | for i in range(sy, ey + 1): 21 | if g[min_x][min_y] > g[sx][i]: min_x, min_y = sx, i 22 | if g[min_x][min_y] > g[ex][i]: min_x, min_y = ex, i 23 | if g[min_x][min_y] > g[mx][i]: min_x, min_y = mx, i 24 | 25 | if min_x < mx and min_y < my: # 左上角 26 | case = 0 27 | elif min_x > mx and min_y < my: # 左下角 28 | case = 1 29 | elif min_x < mx and min_y > my: # 右上角 30 | case = 2 31 | elif min_x > mx and min_y > my: # 右下角 32 | case = 3 33 | else: 34 | if min_x > sx and g[min_x - 1][min_y] < g[min_x][min_y]: 35 | case = 0 if min_y < my else 2 # 上半部分 区分左上右上 36 | elif min_x < ex and g[min_x + 1][min_y] < g[min_x][min_y]: 37 | case = 1 if min_y < my else 3 # 下半部分 区分左下右下 38 | elif min_y > sy and g[min_x][min_y - 1] < g[min_x][min_y]: 39 | case = 0 if min_x < mx else 1 # 左半部分 区分左上左下 40 | elif min_y < ey and g[min_x][min_y + 1] < g[min_x][min_y]: 41 | case = 2 if min_x < mx else 3 # 右半部分 区分右上右下 42 | else: 43 | return min_x, min_y 44 | 45 | if case == 0: 46 | return search_local_minimum(sx, sy, mx, my, g) 47 | elif case == 1: 48 | return search_local_minimum(mx, sy, ex, my, g) 49 | elif case == 2: 50 | return search_local_minimum(sx, my, mx, ey, g) 51 | else: 52 | return search_local_minimum(mx, my, ex, ey, g) 53 | 54 | 55 | def solve_search_local_minimum(g): 56 | x, y = search_local_minimum(0, 0, len(g) - 1, len(g) - 1, g) 57 | return g[x][y] 58 | 59 | # solve_search_local_minimum(g) 60 | # def search_local_minimum(sx, sy, ex, ey, g): 61 | # if sx == ex and sy == ey: 62 | # return sx, sy 63 | # elif ex - sx == 1 and ey - sy == 1: 64 | # temp = [g[sx][sy], g[sx + 1][sy], g[sx][sy + 1], g[sx + 1][sy + 1]] 65 | # return [(sx, sy), (sx + 1, sy), (sx, sy + 1), (sx + 1, sy + 1)][temp.index(min(temp))] 66 | # 67 | # mx, my = (sx + ex) >> 1, (sy + ey) >> 1 68 | # min_x, min_y = mx, my 69 | # for i in range(sx, ex + 1): 70 | # if g[min_x][min_y] > g[i][sy]: 71 | # min_x, min_y = i, sy 72 | # if g[min_x][min_y] > g[i][ey]: 73 | # min_x, min_y = i, ey 74 | # if g[min_x][min_y] > g[i][my]: 75 | # min_x, min_y = i, my 76 | # 77 | # for i in range(sy, ey + 1): 78 | # if g[min_x][min_y] > g[sx][i]: 79 | # min_x, min_y = sx, i 80 | # if g[min_x][min_y] > g[ex][i]: 81 | # min_x, min_y = ex, i 82 | # if g[min_x][min_y] > g[mx][i]: 83 | # min_x, min_y = mx, i 84 | # 85 | # if min_x == mx and min_y == my: 86 | # return mx, my 87 | # 88 | # case = 0 # 0 左上 1 左下 2 右上 3 右下 89 | # if min_x < mx and min_y < my: # 左上角 90 | # case = 0 91 | # elif min_x > mx and min_y < my: # 左下角 92 | # case = 1 93 | # elif min_x < mx and min_y > my: # 右上角 94 | # case = 2 95 | # elif min_x > mx and min_y > my: # 右下角 96 | # case = 3 97 | # elif min_x < mx and min_y == my: # 上半部分 98 | # if g[min_x][min_y - 1] > g[min_x][min_y] and g[min_x][min_y + 1] > g[min_x][min_y]: 99 | # return min_x, min_y 100 | # elif g[min_x][min_y - 1] < g[min_x][min_y]: 101 | # case = 0 102 | # else: 103 | # case = 2 104 | # elif min_x > mx and min_y == my: # 下半部分 105 | # if g[min_x][min_y - 1] > g[min_x][min_y] and g[min_x][min_y + 1] > g[min_x][min_y]: 106 | # return min_x, min_y 107 | # elif g[min_x][min_y - 1] < g[min_x][min_y]: 108 | # case = 1 109 | # else: 110 | # case = 3 111 | # elif min_x == mx and min_y < my: # 左半部分 112 | # if g[min_x - 1][min_y] > g[min_x][min_y] and g[min_x + 1][min_y] > g[min_x][min_y]: 113 | # return min_x, min_y 114 | # elif g[min_x - 1][min_y] < g[min_x][min_y]: 115 | # case = 0 116 | # else: 117 | # case = 1 118 | # elif min_x == mx and min_y > my: # 右半部分 119 | # if g[min_x - 1][min_y] > g[min_x][min_y] and g[min_x + 1][min_y] > g[min_x][min_y]: 120 | # return min_x, min_y 121 | # elif g[min_x - 1][min_y] < g[min_x][min_y]: 122 | # case = 2 123 | # else: 124 | # case = 3 125 | # 126 | # if case == 0: 127 | # return search_local_minimum(sx, sy, mx, my, g) 128 | # elif case == 1: 129 | # return search_local_minimum(mx, sy, ex, my, g) 130 | # elif case == 2: 131 | # return search_local_minimum(sx, my, mx, ey, g) 132 | # else: 133 | # return search_local_minimum(mx, my, ex, ey, g) 134 | 135 | 136 | def test(test_cnt=10000, max_n=320): 137 | print('start test cnt={} , max_n={}'.format(test_cnt, max_n)) 138 | 139 | def ok(i, j, n, g): 140 | if i > 0 and g[i][j] > g[i - 1][j]: return False 141 | if j > 0 and g[i][j] > g[i][j - 1]: return False 142 | if i + 1 < n and g[i][j] > g[i + 1][j]: return False 143 | if j + 1 < n and g[i][j] > g[i][j + 1]: return False 144 | return True 145 | 146 | L = 1 147 | R = max_n ** 4 148 | import random 149 | for i in range(test_cnt): 150 | vis = set() 151 | n = random.randint(1, max_n) 152 | g = [] 153 | for i in range(n): 154 | row = [] 155 | for j in range(n): 156 | t = random.randint(L, R) 157 | while t in vis: 158 | t = random.randint(L, R) 159 | row.append(t) 160 | vis.add(t) 161 | g.append(row) 162 | # print(g) 163 | i, j = search_local_minimum(0, 0, n - 1, n - 1, g) 164 | try: 165 | if not ok(i, j, n, g): 166 | print('Wrong Answer') 167 | print(i, j) 168 | print(g) 169 | return 170 | except IndexError as e: 171 | print('Except') 172 | print(g) 173 | print(i, j) 174 | return 175 | print('ok') 176 | 177 | 178 | if __name__ == '__main__': 179 | # g = [ 180 | # [60, 58, 56, 54, 52], 181 | # [59, 57, 55, 53, 51], 182 | # [42, 44, 46, 48, 50], 183 | # [41, 43, 45, 47, 49], 184 | # [39, 38, 37, 36, 35]] 185 | # # g = [[79, 21, 17], [33, 73, 67], [74, 57, 23]] 186 | # print(search_local_minimum(0, 0, len(g) - 1, len(g) - 1, g)) 187 | # g = [[14, 514, 556], [114, 0, 307], [501, 332, 528]] 188 | # print(search_local_minimum(0, 0, len(g) - 1, len(g) - 1, g)) 189 | g = [ 190 | [60, 58, 56, 54, 52], 191 | [59, 54, 55, 51, 51], 192 | [42, 44, 77, 48, 50], 193 | [41, 43, 45, 47, 49], 194 | [39, 38, 37, 36, 35]] 195 | test() 196 | -------------------------------------------------------------------------------- /1_Divide_and_Conquer/5.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/9/23 3 | # @Author : hrwhisper 4 | 5 | 6 | def count_small_than_mid(rocks, mid, n): 7 | start = cnt = 0 8 | for i in range(1, n): 9 | if rocks[i] - rocks[start] <= mid: 10 | cnt += 1 11 | else: 12 | start = i 13 | return cnt 14 | 15 | 16 | def binary_search(left, right, rocks, M, N): 17 | while left < right: 18 | mid = (left + right) >> 1 19 | if count_small_than_mid(rocks, mid, N) <= M: 20 | left = mid + 1 21 | else: 22 | right = mid 23 | return left 24 | 25 | 26 | def solve_largest_minimum_spacing(L, M, N, rocks): 27 | rocks = [0] + rocks + [L] 28 | N += 2 29 | rocks.sort() 30 | left = min(rocks[i] - rocks[i - 1] for i in range(1, N)) 31 | return binary_search(left, L + 1, rocks, M, N) 32 | 33 | 34 | if __name__ == '__main__': 35 | L, N, M = 25, 5, 2 36 | rocks = [2, 14, 11, 21, 17] 37 | print(solve_largest_minimum_spacing(L, M, N, rocks)) 38 | -------------------------------------------------------------------------------- /1_Divide_and_Conquer/6.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/9/23 3 | # @Author : hrwhisper 4 | 5 | 6 | def merge_sort(L, R, a): 7 | if L >= R - 1: return 0 8 | mid = (L + R) >> 1 9 | cnt_left = merge_sort(L, mid, a) 10 | cnt_right = merge_sort(mid, R, a) 11 | return cnt_left + cnt_right + merge(L, mid, R, a) 12 | 13 | 14 | def _count_by_merge(i, n, j, m, a): 15 | cnt = 0 16 | while i < n and j < m: 17 | if a[i] <= 3 * a[j]: 18 | i += 1 19 | else: 20 | cnt += n - i 21 | j += 1 22 | return cnt 23 | 24 | 25 | def merge(L, le, R, a): 26 | rs = le 27 | ls = L 28 | x = [] 29 | 30 | cnt = _count_by_merge(ls, le, rs, R, a) 31 | 32 | while ls < le and rs < R: 33 | if a[ls] <= a[rs]: 34 | x.append(a[ls]) 35 | ls += 1 36 | else: 37 | x.append(a[rs]) 38 | rs += 1 39 | 40 | for ls in range(ls, le): 41 | x.append(a[ls]) 42 | for rs in range(rs, R): 43 | x.append(a[rs]) 44 | for i in range(L, R): 45 | a[i] = x[i - L] 46 | return cnt 47 | 48 | 49 | def brute_inversions(a): 50 | n = len(a) 51 | _cnt = 0 52 | for i in range(n): 53 | for j in range(i + 1, n): 54 | if a[i] > 3 * a[j]: 55 | _cnt += 1 56 | return _cnt 57 | 58 | 59 | def test(test_cnt=1010, array_num=500, L=1, R=125551): 60 | import random 61 | for i in range(test_cnt): 62 | A = [random.randint(L, R) for _ in range(random.randint(1, array_num))] 63 | B = A[:] 64 | t = brute_inversions(B) 65 | cnt = merge_sort(0, len(A), A) 66 | B.sort() 67 | if cnt != t: 68 | print(cnt, t) 69 | return 70 | print('ok') 71 | 72 | 73 | if __name__ == '__main__': 74 | test() 75 | A = [38, 27, 43, 3, 9, 82, 10] 76 | B = A[:] 77 | print(brute_inversions(B)) 78 | print(merge_sort(0, len(A), A)) 79 | -------------------------------------------------------------------------------- /1_Divide_and_Conquer/8.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/9/23 3 | # @Author : hrwhisper 4 | 5 | 6 | def merge_sort(L, R, a): 7 | if L >= R - 1: return 0 8 | mid = (L + R) >> 1 9 | cnt = merge_sort(L, mid, a) 10 | cnt += merge_sort(mid, R, a) 11 | return cnt + merge(L, mid, R, a) 12 | 13 | 14 | def merge(L, le, R, a): 15 | cnt = 0 16 | rs = le 17 | ls = L 18 | x = [] 19 | while ls < le and rs < R: 20 | if a[ls] <= a[rs]: 21 | x.append(a[ls]) 22 | ls += 1 23 | else: 24 | x.append(a[rs]) 25 | rs += 1 26 | cnt += le - ls 27 | 28 | for i in range(ls, le): 29 | x.append(a[i]) 30 | for i in range(rs, R): 31 | x.append(a[i]) 32 | for i in range(L, R): 33 | a[i] = x[i - L] 34 | return cnt 35 | 36 | 37 | def quick_sort(L, R, a): 38 | if L >= R: return 0 39 | i, cnt = partition(L, R, a) 40 | cnt += quick_sort(L, i - 1, a) 41 | cnt += quick_sort(i + 1, R, a) 42 | return cnt 43 | 44 | 45 | def partition(L, R, a): 46 | cnt = 0 47 | t = [] 48 | base = a[L] 49 | i = L + 1 50 | while i <= R: 51 | if a[i] < base: 52 | cnt += i - L - len(t) 53 | t.append(a[i]) 54 | i += 1 55 | j = len(t) + L 56 | t.append(base) # or delete this line, and let i = L 57 | 58 | for i in range(L + 1, R + 1): 59 | if base <= a[i]: 60 | t.append(a[i]) 61 | 62 | for i in range(L, R + 1): 63 | a[i] = t[i - L] 64 | 65 | return j, cnt 66 | 67 | 68 | def brute_inversions(a): 69 | n = len(a) 70 | _cnt = 0 71 | for i in range(n): 72 | for j in range(i + 1, n): 73 | if a[i] > a[j]: 74 | _cnt += 1 75 | return _cnt 76 | 77 | 78 | def test(test_cnt=1000, array_num=1000, L=1, R=100000): 79 | import random 80 | for i in range(test_cnt): 81 | A = [random.randint(L, R) for _ in range(random.randint(1, array_num))] 82 | # t = brute_inversions(A[:]) 83 | cnt_merge_sort = merge_sort(0, len(A), A[:]) 84 | cnt_quick_sort = quick_sort(0, len(A) - 1, A[:]) 85 | if cnt_quick_sort != cnt_merge_sort: # cnt_merge_sort != t or: 86 | print(A) 87 | print(cnt_merge_sort, cnt_quick_sort) 88 | return 89 | print('ok') 90 | 91 | 92 | if __name__ == '__main__': 93 | from datetime import datetime 94 | 95 | with open('./Q8.txt') as f: 96 | a = list(map(int, f.read().split())) 97 | start = datetime.now() 98 | print(merge_sort(0, len(a), a[:])) 99 | print('complete merge sort in {}s'.format((datetime.now() - start).total_seconds())) 100 | start = datetime.now() 101 | print(quick_sort(0, len(a) - 1, a)) 102 | print('complete quick sort in {}s'.format((datetime.now() - start).total_seconds())) 103 | 104 | test() 105 | -------------------------------------------------------------------------------- /1_Divide_and_Conquer/8_another_version.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/9/23 3 | # @Author : hrwhisper 4 | 5 | 6 | def merge_sort(a): 7 | if len(a) == 1: return a, 0 8 | mid = len(a) >> 1 9 | left, left_cnt = merge_sort(a[:mid]) 10 | right, right_cnt = merge_sort(a[mid:]) 11 | a, cnt = merge(left, right) 12 | return a, left_cnt + right_cnt + cnt 13 | 14 | 15 | def merge(left, right): 16 | cnt = 0 17 | x = [] 18 | left = left[::-1] 19 | right = right[::-1] 20 | while left or right: 21 | if left and right and left[-1] <= right[-1]: 22 | x.append(left.pop()) 23 | elif right: 24 | x.append(right.pop()) 25 | cnt += len(left) 26 | else: 27 | x.append(left.pop()) 28 | return x, cnt 29 | 30 | 31 | def find_inversions_by_merge_sort(a): 32 | return merge_sort(a)[1] 33 | 34 | 35 | def quick_sort(a): 36 | if not a: return 0 37 | if len(a) == 1: return 0 38 | i, cnt = partition(a) 39 | left_cnt = quick_sort(a[:i]) 40 | right_cnt = quick_sort(a[i + 1:]) 41 | return left_cnt + right_cnt + cnt 42 | 43 | 44 | def partition(a): 45 | if len(a) == 0: return 0, 0 46 | cnt = 0 47 | t = [] 48 | base = a[0] 49 | for i in range(len(a)): 50 | if a[i] < base: 51 | cnt += i - len(t) 52 | t.append(a[i]) 53 | j = len(t) 54 | for i in range(len(a)): 55 | if base <= a[i]: 56 | t.append(a[i]) 57 | 58 | for i in range(len(a)): 59 | a[i] = t[i] 60 | return j, cnt 61 | 62 | 63 | def brute_inversions(a): 64 | n = len(a) 65 | _cnt = 0 66 | for i in range(n): 67 | for j in range(i + 1, n): 68 | if a[i] > a[j]: 69 | _cnt += 1 70 | return _cnt 71 | 72 | 73 | def test(test_cnt=1000, array_num=1000, L=1, R=100000): 74 | import random 75 | for i in range(test_cnt): 76 | A = [random.randint(L, R) for _ in range(random.randint(1, array_num))] 77 | # t = brute_inversions(A[:]) 78 | cnt_merge_sort = find_inversions_by_merge_sort(A[:]) 79 | cnt_quick_sort = quick_sort(A[:]) 80 | if cnt_quick_sort != cnt_merge_sort: # cnt_merge_sort != t or: 81 | print(A) 82 | print(cnt_merge_sort, cnt_quick_sort) 83 | return 84 | print('ok') 85 | 86 | 87 | if __name__ == '__main__': 88 | from datetime import datetime 89 | 90 | with open('./Q8.txt') as f: 91 | a = list(map(int, f.read().split())) 92 | start = datetime.now() 93 | print(find_inversions_by_merge_sort(a[:])) 94 | print('complete merge sort in {}s'.format((datetime.now() - start).total_seconds())) 95 | start = datetime.now() 96 | print(quick_sort(a[:])) 97 | print('complete quick sort in {}s'.format((datetime.now() - start).total_seconds())) 98 | 99 | test() 100 | -------------------------------------------------------------------------------- /1_Divide_and_Conquer/9.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/9/24 3 | # @Author : hrwhisper 4 | from functools import cmp_to_key 5 | import math 6 | 7 | 8 | def euclidean_dis_pow(a, b): 9 | return math.sqrt((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2) 10 | 11 | 12 | def solve_closest_pair_n_logn2(points): 13 | def closest_pair(L, R, points): 14 | if L == R: return 0x7fffffff, points[L], points[R] # return int max 15 | if R - L == 1: return euclidean_dis_pow(points[L], points[R]), points[L], points[R] 16 | mid = (L + R) >> 1 17 | d, p1, p2 = closest_pair(L, mid, points) 18 | d2, p3, p4 = closest_pair(mid + 1, R, points) 19 | if d > d2: 20 | d, p1, p2 = d2, p3, p4 21 | 22 | min_x = points[mid][0] - d 23 | max_x = points[mid][0] + d 24 | 25 | suspect = [points[i] for i in range(L, R + 1) if min_x <= points[i][0] <= max_x] 26 | 27 | suspect.sort(key=lambda x: x[1]) 28 | n = len(suspect) 29 | for i in range(n): 30 | for j in range(i + 1, n): 31 | if suspect[j][1] - suspect[i][1] > d: break 32 | t = euclidean_dis_pow(suspect[i], suspect[j]) 33 | if t < d: 34 | d = t 35 | p1, p2 = suspect[i], suspect[j] 36 | return d, p1, p2 37 | 38 | points.sort(key=cmp_to_key(lambda x, y: x[0] - y[0] if x[0] != y[0] else x[1] - y[1])) 39 | return closest_pair(0, len(points) - 1, points) 40 | 41 | 42 | def solve_closest_pair_n_logn(points): 43 | def merge(ls, le, re, a): 44 | start = ls 45 | rs = le + 1 46 | b = [] 47 | while ls <= le and rs <= re: 48 | if a[ls][1] < a[rs][1]: 49 | b.append(a[ls]) 50 | ls += 1 51 | else: 52 | b.append(a[rs]) 53 | rs += 1 54 | 55 | for i in range(ls, le + 1): 56 | b.append(a[i]) 57 | 58 | for i in range(rs, re + 1): 59 | b.append(a[i]) 60 | 61 | for i in range(start, re + 1): 62 | a[i] = b[i - start] 63 | 64 | def closest_pair(L, R, points, y_sorted): 65 | if L == R: return 0x7fffffff, points[L], points[R] # return int max 66 | if R - L == 1: 67 | if y_sorted[L][1] > y_sorted[R][1]: 68 | y_sorted[L], y_sorted[R] = y_sorted[R], y_sorted[L] 69 | return euclidean_dis_pow(points[L], points[R]), points[L], points[R] 70 | mid = (L + R) >> 1 71 | d, p1, p2 = closest_pair(L, mid, points, y_sorted) 72 | d2, p3, p4 = closest_pair(mid + 1, R, points, y_sorted) 73 | merge(L, mid, R, y_sorted) 74 | if d > d2: 75 | d, p1, p2 = d2, p3, p4 76 | 77 | min_x = points[mid][0] - d 78 | max_x = points[mid][0] + d 79 | 80 | suspect = [y_sorted[i] for i in range(L, R + 1) if min_x <= y_sorted[i][0] <= max_x] 81 | n = len(suspect) 82 | for i in range(n): 83 | for j in range(i + 1, n): 84 | if suspect[j][1] - suspect[i][1] > d: break 85 | t = euclidean_dis_pow(suspect[i], suspect[j]) 86 | if t < d: 87 | d = t 88 | p1, p2 = suspect[i], suspect[j] 89 | return d, p1, p2 90 | 91 | points.sort(key=cmp_to_key(lambda x, y: x[0] - y[0] if x[0] != y[0] else x[1] - y[1])) 92 | y_sorted = points[:] 93 | return closest_pair(0, len(points) - 1, points, y_sorted) 94 | 95 | 96 | def brute_closest_pair(points): 97 | d = 0x7ffffffff 98 | n = len(points) 99 | for i in range(n): 100 | for j in range(i + 1, n): 101 | d = min(d, euclidean_dis_pow(points[i], points[j])) 102 | return d 103 | 104 | 105 | def test(test_cnt=3000, array_num=1500, L=1, R=15100): 106 | import random 107 | for i in range(test_cnt): 108 | points = [(random.randint(L, R), random.randint(L, R)) for _ in range(random.randint(2, array_num))] 109 | d1 = brute_closest_pair(points[:]) 110 | d2, p1, p2 = solve_closest_pair_n_logn2(points[:]) 111 | d3, p3, p4 = solve_closest_pair_n_logn(points[:]) 112 | 113 | if d1 != d2 or d1 != d3: 114 | print(d1, d2, d3) 115 | return 116 | print('ok') 117 | 118 | 119 | if __name__ == '__main__': 120 | test() 121 | # a = [[1, 2], [1, 1], [0, 1]] 122 | # print(brute_closest_pair(a)) 123 | # 124 | # a = [(127860, 86521), (30732, 71007), (4991, 11841), (52612, 123297)] 125 | # a = [(3280, 6524), (974, 2708), (9442, 13129), (6876, 5971), (14190, 8614), (14278, 13317), (7126, 7101)] 126 | # print(solve_closest_pair_n_logn(a[:])) 127 | # print(solve_closest_pair_n_logn2(a[:])) 128 | # print(brute_closest_pair(a)) 129 | -------------------------------------------------------------------------------- /1_Divide_and_Conquer/Assignment1_DandC.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/algorithm_course/ce1de1823b7b29d5e336011f2a9095a397b75cfa/1_Divide_and_Conquer/Assignment1_DandC.pdf -------------------------------------------------------------------------------- /1_Divide_and_Conquer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/algorithm_course/ce1de1823b7b29d5e336011f2a9095a397b75cfa/1_Divide_and_Conquer/__init__.py -------------------------------------------------------------------------------- /1_Divide_and_Conquer/img/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/algorithm_course/ce1de1823b7b29d5e336011f2a9095a397b75cfa/1_Divide_and_Conquer/img/1.png -------------------------------------------------------------------------------- /1_Divide_and_Conquer/img/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/algorithm_course/ce1de1823b7b29d5e336011f2a9095a397b75cfa/1_Divide_and_Conquer/img/2.png -------------------------------------------------------------------------------- /1_Divide_and_Conquer/img/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/algorithm_course/ce1de1823b7b29d5e336011f2a9095a397b75cfa/1_Divide_and_Conquer/img/3.png -------------------------------------------------------------------------------- /1_Divide_and_Conquer/img/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/algorithm_course/ce1de1823b7b29d5e336011f2a9095a397b75cfa/1_Divide_and_Conquer/img/4.png -------------------------------------------------------------------------------- /1_Divide_and_Conquer/img/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/algorithm_course/ce1de1823b7b29d5e336011f2a9095a397b75cfa/1_Divide_and_Conquer/img/5.png -------------------------------------------------------------------------------- /1_Divide_and_Conquer/img/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/algorithm_course/ce1de1823b7b29d5e336011f2a9095a397b75cfa/1_Divide_and_Conquer/img/6.png -------------------------------------------------------------------------------- /1_Divide_and_Conquer/readme.md: -------------------------------------------------------------------------------- 1 | ## 计算机算法设计与分析-作业1(Divide and Conquer) 2 | 3 | 几点说明: 4 | 5 | - Author: hrwhisper 6 | - 用Python 3.5编写所有的代码 7 | - 没有写伪代码,虽然具体实现要比伪代码花费更多的时间,但是能通过生成随机数据进行测试来帮助检验算法的正确性。 8 | - 每个代码最后一行为调用的示例 9 | - 所有的代码以及测试代码均可在我的github上找到 10 | - 地址:https://github.com/hrwhisper/algorithm_course/tree/master/1_Divide_and_Conquer 11 | - 作业提交截止后上传,现在只是空目录。 12 | 13 | 14 | 15 | ## Problem 1. 16 | 17 | > You are interested in analyzing some hard-to-obtain data from two separate databases. Each database contains n numerical values, so there are 2n values total and you may assume that no two values are the same. You’d like to determine the median of this set of 2n values, which we will define here to be the nth smallest value. 18 | > However, the only way you can access these values is through queries to the databases. In a single query, you can specify a value k to one of the two databases, and the chosen database will return the kth smallest value that it contains. Since queries are expensive, you would like to compute the median using as few queries as possible. 19 | > Give an algorithm that finds the median value using at most O(logn) queries. 20 | 21 | ### 思路与代码 22 | 23 | 这里将两个数据库的命名为A和B。 24 | 25 | 由于A和B的长度均为n,所以总共的长度为2n,本题定义的中位数位第n小的元素,而不是我们常见的求第(2n + 1) / 2的元素和第(2n + 1) / 2 + 1的元素的均值。 26 | 27 | 由于A和B能查询分别查询其第K个元素是什么(可以看成已经排好顺序的**数组**),因此,我们可以用二分的思想来解决此问题。 28 | 29 | 定义查询A和B中的第k小元素时,A的范围为[la,ra), B的搜索范围为[lb,rb) ,初始有la=lb=0,ra=rb=n。 30 | 31 | 对于每次查询,只需要在B中查找第lb + b_m(b_m= k / 2) 个元素,而在A中查询第la + a_m (a_m = k - b_m ) 个元素。我们假定数据库支持下标查询,且从0开始,于是有: 32 | 33 | - 若A[la + a_m - 1] < B[lb + b_m - 1] (数组下标从0开始,所以-1),说明第k个元素的范围应该在A中的[la + am,ra)和B中的[lb,lb+bm)之间查找第k-a_m个元素。 34 | - 若A[la + a_m - 1] > B[lb + b_m - 1] ,说明第k个元素的范围应该在A中的[la ,la+a_m)和B中的[lb +b_m,rb)之间查找第k-b_m个元素。 35 | 36 | 根据上述思想,Python代码如下: 37 | 38 | ```python 39 | def binary_search(A, la, ra, B, lb, rb, k): 40 | m, n = ra - la, rb - lb 41 | if n == 0: return A[la + k - 1] 42 | if k == 1: return min(A[la], B[lb]) 43 | 44 | b_m = k >> 1 45 | a_m = k - b_m 46 | if A[la + a_m - 1] < B[lb + b_m - 1]: 47 | return binary_search(A, la + a_m, ra, B, lb, lb + b_m, k - a_m) 48 | else: # A[la + a_m - 1] > B[lb + b_m - 1] 49 | return binary_search(A, la, la + a_m, B, lb + b_m, rb, k - b_m) 50 | 51 | 52 | def find_median(A, B): 53 | return binary_search(A, 0, len(A), B, 0, len(A), ((len(A) << 1) + 1) >> 1) 54 | 55 | # find_median(A, B) # A and B should be in sorted 56 | ``` 57 | 58 | ### 子问题关系图 59 | 60 |  61 | 62 | ### 正确性证明 63 | 64 | 我们只需要证明寻找中间的元素过程无误即可。 65 | 66 | 我们的划分依据是(元素各不相同,无需考虑相等情况): 67 | 68 | 1. A[la + a_m - 1] < B[lb + b_m - 1] 则缩小A的范围为[la + a_m,ra),B的范围为[lb,lb+b_m),在新范围查找k - am个元素 69 | 2. A[la + a_m - 1] > B[lb + b_m - 1] 则缩小B的范围为[lb +b_m,rb),A的范围为[la ,la+a_m),在新范围查找k - bm个元素 70 | 71 | 对于每一次,我们在B中查找第lb + k/2个元素b,而在A中查找第la + k - k/2个元素a: 72 | 73 | - 若 a < b,说明第k小的元素一定不在A的[la,la+a_m)中,也不在B的[lb+b_m,rb)中, 因此可以缩小范围。 74 | - 若a > b,说明第k小的元素一定不在A的[la + am, ra)中,也不在B的[lb,lb +b_m)中, 因此可以缩小范围。 75 | 76 | 由于上述过程并不丢失解,因此该算法正确。 77 | 78 | ### 时间复杂度分析 79 | 80 | 由于每次将问题的规模减少1/2,因此有 81 | $$ 82 | T(n) = T(n/2) + c 83 | $$ 84 | 因此复杂度为O(logn) 85 | 86 | ## Problem 2. 87 | 88 | > Find the kth largest element in an unsorted array. Note that it is the kth largest element in the sorted order, not the kth distinct element. 89 | > For example, Given [3, 2 ,1, 5, 6, 4] and k = 2, return 5. 90 | 91 | ### 思路与代码 92 | 93 | 本题目标是在无序的数组中寻找第k大的元素。我们可以借助快排划分函数的思想,对于每一次的划分[L,R],我们选择A[L]作为枢纽元,若比A[L] 大,放在左边, 比A[L] 小,放在右边。 94 | 95 | 这样,可以该划分函数可以返回一个值i,这个值表示了[L,R]之间,A[i]此时的位置。 96 | 97 | - 显然,A[i]左边元素有 i - L + 1 个 ,我们记为 left_element 98 | - 若left_element == k 显然 A[i]就是要找的元素 99 | - left_element < k: 显然应该往右查找,变为查找[i+1,R] 中的第 k - left_element 个元素 100 | - left_element > k:显然结果在左边,查找[L,i - 1]中的第k个元素即可。 101 | 102 | 根据上述思想,Python代码如下: 103 | 104 | ```python 105 | def find_kth_element(L, R, A, k): 106 | if L == R: return A[L] 107 | i = partition(L, R, A) 108 | left_element = i - L + 1 109 | if left_element == k: return A[i] 110 | if left_element < k: 111 | return find_kth_element(i + 1, R, A, k - left_element) 112 | else: 113 | return find_kth_element(L, i - 1, A, k) 114 | 115 | def partition(L, R, A): 116 | i = L + 1 117 | j = R 118 | base = A[L] 119 | while True: 120 | while i < j and A[i] > base: i += 1 121 | while j > L and A[j] < base: j -= 1 122 | if i >= j: break 123 | A[i], A[j] = A[j], A[i] # swap 124 | 125 | A[L], A[j] = A[j], A[L] # swap 126 | return j 127 | 128 | # print(find_kth_element(0, len(a) - 1, a, k)) 129 | ``` 130 | 131 | ### 子问题关系图 132 | 133 |  134 | 135 | ### 正确性证明 136 | 137 | 上述的过程中,我们利用快速排序的划分函数对数组进行划分,并根据划分后返回的下标i来进行区间缩小的依据。 138 | 139 | A[i]左边元素有 left_element = i - L + 1个,且左边的元素均比A[i]来得小,而右边的元素比A[i]来得大。 140 | 141 | - 若left_element == k 显然 A[i]就是要找的元素 142 | - left_element < k: 左边的元素不够K个,说明第K个元素在右边。变为查找[i+1,R] 中的第 k - left_element 个元素 143 | - left_element > k:左边元素比K个大,说明往左边查找。查找[L,i - 1]中的第k个元素即可。 144 | 145 | 由于上述过程并不丢失解,因此该算法正确。 146 | 147 | 148 | 149 | ### 时间复杂度分析 150 | 151 | #### 最坏情况复杂度 152 | 153 | 该算法的复杂度取决于划分函数划分的好坏,与快速排序类似,最坏情况下,每次选择的都是数组中最大或者最小的元素,那么有 154 | $$ 155 | T(n) <= T(n-1) + cn 156 | $$ 157 | 复杂度为O(n^2^) 158 | 159 | #### 最好情况复杂度 160 | 161 | 在最好情况下,每次选择为中间的元素,那么有 162 | $$ 163 | T(n) <= T(n/2) + cn 164 | $$ 165 | 复杂度为O(n) 166 | 167 | #### 平均情况复杂度 168 | 169 | 在平均情况下,我们选择的一般不是最好也不是最差。和快速排序的平均复杂度证明类似,本算法也可以证明平均复杂度为O(n) 170 | 171 | ## Problem 3. 172 | > Consider an n-node complete binary tree T, where n = 2^d^ − 1 for some d. Each node v of T is labeled with a real number x~v~. You may assume that the real numbers labeling the nodes are all distinct. A node v of T is a local minimum if the label x~v~ is less than the label x~w~ for all nodes w that are joined to v by an edge. 173 | > You are given such a complete binary tree T, but the labeling is only specified in the following implicit way: for each node v, you can determine the value x~v~ by probing the node v. Show how to find a local minimum of T using only O(logn) probes to the nodes of T. 174 | 175 | ### 思路与代码 176 | 177 | 从根节点出发, 178 | 179 | - 若它的两个子节点均大于它,那么返回根节点的值。 180 | - 否则必然有一个子节点的值小于它,走该子节点即可。 181 | 182 | 首先定义树节点如下: 183 | 184 | ```python 185 | class TreeNode(object): 186 | def __init__(self, x): 187 | self.val = x 188 | self.left = None 189 | self.right = None 190 | ``` 191 | 192 | 下面是根据上述思想实现的代码 193 | 194 | ```python 195 | def search_local_minimum(root): 196 | while root: 197 | if not root.left: return root.val 198 | if root.val > root.left.val: 199 | root = root.left 200 | elif root.val > root.right.val: 201 | root = root.right 202 | else: 203 | return root.val 204 | 205 | # search_local_minimum(root) 206 | ``` 207 | 208 | ### 子问题关系图 209 | 210 | 下面的图中,打X的说明不需要进行遍历。 211 | 212 |  213 | 214 | ### 正确性证明 215 | 216 | 在上述的过程中,我们往下遍历的条件为:子节点的值 < 当前节点的值。 217 | 218 | 因此,对于遍历到的任一节点来说,必然有其父节点(如果有的话)的值比该节点来的大,因此只需要继续查看其子节点即可。 219 | 220 | 在中途中,若出现两个子节点的值均大于该节点,那么已经找到局部最小值。 221 | 222 | 最坏的情况是遍历到叶子节点。由于其只与父节点有连边,并且该叶子节点值 < 父节点的值,因此为局部最小值。 223 | 224 | ### 复杂度分析 225 | 226 | 在正确性证明中已经提到,最坏的情况是遍历到子节点,因此其最大的次数为树的高度,即为O(logn) 227 | 228 | 229 | 230 | ## Problem 4. 231 | 232 | > Suppose now that you’re given an n×n grid graph G. (An n×n grid graph is just the adjacency graph of an n×n chessboard. To be completely precise, it is a graph whose node set is the set of all ordered pairs of natural numbers (i,j), where i <= n and j <=n ; the nodes (i,j) and (k,l) are joined by an edge if and only if |i − k| + |j − l| = 1.) 233 | > We use some of the terminology of PROBLEM 4. Again, each node v is labeled by a real number x~v~ ; you may assume that all these labels are **distinct**. Show how to find a local minimum of G using only O(n) probes to the nodes of G. (Note that G has n^2^ nodes.) 234 | 235 | ### 思路与代码 236 | 237 | 用分治法解此题,主要是找到必然包含局部最小值的位置,然后缩小查找范围。 238 | 239 | 为了方便说明,将图标记为蓝色、绿色,分别表示一个区域的边界、两个区域(或者四个区域)的交界。 240 | 241 |  242 | 243 | 我们每次从上图中蓝色、绿色所有的点进行比较,找到其最小值,记录其下标为min_x,min_y。 244 | 245 | (min_x,min_y)可能有如下的颜色: 246 | 247 | - 蓝色,则说明局部最小值即为该区域,则下次搜索该区域即可。 248 | - 绿色,则需要比较其上下左右四个点(如果有的话)中的点,有如下情况: 249 | - 该点四周的点比较大,说明该点为局部最小值 250 | - 该点四周某个点比较小,则下次搜索区域为该点所在的区域,若存在多个点,也只需要搜索任一一个小的点所在区域即可。 251 | 252 | 253 | 254 | ```python 255 | def search_local_minimum(sx, sy, ex, ey, g): 256 | if sx == ex and sy == ey: 257 | return sx, sy 258 | elif ex - sx == 1 and ey - sy == 1: 259 | temp = [g[sx][sy], g[sx + 1][sy], g[sx][sy + 1], g[sx + 1][sy + 1]] 260 | return [(sx, sy), (sx + 1, sy), (sx, sy + 1), (sx + 1, sy + 1)][temp.index(min(temp))] 261 | 262 | mx, my = (sx + ex) >> 1, (sy + ey) >> 1 263 | min_x, min_y = mx, my 264 | for i in range(sx, ex + 1): 265 | if g[min_x][min_y] > g[i][sy]: min_x, min_y = i, sy 266 | if g[min_x][min_y] > g[i][ey]: min_x, min_y = i, ey 267 | if g[min_x][min_y] > g[i][my]: min_x, min_y = i, my 268 | 269 | for i in range(sy, ey + 1): 270 | if g[min_x][min_y] > g[sx][i]: min_x, min_y = sx, i 271 | if g[min_x][min_y] > g[ex][i]: min_x, min_y = ex, i 272 | if g[min_x][min_y] > g[mx][i]: min_x, min_y = mx, i 273 | 274 | if min_x < mx and min_y < my: # 左上角 275 | case = 0 276 | elif min_x > mx and min_y < my: # 左下角 277 | case = 1 278 | elif min_x < mx and min_y > my: # 右上角 279 | case = 2 280 | elif min_x > mx and min_y > my: # 右下角 281 | case = 3 282 | else: 283 | if min_x > sx and g[min_x - 1][min_y] < g[min_x][min_y]: 284 | case = 0 if min_y < my else 2 # 上半部分 区分左上右上 285 | elif min_x < ex and g[min_x + 1][min_y] < g[min_x][min_y]: 286 | case = 1 if min_y < my else 3 # 下半部分 区分左下右下 287 | elif min_y > sy and g[min_x][min_y - 1] < g[min_x][min_y]: 288 | case = 0 if min_x < mx else 1 # 左半部分 区分左上左下 289 | elif min_y < ey and g[min_x][min_y + 1] < g[min_x][min_y]: 290 | case = 2 if min_x < mx else 3 # 右半部分 区分右上右下 291 | else: 292 | return min_x, min_y 293 | 294 | if case == 0: 295 | return search_local_minimum(sx, sy, mx, my, g) 296 | elif case == 1: 297 | return search_local_minimum(mx, sy, ex, my, g) 298 | elif case == 2: 299 | return search_local_minimum(sx, my, mx, ey, g) 300 | else: 301 | return search_local_minimum(mx, my, ex, ey, g) 302 | 303 | def solve_search_local_minimum(g): 304 | x, y = search_local_minimum(0, 0, len(g) - 1, len(g) - 1, g) 305 | return g[x][y] 306 | 307 | # solve_search_local_minimum(g) 308 | ``` 309 | 310 | ### 子问题关系图 311 | 312 | 见思路与代码中的图,每次将其范围缩小为左上、右上、左下、右下中的一个。 313 | 314 | ### 正确性证明 315 | 316 | 在上述的算法中,我们首先比较了所有蓝色和绿色的点,并标记了最小值的坐标min_x,min_y。 317 | 318 | 由于各个点的值都不相同,因此,该点必然小于其它蓝色、绿色的点。 319 | 320 | 回顾一下我们的过程,若该点的颜色如下: 321 | 322 | - 蓝色,下次搜索该点所在的区域。 323 | - 绿色,则需要比较其上下左右四个点(如果有的话)中的点,有如下情况: 324 | - 该点四周的点比较大,说明该点为局部最小值 325 | - 该点四周某个点比较小,则下次搜索区域为该点所在的区域,若存在多个点,也只需要搜索任一一个小的点所在区域。 326 | 327 | 证明该算法的正确性关键在于证明对于蓝色和绿色点处理的正确性。 328 | 329 | #### 1. 蓝色的点正确性 330 | 331 | 若该点(下面记为点a)为蓝色的点,那么,我们的做法是下一次直接搜索该点所在的区域area。 332 | 333 | 若从该点a出发,对area进行遍历,遍历的方法是每次取比当前点的值val **小**的元素,那么遍历一定会终止。 334 | 335 | 因为val不断的缩小,而我们知道,area的最外围的一定大于val。 因此遍历一定终止,而终止的时候val为局部最小值。 336 | 337 | 因此对于蓝色的点,该点所在的area中必然有局部最小值,因此对于蓝色点处理正确。 338 | 339 | #### 2. 绿色点的正确性 340 | 341 | 绿色点处理的正确性证明如下: 342 | 343 | 1. 若四周的点比该点大,说明为局部最小值,返回该点即可。这个显然正确。 344 | 2. 若该点四周的某个点b比该点小,则下次为这个点所在的区域area。原理和蓝色的点类似,若从b出发,对area进行遍历,遍历的方法是每次取比当前点的值val **小**的元素,那么遍历一定会终止。因为area最外围边界的点均会大于遍历时不断缩小的val。 终止时val为局部最小值。因此这样能保证有局部最小值。 345 | 346 | 综合1和2对于蓝色、绿色点的处理正确性证明,该算法正确。 347 | 348 | ### 复杂度分析 349 | 350 | 上述的算法中,每次将划分为1个子问题,每个子问题是原来的1/4,而每次比较的为6n,有如下递归式: 351 | $$ 352 | T(n) = T(n/4) +cn 353 | $$ 354 | 因此复杂度为O(n) 355 | 356 | ## Problem 5. 357 | 358 | > every year the cows hold an event featuring a peculiar version of hopscotch that involves carefully jumping from rock to rock in a river. The excitement takes place on a long, straight river with a rock at the start and another rock at the end, L units away from the start (1 ≤ L ≤ 1,000,000,000). Along the river between the starting and ending rocks, N (0 ≤ N ≤ 50,000) more rocks appear, each at an integral distance Di from the start (0 < Di < L). 359 | > To play the game, each cow in turn starts at the starting rock and tries to reach the finish at the ending rock, jumping only from rock to rock. Of course, less agile cows never make it to the final rock, ending up instead in the river. 360 | > Farmer John is proud of his cows and watches this event each year. But as time goes by, he tires of watching the timid cows of the other farmers limp across the short distances between rocks placed too closely together. He plans to remove several rocks in order to increase the shortest distance a cow will have to jump to reach the end. He knows he cannot remove the starting and ending rocks, but he calculates that he has enough resources to remove up to M rocks (0 ≤ M ≤ N). 361 | > Farmer John wants to know exactly how much he can increase the shortest distance before he starts removing the rocks. Help Farmer John determine the greatest possible shortest distance a cow has to jump after removing the optimal set of M rocks. In other words, you need help John to find a way to remove M blocks, so that in the rest of the blocks, the distance between two adjacent blocks which have a minimum spacing is the largest. 362 | 363 | ### 思路与代码 364 | 365 | 题目给定了到L,M,N分别表示到河岸的距离,移除M个石头,原来有N个石头,以及各个石头到初始点的距离rocks。 366 | 367 | 首先将初始点0和终点L看成不能删掉的两个石头,将它们添加进rocks数组,然后对rocks排序。我们知道,在移除M个石头之后,最小距离d 必定满足 368 | $$ 369 | min(rocks[i] - rocks[i-1] , i ∈ [1,N + 1]) < = d <= L 370 | $$ 371 | 因此,我们可以用二分的方法猜测该最小距离。设当前的范围[left,right)。 每次我们查看 left 和 right的中点mid,该mid就是我们猜测的最小的距离的最大值,然后对rocks中,间距**不大于**mid的进行计数(这些就是要删除的点),记为cnt。 372 | 373 | - cnt > M: 说明该mid值太大了,需要减小,区间变为 [left,mid) 374 | - cnt < M: 说明mid太小,区间变为 [mid+1,right) 375 | - cnt == M: 说明恰好去除了M个,但是,删除掉这M个后,长度一定比mid大,因此区间变为 [mid+1,right) 376 | 377 | 根据上述思想,Python代码如下: 378 | 379 | ```python 380 | def count_small_than_mid(rocks, mid, n): 381 | start = cnt = 0 382 | for i in range(1, n): 383 | if rocks[i] - rocks[start] <= mid: 384 | cnt += 1 385 | else: 386 | start = i 387 | return cnt 388 | 389 | def binary_search(left, right, rocks, M, N): 390 | while left < right: 391 | mid = (left + right) >> 1 392 | if count_small_than_mid(rocks, mid, N) <= M: 393 | left = mid + 1 394 | else: 395 | right = mid 396 | return left 397 | 398 | def solve_largest_minimum_spacing(L, M, N, rocks): 399 | rocks = [0] + rocks + [L] 400 | N += 2 401 | rocks.sort() 402 | left = min(rocks[i] - rocks[i - 1] for i in range(1, N)) # left start with 0 is ok. 403 | return binary_search(left, L + 1, rocks, M, N) 404 | 405 | # solve_largest_minimum_spacing(L, M, N, rocks) 406 | ``` 407 | 408 | ### 子问题关系图 409 | 410 |  411 | 412 | ### 正确性证明 413 | 414 | 上述的做法中,用二分法猜测最小距离的最大值,来不断的逼近正确的解。 415 | 416 | 对于每一次的二分范围,[left,right)。mid 为其中点,也是我们的猜测值,我们遍历rocks数组,若石头间的间距比mid小,说明我们要把这个石头删除掉,来扩大最小值。记小于等于mid的共有cnt个,于是有: 417 | 418 | - cnt > M: 我们需要删除多于M个石头来达到mid,说明该mid值太大了,需要减小区间上界,区间变为 [left,mid) 419 | - cnt < M: 说明还没有删除M个石头就可以达到mid,就是说mid太小,区间变为 [mid+1,right) 420 | - cnt == M: 说明恰好去除了M个,但是,删除掉这M个后,长度一定比mid大,因此区间变为 [mid+1,right) 421 | 422 | 通过不断的二分缩小范围,最后一定能找到真正的最大的最小间距。 423 | 424 | ### 复杂度分析 425 | 426 | 首先进行排序,复杂度O(NlogN),接着对L进行二分,每次过程需要O(N),因此复杂度为O(NlogL) 427 | 428 | 一般来说,有L > N, 总复杂度为O(NlogL) 429 | 430 | ## Problem 6. 431 | 432 | > Recall the problem of finding the number of inversions. As in the course, we are given a sequence of n numbers a1,··· ,an, which we assume are all distinct, and we difine an inversion to be a pair i < j such that ai > aj. 433 | > We motivated the problem of counting inversions as a good measure of how different two orderings are. However, one might feel that this measure is too sensitive. Let’s call a pair a significant inversion if i < j and ai > 3a~j~. Given an O(nlogn) algorithm to count the number of significant inversions between two orderings. 434 | 435 | ### 思路与代码 436 | 437 | 本题目的提出了新的逆序数定义,要求 i < j 并且 a[i] > 3a[j],不能像原来的合并排序一样,简单的在合并的时候a[i] > a[j] ,然后累计下标之差。因为可能a[i] > a[j]但是 a[i] <= 3a[j]。 438 | 439 | 因此,单独的一个新的过程,对于左右两个已经有序的数组,我们只需要O(n)即可求出解。 440 | 441 | - 若a[i] <= 3*a[j] : i++ 442 | - 否则a[i] > 3*a[j]: 说明此时满足条件,cnt += n - i , 最后j++ 443 | 444 | 代码如下: 445 | 446 | ```python 447 | def merge_sort(L, R, a): 448 | if L >= R - 1: return 0 449 | mid = (L + R) >> 1 450 | cnt_left = merge_sort(L, mid, a) 451 | cnt_right = merge_sort(mid, R, a) 452 | return cnt_left + cnt_right + merge(L, mid, R, a) 453 | 454 | def _count_by_merge(i, n, j, m, a): 455 | cnt = 0 456 | while i < n and j < m: 457 | if a[i] <= 3 * a[j]: 458 | i += 1 459 | else: 460 | cnt += n - i 461 | j += 1 462 | return cnt 463 | 464 | def merge(L, le, R, a): 465 | rs = le 466 | ls = L 467 | x = [] 468 | 469 | cnt = _count_by_merge(ls, le, rs, R, a) 470 | 471 | while ls < le and rs < R: 472 | if a[ls] <= a[rs]: 473 | x.append(a[ls]) 474 | ls += 1 475 | else: 476 | x.append(a[rs]) 477 | rs += 1 478 | 479 | for ls in range(ls, le): 480 | x.append(a[ls]) 481 | for rs in range(rs, R): 482 | x.append(a[rs]) 483 | for i in range(L, R): 484 | a[i] = x[i - L] 485 | return cnt 486 | 487 | # merge_sort(0, len(A), A) 488 | ``` 489 | 490 | ### 子问题关系图 491 | 492 | 和合并排序类似,过程如下:(在合并过程中进行计数已经用红色的数字标识出来。) 493 | 494 |  495 | 496 | ### 正确性证明 497 | 498 | 该算法在合并排序的基础上,添加了一个_count_by_merge的过程,来遍历两个子数组,用来求解significant inversion的个数。 499 | 500 | 由于两个子数组已经排好序,因此可以用双指针的思想来找到a[i] >3* a[j] 的分界点。对于a[i] <= 3a[j],显然a[i] <= 3*a[j+1....],因此使用双指针不会丢失解,该算法是正确的。 501 | 502 | ### 复杂度分析 503 | 504 | 本解法在合并排序的合并过程中增加了一个过程来统计逆序数的个数,其和合并过程复杂度一样,都为O(n), 仍然有如下递归表达式: 505 | 506 | $$ 507 | T(n) = 2T(n/2) + cn 508 | $$ 509 | 因此总复杂度仍为O(nlogn) 510 | 511 | 512 | 513 | 514 | 515 | ## Problem 7. 516 | 517 | > A group of n Ghostbusters is battling n ghosts. Each Ghostbuster is armed with a proton 518 | > pack, which shoots a stream at a ghost, eradicating it. A stream goes in a straight line and terminates when it hits the ghost. The Ghostbusters decide upon the following strategy. They will pair off with the ghosts, forming n Ghostbuster-ghost pairs, and then simultaneously each Ghostbuster will shoot a stream at his chosen ghost. As we all know, it is very dangerous to let streams cross, and so the Ghostbusters must choose pairings for which no streams will cross. Assume that the position of each Ghostbuster and each ghost is a fixed point in the plane and that no three positions are collinear. 519 | > 520 | > 1. Argue that there exists a line passing through one Ghostbuster and one ghost such the number of Ghostbusters on one side of the line equals the number of ghosts on the same side. Describe how to nd such a line in O(n log n) time. 521 | > 2. Give an O(n^2^ log n)-time algorithm to pair Ghostbusters with ghosts in such a way that no streams cross. 522 | 523 | ### 1 524 | 525 | 我们记Ghostbusters 为平面上红色的点,ghost为平面上黑色的点。 526 | 527 | 我们首先证明,给定平面上的点,我们能找到一条经过红色和黑色的点,使得在该直线的一侧有等数量的红色点和黑色点。 528 | 529 | 平面上有n个红点和n个黑点。找到所有2n个点中纵坐标y最小的点。若有两个(因为没有三点共线)点y相同取横坐标x较小的那一个点,记录该点为点A,设A为红色的点(黑色同理,这里以红色为例)。以A点向其余所有的点连线,计算与x轴正向的夹角( [1,0] ),然后按照夹角排序。这样,从x轴正向出发,按照夹角的大小扫描点,初始设置cnt_b = cnt_r=0(黑色和红色的点的个数为0),若当前点是红色,那么cnt_r ++ ,若为黑色,则查看cnt_b 是否等于cnt_r,若相等,该点就是我们要找的分界点。否则cnt_b ++. 530 | 531 | 由于进行排序,所以复杂度为O(nlogn) 532 | 533 | ### 2 534 | 535 | 每一次调用方法一的划分,然后分成两边,对每一边递归即可。 536 | 537 | 由于每次配对1个,并进行一次排序,因此 538 | $$ 539 | T(n) = T(n-1) + nlog(n) 540 | $$ 541 | 因此复杂度为n^2^logn 542 | 543 | 544 | 545 | ## Problem 8. 546 | 547 | > The attachedfile Q5.txt contains 100,000 integers between 1 and 100,000 (each row has asingle integer), the order of these integers is random and no integer is repeated. 548 | > 549 | > 1. Write a program to implement the Sort-and-Count algorithms in yourfavorite language, find the number of inversions in the given file. 550 | > 2. In the lecture, we count the number of inversions in O(n log n)time, using the Merge-Sort idea. Is it possible to use the Quick-Sort ideainstead ?If possible, implement the algorithm inyour favourite language, run it over the given file, and compare its runningtime with the one above. If not, give a explanation. 551 | 552 | ### 1.合并排序 553 | 554 | ```python 555 | def merge_sort(L, R, a): 556 | if L >= R - 1: return 0 557 | mid = (L + R) >> 1 558 | cnt = merge_sort(L, mid, a) 559 | cnt += merge_sort(mid, R, a) 560 | return cnt + merge(L, mid, R, a) 561 | 562 | def merge(L, le, R, a): 563 | cnt = 0 564 | rs = le 565 | ls = L 566 | x = [] 567 | while ls < le and rs < R: 568 | if a[ls] <= a[rs]: 569 | x.append(a[ls]) 570 | ls += 1 571 | else: 572 | x.append(a[rs]) 573 | rs += 1 574 | cnt += le - ls 575 | while ls < le: 576 | x.append(a[ls]) 577 | ls += 1 578 | while rs < R: 579 | x.append(a[rs]) 580 | rs += 1 581 | 582 | for i in range(L, R): 583 | a[i] = x[i - L] 584 | return cnt 585 | 586 | # cnt = merge_sort(0, len(A), A) 587 | ``` 588 | 589 | ### 2.快速排序 590 | 591 | 可以用快速排序来完成,需要注意的是,每次划分不再是简单的交换左右两边,而是要分别遍历来保证有序。 592 | 593 | 在下面的划分函数中,使用了辅助数组t,首先处理小于a[L]的情况, 不断的将小于a[L]的元素放进t中,而这些会构成的逆序数个数为 i - L - len(t),就是说 和t的长度的差值说明之前有i - L - len(t)个元素不小于a[L],而当前的元素a[i] < a[L],因此逆序数个数应该加上i - L - len(t)个。 594 | 595 | 下面为代码: 596 | 597 | ```python 598 | def quick_sort(L, R, a): 599 | if L >= R: return 0 600 | i, cnt = partition(L, R, a) 601 | cnt += quick_sort(L, i - 1, a) 602 | cnt += quick_sort(i + 1, R, a) 603 | return cnt 604 | 605 | def partition(L, R, a): 606 | cnt = 0 607 | t = [] 608 | base = a[L] 609 | i = L + 1 610 | while i <= R: 611 | if a[i] < base: 612 | cnt += i - L - len(t) 613 | t.append(a[i]) 614 | i += 1 615 | j = len(t) + L 616 | t.append(base) # or delete this line, and let i = L 617 | i = L + 1 618 | while i <= R: 619 | if base <= a[i]: 620 | t.append(a[i]) 621 | i += 1 622 | 623 | for i in range(L, R + 1): 624 | a[i] = t[i - L] 625 | 626 | return j, cnt 627 | 628 | # cnt = quick_sort(0, len(A) - 1, A) 629 | ``` 630 | 631 | ### 测试输入与结果 632 | 633 | 读入Q8.txt的数据以及时间测试结果的代码为: 634 | 635 | ```python 636 | if __name__ == '__main__': 637 | from datetime import datetime 638 | 639 | with open('./Q8.txt') as f: 640 | a = list(map(int, f.read().split())) 641 | start = datetime.now() 642 | print(merge_sort(0, len(a), a[:])) 643 | print('complete merge sort in {}s'.format((datetime.now() - start).total_seconds())) 644 | start = datetime.now() 645 | print(quick_sort(0, len(a) - 1, a)) 646 | print('complete quick sort in {}s'.format((datetime.now() - start).total_seconds())) 647 | ``` 648 | 649 | 上述程序输出: 650 | 651 | ``` 652 | 2500572073 653 | complete merge sort in 1.207803s 654 | 2500572073 655 | complete quick sort in 1.722145s 656 | ``` 657 | 658 | 659 | 660 | ## Problem 9. 661 | 662 | > Implement the algorithm for the closest pair problem in your favourite language. 663 | > INPUT: Given n points in a plane. 664 | > OUTPUT: the pair with the least Euclidean distance. 665 | 666 | 首先定义欧式距离: 667 | 668 | ```python 669 | def euclidean_dis_pow(a, b): 670 | return math.sqrt((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2) 671 | ``` 672 | 673 | **这里假设两点的欧式距离不大于int最大值(0x7fffffff)** 674 | 675 | O(nlogn^2^)的算法实现如下: 676 | 677 | ```python 678 | def solve_closest_pair_n_logn2(points): 679 | def closest_pair(L, R, points): 680 | if L == R: return 0x7fffffff, points[L], points[R] # return int max 681 | if R - L == 1: return euclidean_dis_pow(points[L], points[R]), points[L], points[R] 682 | mid = (L + R) >> 1 683 | d, p1, p2 = closest_pair(L, mid, points) 684 | d2, p3, p4 = closest_pair(mid + 1, R, points) 685 | if d > d2: 686 | d, p1, p2 = d2, p3, p4 687 | 688 | min_x = points[mid][0] - d 689 | max_x = points[mid][0] + d 690 | 691 | suspect = [points[i] for i in range(L, R + 1) if min_x <= points[i][0] <= max_x] 692 | 693 | suspect.sort(key=lambda x: x[1]) 694 | n = len(suspect) 695 | for i in range(n): 696 | for j in range(i + 1, n): 697 | if suspect[j][1] - suspect[i][1] > d: break 698 | t = euclidean_dis_pow(suspect[i], suspect[j]) 699 | if t < d: 700 | d = t 701 | p1, p2 = suspect[i], suspect[j] 702 | return d, p1, p2 703 | 704 | points.sort(key=cmp_to_key(lambda x, y: x[0] - y[0] if x[0] != y[0] else x[1] - y[1])) 705 | return closest_pair(0, len(points) - 1, points) 706 | 707 | # d,p1,p2 = solve_closest_pair_n_logn2(points) 708 | ``` 709 | 710 | 根据合并排序的思想,可将上述的提升到O(nlogn) 711 | 712 | ```python 713 | def solve_closest_pair_n_logn(points): 714 | def merge(ls, le, re, a): 715 | start = ls 716 | rs = le + 1 717 | b = [] 718 | while ls <= le and rs <= re: 719 | if a[ls][1] < a[rs][1]: 720 | b.append(a[ls]) 721 | ls += 1 722 | else: 723 | b.append(a[rs]) 724 | rs += 1 725 | 726 | for i in range(ls, le + 1): 727 | b.append(a[i]) 728 | for i in range(rs, re + 1): 729 | b.append(a[i]) 730 | for i in range(start, re + 1): 731 | a[i] = b[i - start] 732 | 733 | def closest_pair(L, R, points, y_sorted): 734 | if L == R: return 0x7fffffff, points[L], points[R] # return int max 735 | if R - L == 1: 736 | if y_sorted[L][1] > y_sorted[R][1]: 737 | y_sorted[L], y_sorted[R] = y_sorted[R], y_sorted[L] 738 | return euclidean_dis_pow(points[L], points[R]), points[L], points[R] 739 | mid = (L + R) >> 1 740 | d, p1, p2 = closest_pair(L, mid, points, y_sorted) 741 | d2, p3, p4 = closest_pair(mid + 1, R, points, y_sorted) 742 | merge(L, mid, R, y_sorted) 743 | if d > d2: 744 | d, p1, p2 = d2, p3, p4 745 | 746 | min_x = points[mid][0] - d 747 | max_x = points[mid][0] + d 748 | 749 | suspect = [y_sorted[i] for i in range(L, R + 1) if min_x <= y_sorted[i][0] <= max_x] 750 | n = len(suspect) 751 | for i in range(n): 752 | for j in range(i + 1, n): 753 | if suspect[j][1] - suspect[i][1] > d: break 754 | t = euclidean_dis_pow(suspect[i], suspect[j]) 755 | if t < d: 756 | d = t 757 | p1, p2 = suspect[i], suspect[j] 758 | return d, p1, p2 759 | 760 | points.sort(key=cmp_to_key(lambda x, y: x[0] - y[0] if x[0] != y[0] else x[1] - y[1])) 761 | y_sorted = points[:] 762 | return closest_pair(0, len(points) - 1, points, y_sorted) 763 | 764 | # d,p1,p2 = solve_closest_pair_n_logn2(points) 765 | ``` 766 | 767 | ## Problem 10. 768 | 769 | > Implement the Strassen algorithm algorithm for MatrixMultiplication problem in your favourite language, and compare the performance with grade-school method. 770 | 771 | 首先定义矩阵的加法、减法和小学生乘法。 772 | 773 | ```python 774 | def matrix_brute_mul(A, B): 775 | n = len(A) 776 | C = [[0 for _ in range(n)] for _ in range(n)] 777 | for i in range(n): 778 | for j in range(n): 779 | for k in range(n): 780 | C[i][j] += A[i][k] * B[k][j] 781 | return C 782 | 783 | def matrix_add_or_sub(A, B, add=True): 784 | n = len(A) 785 | return [[A[i][j] + B[i][j] if add else A[i][j] - B[i][j] for j in range(n)] for i in range(n)] 786 | ``` 787 | 788 | 接下来是strassen的矩阵乘法。 789 | 790 | 为了能处理奇数的情况,这里将其扩展为2的整次幂,不够的补0。这样,在分治过程中,一定能保证矩阵能刚好被划分。 791 | 792 | 具体的做法是求大于等于当前的维度的最小2的整次幂,即 793 | $$ 794 | 2^{ ceil(log_{n}2)} 795 | $$ 796 | 其中,ceil是向上取整。 797 | 798 | ```python 799 | from math import ceil, log 800 | def strassen_matrix_mul(A, B): 801 | before_n = len(A) 802 | n = 2 ** ceil(log(before_n, 2)) 803 | for i in range(before_n): 804 | A[i].extend([0] * (n - before_n)) 805 | B[i].extend([0] * (n - before_n)) 806 | for i in range(before_n, n): 807 | A.append([0] * n) 808 | B.append([0] * n) 809 | C = _strassen_mul(A, B)[:before_n] 810 | return [row[:before_n] for row in C] 811 | ``` 812 | 813 | 接下来是strassen矩阵乘法核心的分治过程: 814 | 815 | ```python 816 | def _strassen_mul(A, B): 817 | n = len(A) 818 | if n == 1: return [[A[0][0] * B[0][0]]] 819 | if n == 2: matrix_brute_mul(A, B) 820 | half_n = n >> 1 821 | A11, A12, A21, A22 = [], [], [], [] 822 | B11, B12, B21, B22 = [], [], [], [] 823 | for i in range(half_n): 824 | A11.append(A[i][:half_n][:]) 825 | A12.append(A[i][half_n:][:]) 826 | B11.append(B[i][:half_n][:]) 827 | B12.append(B[i][half_n:][:]) 828 | A21.append(A[i + half_n][:half_n][:]) 829 | A22.append(A[i + half_n][half_n:][:]) 830 | B21.append(B[i + half_n][:half_n][:]) 831 | B22.append(B[i + half_n][half_n:][:]) 832 | 833 | P1 = _strassen_mul(A11, matrix_add_or_sub(B12, B22, False)) 834 | P2 = _strassen_mul(matrix_add_or_sub(A11, A12), B22) 835 | P3 = _strassen_mul(matrix_add_or_sub(A21, A22), B11) 836 | P4 = _strassen_mul(A22, matrix_add_or_sub(B21, B11, False)) 837 | P5 = _strassen_mul(matrix_add_or_sub(A11, A22), matrix_add_or_sub(B11, B22)) 838 | P6 = _strassen_mul(matrix_add_or_sub(A12, A22, False), matrix_add_or_sub(B21, B22)) 839 | P7 = _strassen_mul(matrix_add_or_sub(A11, A21, False), matrix_add_or_sub(B11, B12)) 840 | 841 | C11 = matrix_add_or_sub(matrix_add_or_sub(matrix_add_or_sub(P4, P5), P6), P2, False) 842 | C12 = matrix_add_or_sub(P1, P2) 843 | C21 = matrix_add_or_sub(P3, P4) 844 | C22 = matrix_add_or_sub(matrix_add_or_sub(matrix_add_or_sub(P1, P5), P3, False), P7, False) 845 | 846 | C = [[] for _ in range(n)] 847 | for i in range(half_n): 848 | C[i].extend(C11[i]) 849 | C[i].extend(C12[i]) 850 | C[i + half_n].extend(C21[i]) 851 | C[i + half_n].extend(C22[i]) 852 | return C 853 | 854 | # C = strassen_matrix_mul(A, B) 855 | ``` 856 | 857 | 对比小学生矩阵乘法,比较代码如下: 858 | 859 | ```python 860 | if __name__ == '__main__': 861 | from datetime import datetime 862 | import random 863 | 864 | n = 64 865 | A, B = [], [] 866 | for i in range(n): 867 | A.append([random.randint(0, n ** 2) for _ in range(n)]) 868 | B.append([random.randint(0, n ** 2) for _ in range(n)]) 869 | 870 | start = datetime.now() 871 | matrix_brute_mul(A, B) 872 | print('complete grade-school method in {}s'.format((datetime.now() - start).total_seconds())) 873 | 874 | start = datetime.now() 875 | strassen_matrix_mul(A[:], B[:]) 876 | print('complete Strassen method in {}s'.format((datetime.now() - start).total_seconds())) 877 | ``` 878 | 879 | 上面的测试结果为: 880 | 881 | ``` 882 | complete grade-school method in 0.101565s 883 | complete Strassen method in 1.840224s 884 | ``` 885 | 886 | 反而不如小学生矩阵,我感觉是因为python动态类型+函数调用引起的。 887 | 888 | ## Problem 11. 889 | 890 | >Implement the Karatsuba algorithm for Multiplication problem in your favourite language, and compare the performance with quadratic grade-school method. 891 | 892 | ```python 893 | def quick_mul(x, y): 894 | s_x, s_y = str(x), str(y) 895 | if len(s_x) == 1 or len(s_y) == 1: return x * y 896 | n = max(len(s_x), len(s_y)) 897 | half_n = n >> 1 898 | pow_half_n = 10 ** half_n 899 | xh = x // pow_half_n 900 | xl = x % pow_half_n 901 | yh = y // pow_half_n 902 | yl = y % pow_half_n 903 | p = quick_mul(xh + xl, yh + yl) 904 | h = quick_mul(xh, yh) 905 | l = quick_mul(xl, yl) 906 | if n & 1: n -= 1 907 | return h * (10 ** n) + (p - h - l) * (10 ** half_n) + l 908 | 909 | # quick_mul(x,y) 910 | ``` -------------------------------------------------------------------------------- /1_Divide_and_Conquer/readme.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/algorithm_course/ce1de1823b7b29d5e336011f2a9095a397b75cfa/1_Divide_and_Conquer/readme.pdf -------------------------------------------------------------------------------- /2_DP/1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/10/7 3 | # @Author : hrwhisper 4 | 5 | 6 | def largest_divisible_subset(nums): 7 | if len(nums) <= 1: return nums 8 | nums.sort() 9 | n = len(nums) 10 | dp = [1] * n 11 | update_from = [-1] * n 12 | max_len, max_index = 1, 0 13 | for i in range(1, n): 14 | for j in range(i - 1, -1, -1): 15 | if nums[i] % nums[j] == 0 and dp[j] + 1 > dp[i]: 16 | dp[i] = dp[j] + 1 17 | update_from[i] = j 18 | 19 | if dp[i] > max_len: 20 | max_len = dp[i] 21 | max_index = i 22 | 23 | ans = [] 24 | while max_index != -1: 25 | ans.append(nums[max_index]) 26 | max_index = update_from[max_index] 27 | return ans 28 | 29 | 30 | if __name__ == '__main__': 31 | # print(s.largestDivisibleSubset([1])) 32 | # print(s.largestDivisibleSubset([1, 2, 3])) 33 | # print(s.largestDivisibleSubset([1, 2, 4, 8])) 34 | print(largest_divisible_subset([3, 4, 6, 12, 18, 54])) 35 | -------------------------------------------------------------------------------- /2_DP/2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/10/7 3 | # @Author : hrwhisper 4 | 5 | 6 | def rob_circle(nums): 7 | if not nums: return 0 8 | n = len(nums) 9 | if n <= 2: return max(nums) 10 | dp1 = [0] * n 11 | dp2 = [0] * n 12 | dp1[0] = dp1[1] = nums[0] 13 | dp2[1] = nums[1] 14 | for i in range(2, n): 15 | dp1[i] = max(dp1[i - 1], dp1[i - 2] + nums[i]) 16 | dp2[i] = max(dp2[i - 1], dp2[i - 2] + nums[i]) 17 | return max(dp1[n - 2], dp2[n - 1]) 18 | 19 | 20 | def rob_no_circle(nums): 21 | if not nums: return 0 22 | n = len(nums) 23 | if n <= 2: return max(nums) 24 | dp = [0] * n 25 | dp[0] = nums[0] 26 | dp[1] = max(nums[0], nums[1]) 27 | for i in range(2, n): 28 | dp[i] = max(dp[i - 1], dp[i - 2] + nums[i]) 29 | return dp[n - 1] 30 | 31 | 32 | def rob_circle2(self, nums): 33 | if not nums: return 0 34 | if len(nums) <= 2: return max(nums) 35 | return max(rob_no_circle(nums[:-1]), rob_no_circle(nums[1:])) 36 | 37 | 38 | if __name__ == '__main__': 39 | s = Solution() 40 | print(s.rob([1, 2, 3])) 41 | print(rob_circle([1,2,3])) 42 | -------------------------------------------------------------------------------- /2_DP/3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/10/11 3 | # @Author : hrwhisper 4 | 5 | 6 | def partition(s): 7 | def helper(i, j): 8 | while j >= 0 and i < n: 9 | if s[i] != s[j]: 10 | break 11 | dp[i] = min(dp[i], dp[j - 1] + 1 if j > 0 else 0) 12 | i, j = i + 1, j - 1 13 | 14 | n = len(s) 15 | dp = [0] + [0x7fffffff] * n 16 | for k in range(1, n): 17 | helper(k, k) # odd case 18 | helper(k, k - 1) # even case 19 | 20 | return dp[n - 1] 21 | 22 | 23 | if __name__ == '__main__': 24 | print(partition('aab')) 25 | print(partition('aaba')) 26 | print(partition('aba')) 27 | print(partition('a')) 28 | -------------------------------------------------------------------------------- /2_DP/4.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/10/7 3 | # @Author : hrwhisper 4 | 5 | 6 | def decoding_ways(s): 7 | """ 8 | :type s: str 9 | :rtype: int 10 | """ 11 | if not s: return 0 12 | n = len(s) 13 | dp = [0] * n 14 | dp[0] = 1 if s[0] != '0' else 0 15 | for i in range(1, n): 16 | if 10 <= int(s[i - 1:i + 1]) <= 26: 17 | dp[i] += dp[i - 2] if i >= 2 else 1 18 | if s[i] != '0': 19 | dp[i] += dp[i - 1] 20 | return dp[n - 1] 21 | 22 | 23 | if __name__ == '__main__': 24 | print(decoding_ways('0')) # 0 25 | print(decoding_ways('10')) # 1 26 | print(decoding_ways('1211')) # 5 27 | print(decoding_ways('12011')) # 2 28 | print(decoding_ways('12345')) # 3 29 | print(decoding_ways('01')) # 0 30 | -------------------------------------------------------------------------------- /2_DP/5.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/10/11 3 | # @Author : hrwhisper 4 | import collections 5 | 6 | 7 | def can_cross2(stones): 8 | n = len(stones) 9 | val2id = {stone: i for i, stone in enumerate(stones)} 10 | dp = collections.defaultdict(lambda: collections.defaultdict(int)) 11 | dp[1][0] = True 12 | for j in range(1, n): 13 | for i in dp[j]: # the same as dp[j].keys() 14 | step = stones[j] - stones[i] 15 | for k in [step + 1, step, step - 1]: 16 | _next = stones[j] + k 17 | if _next in val2id: 18 | _id = val2id[_next] 19 | if _id == n - 1: 20 | return True 21 | if _id != j: 22 | dp[_id][j] = True 23 | return False 24 | 25 | 26 | def can_cross(stones): 27 | dp = {stone: {} for stone in stones} 28 | dp[0][0] = 0 29 | for stone in stones: 30 | for step in dp[stone].values(): 31 | for k in [step + 1, step, step - 1]: 32 | if k > 0 and stone + k in dp: 33 | dp[stone + k][stone] = k 34 | return len(dp[stones[-1]].keys()) > 0 35 | 36 | 37 | if __name__ == '__main__': 38 | print(can_cross([0, 2])) 39 | print(can_cross([0, 1, 3, 5, 6, 8, 12, 17])) 40 | print(can_cross([0, 1, 2, 3, 4, 8, 9, 11])) -------------------------------------------------------------------------------- /2_DP/6.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/10/7 3 | # @Author : hrwhisper 4 | 5 | 6 | def max_profit(prices): 7 | if not prices or len(prices) < 2: return 0 8 | n = len(prices) 9 | min_price = prices[0] 10 | dp = [0] * n 11 | for i in range(1, n): 12 | dp[i] = max(dp[i - 1], prices[i] - min_price) 13 | min_price = min(prices[i], min_price) 14 | 15 | max_price = prices[n - 1] 16 | ans = dp[n - 1] 17 | max_profit = 0 18 | for i in range(n - 1, 0, -1): 19 | max_profit = max(max_profit, max_price - prices[i]) 20 | max_price = max(max_price, prices[i]) 21 | ans = max(ans, max_profit + dp[i - 1]) 22 | return ans 23 | 24 | 25 | if __name__ == '__main__': 26 | print(max_profit([3, 2, 6, 5, 0, 3])) # 7 27 | print(max_profit([1, 2, 5, 2, 3, 10, 2, 15])) # 22 28 | print(max_profit([7, 1, 5, 3, 6, 4])) # 7 29 | print(max_profit([1, 2])) # 1 30 | print(max_profit([1, 2, 3])) # 2 31 | print(max_profit([1, 10, 1, 5])) # 13 32 | print(max_profit([2, 1])) # 0 33 | print(max_profit([1])) 34 | print(max_profit([])) 35 | -------------------------------------------------------------------------------- /2_DP/7.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/10/11 3 | # @Author : hrwhisper 4 | 5 | 6 | def longest_increasing_subsequence(nums): 7 | if not nums: return 0 8 | n = len(nums) 9 | dp = [1] * n 10 | update_from = [-1] * n 11 | 12 | lis_len = 1 13 | index = 0 14 | for i in range(1, n): 15 | for j in range(i - 1, -1, -1): 16 | if nums[j] < nums[i] and dp[i] < dp[j] + 1: 17 | dp[i] = dp[j] + 1 18 | update_from[i] = j 19 | if dp[i] > lis_len: 20 | lis_len, index = dp[i], i 21 | 22 | ans = [] 23 | while index != -1: 24 | ans.append(nums[index]) 25 | index = update_from[index] 26 | return lis_len, ans[::-1] 27 | 28 | 29 | def binary_search(g, x, L, R): 30 | while L < R: 31 | mid = (L + R) >> 1 32 | if g[mid] < x: 33 | L = mid + 1 34 | else: 35 | R = mid 36 | return L 37 | 38 | 39 | def longest_increasing_subsequence_nlogn(nums): 40 | if not nums: return 0 41 | n = len(nums) 42 | dp = [1] * n 43 | g = [0x7fffffff] * (n + 1) 44 | update_from = [-1] * (n + 1) 45 | indexs = [-1] * (n + 1) 46 | lis_len = 1 47 | index = 0 48 | for i in range(n): 49 | k = binary_search(g, nums[i], 1, n) 50 | g[k] = nums[i] 51 | dp[i] = k 52 | indexs[k] = i 53 | update_from[i] = indexs[k - 1] 54 | if dp[i] > lis_len: 55 | lis_len, index = dp[i], i 56 | 57 | ans = [] 58 | while index != -1: 59 | ans.append(nums[index]) 60 | index = update_from[index] 61 | return lis_len, ans[::-1] 62 | 63 | 64 | def test(test_cnt=1000, array_num=1000): 65 | import random 66 | L = 1 67 | R = array_num ** 2 68 | for i in range(test_cnt): 69 | A = [] 70 | for _ in range(random.randint(1, array_num)): 71 | A.append(random.randint(L, R)) 72 | try: 73 | t1 = longest_increasing_subsequence_nlogn(A[:]) 74 | t2 = longest_increasing_subsequence(A[:]) 75 | if t1 != t2: 76 | print(t1, t2) 77 | return 78 | except Exception as e: 79 | print(e) 80 | print(A) 81 | return 82 | print('ok') 83 | 84 | 85 | if __name__ == '__main__': 86 | nums = [1, 3, 4, 2, 1, 6, 2] 87 | print(longest_increasing_subsequence(nums)) 88 | print(longest_increasing_subsequence_nlogn(nums)) 89 | test() 90 | -------------------------------------------------------------------------------- /2_DP/Assignment2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/algorithm_course/ce1de1823b7b29d5e336011f2a9095a397b75cfa/2_DP/Assignment2.pdf -------------------------------------------------------------------------------- /2_DP/readme.md: -------------------------------------------------------------------------------- 1 | # 计算机算法设计与分析-作业2(DP) 2 | 3 | - Author: hrwhipser 4 | - https://github.com/hrwhisper/algorithm_course/ 5 | 6 | 7 | 8 | ## 说明 9 | 10 | - 采用python 3.5.2编写了所有的代码 11 | - 在作业提交期限截止后,所有的代码可以在如下网址找到: 12 | - https://github.com/hrwhisper/algorithm_course 13 | 14 | 15 | 16 | ## 1. Largest Divisible Subset 17 | 18 | > Given a set of distinct positive integers,find the largest subset such that every pair ( S~i~ , S~j~ ) of elements in this subset satisfies: S~i~ % S~j~ = 0 or S~j~ % S~i~ = 0. 19 | 20 | ### 问题分析(最优子结构及DP表达式) 21 | 22 | 该题目给定了一个正整数的集合,要求求最大的子集使得在子集中任意的元素均有 S~i~% S~j~ = 0 或 S~j~%S~i~ = 0. 23 | 24 | 若我们先对原集合排序,那么对于一个元素X,有X % Y == 0 (Y为它之前的元素),那么有X % Z == 0 (Z为Y之前的元素且有Y % Z == 0)。因此,该问题具有最优子结构。 25 | 26 | 设dp[i]为到达i为止的最大的可整除集合大小,我们可以写出dp表达式如下(有些类似于LIS算法): 27 | $$ 28 | dp[i] = max(dp[j] + 1) ,j < i \ \&\&\ num[i] \% num[j] == 0 29 | $$ 30 | 最后,要求出最大的子集合,只需要对dp数组进行回溯查找即可。当然也可以用另个一个数组记录更新的下标,可以更快的进行回溯。 31 | 32 | ### 代码 33 | 34 | ```python 35 | def largest_divisible_subset(nums): 36 | if len(nums) <= 1: return nums 37 | nums.sort() 38 | n = len(nums) 39 | dp = [1] * n 40 | update_from = [-1] * n 41 | max_len, max_index = 1, 0 42 | for i in range(1, n): 43 | for j in range(i - 1, -1, -1): 44 | if nums[i] % nums[j] == 0 and dp[j] + 1 > dp[i]: 45 | dp[i] = dp[j] + 1 46 | update_from[i] = j 47 | 48 | if dp[i] > max_len: 49 | max_len = dp[i] 50 | max_index = i 51 | 52 | ans = [] 53 | while max_index != -1: 54 | ans.append(nums[max_index]) 55 | max_index = update_from[max_index] 56 | return ans 57 | ``` 58 | 59 | ### 正确性证明 60 | 61 | 在问题分析中,已经给出,若对集合排序,则有如下成立: 62 | $$ 63 | X \% Y = 0\ \&\& \ Y \% Z = 0 => X \% Z = 0 , 其中Y < X , Z < Y 64 | $$ 65 | 由于Z能被Y整除,说明Y有Z这个因子,而Y能被X整除,说明X有Y这个因子,而Y有Z这个因子,所以Z能被X整除。 66 | 67 | 由于上述的最优子结构正确,因此我们的递推表达式通过枚举小于X的所有元素进行更新也同样正确。 68 | 69 | ### 时间复杂度分析 70 | 71 | 对于dp的过程,最坏情况下为O(n^2^),而回溯过程为O(n),因此总复杂度为O(n^2^) 72 | 73 | ## 2. Money robbing 74 | 75 | > A robber is planning to rob houses along astreet. Each house has a certain amount of money stashed, the only constraint stopping you from robbing each of them is that adjacent houses have security system connected and it will automatically contact the police if two adjacent houses were broken into on the same night. 76 | > 77 | > 1. Given a list of non-negative integers representing the amount of money of each house, determine the maximum amount of money you can rob tonight without alerting the police. 78 | > 2. What if all houses are arranged in a circle? 79 | 80 | ### 问题分析(最优子结构及DP表达式) 81 | 82 | 我们设dp[i]表示到第i个房子能抢到的最大值。 83 | 84 | 对于一个房子,若选择抢, 则上一个房子不能抢,若选择不抢,则保留为到i-1个房子的最大值。 85 | 86 | 因此有如下递推表达式: 87 | $$ 88 | dp[i] = max(dp[i-1],dp[i-2] + nums[i]) 89 | $$ 90 | 若为圆形,说明第一个房子和最后的房子不能同时抢,此时可以设两个dp,dp1为为抢了第一个房子,dp2为不抢第一个房子,然后按照上面的式子更新, 最后结果为max(dp1[n - 2], dp2[n - 1])。或者是,分别计算 [0,n-2]和[1,n-1] 能抢到的最大值,然后取max. 91 | 92 | ### 代码 93 | 94 | - 所有的房子为直线 95 | 96 | ```python 97 | def rob_no_circle(nums): 98 | if not nums: return 0 99 | n = len(nums) 100 | if n <= 2: return max(nums) 101 | dp = [0] * n 102 | dp[0] = nums[0] 103 | dp[1] = max(nums[0], nums[1]) 104 | for i in range(2, n): 105 | dp[i] = max(dp[i - 1], dp[i - 2] + nums[i]) 106 | return dp[n - 1] 107 | ``` 108 | 109 | - 房子为环 - 方法1 双dp 110 | 111 | ```python 112 | def rob_circle(nums): 113 | if not nums: return 0 114 | n = len(nums) 115 | if n <= 2: return max(nums) 116 | dp1 = [0] * n 117 | dp2 = [0] * n 118 | dp1[0] = dp1[1] = nums[0] 119 | dp2[1] = nums[1] 120 | for i in range(2, n): 121 | dp1[i] = max(dp1[i - 1], dp1[i - 2] + nums[i]) 122 | dp2[i] = max(dp2[i - 1], dp2[i - 2] + nums[i]) 123 | return max(dp1[n - 2], dp2[n - 1]) 124 | ``` 125 | 126 | - 房子为环 - 方法2 直接调用为直线情况的code 127 | 128 | ```python 129 | def rob_circle2(nums): 130 | if not nums: return 0 131 | if len(nums) <= 2: return max(nums) 132 | return max(rob_no_circle(nums[:-1]), rob_no_circle(nums[1:])) 133 | ``` 134 | 135 | ### 正确性证明 136 | 137 | 该题正确性的证明在于对状态转移方程的正确性证明。 138 | 139 | 对于i不抢,显然只能为dp[i-1],而抢显然为dp[i-2] + nums[i],因此我们只需要取其最大值即可。 140 | 141 | 环形情况同理。 142 | 143 | ### 时间复杂度分析 144 | 145 | - 对于直线的情况,由于只遍历了一次数组,因此复杂度为O(n) 146 | 147 | 148 | - 对于环形的情况: 149 | - 方法1也只遍历了一次数组,复杂度为O(n) 150 | - 方法2遍历了两次,复杂度也为O(n) 151 | 152 | ## 3. Partition 153 | 154 | > Given a string s, partition s such that every substring of the partition is a palindrome. Return the minimum cuts needed for a palindrome partitioning of s. 155 | > For example, given s = “aab", return 1 since the palindrome partitioning ["aa", "b"] could be produced using 1 cut. 156 | 157 | ### 问题分析(最优子结构及DP表达式) 158 | 159 | 该问题要求求解最小的划分数使得给定的字符串s每个子串均为回文串。 160 | 161 | 于是定义dp[i]为s[0....i]的最小切分次数,使得s[0.....i]的每个子串均为回文串。 162 | 163 | 于是有: 164 | $$ 165 | s[i] = min(s[j-1] + 1) 其中 j < i 且s[j.....i]是回文串 166 | $$ 167 | 上述的可以简单的写出python代码如下: 168 | 169 | ```python 170 | def partition(s): 171 | n = len(s) 172 | dp = [0] + [0x7fffffff] * n 173 | for i in range(1, n): 174 | for j in range(i + 1): 175 | if is_palindrome(s[j:i+1]): # to check whether s[j....i] is palindrome or not. 176 | if j > 0: 177 | dp[i] = min(dp[j - 1] + 1, dp[i]) 178 | else: 179 | dp[i] = 0 180 | return dp[n - 1] # dp[n-1] is the answer 181 | 182 | def is_palindrome(s): 183 | return s==s[::-1] 184 | ``` 185 | 186 | 在上述的解法中,我们枚举i为当前计算的位置,然后枚举j,看看i加入后,是否能和前面的构成了一个回文串,最后在查看j...i是否为回文串。 这样复杂度为O(n^3^) (枚举i O(n) 枚举起始位置j O(n) 判断回文O(n),所以为O(n^3^)) 187 | 188 | 但是,我们还可以做得更好,我们枚举k为当前计算的位置,然后用双指针的思想,从k向两边扩散,判断是否回文(要分别计算长度为奇数和偶数的情况),并根据上述公式更新dp数组。这样,就可以将第一种解法的枚举j和判断回文合并起来,从而把复杂度降低为O(n^2^) 189 | 190 | ### 代码 191 | 192 | ```python 193 | def partition(s): 194 | def helper(i, j): 195 | while j >= 0 and i < n: 196 | if s[i] != s[j]: 197 | break 198 | dp[i] = min(dp[i], dp[j - 1] + 1 if j > 0 else 0) 199 | i, j = i + 1, j - 1 200 | 201 | n = len(s) 202 | dp = [0] + [0x7fffffff] * n 203 | for k in range(1, n): 204 | helper(k, k) # odd case 205 | helper(k, k - 1) # even case 206 | 207 | return dp[n - 1] 208 | ``` 209 | 210 | ### 正确性证明 211 | 212 | 对于新加的一个字母,若能和前面的组成回文串(即s[j...i]为回文串),则可以划分的次数显然为s[j-1] + 1,我们枚举j 显然能得到最小的解。因此算法正确。 213 | 214 | ### 时间复杂度分析 215 | 216 | 在问题分析中已经分析出,复杂度为O(n^2^) 217 | 218 | ## 4. Decoding 219 | 220 | > A message containing letters from A-Z isbeing encoded to numbers using the following mapping: 221 | > 222 | > A : 1 223 | > 224 | > B : 2 225 | > 226 | > ... 227 | > 228 | > Z : 26 229 | > 230 | > Given an encoded message containing digits, determine the total number of ways to decode it. 231 | > 232 | > For example, given encoded message “12”, it could be decoded as “AB” (1 2) or “L”(12). 233 | > 234 | > The number of ways decoding “12” is 2. 235 | 236 | ### 问题分析(最优子结构及DP表达式) 237 | 238 | 对于一个编码后的串s,s的所有的字符出现在0~9之间。 239 | 240 | 要查看其解码方式有多少种可能,主要在于因为有的字符可以被拆分,如12可以算L也可以算AB,而这样的在10~26均是可能的。 241 | 242 | 设dp[i]为s[0...i]最多的解码方式,因此我们有: 243 | $$ 244 | dp[i] += dp[i-1] (如果 s[i]!='0') 245 | $$ 246 | 247 | $$ 248 | dp[i] += dp[i-2] (如果 i >= 2 \&\& 10<=int(s[i-1..i]) <= 26) 249 | $$ 250 | 251 | ### 代码 252 | 253 | ```python 254 | def decoding_ways(s): 255 | if not s: return 0 256 | n = len(s) 257 | dp = [0] * n 258 | dp[0] = 1 if s[0] != '0' else 0 259 | for i in range(1, n): 260 | if 10 <= int(s[i - 1:i + 1]) <= 26: 261 | dp[i] += dp[i - 2] if i >= 2 else 1 262 | if s[i] != '0': 263 | dp[i] += dp[i - 1] 264 | return dp[n - 1] 265 | ``` 266 | 267 | ### 正确性证明 268 | 269 | 对于当前位置,若该位置不是‘0’,则dp[i] += dp[i-1] (为0说明是上一个的遗留) 270 | 271 | 若能和上一个组合的数字范围在[10,26],那么说明解码的方式可以在加上dp[i-2] 272 | 273 | 因此,算法最优子结构和递推表达式均无误。 274 | 275 | ### 时间复杂度分析 276 | 277 | 上述的算法只遍历了一次数组,因此复杂度为O(n) 278 | 279 | ## 5. Frog Jump 280 | 281 | > A frog is crossing a river. The river is divided into x units and at each unit there may or may not exist a stone. The frog can jump on a stone, but it must not jump into the water. 282 | > 283 | > If the frog’s last jump was k units, then its next jump must be either k −1, k, or k +1 units. Note that the frog can only jump in the forward direction. 284 | > 285 | > Given a list of stones’ positions (in units) in sorted ascending order, determine if the frog is able to cross the river by landing on the last stone. Initially, the frog is on the first stone and assume the first jump must be 1 unit. 286 | 287 | ### 问题分析(最优子结构及DP表达式) 288 | 289 | 青蛙过河,上一次跳k长度,下一次只能跳k-1,k或者k+1。 290 | 291 | 因此对于到达了某一个点,我们可以查看其上一次是从哪个点跳过来的。 292 | 293 | 设dp[ j ]\[ i ] 为从i到达j 的步数,初始时把所有的石头存放进hash表。然后设置dp\[0][0] = 0. 接着对于每个石头,从可以到达该石头的所有石头中取出步数k(k > 0),然后当前的stone + k看其是否是合法的石头,是的话就有d\[stone + k ][stone] = k 294 | 295 | ``` 296 | def can_cross(stones): 297 | dp = {stone: {} for stone in stones} 298 | dp[0][0] = 0 299 | for stone in stones: 300 | for step in dp[stone].values(): 301 | for k in [step + 1, step, step - 1]: 302 | if k > 0 and stone + k in dp: 303 | dp[stone + k][stone] = k 304 | return len(dp[stones[-1]].keys()) > 0 305 | ``` 306 | 307 | 308 | 309 | ### 原来的方法 310 | 311 | 设dp[ j ]\[ i ] 从i可以到达j,因此,对于点 j,我们只需要查看可以从哪个地方跳转过来(这里假设为i),然后查看其跳跃的距离$step = stones[j] - stones[i]$ , 则下一次的跳的距离为$step + 1, step, step - 1$ ,然后查看下一个点\_id存不存在(用Hash),存在将dp\[\_id][j] 设置为可达 ,若$\_id==n-1$,说明到达了对岸。这样复杂度为O(n^2^) 312 | 313 | #### 代码 314 | 315 | 在具体的实现上,使用了类似邻接表的方式来加快速度。 316 | 317 | ```python 318 | def can_cross(stones): 319 | n = len(stones) 320 | val2id = {stone: i for i, stone in enumerate(stones)} 321 | dp = collections.defaultdict(lambda :collections.defaultdict(int)) 322 | dp[1][0] = True 323 | for j in range(1, n): 324 | for i in dp[j]: # the same as dp[j].keys() 325 | step = stones[j] - stones[i] 326 | for k in [step + 1, step, step - 1]: 327 | _next = stones[j] + k 328 | if _next in val2id: 329 | _id = val2id[_next] 330 | if _id == n - 1: 331 | return True 332 | if _id != j: 333 | dp[_id][j] = True 334 | return False 335 | ``` 336 | ### 正确性证明 337 | 338 | 上述的算法,用可达矩阵dp[ j ]\[ i ]来标记从i可以到达j,对于任意的点j,我们均查看其可达矩阵中所有的能到达j的点,并计算上一次的步长 step ,然后枚举走 $step + 1, step, step - 1$ 在数组中的位置,并标记对应的可达矩阵。因此这样做不会丢失解。 339 | 340 | ### 时间复杂度分析 341 | 342 | 由于最坏的情况下,每个点距离为1,均为可达的,每次均要从可达矩阵中扫描所有0...j的点,因此复杂度为O(n^2^) 343 | 344 | ## 6. Maximum profit of transactions 345 | 346 | > You have an array for which the *i*-th element is the priceof a given stock on day *i*. 347 | > 348 | > Design an algorithm and implement it to find the maximum profit. You may complete at most two transactions. 349 | > 350 | > Note: You may not engage in multiple transactions at the same time (ie, you must sell the stock beforeyou buy again). 351 | 352 | ### 问题分析 353 | 354 | 该问题要求最多两次交易下能取得的最大值,我们可以先计算一次交易下能取得的最大值。 355 | 356 | 设dp[i]为第i天能取得的最大利润,我们维护一个到[0...i-1]的最小值min_price,因此有 357 | $$ 358 | dp[i] = max(dp[i - 1], prices[i] - min\_price) 359 | $$ 360 | 这样,我们就求出了一次交易下能获取的最大值。 361 | 362 | 要求两次交易的最大值,我们可以逆序扫描数组,维护一个[i+1....n-1]的最大值max_price,并且维护一个到当前位置的最大的利润max_profit,则有: 363 | $$ 364 | ans = max(ans, max\_profit + dp[i - 1]) 365 | $$ 366 | 其中,$max\_profit = max(max\_profit, max\_price - prices[i])$ 367 | 368 | ### 代码 369 | 370 | ```python 371 | def max_profit(prices): 372 | if not prices or len(prices) < 2: return 0 373 | n = len(prices) 374 | min_price = prices[0] 375 | dp = [0] * n 376 | for i in range(1, n): 377 | dp[i] = max(dp[i - 1], prices[i] - min_price) 378 | min_price = min(prices[i], min_price) 379 | 380 | max_price = prices[n - 1] 381 | ans = dp[n - 1] 382 | max_profit = 0 383 | for i in range(n - 1, 0, -1): 384 | max_profit = max(max_profit, max_price - prices[i]) 385 | max_price = max(max_price, prices[i]) 386 | ans = max(ans, max_profit + dp[i - 1]) 387 | return ans 388 | ``` 389 | ### 时间复杂度分析 390 | 391 | 由于进行了两次线性扫描,因此复杂度还是O(n) 392 | 393 | ## 7. Maximum length 394 | 395 | > Given a sequenceof n real numbers *a*1*,...,an*, determine asubsequence (not necessarily contiguous) of maximum length in which the valuesin the subsequence form a strictly increasing sequence. 396 | 397 | ### 方法1 O(n^2^) naive method 398 | 399 | 设dp[i]为以i结尾的最长上升子序列长度,则显然有 400 | $$ 401 | dp[i] = max(dp[j] + 1), j < i \ \&\&\ nums[j]\ <\ nums[i] 402 | $$ 403 | 这样复杂度为O(n^2^) 404 | 405 | 因此写出代码如下: 406 | 407 | ```python 408 | def longest_increasing_subsequence(nums): 409 | if not nums: return 0 410 | n = len(nums) 411 | dp = [1] * n 412 | update_from = [-1] * n 413 | 414 | lis_len = 1 415 | index = 0 416 | for i in range(1, n): 417 | for j in range(i - 1, -1, -1): 418 | if nums[j] < nums[i] and dp[i] < dp[j] + 1: 419 | dp[i] = dp[j] + 1 420 | update_from[i] = j 421 | if dp[i] > lis_len: 422 | lis_len, index = dp[i], i 423 | 424 | ans = [] 425 | while index != -1: 426 | ans.append(nums[index]) 427 | index = update_from[index] 428 | return lis_len, ans[::-1] 429 | ``` 430 | 431 | ### 方法2 O(nlogn) method 432 | 433 | 我们可以做得比O(n^2^)更好,这里仍然设dp[i]为以i结尾的最长上升子序列长度。 434 | 435 | 假设 $dp[x] = dp[y] $, 且$ nums[x] < nums[y] $, 那么对于后续的所有状态i来说(i>x,i>y),显然满足$nums[y] < nums[i] $ 的必然满足$ nums[x] < nums[i]$ ,反之不一定成立。因此只需要保留x的值即可。 436 | 437 | 这里引进辅助数组g,g[i]为LIS长度为i的结尾最小的值。显然 $ g(1) <= g(2) <=……g(n) $ 438 | 439 | 因此,可以进行二分查找。我们查找nums[i]在g中,可以插入的位置,换句话说,就是找一个最小的下标k,使得 $ nums[i] <= g[k] $ ,此时,dp[i] = k, 并且可以更新g[k] = nums[i] 440 | 441 | ```python 442 | def binary_search(g, x, L, R): 443 | while L < R: 444 | mid = (L + R) >> 1 445 | if g[mid] < x: 446 | L = mid + 1 447 | else: 448 | R = mid 449 | return L 450 | 451 | 452 | def longest_increasing_subsequence_nlogn(nums): 453 | if not nums: return 0 454 | n = len(nums) 455 | dp = [1] * n 456 | g = [0x7fffffff] * (n + 1) 457 | update_from = [-1] * (n + 1) 458 | indexs = [-1] * (n + 1) 459 | lis_len = 1 460 | index = 0 461 | for i in range(n): 462 | k = binary_search(g, nums[i], 1, n) 463 | g[k] = nums[i] 464 | dp[i] = k 465 | indexs[k] = i 466 | update_from[i] = indexs[k - 1] 467 | if dp[i] > lis_len: 468 | lis_len, index = dp[i], i 469 | 470 | ans = [] 471 | while index != -1: 472 | ans.append(nums[index]) 473 | index = update_from[index] 474 | return lis_len, ans[::-1] 475 | ``` -------------------------------------------------------------------------------- /2_DP/readme.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/algorithm_course/ce1de1823b7b29d5e336011f2a9095a397b75cfa/2_DP/readme.pdf -------------------------------------------------------------------------------- /3_Greedy/1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/10/18 3 | # @Author : hrwhisper 4 | 5 | 6 | def can_be_a_graph(degrees): 7 | d_sum, n = sum(degrees), len(degrees) 8 | if d_sum & 1 or n * (n - 1) < d_sum or max(degrees) > n - 1: return False 9 | for n in range(n, -1, -1): 10 | degrees.sort(reverse=True) # 可以每一次算完类似合并排序合并过程使得总复杂度为O(n^2) 11 | for i in range(1, n): 12 | if degrees[0] <= 0: break 13 | degrees[i] -= 1 14 | if degrees[i] < 0: return False 15 | degrees[0] -= 1 16 | if degrees[0] != 0: return False 17 | return True 18 | 19 | 20 | def merge(a, ls, le, re): 21 | t = [] 22 | _ls = ls 23 | rs = le 24 | while ls < le and rs < re: 25 | if a[ls] >= a[rs]: 26 | t.append(a[ls]) 27 | ls += 1 28 | else: 29 | t.append(a[rs]) 30 | rs += 1 31 | for i in range(ls, le): 32 | t.append(a[i]) 33 | for i in range(rs, re): 34 | t.append(a[i]) 35 | 36 | for i in range(_ls, re): 37 | a[i] = t[i - _ls] 38 | 39 | 40 | def can_be_a_graph2(degrees): 41 | d_sum, n = sum(degrees), len(degrees) 42 | if d_sum & 1 or n * (n - 1) < d_sum or max(degrees) > n - 1: return False 43 | degrees.sort(reverse=True) 44 | while degrees: 45 | k = degrees[0] 46 | for i in range(1, n): 47 | if degrees[0] <= 0: break 48 | degrees[i] -= 1 49 | if degrees[i] < 0: return False 50 | degrees[0] -= 1 51 | if degrees[0] != 0: return False 52 | n -= 1 53 | degrees.pop(0) 54 | merge(degrees, 0, k, n) 55 | return True 56 | 57 | 58 | if __name__ == '__main__': 59 | test_case = [ 60 | [1, 1, 2, 2, 4], # True 61 | [1, 1, 2, 2, 2], # True 62 | [1, 2, 2, 3, 4], # True 63 | [1, 2, 2, 2, 4], # False 64 | [1, 2, 3, 4, 4], # False 65 | [0], # True 66 | [1], # False 67 | ] 68 | 69 | for t in test_case: 70 | print(can_be_a_graph(t[:]) , can_be_a_graph2(t[:])) 71 | -------------------------------------------------------------------------------- /3_Greedy/2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/10/19 3 | # @Author : hrwhisper 4 | from functools import cmp_to_key 5 | 6 | 7 | def min_complete_time(p, f): 8 | n = len(p) 9 | t = list(zip(range(n), list(zip(p, f)))) # it will be like this: [_id,(pi,fi)] 10 | t.sort(key=cmp_to_key(lambda x, y: y[1][1] - x[1][1])) 11 | order = [] 12 | min_time = timecost = 0 13 | for i in range(n): 14 | order.append(t[i][0]) 15 | timecost += t[i][1][0] 16 | min_time = max(min_time, timecost + t[i][1][1]) 17 | return min_time, order 18 | 19 | 20 | def cal_time(order, p, f): 21 | cur_p_time = 0 22 | total_time = 0 23 | for i in order: 24 | cur_p_time += p[i] 25 | total_time = max(total_time, cur_p_time + f[i]) 26 | return total_time 27 | 28 | 29 | def brute_judge(p, f): 30 | order = list(range(len(p))) 31 | min_time = cal_time(order, p, f) 32 | min_order = order[:] 33 | while next_permutation(order): 34 | cur_time = cal_time(order, p, f) 35 | if cur_time < min_time: 36 | min_time = cur_time 37 | min_order = order[:] 38 | return min_time, min_order 39 | 40 | 41 | def next_permutation(num): 42 | j, k = len(num) - 2, len(num) - 1 43 | while j >= 0: 44 | if num[j] < num[j + 1]: break 45 | j -= 1 46 | 47 | if j < 0: 48 | return False 49 | 50 | while k > j: 51 | if num[k] > num[j]: break 52 | k -= 1 53 | num[j], num[k] = num[k], num[j] 54 | num[:] = num[:j + 1] + num[:j:-1] 55 | return True 56 | 57 | 58 | def min_complete_time2(p, f): 59 | n = len(p) 60 | if n == 0: return 0, [] 61 | if n == 1: return p[0] + f[0], [0] 62 | pf = list(zip(p, f)) # it will be like this: [pi,fi] 63 | order = [] 64 | vis = [False] * n 65 | for _ in range(n): 66 | cur_min_cost = 0x7fffffff 67 | min_id = -1 68 | for _id, (pi, fi) in enumerate(pf): 69 | if vis[_id]: continue 70 | # if min_id == -1: 71 | # min_id = _id 72 | t = max([pi2 + fi2 for _id2, (pi2, fi2) in enumerate(pf) if _id != _id2]) 73 | t = max(t, fi) 74 | if cur_min_cost > t + pi: 75 | cur_min_cost = t + pi 76 | min_id = _id 77 | vis[min_id] = True 78 | order.append(min_id) 79 | min_time = cal_time(order, p, f) 80 | return min_time, order 81 | 82 | 83 | def test(test_cnt=10000, array_num=4, L=1, R=10): 84 | import random 85 | for i in range(test_cnt): 86 | n = random.randint(0, array_num) 87 | p = [random.randint(L, R) for _ in range(n)] 88 | f = [random.randint(L, R) for _ in range(n)] 89 | 90 | min1, order1 = min_complete_time(p[:], f[:]) 91 | min2, order2 = brute_judge(p[:], f[:]) 92 | # min3, order3 = min_complete_time2(p[:], f[:]) 93 | if min1 != min2 :# or min2 != min3: 94 | print(min1, order1) 95 | print(min2, order2) 96 | # print(min3, order3) 97 | print(p) 98 | print(f) 99 | return 100 | print('ok') 101 | 102 | 103 | if __name__ == '__main__': 104 | # test_case = [ 105 | # [ # 6 106 | # [1, 2], 107 | # [3, 4] 108 | # ], 109 | # [ # 14 110 | # [2, 4], 111 | # [4, 10] 112 | # ], 113 | # [ # 14 114 | # [4, 2], 115 | # [10, 6] 116 | # ], 117 | # [ # 14 118 | # [4, 2, 1], 119 | # [10, 6, 3], 120 | # ], 121 | # [ # 8 122 | # [1, 4], 123 | # [5, 3], 124 | # ], 125 | # [[], []], # 0 126 | # [[1], [1]] # 2 127 | # ] 128 | # for p, f in test_case: 129 | # print(min_complete_time(p[:], f[:]), min_complete_time2(p[:], f[:]), brute_judge(p[:], f[:])) 130 | test() 131 | -------------------------------------------------------------------------------- /3_Greedy/3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/10/22 3 | # @Author : hrwhisper 4 | from math import sqrt 5 | 6 | 7 | class Point(object): 8 | def __init__(self, x, y): 9 | self.x = x 10 | self.y = y 11 | 12 | def __lt__(self, other): 13 | return self.x < other.x 14 | 15 | 16 | def min_radar(points, d): 17 | if not points: return 0 18 | if len(list(filter(lambda point: point.y > d, points))) > 0: return -1 # have no answer 19 | points.sort() 20 | px = points[0].x + sqrt(d * d - points[0].y * points[0].y) 21 | ans = 1 22 | for i in range(1,len(points)): 23 | if (px - points[i].x)**2 + points[i].y * points[i].y <= d*d: continue 24 | cx = points[i].x + sqrt(d * d - points[i].y * points[i].y) 25 | if cx < px: 26 | px = cx 27 | continue 28 | px = cx 29 | ans += 1 30 | return ans 31 | 32 | 33 | if __name__ == '__main__': 34 | test_case = [ 35 | ([Point(1, 2), Point(-3, 1), Point(2, 1)],2), 36 | ([Point(0, 2)],2) 37 | ] 38 | for points,d in test_case: 39 | print(min_radar(points,d)) 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /3_Greedy/4.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/10/17 3 | # @Author : hrwhisper 4 | 5 | 6 | def max_payoff(A, B): 7 | if not A or not B or len(A) != len(B): return 0 8 | A.sort() 9 | B.sort() 10 | ans = 1 11 | for i in range(len(A)): 12 | ans *= A[i] ** B[i] 13 | return ans 14 | 15 | 16 | if __name__ == '__main__': 17 | a = [1, 3, 4, 2, 1, 6, 2] 18 | b = [2, 3, 4, 5, 6, 7, 2] 19 | print(max_payoff(a,b)) 20 | -------------------------------------------------------------------------------- /3_Greedy/5.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/10/18 3 | # @Author : hrwhisper 4 | import heapq 5 | import collections 6 | 7 | 8 | class TreeNode(object): 9 | def __init__(self, val, cnt, left=None, right=None): 10 | self.cnt = cnt 11 | self.val = val 12 | self.left = left 13 | self.right = right 14 | 15 | def __lt__(self, other): 16 | return self.cnt < other.cnt 17 | 18 | 19 | def create_huffman_tree(txt): 20 | q = [TreeNode(c, cnt) for c, cnt in collections.Counter(txt).items()] 21 | heapq.heapify(q) 22 | while len(q) > 1: 23 | a, b = heapq.heappop(q), heapq.heappop(q) 24 | heapq.heappush(q, TreeNode('', a.cnt + b.cnt, a, b)) 25 | return q.pop() 26 | 27 | 28 | def get_huffman_tree(cur, root, code): 29 | if not root.left and not root.right: # the leaf node 30 | code[root.val] = cur 31 | return 32 | 33 | if root.left: get_huffman_tree(cur + '0', root.left, code) 34 | if root.right: get_huffman_tree(cur + '1', root.right, code) 35 | 36 | 37 | def decode(txt, r_haffman_code, decode_save_path, last_byte=0): 38 | txt = ''.join(['0' * (8 - len(bin(ord(c))[2:])) + bin(ord(c))[2:] for c in txt]) 39 | if last_byte: 40 | txt = txt[:-8] + txt[-last_byte:] 41 | n = len(txt) 42 | cur, decode_txt = '', '' 43 | for i in range(n): 44 | cur += txt[i] 45 | if cur in r_haffman_code: 46 | decode_txt += r_haffman_code[cur] 47 | cur = '' 48 | 49 | with open(decode_save_path, 'w') as f: 50 | f.write(decode_txt) 51 | 52 | 53 | def encode(txt, huffman_code, compress_save_path): 54 | with open(compress_save_path, 'wb') as f: 55 | txt = ''.join([huffman_code[c] for c in txt]) 56 | last_byte = len(txt) % 8 57 | txt = ''.join(chr(int(txt[i:i + 8], 2)) for i in range(0, len(txt), 8)) 58 | f.write(bytes(txt, "utf-8")) 59 | return last_byte 60 | 61 | 62 | if __name__ == '__main__': 63 | file_paths = ['./Aesop_Fables.txt', './graph.txt'] 64 | for cur_file_path in file_paths: 65 | compress_save_path = cur_file_path + '_compressed' 66 | decode_save_path = cur_file_path + '_compressed_decode' 67 | 68 | with open(cur_file_path) as f: 69 | txt = f.read() 70 | 71 | root = create_huffman_tree(txt) 72 | huffman_code = {} 73 | get_huffman_tree('', root, huffman_code) 74 | r_haffman_code = {code: c for c, code in huffman_code.items()} 75 | last_byte = encode(txt, huffman_code, compress_save_path) 76 | 77 | with open(compress_save_path, 'rb') as f: 78 | txt = f.read().decode('utf-8') 79 | 80 | decode(txt, r_haffman_code, decode_save_path, last_byte) 81 | 82 | with open(decode_save_path) as fd, \ 83 | open(cur_file_path) as f, open(compress_save_path) as fp: 84 | t = f.read() 85 | print('{}: compression ratio: {:.2f}, decode file equals original file is' 86 | .format(f.name,len(fp.read()) * 1.0 / len(t)), t == fd.read()) 87 | -------------------------------------------------------------------------------- /3_Greedy/6.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/10/18 3 | # @Author : hrwhisper 4 | 5 | import heapq 6 | import collections 7 | import random 8 | 9 | 10 | class Node(object): 11 | def __init__(self, to, val): 12 | self.to = to 13 | self.val = val 14 | 15 | def __lt__(self, other): 16 | return self.val < other.val 17 | 18 | 19 | def dijkstra(s, t, g): 20 | q = [] 21 | dis = collections.defaultdict(lambda: 0x7fffffff) # [0x7fffffff] * len(g) 22 | vis = collections.defaultdict(bool) # [False] * len(g) 23 | dis[s] = 0 24 | heapq.heappush(q, Node(s, 0)) 25 | 26 | while q: 27 | cur = heapq.heappop(q).to 28 | if vis[cur]: continue 29 | vis[cur] = True 30 | for to, val in g[cur]: 31 | if not vis[to] and dis[cur] + val < dis[to]: 32 | dis[to] = dis[cur] + val 33 | heapq.heappush(q, Node(to, dis[to])) 34 | return dis 35 | 36 | 37 | def count_node_path(s, t, dis, g): 38 | cnt = collections.defaultdict(int) # [0] * len(g) 39 | q = [to for to, val in g[t] if dis[t] == dis[to] + val] 40 | while q: 41 | cur = q.pop() 42 | if cur == s: continue 43 | cnt[cur] += 1 44 | for to, val in g[cur]: 45 | if dis[cur] == dis[to] + val: 46 | q.append(to) 47 | return cnt 48 | 49 | 50 | if __name__ == '__main__': 51 | g = collections.defaultdict(list) 52 | 53 | with open('./graph.txt') as f: 54 | for i, line in enumerate(f): 55 | if i < 6: continue 56 | x, y, val = list(map(int, line.strip().split())) 57 | g[x].append((y, val)) 58 | g[y].append((x, val)) 59 | 60 | s, t = random.randint(0, len(g)), random.randint(0, len(g)) 61 | dis = dijkstra(s, t, g) 62 | print(s, t, dis[t]) 63 | cnt = count_node_path(s, t, dis, g) 64 | print(cnt) 65 | -------------------------------------------------------------------------------- /3_Greedy/Assignment3_Greedy.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/algorithm_course/ce1de1823b7b29d5e336011f2a9095a397b75cfa/3_Greedy/Assignment3_Greedy.pdf -------------------------------------------------------------------------------- /3_Greedy/graph.txt_compressed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/algorithm_course/ce1de1823b7b29d5e336011f2a9095a397b75cfa/3_Greedy/graph.txt_compressed -------------------------------------------------------------------------------- /3_Greedy/graph2.txt: -------------------------------------------------------------------------------- 1 | # Every line of this file implies an edge. 2 | # First column: one end of an edge; 3 | # Second column: the other end of the edge; 4 | # Third column: weight of the edge. 5 | # This graph is connected and undirected, with positive edge weights. 6 | 7 | 0 1 1 8 | 1 2 1 9 | 2 10 3 10 | 2 6 1 11 | 6 7 1 12 | 7 10 1 13 | 1 3 1 14 | 3 4 1 15 | 4 10 2 16 | -------------------------------------------------------------------------------- /3_Greedy/img/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/algorithm_course/ce1de1823b7b29d5e336011f2a9095a397b75cfa/3_Greedy/img/1.jpg -------------------------------------------------------------------------------- /3_Greedy/img/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/algorithm_course/ce1de1823b7b29d5e336011f2a9095a397b75cfa/3_Greedy/img/3.png -------------------------------------------------------------------------------- /3_Greedy/readme.md: -------------------------------------------------------------------------------- 1 | # 计算机算法设计与分析-作业3(Greedy) 2 | 3 | - Author: hrwhipser 4 | - https://github.com/hrwhisper/algorithm_course/ 5 | 6 | 7 | 8 | ## 说明 9 | 10 | - 采用python 3.5.2编写了所有的代码 11 | - 在作业提交期限截止后,所有的代码可以在如下网址找到: 12 | - https://github.com/hrwhisper/algorithm_course 13 | 14 | 15 | 16 | ## Problem 1. 17 | 18 | > Given a list of n natural numbers d~1~, d~2~,...,d~n~ , show how to decide in polynomial time whether there exists an undirected graph G = (V, E) whose node degrees are precisely the numbers d~1~, d~2~,...,d~n~. G should not contain multiple edges between the same pair of nodes, or “ loop” edges with both endpoints equal to the same node. 19 | 20 | ### 思路 21 | 22 | 题目给定了n个度,让我们判断是否能形成一个没有两条边连接相同节点且每个边都连接两个节点的图(就是简单图) 23 | 24 | 首先,判断满足如下条件: 25 | 26 | - 度数和为偶数 (对于无向图,每条边贡献两个度,因此度数和必为偶数) 27 | - 总边数不超过 $ \frac{n*(n-1)}{2} $ 在(完全的简单图,**最大度不超过n-1**) 28 | 29 | 接着,我们每次排序(逆序),首先把最大的度数(设为k)安排出去,看看是否足够多的点(k个)来满足它需要的边(就是从前往后扫,每个度-1)。如果不够,说明只能安排重边,即不能构成简单图。 30 | 31 | 其实也是**Havel**定理的运用。 32 | 33 | ### 代码 34 | 35 | ```python 36 | def can_be_a_graph(degrees): 37 | d_sum, n = sum(degrees), len(degrees) 38 | if d_sum & 1 or n * (n - 1) < d_sum or max(degrees) > n - 1: return False 39 | for n in range(n, -1, -1): 40 | degrees.sort(reverse=True) 41 | for i in range(1, n): 42 | if degrees[0] <= 0: break 43 | degrees[i] -= 1 44 | if degrees[i] < 0: return False 45 | degrees[0] -= 1 46 | if degrees[0] != 0: return False 47 | return True 48 | ``` 49 | 50 | ### 正确性证明 51 | 52 | 如果给定的n个节点的度能构成简单图,那么必然满足上面的两个判断条件。 53 | 54 | 紧接着,我们每次排序(逆序),如果能构成简单的图,度数最大的点x(设度数为k)必然可以从其他较大的k个节点中连接一条边,因此如果有k个节点,那么不会有重边出现,这个点可以安排,如果不满足k个节点,显然不能构成简单图。 55 | 56 | ### 时间复杂度分析 57 | 58 | 上述的做法,每次进行排序,每次排序复杂度为O(nlogn),一共有n次排序,因此复杂度O(n^2^logn) 59 | 60 | ## Problem 2. 61 | 62 | > There are n distinct jobs, labeled J~1~, J~2~,···,J~n~, which can be performed completely independently of one another. Each jop consists of two stages: first it needs to be preprocessed on the supercomputer, and then it needs to be finished on one of the PCs. Let’s say that job J~i~ needs p~i~ seconds of time on the supercomputer, followed by f~i~ seconds of time on a PC. Since there are at least n PCs available on the premises, the finishing of the jobs can be performed on PCs at the same time. However, the supercomputer can only work on a single job a time without any interruption. For every job, as soon as the preprocessing is done on the supercomputer, it can be handed off to a PC for finishing. 63 | > 64 | > Let’s say that a schedule is an ordering of the jobs for the supercomputer, and the completion time of the schedule is the earlist time at which all jobs have finished processing on the PCs. Give a polynomial-time algorithm that finds a schedule with as small a completion time as possible. 65 | 66 | ### 思路 67 | 68 | 我们采用贪心的思想,为了尽可能的节约总时间,我们把f~i~大的先运行,让其尽早的并行处理。 69 | 70 | ### 代码 71 | 72 | ```python 73 | def min_complete_time(p, f): 74 | n = len(p) 75 | t = list(zip(range(n), list(zip(p, f)))) # it will be like this: [_id,(pi,fi)] 76 | t.sort(key=cmp_to_key(lambda x, y: y[1][1] - x[1][1])) 77 | order = [] 78 | min_time = timecost = 0 79 | for i in range(n): 80 | order.append(t[i][0]) 81 | timecost += t[i][1][0] 82 | min_time = max(min_time, timecost + t[i][1][1]) 83 | return min_time, order 84 | ``` 85 | 86 | ### 正确性证明 87 | 88 | 假设已知有最佳的任务处理序列 j~a~,j~b~,j~c~…j~k~,j~i~ ,… , 对于相邻的任务j~i~和j~k~有:任务j~i~在PC上的时间f~i~大于j~k~ 在PC上的时间f~k~,但是j~k~在最佳序列中排在j~i~前面。 89 | 90 | 对于运行到任务j~k~,设pte为在任务j~k~之前,在supercomputer上运行的总时间,fte为当前运行的总时间,显然,运行j~k~,j~i~有: 91 | 92 | ``` 93 | pte += p[k] 94 | fte_k_i_1 = fte = max(fte,pte+f[k]) 95 | pte += p[i] 96 | fte_k_i = fte = max(fte,pte+f[i]) 97 | ``` 98 | 99 | 而如果我们交换任务k和任务i的顺序,那么有: 100 | 101 | ``` 102 | pte += p[i] 103 | fte_i_k_1 = fte = max(fte,pte+f[i]) 104 | pte += p[k] 105 | fte_i_k = fte = max(fte,pte+f[k]) 106 | ``` 107 | 108 | 显然,最后的pte是相同的,那么fte是哪个比较大? 109 | 110 | 由于f[i] > f[k],那么有fte_k\_i > fte_k\_i\_1(因为f[i] > f[k]并且pte变大了) 111 | 112 | 而 fte_i_k >= fte\_i\_k\_1(pte变大,但是f[k] < f[i]),也就是说,交换i和k不会增大fte的值。 113 | 114 | 因此,我们的贪心算法是正确的。 115 | 116 | ### 时间复杂度分析 117 | 118 | 上述的算法进行了排序,复杂度O(nlogn),然后扫描一遍O(n),因此总复杂度O(nlogn) 119 | 120 | ## Problem 3. 121 | 122 | > Assume the coasting is an infinite straight line. Land is in one side of coasting, sea in theother. Each small island is a point locating in the sea side. And any radar installation, locating on the coasting, can only cover d distance, so an islandin the sea can be covered by a radius installation, if the distance between them is at most d. 123 | > 124 | > We use Cartesian coordinate system, defining the coasting is the x-axis. The seaside is above x-axis, and the land side below. Given the position of each island in the sea, and given the distance of the coverage of the radar installation, your task is to write a program to find the minimal number ofradar installations to cover all the islands. Note that the position of an island is represented by its x-y coordinates. 125 | > 126 | >  127 | 128 | ### 思路 129 | 130 | 如果某个岛屿的纵坐标大于d,那么肯定无解。 131 | 132 | 在放置雷达的过程中,先按照横坐标进行排序,每次我们尽量的向右放置,对于一个点i,我们尝试其是否在之前的雷达范围内,如果在则扫描下一个。如果不在,则看该点支持放置的最右边的雷达的横坐标cx是多少,若小于等于之前的雷达横坐标px,则px=cx,把之前的雷达进行左移即可,否则说明需要一个新的雷达才可以覆盖到,而新的雷达放置点为cx。 133 | 134 | 如图,一开始我们将雷达位置放在A最右边能达到的点P。接着对于点B的情况,由于距离大于d,而B可以放置的最右边位置为p2,那么我们将P移动到P2即可覆盖A和B。 而如果是点C的情况,在右边并且距离大于d,那么显然只能多个雷达了。 135 | 136 |  137 | 138 | ### 代码 139 | 140 | ```python 141 | class Point(object): 142 | def __init__(self, x, y): 143 | self.x = x 144 | self.y = y 145 | 146 | def __lt__(self, other): 147 | return self.x < other.x 148 | 149 | def min_radar(points, d): 150 | if not points: return 0 151 | if len(list(filter(lambda point: point.y > d, points))) > 0: return -1 # have no answer 152 | points.sort() 153 | px = points[0].x + sqrt(d * d - points[0].y * points[0].y) 154 | ans = 1 155 | for i in range(1,len(points)): 156 | if (px - points[i].x)**2 + points[i].y ** 2 <= d*d: continue 157 | cx = points[i].x + sqrt(d * d - points[i].y * points[i].y) 158 | if cx < px: 159 | px = cx 160 | continue 161 | px = cx 162 | ans += 1 163 | return ans 164 | ``` 165 | 166 | ### 正确性证明 167 | 168 | 贪心选择是正确的,因为我们尽可能的将雷达放置在最右边。 169 | 170 | 如上面的图,对于B的情况p2\
p,只能设置一个新的雷达在p2了,否则之前覆盖的岛屿就覆盖不到了。
171 |
172 | 综上,算法是正确的。
173 |
174 | ### 时间复杂度分析
175 |
176 | 上述的算法进行了排序,复杂度O(nlogn),然后扫描一遍O(n),因此总复杂度O(nlogn)
177 |
178 | ## Problem 4.
179 |
180 | > Suppose you are given two sets A and B, each containing n positive integers.You can choose to reorder each set however you like. After reordering, let ai be the ith element of set A, and let bi be the ith element of set B. You then receive a payoff $\prod_{i=1}^{n}a_{i}^{b_{i}}$ . Give an polynomial-time algorithm that will maximize your payoff.
181 |
182 | ### 思路
183 |
184 | 由于都是正数,指数函数为增函数,底越大增长越快,因此尽可能让底大的配合上幂越大的。将A和B排序,然后直接a~0~^b0^ * a~1~^b1^……即可。
185 |
186 | ### 代码
187 |
188 | ```python
189 | def max_payoff(A, B):
190 | if not A or not B or len(A) != len(B): return 0
191 | A.sort()
192 | B.sort()
193 | ans = 1
194 | for i in range(len(A)):
195 | ans *= A[i] ** B[i]
196 | return ans
197 | ```
198 |
199 | ### 正确性证明
200 |
201 | 我们首先排序(这里为升序)
202 |
203 | 若i < k ,则有 A~i~<= A~k~ , B~i~<= B~k~ ,那么有:$A_{i}^{B_{k}-B_{i}} <= A_{k}^{B_{k}-B_{i}}$, 两边同时乘以 $A_{k}^{B_{i}}A_{i}^{B_{i}}$, 得:
204 |
205 | $A_{i}^{B_{k}}A_{k}^{B_{i}} <= A_{i}^{B_{i}}A_{k}^{B_{k}} $, 因此,算法正确。
206 |
207 | ### 时间复杂度分析
208 |
209 | 上述的算法进行了排序,复杂度O(nlogn),然后扫描一遍数组O(n),因此总复杂度O(nlogn)
210 |
211 | ## Problem 5.
212 |
213 | > Write a program in your favorite language to compress a file using Huffman code and then decompress it. Code information may be contained in the compressed file if you can. Use your program to compress the two files (graph.txt and Aesop Fables.txt) and compare the results (Huffman code and compression ratio).
214 |
215 | ### 思路
216 |
217 | 首先对字符进行频率统计,然后建立哈夫曼树,接着对原内容进行编码压缩,用"wb"模式写入文件,最后解压。需要注意的是解压过程中,要对最后一个字符进行特殊处理。
218 |
219 | ### 代码
220 |
221 | ```python
222 | # -*- coding: utf-8 -*-
223 | # @Date : 2016/10/18
224 | # @Author : hrwhisper
225 | import heapq
226 | import collections
227 |
228 | class TreeNode(object):
229 | def __init__(self, val, cnt, left=None, right=None):
230 | self.cnt = cnt
231 | self.val = val
232 | self.left = left
233 | self.right = right
234 |
235 | def __lt__(self, other):
236 | return self.cnt < other.cnt
237 |
238 | def create_huffman_tree(txt):
239 | q = [TreeNode(c, cnt) for c, cnt in collections.Counter(txt).items()]
240 | heapq.heapify(q)
241 | while len(q) > 1:
242 | a, b = heapq.heappop(q), heapq.heappop(q)
243 | heapq.heappush(q, TreeNode('', a.cnt + b.cnt, a, b))
244 | return q.pop()
245 |
246 | def get_huffman_tree(cur, root, code):
247 | if not root.left and not root.right: # the leaf node
248 | code[root.val] = cur
249 | return
250 |
251 | if root.left: get_huffman_tree(cur + '0', root.left, code)
252 | if root.right: get_huffman_tree(cur + '1', root.right, code)
253 |
254 | def decode(txt, r_haffman_code, decode_save_path, last_byte=0):
255 | txt = ''.join(['0' * (8 - len(bin(ord(c))[2:])) + bin(ord(c))[2:] for c in txt])
256 | if last_byte:
257 | txt = txt[:-8] + txt[-last_byte:]
258 | n = len(txt)
259 | cur, decode_txt = '', ''
260 | for i in range(n):
261 | cur += txt[i]
262 | if cur in r_haffman_code:
263 | decode_txt += r_haffman_code[cur]
264 | cur = ''
265 |
266 | with open(decode_save_path, 'w') as f:
267 | f.write(decode_txt)
268 |
269 | def encode(txt, huffman_code, compress_save_path):
270 | with open(compress_save_path, 'wb') as f:
271 | txt = ''.join([huffman_code[c] for c in txt])
272 | last_byte = len(txt) % 8
273 | txt = ''.join(chr(int(txt[i:i + 8], 2)) for i in range(0, len(txt), 8))
274 | f.write(bytes(txt, "utf-8"))
275 | return last_byte
276 |
277 | if __name__ == '__main__':
278 | file_paths = ['./Aesop_Fables.txt', './graph.txt']
279 | for cur_file_path in file_paths:
280 | compress_save_path = cur_file_path + '_compressed'
281 | decode_save_path = cur_file_path + '_compressed_decode'
282 |
283 | with open(cur_file_path) as f:
284 | txt = f.read()
285 |
286 | root = create_huffman_tree(txt)
287 | huffman_code = {}
288 | get_huffman_tree('', root, huffman_code)
289 | r_haffman_code = {code: c for c, code in huffman_code.items()}
290 | last_byte = encode(txt, huffman_code, compress_save_path)
291 |
292 | with open(compress_save_path, 'rb') as f:
293 | txt = f.read().decode('utf-8')
294 |
295 | decode(txt, r_haffman_code, decode_save_path, last_byte)
296 |
297 | with open(decode_save_path) as fd, \
298 | open(cur_file_path) as f, open(compress_save_path) as fp:
299 | t = f.read()
300 | print('{}: compression ratio: {:.2f}, decode file equals original file is'
301 | .format(f.name,len(fp.read()) * 1.0 / len(t)), t == fd.read())
302 | ```
303 |
304 | ### 结果
305 |
306 | ```
307 | ./Aesop_Fables.txt: compression ratio: 0.56, decode file equals original file is True
308 | ./graph.txt: compression ratio: 0.44, decode file equals original file is True
309 | ```
310 |
311 | ## Problem 6.
312 |
313 | > 1. Implement Dijkstra’s algorithm (using linked list, binary heap, binomialheap, and Fibonacci heap) to calculate the shortest path from node s to node t of the given graph (graph.txt), where s and t are randomly chosen. The comparison of different priority queue is expected.
314 | >
315 | > Note: you can implement the heaps by yourself or using Boost C++/STL, etc.
316 | >
317 | >
318 | > 2. Figure out how many shortest paths is every node lying on in your program, except starting node s and finishing node t. For example, if there are in total three shortest paths 0 → 1 → 2 → 10, 0 → 1 → 3 → 4 → 10 and 0 → 1 → 2 → 6 → 7 → 10, then 1 lies on 3 shortest paths, 2 lies on 2 shortest paths, and 3 lies on 1 shortest path, etc.
319 |
320 | ### 思路
321 |
322 | 首先进行建图,由于是无向图,因此对于x y w要建立x->y 和y->x的两条边。
323 |
324 | 然后给定起点s,运行Dijkstra算法,即可算出s到终点t的最短路径。
325 |
326 | 对于每个节点的最短路径输出,从终点进行回溯,每次将上一个可能更新的节点加入队列即可。
327 |
328 | ### 代码
329 |
330 | 首先是结点定义和dijkstra算法
331 |
332 | ```python
333 | import heapq
334 | import collections
335 | import random
336 |
337 | class Node(object):
338 | def __init__(self, to, val):
339 | self.to = to
340 | self.val = val
341 |
342 | def __lt__(self, other):
343 | return self.val < other.val
344 |
345 | def dijkstra(s, t, g):
346 | q = []
347 | dis = collections.defaultdict(lambda: 0x7fffffff) # [0x7fffffff] * len(g)
348 | vis = collections.defaultdict(bool) # [False] * len(g)
349 | dis[s] = 0
350 | heapq.heappush(q, Node(s, 0))
351 |
352 | while q:
353 | cur = heapq.heappop(q).to
354 | if vis[cur]: continue
355 | vis[cur] = True
356 | for to, val in g[cur]:
357 | if not vis[to] and dis[cur] + val < dis[to]:
358 | dis[to] = dis[cur] + val
359 | heapq.heappush(q, Node(to, dis[to]))
360 | return dis
361 | ```
362 |
363 | 接着是进行回溯获得结点在最短路径上出现的次数
364 |
365 | ```python
366 | def count_node_path(s, t, dis, g):
367 | cnt = collections.defaultdict(int) # [0] * len(g)
368 | q = [to for to, val in g[t] if dis[t] == dis[to] + val]
369 | while q:
370 | cur = q.pop()
371 | if cur == s: continue
372 | cnt[cur] += 1
373 | for to, val in g[cur]:
374 | if dis[cur] == dis[to] + val:
375 | q.append(to)
376 | return cnt
377 | ```
378 |
379 | 最后是读文件然后建图,调用Dijkstra和回溯的过程
380 |
381 | ```python
382 | if __name__ == '__main__':
383 | g = collections.defaultdict(list)
384 |
385 | with open('./graph.txt') as f:
386 | for i, line in enumerate(f):
387 | if i < 6: continue
388 | x, y, val = list(map(int, line.strip().split()))
389 | g[x].append((y, val))
390 | g[y].append((x, val))
391 |
392 | s, t = random.randint(0, len(g)), random.randint(0, len(g))
393 | dis = dijkstra(s, t, g)
394 | print(s, t, dis[t])
395 | cnt = count_node_path(s, t, dis, g)
396 | print(cnt)
397 | ```
398 |
399 |
400 |
401 |
--------------------------------------------------------------------------------
/3_Greedy/readme.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/algorithm_course/ce1de1823b7b29d5e336011f2a9095a397b75cfa/3_Greedy/readme.pdf
--------------------------------------------------------------------------------
/4_LP/7.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Date : 2016/11/17
3 | # @Author : hrwhisper
4 |
5 | import numpy as np
6 |
7 |
8 | class Simplex(object):
9 | def __init__(self, obj, max_mode=False):
10 | self.max_mode = max_mode # default is solve min LP, if want to solve max lp,should * -1
11 | self.mat = np.array([[0] + obj]) * (-1 if max_mode else 1)
12 |
13 | def add_constraint(self, a, b):
14 | self.mat = np.vstack([self.mat, [b] + a])
15 |
16 | def solve(self):
17 | m, n = self.mat.shape # m - 1is the number slack variables we should add
18 | temp, B = np.vstack([np.zeros((1, m - 1)), np.eye(m - 1)]), list(range(n - 1, n + m - 1)) # add diagonal array
19 | mat = self.mat = np.hstack([self.mat, temp]) # combine them!
20 | while mat[0, 1:].min() < 0:
21 | col = np.where(mat[0, 1:] < 0)[0][0] + 1 # use Bland's method to avoid degeneracy.
22 | row = np.array([mat[i][0] / mat[i][col] if mat[i][col] > 0 else 0x7fffffff for i in
23 | range(1, mat.shape[0])]).argmin() + 1 # find the theta index
24 | if mat[row][col] <= 0: return None # the theta is ∞, the problem is unbounded
25 | mat[row] /= mat[row][col]
26 | ids = np.arange(mat.shape[0]) != row
27 | mat[ids] -= mat[row] * mat[ids, col:col + 1] # for each i!= row do: mat[i]= mat[i] - mat[row] * mat[i][col]
28 | B[row] = col
29 | return mat[0][0] * (1 if self.max_mode else -1), {B[i]: mat[i, 0] for i in range(1, m) if B[i] < n}
30 |
31 |
32 | if __name__ == '__main__':
33 | """
34 | maximize z: 3*x1 + 2*x2;
35 | 2*x1 + x2 <= 100;
36 | x1 + x2 <= 80;
37 | x1 <= 40
38 | answer :180
39 | """
40 | s = Simplex([3, 2], max_mode=True)
41 | s.add_constraint([2, 1], 100)
42 | s.add_constraint([1, 1], 80)
43 | s.add_constraint([1, 0], 40)
44 | print(s.solve())
45 | print(s.mat)
46 |
47 | # """
48 | # max 2x1 + 3x2 + 2x3
49 | # st
50 | # 2x1 + x2 + x3 <= 4
51 | # x1 + 2x2 + x3 <= 7
52 | # x3 <= 5
53 | # x1,x2,x3>= 0
54 | # answer :11
55 | # """
56 | # t = Simplex([2, 3, 2], max_mode=True)
57 | # t.add_constraint([2, 1, 1], 4)
58 | # t.add_constraint([1, 2, 1], 7)
59 | # t.add_constraint([0, 0, 1], 5)
60 | # print(t.solve())
61 | # print(t.mat)
62 | #
63 | # """
64 | # max z = 2x1 - x2
65 | # st
66 | # 2x1 - x2 <= 2
67 | # x1 - 5x2 <= -4
68 | # x1 ,x2 >= 0
69 | # answer :2
70 | # """
71 | # t = Simplex([2, -1], max_mode=True)
72 | # t.add_constraint([2, -1], 2)
73 | # t.add_constraint([1, -5], -4)
74 | # print(t.solve())
75 | # print(t.mat)
76 | #
77 | # """
78 | # max x1 + 14x2 +6x3
79 | # st
80 | # x1 + x2 + x3 <=4
81 | # x1 <= 2
82 | # x3 <= 3
83 | # 3x2 + x3 >= 6
84 | # x1 ,x2 ,x3 >= 0
85 | # answer :-56
86 | # """
87 | # t = Simplex([1, 14, 6], max_mode=True)
88 | # t.add_constraint([1, 1, 1], 4)
89 | # t.add_constraint([1, 0, 0], 2)
90 | # t.add_constraint([0, 0, 1], 3)
91 | # t.add_constraint([0, -3, -1], -6)
92 | # print(t.solve())
93 | # print(t.mat)
94 |
95 | """
96 | minimize -x1 - 14x2 - 6x3
97 | st
98 | x1 + x2 + x3 <=4
99 | x1 <= 2
100 | x3 <= 3
101 | 3x2 + x3 <= 6
102 | x1 ,x2 ,x3 >= 0
103 | answer :-32
104 | """
105 | t = Simplex([-1, -14, -6])
106 | t.add_constraint([1, 1, 1], 4)
107 | t.add_constraint([1, 0, 0], 2)
108 | t.add_constraint([0, 0, 1], 3)
109 | t.add_constraint([0, 3, 1], 6)
110 | print(t.solve())
111 | print(t.mat)
112 |
113 | """
114 | maximize x1 + 14x2 + 6x3
115 | st
116 | x1 + x2 + x3 <=4
117 | x1 <= 2
118 | x3 <= 3
119 | 3x2 + x3 <= 6
120 | x1 ,x2 ,x3 >= 0
121 | answer :32
122 | """
123 | t = Simplex([1, 14, 6], max_mode=True)
124 | t.add_constraint([1, 1, 1], 4)
125 | t.add_constraint([1, 0, 0], 2)
126 | t.add_constraint([0, 0, 1], 3)
127 | t.add_constraint([0, 3, 1], 6)
128 | print(t.solve())
129 | print(t.mat)
130 |
--------------------------------------------------------------------------------
/4_LP/Assignment4_LP.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/algorithm_course/ce1de1823b7b29d5e336011f2a9095a397b75cfa/4_LP/Assignment4_LP.pdf
--------------------------------------------------------------------------------
/4_LP/img/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/algorithm_course/ce1de1823b7b29d5e336011f2a9095a397b75cfa/4_LP/img/6.png
--------------------------------------------------------------------------------
/4_LP/readme.md:
--------------------------------------------------------------------------------
1 | # 计算机算法设计与分析-作业4(LP)
2 |
3 | - Author: hrwhipser
4 | - https://github.com/hrwhisper/algorithm_course/
5 |
6 |
7 |
8 |
9 |
10 | ## 2. Airplane Landing Problem
11 |
12 | > With human lives at stake, an air traffic controller has to schedule the airplanes that are landing at an airport in order to avoid airplane collision. Each airplane i has a time window [si,ti] during which it can safely land. You must compute the exact time of landing for each airplane that respects these time windows. Furthermore, the airplane landings should be stretched out as much as possible so that the minimum time gap between successive landings is as large as possible.
13 | > For example, if the time window of landing three airplanes are [10:00-11:00], [11:20-11:40], [12:00-12:20], and they land at 10:00, 11:20, 12:20 respectively, then the smallest gap is 60 minutes, which occurs between the last two airplanes.
14 | > Given n time windows, denoted as [s1,t1],[s2,t2],···,[sn,tn] satisfying s1 < t1 < s2 < t2 < ··· < sn < tn, you are required to give the exact landing time of each airplane, in which the smallest gap between successive landings is maximized.
15 | > Please formulate this problem as an LP, construct an instance and use GLPK or Gurobi or other similar tools to solve it.
16 |
17 | 依题意,设xi为第i个时间窗的时间,y为各个x的之差最小值,因此有:
18 | $$
19 | \begin{alignat}{2}
20 |
21 | \max\quad &y& \\
22 |
23 | \mbox{s.t.}\quad
24 |
25 | &x_i-x_{i-1}\geq{y}, &\quad&i=2,...,n\\
26 | &s_i\leq{x_i}\leq{t_i}, &\quad&i=1,...,n\\
27 | &s_1