├── README.md └── dpp.py /README.md: -------------------------------------------------------------------------------- 1 | Determinantal point process sampling procedures from "Fast Determinantal Point Process Sampling with 2 | Application to Clustering, Byungkon Kang, NIPS 2013" 3 | -------------------------------------------------------------------------------- /dpp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from itertools import product 3 | """ 4 | Determinantal point process sampling procedures based 5 | on (Fast Determinantal Point Process Sampling with 6 | Application to Clustering, Byungkon Kang, NIPS 2013) 7 | """ 8 | 9 | def build_similary_matrix(cov_function, items): 10 | """ 11 | build the similarity matrix from a covariance function 12 | cov_function and a set of items. each pair of items 13 | is given to cov_function, which computes the similarity 14 | between two items. 15 | """ 16 | L = np.zeros((len(items), len(items))) 17 | for i in range(len(items)): 18 | for j in range(i, len(items)): 19 | L[i, j] = cov_function(items[i], items[j]) 20 | L[j, i] = L[i, j] 21 | return L 22 | 23 | 24 | def exp_quadratic(sigma): 25 | """ 26 | exponential quadratic covariance function 27 | """ 28 | def f(p1, p2): 29 | return np.exp(-0.5 * (((p1 - p2)**2).sum()) / sigma**2) 30 | return f 31 | 32 | 33 | def sample(items, L, max_nb_iterations=1000, rng=np.random): 34 | """ 35 | Sample a list of items from a DPP defined 36 | by the similarity matrix L. The algorithm 37 | is iterative and runs for max_nb_iterations. 38 | The algorithm used is from 39 | (Fast Determinantal Point Process Sampling with 40 | Application to Clustering, Byungkon Kang, NIPS 2013) 41 | """ 42 | Y = rng.choice((True, False), size=len(items)) 43 | L_Y = L[Y, :] 44 | L_Y = L_Y[:, Y] 45 | L_Y_inv = np.linalg.inv(L_Y) 46 | 47 | for i in range(max_nb_iterations): 48 | u = rng.randint(0, len(items)) 49 | 50 | c_u = L[u:u+1, :] 51 | c_u = c_u[:, u:u+1] 52 | b_u = L[Y, :] 53 | b_u = b_u[:, u:u+1] 54 | if Y[u] == False: 55 | p_include_U = min(1, c_u - np.dot(np.dot(b_u.T, L_Y_inv), b_u)) 56 | if rng.uniform() <= p_include_U: 57 | d_u = (c_u - np.dot(np.dot(b_u.T, L_Y_inv), b_u)) 58 | upleft = (L_Y_inv + 59 | np.dot(np.dot(np.dot(L_Y_inv, b_u), b_u.T), 60 | L_Y_inv) / d_u) 61 | upright = -np.dot(L_Y_inv, b_u) / d_u 62 | downleft = -np.dot(b_u.T, L_Y_inv) / d_u 63 | downright = d_u 64 | L_Y_inv = np.bmat([[upleft, upright], [downleft, downright]]) 65 | Y[u] = True 66 | L_Y = L[Y, :] 67 | L_Y = L_Y[:, Y] 68 | else: 69 | p_remove_U = min(1, 1./(c_u - np.dot(np.dot(b_u.T, L_Y_inv), b_u))) 70 | if rng.uniform() <= p_remove_U: 71 | l = L_Y_inv.shape[0] - 1 72 | D = L_Y_inv[0:l, :] 73 | D = D[:, 0:l] 74 | e = L_Y_inv[0:l, :] 75 | e = e[:, l:l+1] 76 | f = L_Y_inv[l:l+1, :] 77 | f = f[:, l:l+1] 78 | L_Y_inv = D - np.dot(e, e.T) / f 79 | Y[u] = False 80 | L_Y = L[Y, :] 81 | L_Y = L_Y[:, Y] 82 | return np.array(items)[Y] 83 | 84 | 85 | def sample_k(items, L, k, max_nb_iterations=1000, rng=np.random): 86 | """ 87 | Sample a list of k items from a DPP defined 88 | by the similarity matrix L. The algorithm 89 | is iterative and runs for max_nb_iterations. 90 | The algorithm used is from 91 | (Fast Determinantal Point Process Sampling with 92 | Application to Clustering, Byungkon Kang, NIPS 2013) 93 | """ 94 | initial = rng.choice(range(len(items)), size=k, replace=False) 95 | X = [False] * len(items) 96 | for i in initial: 97 | X[i] = True 98 | X = np.array(X) 99 | for i in range(max_nb_iterations): 100 | u = rng.choice(np.arange(len(items))[X]) 101 | v = rng.choice(np.arange(len(items))[~X]) 102 | Y = X.copy() 103 | Y[u] = False 104 | L_Y = L[Y, :] 105 | L_Y = L_Y[:, Y] 106 | L_Y_inv = np.linalg.inv(L_Y) 107 | 108 | c_v = L[v:v+1, :] 109 | c_v = c_v[:, v:v+1] 110 | b_v = L[Y, :] 111 | b_v = b_v[:, v:v+1] 112 | c_u = L[u:u+1, :] 113 | c_u = c_u[:, u:u+1] 114 | b_u = L[Y, :] 115 | b_u = b_u[:, u:u+1] 116 | 117 | p = min(1, c_v - np.dot(np.dot(b_v.T, L_Y_inv), b_v) / 118 | (c_u - np.dot(np.dot(b_u.T, L_Y_inv.T), b_u))) 119 | if rng.uniform() <= p: 120 | X = Y[:] 121 | X[v] = True 122 | return np.array(items)[X] 123 | 124 | 125 | def test(): 126 | x = np.arange(1, 100) 127 | L = build_similary_matrix(exp_quadratic(sigma=0.1), 128 | x) 129 | for i in range(10): 130 | #print(sample_k(x, L, 10)) 131 | print(sample(x, L)) 132 | 133 | 134 | if __name__ == "__main__": 135 | import matplotlib.pyplot as plt 136 | #test() 137 | #sys.exit(0) 138 | x = np.arange(0, 1, 0.1) 139 | y = np.arange(0, 1, 0.1) 140 | z = np.array(list(product(x, y))) 141 | sigmas = [0.0001, 0.1, 1, 2, 10] 142 | k = 1 143 | for sigma in sigmas: 144 | plt.subplot(1, len(sigmas) + 1, k) 145 | L = build_similary_matrix(exp_quadratic(sigma=sigma), z) 146 | selected_by_dpp = sample(z, L) 147 | plt.scatter(selected_by_dpp[:, 0], selected_by_dpp[:, 1]) 148 | plt.title("DPP(sigma={0})".format(sigma)) 149 | k += 1 150 | 151 | plt.subplot(1, len(sigmas) + 1, k) 152 | selected_by_random = z[np.random.choice((True, False), 153 | size=len(z))] 154 | plt.scatter(selected_by_random[:, 0], selected_by_random[:, 1]) 155 | plt.title("random") 156 | plt.show() 157 | --------------------------------------------------------------------------------