├── README.md
└── dpp.py


/README.md:
--------------------------------------------------------------------------------
1 | Determinantal point process sampling procedures from "Fast Determinantal Point Process Sampling with
2 | Application to Clustering, Byungkon Kang, NIPS 2013"
3 | 


--------------------------------------------------------------------------------
/dpp.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from itertools import product
  3 | """
  4 | Determinantal point process sampling procedures based
  5 | on  (Fast Determinantal Point Process Sampling with
  6 |      Application to Clustering, Byungkon Kang, NIPS 2013)
  7 | """
  8 | 
  9 | def build_similary_matrix(cov_function, items):
 10 |     """
 11 |     build the similarity matrix from a covariance function
 12 |     cov_function and a set of items. each pair of items
 13 |     is given to cov_function, which computes the similarity
 14 |     between two items.
 15 |     """
 16 |     L = np.zeros((len(items), len(items)))
 17 |     for i in range(len(items)):
 18 |         for j in range(i, len(items)):
 19 |             L[i, j] = cov_function(items[i], items[j])
 20 |             L[j, i] = L[i, j]
 21 |     return L
 22 | 
 23 | 
 24 | def exp_quadratic(sigma):
 25 |     """
 26 |     exponential quadratic covariance function
 27 |     """
 28 |     def f(p1, p2):
 29 |         return np.exp(-0.5 * (((p1 - p2)**2).sum()) / sigma**2)
 30 |     return f
 31 | 
 32 | 
 33 | def sample(items, L, max_nb_iterations=1000, rng=np.random):
 34 |     """
 35 |     Sample a list of items from a DPP defined
 36 |     by the similarity matrix L. The algorithm
 37 |     is iterative and runs for max_nb_iterations.
 38 |     The algorithm used is from
 39 |     (Fast Determinantal Point Process Sampling with
 40 |     Application to Clustering, Byungkon Kang, NIPS 2013)
 41 |     """
 42 |     Y = rng.choice((True, False), size=len(items))
 43 |     L_Y = L[Y, :]
 44 |     L_Y = L_Y[:, Y]
 45 |     L_Y_inv = np.linalg.inv(L_Y)
 46 | 
 47 |     for i in range(max_nb_iterations):
 48 |         u = rng.randint(0, len(items))
 49 | 
 50 |         c_u = L[u:u+1, :]
 51 |         c_u = c_u[:, u:u+1]
 52 |         b_u = L[Y, :]
 53 |         b_u = b_u[:, u:u+1]
 54 |         if Y[u] == False:
 55 |             p_include_U = min(1, c_u - np.dot(np.dot(b_u.T, L_Y_inv), b_u))
 56 |             if rng.uniform() <= p_include_U:
 57 |                 d_u = (c_u - np.dot(np.dot(b_u.T, L_Y_inv), b_u))
 58 |                 upleft = (L_Y_inv +
 59 |                           np.dot(np.dot(np.dot(L_Y_inv, b_u), b_u.T),
 60 |                                  L_Y_inv) / d_u)
 61 |                 upright = -np.dot(L_Y_inv, b_u) / d_u
 62 |                 downleft = -np.dot(b_u.T, L_Y_inv) / d_u
 63 |                 downright = d_u
 64 |                 L_Y_inv = np.bmat([[upleft, upright], [downleft, downright]])
 65 |                 Y[u] = True
 66 |                 L_Y = L[Y, :]
 67 |                 L_Y = L_Y[:, Y]
 68 |         else:
 69 |             p_remove_U = min(1, 1./(c_u - np.dot(np.dot(b_u.T, L_Y_inv), b_u)))
 70 |             if rng.uniform() <= p_remove_U:
 71 |                 l = L_Y_inv.shape[0] - 1
 72 |                 D = L_Y_inv[0:l, :]
 73 |                 D = D[:, 0:l]
 74 |                 e = L_Y_inv[0:l, :]
 75 |                 e = e[:, l:l+1]
 76 |                 f = L_Y_inv[l:l+1, :]
 77 |                 f = f[:, l:l+1]
 78 |                 L_Y_inv = D - np.dot(e, e.T) / f
 79 |                 Y[u] = False
 80 |                 L_Y = L[Y, :]
 81 |                 L_Y = L_Y[:, Y]
 82 |     return np.array(items)[Y]
 83 | 
 84 | 
 85 | def sample_k(items, L, k, max_nb_iterations=1000, rng=np.random):
 86 |     """
 87 |     Sample a list of k items from a DPP defined
 88 |     by the similarity matrix L. The algorithm
 89 |     is iterative and runs for max_nb_iterations.
 90 |     The algorithm used is from
 91 |     (Fast Determinantal Point Process Sampling with
 92 |     Application to Clustering, Byungkon Kang, NIPS 2013)
 93 |     """
 94 |     initial = rng.choice(range(len(items)), size=k, replace=False)
 95 |     X = [False] * len(items)
 96 |     for i in initial:
 97 |         X[i] = True
 98 |     X = np.array(X)
 99 |     for i in range(max_nb_iterations):
100 |         u = rng.choice(np.arange(len(items))[X])
101 |         v = rng.choice(np.arange(len(items))[~X])
102 |         Y = X.copy()
103 |         Y[u] = False
104 |         L_Y = L[Y, :]
105 |         L_Y = L_Y[:, Y]
106 |         L_Y_inv = np.linalg.inv(L_Y)
107 | 
108 |         c_v = L[v:v+1, :]
109 |         c_v = c_v[:, v:v+1]
110 |         b_v = L[Y, :]
111 |         b_v = b_v[:, v:v+1]
112 |         c_u = L[u:u+1, :]
113 |         c_u = c_u[:, u:u+1]
114 |         b_u = L[Y, :]
115 |         b_u = b_u[:, u:u+1]
116 | 
117 |         p = min(1, c_v - np.dot(np.dot(b_v.T, L_Y_inv), b_v) /
118 |                 (c_u - np.dot(np.dot(b_u.T, L_Y_inv.T), b_u)))
119 |         if rng.uniform() <= p:
120 |             X = Y[:]
121 |             X[v] = True
122 |     return np.array(items)[X]
123 | 
124 | 
125 | def test():
126 |     x = np.arange(1, 100)
127 |     L = build_similary_matrix(exp_quadratic(sigma=0.1),
128 |                               x)
129 |     for i in range(10):
130 |         #print(sample_k(x, L, 10))
131 |         print(sample(x, L))
132 | 
133 | 
134 | if __name__ == "__main__":
135 |     import matplotlib.pyplot as plt
136 |     #test()
137 |     #sys.exit(0)
138 |     x = np.arange(0, 1, 0.1)
139 |     y = np.arange(0, 1, 0.1)
140 |     z = np.array(list(product(x, y)))
141 |     sigmas = [0.0001, 0.1, 1, 2, 10]
142 |     k = 1
143 |     for sigma in sigmas:
144 |         plt.subplot(1, len(sigmas) + 1, k)
145 |         L = build_similary_matrix(exp_quadratic(sigma=sigma), z)
146 |         selected_by_dpp = sample(z, L)
147 |         plt.scatter(selected_by_dpp[:, 0], selected_by_dpp[:, 1])
148 |         plt.title("DPP(sigma={0})".format(sigma))
149 |         k += 1
150 | 
151 |     plt.subplot(1, len(sigmas) + 1, k)
152 |     selected_by_random = z[np.random.choice((True, False),
153 |                            size=len(z))]
154 |     plt.scatter(selected_by_random[:, 0], selected_by_random[:, 1])
155 |     plt.title("random")
156 |     plt.show()
157 | 


--------------------------------------------------------------------------------