├── README.md
├── dominant_set.py
└── ds_clustering.py


/README.md:
--------------------------------------------------------------------------------
1 | dominant_set
2 | ============
3 | 
4 | Dominant set clustering. See 'Dominant Sets and Pairwise Clustering', by Massimiliano Pavan and Marcello Pelillo, PAMI 2007.
5 | 


--------------------------------------------------------------------------------
/dominant_set.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from numpy.linalg import norm
 3 | 
 4 | def dominant_set(A, x=None, epsilon=1.0e-4):
 5 |     """Compute the dominant set of the similarity matrix A with the
 6 |     replicator dynamics optimization approach. Convergence is reached
 7 |     when x changes less than epsilon.
 8 | 
 9 |     See: 'Dominant Sets and Pairwise Clustering', by Massimiliano
10 |     Pavan and Marcello Pelillo, PAMI 2007.
11 |     """
12 |     if x is None:
13 |         x = np.ones(A.shape[0])/float(A.shape[0])
14 |         
15 |     distance = epsilon*2
16 |     while distance > epsilon:
17 |         x_old = x.copy()
18 |         # x = x * np.dot(A, x) # this works only for dense A
19 |         x = x * A.dot(x) # this works both for dense and sparse A
20 |         x = x / x.sum()
21 |         distance = norm(x - x_old)
22 |         print x.size, distance
23 | 
24 |     return x
25 | 
26 | 
27 | if __name__=="__main__":
28 | 
29 |     from sklearn.metrics import pairwise_distances
30 |     from sklearn.datasets import make_blobs
31 |     import matplotlib.pyplot as plt
32 | 
33 |     np.random.seed(1)
34 | 
35 |     n = 1000
36 |     d = 2
37 | 
38 |     X, y = make_blobs(n, d, centers=3)
39 | 
40 |     D = pairwise_distances(X, metric='sqeuclidean')
41 | 
42 |     sigma2 = np.median(D)
43 |     
44 |     S = np.exp(-D / sigma2)
45 | 
46 |     x = dominant_set(S, epsilon=2e-4)
47 |     
48 |     if d==2:
49 |         plt.figure()
50 |         for yi in np.unique(y):
51 |             plt.plot(X[y==yi,0], X[y==yi,1], 'o')
52 | 
53 |         plt.title('Dataset')
54 | 
55 |     plt.figure()
56 |     plt.imshow(S, interpolation='nearest')
57 |     plt.title('similarity matrix')
58 | 
59 |     idx = np.argsort(x)[::-1]
60 |     B = S[idx,:][:,idx]
61 |     plt.figure()
62 |     plt.imshow(B, interpolation='nearest')
63 |     plt.title('Re-arranged similarity matrix')
64 |     plt.figure()
65 |     plt.semilogy(np.sort(x))
66 |     plt.title('Sorted weighted characteristic vector (x)')
67 | 
68 |     cutoff = np.median(x[x>0])
69 |     print "cutoff:", cutoff
70 |     plt.figure()
71 |     plt.plot(X[x<=cutoff,0], X[x<=cutoff,1], 'bo')
72 |     plt.plot(X[x>cutoff,0], X[x>cutoff,1], 'ro')
73 |     plt.title("Dominant set")
74 |     
75 |     plt.show()
76 | 


--------------------------------------------------------------------------------
/ds_clustering.py:
--------------------------------------------------------------------------------
 1 | """Dominant set clustering: iteratively find the dominant set and then
 2 | remove it from the dataset.
 3 | """
 4 | 
 5 | import numpy as np
 6 | from dominant_set import dominant_set
 7 | 
 8 | if __name__ == '__main__':
 9 | 
10 |     from sklearn.metrics import pairwise_distances
11 |     from sklearn.datasets import make_blobs
12 |     import matplotlib.pyplot as plt
13 | 
14 |     np.random.seed(1)
15 | 
16 |     n = 1000
17 |     d = 2
18 | 
19 |     X, y = make_blobs(n, d, centers=3)
20 | 
21 |     D = pairwise_distances(X, metric='sqeuclidean')
22 | 
23 |     sigma2 = np.median(D)
24 |     
25 |     S = np.exp(-D / sigma2)
26 | 
27 |     if d==2:
28 |         plt.figure()
29 |         for yi in np.unique(y):
30 |             plt.plot(X[y==yi,0], X[y==yi,1], 'o')
31 | 
32 |         plt.title('Dataset')
33 | 
34 | 
35 |     while S.size > 10:
36 |         x = dominant_set(S, epsilon=2e-4)
37 |         cutoff = np.median(x[x>0])
38 | 
39 |         plt.figure()
40 |         plt.plot(X[x<=cutoff,0], X[x<=cutoff,1], 'bo')
41 |         plt.plot(X[x>cutoff,0], X[x>cutoff,1], 'ro')
42 |         plt.title("Dominant set")
43 | 
44 |         # remove the dominant set
45 |         idx = x <= cutoff
46 |         S = S[idx, :][:, idx]
47 |         X = X[idx, :]
48 |         
49 |     plt.show()
50 |         
51 |         
52 | 


--------------------------------------------------------------------------------