├── README.md
├── SVthresh.m
├── dataset
    ├── 3-sources.mat
    ├── bbc.mat
    ├── caltech7.mat
    ├── syn500.mat
    └── uci.mat
├── method.m
├── metric
    ├── accuracyMeasure.m
    ├── adjrand.m
    ├── bestMap.m
    ├── clustering_metric.m
    ├── fprMeasure.m
    ├── nmi.m
    └── rand_index.m
├── run_3s.m
├── run_Caltech.m
├── run_UCI.m
├── run_bbc.m
├── run_syn.m
├── synthetic_lowrank.m
└── tool
    ├── SpectralClustering.m
    ├── SpectralClustering2.m
    ├── discretisation.m
    └── discretisationEigenVectorData.m


/README.md:
--------------------------------------------------------------------------------
1 | ## DALIGA
2 | MATLAB implementation of the paper "Direct affinity learning to boost multi-view clustering via subspace merging on a Grassmann manifold".
3 | 
4 | The main method is ``method.m``.
5 | 
6 | run ``run_syn.m``, ``run_3s.m``, ``run_bbc.m``, ``run_Caltech.m``, ``run_UCI.m`` to get experimental results on synthetic, 3-sources, BBCSport, Caltech 101, UCI Digit dataset, respectively.
7 | 


--------------------------------------------------------------------------------
/SVthresh.m:
--------------------------------------------------------------------------------
 1 | function [ thresh_X ] = SVthresh( X, thresh )
 2 | % Singular value thresholding
 3 | 
 4 | % Enhong Zhuo, 2019
 5 | 
 6 | [U, S, V] = svd(X);
 7 | S_thresh = S - thresh;
 8 | S_thresh = max(S_thresh, 0);
 9 | thresh_X = U * S_thresh * V';
10 | 
11 | end
12 | 
13 | 


--------------------------------------------------------------------------------
/dataset/3-sources.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scutbioinformatic/DALIGA/4f8da1df66f3d6b41053c4eab14219ceaee7ff91/dataset/3-sources.mat


--------------------------------------------------------------------------------
/dataset/bbc.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scutbioinformatic/DALIGA/4f8da1df66f3d6b41053c4eab14219ceaee7ff91/dataset/bbc.mat


--------------------------------------------------------------------------------
/dataset/caltech7.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scutbioinformatic/DALIGA/4f8da1df66f3d6b41053c4eab14219ceaee7ff91/dataset/caltech7.mat


--------------------------------------------------------------------------------
/dataset/syn500.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scutbioinformatic/DALIGA/4f8da1df66f3d6b41053c4eab14219ceaee7ff91/dataset/syn500.mat


--------------------------------------------------------------------------------
/dataset/uci.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scutbioinformatic/DALIGA/4f8da1df66f3d6b41053c4eab14219ceaee7ff91/dataset/uci.mat


--------------------------------------------------------------------------------
/method.m:
--------------------------------------------------------------------------------
  1 | function [ B1 ] = method( X, opts )
  2 | % Low-rank sparse metric learning for multi-view subspace clustering
  3 | % X    -- cell, multi-view data points
  4 | % X{i} -- i-th view matrix, row for sample, column for feature
  5 | % opts -- parameter settings
  6 | 
  7 | % Enhong Zhuo, 2019
  8 | 
  9 | num_views = length(X);
 10 | n = size(X{1},1);
 11 | 
 12 | % setting default parameters
 13 | num_iter = 100;
 14 | mu = 10;
 15 | max_mu = 1e6;
 16 | rho = 1.5;
 17 | alpha = 0.5;
 18 | beta = [1, 1];
 19 | gamma = [0.01 0.01];
 20 | err_thr = 1e-5;
 21 | 
 22 | if ~exist('opts', 'var')
 23 |     opts = [];
 24 | else
 25 |     if ~isstruct(opts)
 26 |         error('Parameter error: opts is not a structure.');
 27 |     end
 28 | end
 29 | 
 30 | if isfield(opts, 'alpha');      alpha = opts.alpha;	end
 31 | if isfield(opts, 'beta');      beta = opts.beta;	end
 32 | if isfield(opts, 'gamma');      gamma = opts.gamma;	end
 33 | if isfield(opts, 'mu');         mu = opts.mu;	end
 34 | if isfield(opts, 'max_mu');     max_mu = opts.max_mu;	end
 35 | if isfield(opts, 'rho');        rho = opts.rho;	end
 36 | if isfield(opts, 'num_iter');   num_iter = opts.num_iter;	end
 37 | if isfield(opts, 'err_thr');	err_thr = opts.err_thr;	end
 38 | 
 39 | alpha = repmat({alpha}, 1, num_views);
 40 | 
 41 | A1 = repmat({zeros(n,n)}, 1, num_views);
 42 | A2 = repmat({zeros(n,n)}, 1, num_views);
 43 | A3 = repmat({zeros(n,n)}, 1, num_views);
 44 | A4 = repmat({zeros(n,n)}, 1, num_views);
 45 | 
 46 | B1 = zeros(n,n);
 47 | B2 = zeros(n,n);
 48 | B3 = zeros(n,n);
 49 | 
 50 | K = repmat({zeros(n,n)}, 1, num_views);
 51 | 
 52 | Lambda1 = repmat({zeros(n,n)}, 1, num_views);
 53 | Lambda2= repmat({zeros(n,n)}, 1, num_views);
 54 | Lambda3 = repmat({zeros(n,n)}, 1, num_views);
 55 | Lambda4 = zeros(n,n);
 56 | Lambda5 = zeros(n,n);
 57 | 
 58 | mu = mu * ones(5,1);
 59 | 
 60 | for v = 1:num_views
 61 |     K{v} = X{v} * X{v}';
 62 | end
 63 | 
 64 | iter = 0;
 65 | err = ones(num_views*3+2,1);
 66 | 
 67 | while iter < num_iter && max(err) > err_thr
 68 |     
 69 |     iter = iter + 1;
 70 |     temp = zeros(n);
 71 |     for v = 1:num_views
 72 |         
 73 |         A1{v} = (K{v}+mu(1)*A2{v}+mu(2)*A3{v}+mu(3)*A4{v}-Lambda1{v}-Lambda2{v}-Lambda3{v}) / (K{v}+sum(mu(1:3))*eye(n));
 74 |         
 75 |         
 76 |         A2{v} = A1{v} + (alpha{v}*(B1'+B1)+Lambda1{v})/mu(1); 
 77 |         A3{v} = SVthresh(A1{v}+Lambda2{v}/mu(2), beta(1)/mu(2));
 78 |         A4{v} = wthresh(A1{v}+Lambda3{v}/mu(3), 's', beta(2)/mu(3));
 79 |         A4{v} = A4{v} - diag(diag(A4{v}));
 80 |         
 81 | 
 82 |         Lambda1{v} = Lambda1{v} + mu(1)*(A1{v}-A2{v});
 83 |         Lambda2{v} = Lambda2{v} + mu(2)*(A1{v}-A3{v});
 84 |         Lambda3{v} = Lambda3{v} + mu(3)*(A1{v}-A4{v});
 85 |         
 86 |         temp = temp + alpha{v}*(A2{v}'+A2{v});
 87 |         
 88 |         err((v-1)*3+1) = norm(A1{v}-A2{v},'inf');
 89 |         err((v-1)*3+2) = norm(A1{v}-A3{v},'inf');
 90 |         err((v-1)*3+3) = norm(A1{v}-A4{v},'inf');
 91 |         
 92 |     end
 93 |     
 94 |     B1 = (temp + mu(4)*B2 + mu(5)*B3 - Lambda4 - Lambda5) / sum(mu(4:5));
 95 |     B2 = SVthresh(B1+Lambda4/mu(4), gamma(1)/mu(4));
 96 |     B3 = wthresh(B1+Lambda5/mu(5), 's', gamma(2)/mu(5));
 97 |     
 98 |     Lambda4 = Lambda4 + mu(4)*(B1-B2);
 99 |     Lambda5 = Lambda5 + mu(5)*(B1-B3);
100 |     
101 |     err(num_views*3+1) = norm(B1-B2,'inf');
102 |     err(num_views*3+2) = norm(B1-B3,'inf');
103 |     
104 |     mu = min(rho*mu,max_mu);
105 | 
106 | end
107 | 
108 | end
109 | 
110 | 
111 | 


--------------------------------------------------------------------------------
/metric/accuracyMeasure.m:
--------------------------------------------------------------------------------
1 | function AC = accuracyMeasure(gnd,res)
2 |     res = bestMap(gnd,res);
3 |     AC = length(find(gnd == res))/length(gnd);
4 | end


--------------------------------------------------------------------------------
/metric/adjrand.m:
--------------------------------------------------------------------------------
 1 | function ari = adjrand(P1,P2)
 2 | 
 3 | % ADJRAND   Adjusted Rand Index to Compare Two Partitions
 4 | %
 5 | %   ARI = ADJRAND(P1,P2) returns the adjusted rand index for partitions
 6 | %   P1 and P2 for the same data set. Each of these partitions 
 7 | %   are vectors with an index to the group number. For example, 
 8 | %   this could be the output from KMEANS or CLUSTER.
 9 | %
10 | 
11 | if length(P1) ~= length(P2)
12 |     error('Input vectors must be the same length.')
13 |     return
14 | end
15 | uP1 = unique(P1);
16 | uP2 = unique(P2);
17 | g1 = length(uP1);
18 | g2 = length(uP2);
19 | n = length(P1);
20 | 
21 | % Now find the matching matrix M
22 | M = zeros(g1,g2);
23 | I = 0; 
24 | for i = uP1(:)'
25 |     I = I + 1;
26 |     J = 0;
27 |     for j = uP2(:)'
28 |         J = J + 1;
29 |         indI = find(P1 == i);
30 |         indJ = find(P2 == j);
31 |         M(I,J) = length(intersect(indI,indJ));
32 |     end
33 | end
34 | 
35 | nc2 = nchoosek(n,2);
36 | if g1>1 & g2>1
37 |     % The neither one is a vector, so it is ok to just do the transpose.
38 |     nidot = sum(M);
39 |     njdot = sum(M');
40 | elseif g1==1
41 |     % Then M only has one row. No need to get column totals.
42 |     nidot = M;
43 |     njdot = sum(M);
44 | else
45 |     % Then M has one column. No need to get row totals.
46 |     nidot = sum(M);
47 |     njdot = M;
48 | end
49 | 
50 | % NOw get the stuff needed for the index.
51 | for i = 1:g1
52 |     for j = 1:g2
53 |         if M(i,j) > 1
54 |             nijc2(i,j) = nchoosek(M(i,j),2);
55 |         else
56 |             nijc2(i,j) = 0;
57 |         end
58 |     end
59 | end
60 | for i = 1:length(nidot)
61 |     if nidot(i) > 1
62 |         nidotc2(i) = nchoosek(nidot(i),2);
63 |     else
64 |         nidotc2(i) = 0;
65 |     end
66 | end
67 | for i = 1:length(njdot)
68 |     if njdot(i) > 1
69 |         njdotc2(i) = nchoosek(njdot(i),2);
70 |     else
71 |         njdotc2(i) = 0;
72 |     end
73 | end
74 | % Now calculate the index.
75 | N = sum(sum(nijc2)) - sum(nidotc2)*sum(njdotc2)/nc2;
76 | D = (sum(nidotc2) + sum(njdotc2))/2 - sum(nidotc2)*sum(njdotc2)/nc2;
77 | ari = N/D;
78 | 
79 | 


--------------------------------------------------------------------------------
/metric/bestMap.m:
--------------------------------------------------------------------------------
  1 | function [newL2, c] = bestMap(L1,L2)
  2 | %bestmap: permute labels of L2 match L1 as good as possible
  3 | %   [newL2] = bestMap(L1,L2);
  4 | 
  5 | %===========    
  6 | L1 = L1(:);
  7 | L2 = L2(:);
  8 | if size(L1) ~= size(L2)
  9 |     error('size(L1) must == size(L2)');
 10 | end
 11 | L1 = L1 - min(L1) + 1;      %   min (L1) <- 1;
 12 | L2 = L2 - min(L2) + 1;      %   min (L2) <- 1;
 13 | %===========    make bipartition graph  ============
 14 | nClass = max(max(L1), max(L2));
 15 | G = zeros(nClass);
 16 | for i=1:nClass
 17 |     for j=1:nClass
 18 |         G(i,j) = length(find(L1 == i & L2 == j));
 19 |     end
 20 | end
 21 | %===========    assign with hungarian method    ======
 22 | [c,t] = hungarian(-G);
 23 | newL2 = zeros(nClass,1);
 24 | for i=1:nClass
 25 |     newL2(L2 == i) = c(i);
 26 | end
 27 | 
 28 | 
 29 | function [C,T]=hungarian(A)
 30 | %HUNGARIAN Solve the Assignment problem using the Hungarian method.
 31 | %
 32 | %[C,T]=hungarian(A)
 33 | %A - a square cost matrix.
 34 | %C - the optimal assignment.
 35 | %T - the cost of the optimal assignment.
 36 | %s.t. T = trace(A(C,:)) is minimized over all possible assignments.
 37 | 
 38 | % Adapted from the FORTRAN IV code in Carpaneto and Toth, "Algorithm 548:
 39 | % Solution of the assignment problem [H]", ACM Transactions on
 40 | % Mathematical Software, 6(1):104-111, 1980.
 41 | 
 42 | % v1.0  96-06-14. Niclas Borlin, niclas@cs.umu.se.
 43 | %                 Department of Computing Science, Ume?University,
 44 | %                 Sweden. 
 45 | %                 All standard disclaimers apply.
 46 | 
 47 | % A substantial effort was put into this code. If you use it for a
 48 | % publication or otherwise, please include an acknowledgement or at least
 49 | % notify me by email. /Niclas
 50 | 
 51 | [m,n]=size(A);
 52 | 
 53 | if (m~=n)
 54 |     error('HUNGARIAN: Cost matrix must be square!');
 55 | end
 56 | 
 57 | % Save original cost matrix.
 58 | orig=A;
 59 | 
 60 | % Reduce matrix.
 61 | A=hminired(A);
 62 | 
 63 | % Do an initial assignment.
 64 | [A,C,U]=hminiass(A);
 65 | 
 66 | % Repeat while we have unassigned rows.
 67 | while (U(n+1))
 68 |     % Start with no path, no unchecked zeros, and no unexplored rows.
 69 |     LR=zeros(1,n);
 70 |     LC=zeros(1,n);
 71 |     CH=zeros(1,n);
 72 |     RH=[zeros(1,n) -1];
 73 |     
 74 |     % No labelled columns.
 75 |     SLC=[];
 76 |     
 77 |     % Start path in first unassigned row.
 78 |     r=U(n+1);
 79 |     % Mark row with end-of-path label.
 80 |     LR(r)=-1;
 81 |     % Insert row first in labelled row set.
 82 |     SLR=r;
 83 |     
 84 |     % Repeat until we manage to find an assignable zero.
 85 |     while (1)
 86 |         % If there are free zeros in row r
 87 |         if (A(r,n+1)~=0)
 88 |             % ...get column of first free zero.
 89 |             l=-A(r,n+1);
 90 |             
 91 |             % If there are more free zeros in row r and row r in not
 92 |             % yet marked as unexplored..
 93 |             if (A(r,l)~=0 && RH(r)==0)
 94 |                 % Insert row r first in unexplored list.
 95 |                 RH(r)=RH(n+1);
 96 |                 RH(n+1)=r;
 97 |                 
 98 |                 % Mark in which column the next unexplored zero in this row
 99 |                 % is.
100 |                 CH(r)=-A(r,l);
101 |             end
102 |         else
103 |             % If all rows are explored..
104 |             if (RH(n+1)<=0)
105 |                 % Reduce matrix.
106 |                 [A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR);
107 |             end
108 |             
109 |             % Re-start with first unexplored row.
110 |             r=RH(n+1);
111 |             % Get column of next free zero in row r.
112 |             l=CH(r);
113 |             % Advance "column of next free zero".
114 |             CH(r)=-A(r,l);
115 |             % If this zero is last in the list..
116 |             if (A(r,l)==0)
117 |                 % ...remove row r from unexplored list.
118 |                 RH(n+1)=RH(r);
119 |                 RH(r)=0;
120 |             end
121 |         end
122 |         
123 |         % While the column l is labelled, i.e. in path.
124 |         while (LC(l)~=0)
125 |             % If row r is explored..
126 |             if (RH(r)==0)
127 |                 % If all rows are explored..
128 |                 if (RH(n+1)<=0)
129 |                     % Reduce cost matrix.
130 |                     [A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR);
131 |                 end
132 |                 
133 |                 % Re-start with first unexplored row.
134 |                 r=RH(n+1);
135 |             end
136 |             
137 |             % Get column of next free zero in row r.
138 |             l=CH(r);
139 |             
140 |             % Advance "column of next free zero".
141 |             CH(r)=-A(r,l);
142 |             
143 |             % If this zero is last in list..
144 |             if(A(r,l)==0)
145 |                 % ...remove row r from unexplored list.
146 |                 RH(n+1)=RH(r);
147 |                 RH(r)=0;
148 |             end
149 |         end
150 |         
151 |         % If the column found is unassigned..
152 |         if (C(l)==0)
153 |             % Flip all zeros along the path in LR,LC.
154 |             [A,C,U]=hmflip(A,C,LC,LR,U,l,r);
155 |             % ...and exit to continue with next unassigned row.
156 |             break;
157 |         else
158 |             % ...else add zero to path.
159 |             
160 |             % Label column l with row r.
161 |             LC(l)=r;
162 |             
163 |             % Add l to the set of labelled columns.
164 |             SLC=[SLC l];
165 |             
166 |             % Continue with the row assigned to column l.
167 |             r=C(l);
168 |             
169 |             % Label row r with column l.
170 |             LR(r)=l;
171 |             
172 |             % Add r to the set of labelled rows.
173 |             SLR=[SLR r];
174 |         end
175 |     end
176 | end
177 | 
178 | % Calculate the total cost.
179 | T=sum(orig(logical(sparse(C,1:size(orig,2),1))));
180 | 
181 | 
182 | function A=hminired(A)
183 | %HMINIRED Initial reduction of cost matrix for the Hungarian method.
184 | %
185 | %B=assredin(A)
186 | %A - the unreduced cost matris.
187 | %B - the reduced cost matrix with linked zeros in each row.
188 | 
189 | % v1.0  96-06-13. Niclas Borlin, niclas@cs.umu.se.
190 | 
191 | [m,n]=size(A);
192 | 
193 | % Subtract column-minimum values from each column.
194 | colMin=min(A);
195 | A=A-colMin(ones(n,1),:);
196 | 
197 | % Subtract row-minimum values from each row.
198 | rowMin=min(A')';
199 | A=A-rowMin(:,ones(1,n));
200 | 
201 | % Get positions of all zeros.
202 | [i,j]=find(A==0);
203 | 
204 | % Extend A to give room for row zero list header column.
205 | A(1,n+1)=0;
206 | for k=1:n
207 |     % Get all column in this row. 
208 |     cols=j(k==i)';
209 |     % Insert pointers in matrix.
210 |     A(k,[n+1 cols])=[-cols 0];
211 | end
212 | 
213 | 
214 | function [A,C,U]=hminiass(A)
215 | %HMINIASS Initial assignment of the Hungarian method.
216 | %
217 | %[B,C,U]=hminiass(A)
218 | %A - the reduced cost matrix.
219 | %B - the reduced cost matrix, with assigned zeros removed from lists.
220 | %C - a vector. C(J)=I means row I is assigned to column J,
221 | %              i.e. there is an assigned zero in position I,J.
222 | %U - a vector with a linked list of unassigned rows.
223 | 
224 | % v1.0  96-06-14. Niclas Borlin, niclas@cs.umu.se.
225 | 
226 | [n,np1]=size(A);
227 | 
228 | % Initalize return vectors.
229 | C=zeros(1,n);
230 | U=zeros(1,n+1);
231 | 
232 | % Initialize last/next zero "pointers".
233 | LZ=zeros(1,n);
234 | NZ=zeros(1,n);
235 | 
236 | for i=1:n
237 |     % Set j to first unassigned zero in row i.
238 | 	lj=n+1;
239 | 	j=-A(i,lj);
240 | 
241 |     % Repeat until we have no more zeros (j==0) or we find a zero
242 | 	% in an unassigned column (c(j)==0).
243 |     
244 | 	while (C(j)~=0)
245 | 		% Advance lj and j in zero list.
246 | 		lj=j;
247 | 		j=-A(i,lj);
248 | 	
249 | 		% Stop if we hit end of list.
250 | 		if (j==0)
251 | 			break;
252 | 		end
253 | 	end
254 | 
255 | 	if (j~=0)
256 | 		% We found a zero in an unassigned column.
257 | 		
258 | 		% Assign row i to column j.
259 | 		C(j)=i;
260 | 		
261 | 		% Remove A(i,j) from unassigned zero list.
262 | 		A(i,lj)=A(i,j);
263 | 
264 | 		% Update next/last unassigned zero pointers.
265 | 		NZ(i)=-A(i,j);
266 | 		LZ(i)=lj;
267 | 
268 | 		% Indicate A(i,j) is an assigned zero.
269 | 		A(i,j)=0;
270 | 	else
271 | 		% We found no zero in an unassigned column.
272 | 
273 | 		% Check all zeros in this row.
274 | 
275 | 		lj=n+1;
276 | 		j=-A(i,lj);
277 | 		
278 | 		% Check all zeros in this row for a suitable zero in another row.
279 | 		while (j~=0)
280 | 			% Check the in the row assigned to this column.
281 | 			r=C(j);
282 | 			
283 | 			% Pick up last/next pointers.
284 | 			lm=LZ(r);
285 | 			m=NZ(r);
286 | 			
287 | 			% Check all unchecked zeros in free list of this row.
288 | 			while (m~=0)
289 | 				% Stop if we find an unassigned column.
290 | 				if (C(m)==0)
291 | 					break;
292 | 				end
293 | 				
294 | 				% Advance one step in list.
295 | 				lm=m;
296 | 				m=-A(r,lm);
297 | 			end
298 | 			
299 | 			if (m==0)
300 | 				% We failed on row r. Continue with next zero on row i.
301 | 				lj=j;
302 | 				j=-A(i,lj);
303 | 			else
304 | 				% We found a zero in an unassigned column.
305 | 			
306 | 				% Replace zero at (r,m) in unassigned list with zero at (r,j)
307 | 				A(r,lm)=-j;
308 | 				A(r,j)=A(r,m);
309 | 			
310 | 				% Update last/next pointers in row r.
311 | 				NZ(r)=-A(r,m);
312 | 				LZ(r)=j;
313 | 			
314 | 				% Mark A(r,m) as an assigned zero in the matrix . . .
315 | 				A(r,m)=0;
316 | 			
317 | 				% ...and in the assignment vector.
318 | 				C(m)=r;
319 | 			
320 | 				% Remove A(i,j) from unassigned list.
321 | 				A(i,lj)=A(i,j);
322 | 			
323 | 				% Update last/next pointers in row r.
324 | 				NZ(i)=-A(i,j);
325 | 				LZ(i)=lj;
326 | 			
327 | 				% Mark A(r,m) as an assigned zero in the matrix . . .
328 | 				A(i,j)=0;
329 | 			
330 | 				% ...and in the assignment vector.
331 | 				C(j)=i;
332 | 				
333 | 				% Stop search.
334 | 				break;
335 | 			end
336 | 		end
337 | 	end
338 | end
339 | 
340 | % Create vector with list of unassigned rows.
341 | 
342 | % Mark all rows have assignment.
343 | r=zeros(1,n);
344 | rows=C(C~=0);
345 | r(rows)=rows;
346 | empty=find(r==0);
347 | 
348 | % Create vector with linked list of unassigned rows.
349 | U=zeros(1,n+1);
350 | U([n+1 empty])=[empty 0];
351 | 
352 | 
353 | function [A,C,U]=hmflip(A,C,LC,LR,U,l,r)
354 | %HMFLIP Flip assignment state of all zeros along a path.
355 | %
356 | %[A,C,U]=hmflip(A,C,LC,LR,U,l,r)
357 | %Input:
358 | %A   - the cost matrix.
359 | %C   - the assignment vector.
360 | %LC  - the column label vector.
361 | %LR  - the row label vector.
362 | %U   - the 
363 | %r,l - position of last zero in path.
364 | %Output:
365 | %A   - updated cost matrix.
366 | %C   - updated assignment vector.
367 | %U   - updated unassigned row list vector.
368 | 
369 | % v1.0  96-06-14. Niclas Borlin, niclas@cs.umu.se.
370 | 
371 | n=size(A,1);
372 | 
373 | while (1)
374 |     % Move assignment in column l to row r.
375 |     C(l)=r;
376 |     
377 |     % Find zero to be removed from zero list..
378 |     
379 |     % Find zero before this.
380 |     m=find(A(r,:)==-l);
381 |     
382 |     % Link past this zero.
383 |     A(r,m)=A(r,l);
384 |     
385 |     A(r,l)=0;
386 |     
387 |     % If this was the first zero of the path..
388 |     if (LR(r)<0)
389 |         ...remove row from unassigned row list and return.
390 |         U(n+1)=U(r);
391 |         U(r)=0;
392 |         return;
393 |     else
394 |         
395 |         % Move back in this row along the path and get column of next zero.
396 |         l=LR(r);
397 |         
398 |         % Insert zero at (r,l) first in zero list.
399 |         A(r,l)=A(r,n+1);
400 |         A(r,n+1)=-l;
401 |         
402 |         % Continue back along the column to get row of next zero in path.
403 |         r=LC(l);
404 |     end
405 | end
406 | 
407 | 
408 | function [A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR)
409 | %HMREDUCE Reduce parts of cost matrix in the Hungerian method.
410 | %
411 | %[A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR)
412 | %Input:
413 | %A   - Cost matrix.
414 | %CH  - vector of column of 'next zeros' in each row.
415 | %RH  - vector with list of unexplored rows.
416 | %LC  - column labels.
417 | %RC  - row labels.
418 | %SLC - set of column labels.
419 | %SLR - set of row labels.
420 | %
421 | %Output:
422 | %A   - Reduced cost matrix.
423 | %CH  - Updated vector of 'next zeros' in each row.
424 | %RH  - Updated vector of unexplored rows.
425 | 
426 | % v1.0  96-06-14. Niclas Borlin, niclas@cs.umu.se.
427 | 
428 | n=size(A,1);
429 | 
430 | % Find which rows are covered, i.e. unlabelled.
431 | coveredRows=LR==0;
432 | 
433 | % Find which columns are covered, i.e. labelled.
434 | coveredCols=LC~=0;
435 | 
436 | r=find(~coveredRows);
437 | c=find(~coveredCols);
438 | 
439 | % Get minimum of uncovered elements.
440 | m=min(min(A(r,c)));
441 | 
442 | % Subtract minimum from all uncovered elements.
443 | A(r,c)=A(r,c)-m;
444 | 
445 | % Check all uncovered columns..
446 | for j=c
447 |     % ...and uncovered rows in path order..
448 |     for i=SLR
449 |         % If this is a (new) zero..
450 |         if (A(i,j)==0)
451 |             % If the row is not in unexplored list..
452 |             if (RH(i)==0)
453 |                 % ...insert it first in unexplored list.
454 |                 RH(i)=RH(n+1);
455 |                 RH(n+1)=i;
456 |                 % Mark this zero as "next free" in this row.
457 |                 CH(i)=j;
458 |             end
459 |             % Find last unassigned zero on row I.
460 |             row=A(i,:);
461 |             colsInList=-row(row<0);
462 |             if (length(colsInList)==0)
463 |                 % No zeros in the list.
464 |                 l=n+1;
465 |             else
466 |                 l=colsInList(row(colsInList)==0);
467 |             end
468 |             % Append this zero to end of list.
469 |             A(i,l)=-j;
470 |         end
471 |     end
472 | end
473 | 
474 | % Add minimum to all doubly covered elements.
475 | r=find(coveredRows);
476 | c=find(coveredCols);
477 | 
478 | % Take care of the zeros we will remove.
479 | [i,j]=find(A(r,c)<=0);
480 | 
481 | i=r(i);
482 | j=c(j);
483 | 
484 | for k=1:length(i)
485 |     % Find zero before this in this row.
486 |     lj=find(A(i(k),:)==-j(k));
487 |     % Link past it.
488 |     A(i(k),lj)=A(i(k),j(k));
489 |     % Mark it as assigned.
490 |     A(i(k),j(k))=0;
491 | end
492 | 
493 | A(r,c)=A(r,c)+m;
494 | 


--------------------------------------------------------------------------------
/metric/clustering_metric.m:
--------------------------------------------------------------------------------
1 | function [ NMI, ARI, ACC, fscore, precision, recall ] = clustering_metric( gnd, res )
2 | 
3 | NMI = nmi(gnd,res);
4 | ARI = adjrand(gnd,res);
5 | ACC = accuracyMeasure(gnd,res);
6 | [fscore, precision, recall] = fprMeasure(gnd,res);
7 | 
8 | end
9 | 


--------------------------------------------------------------------------------
/metric/fprMeasure.m:
--------------------------------------------------------------------------------
 1 | function [f,p,r] = fprMeasure(T,H)
 2 | % T ground truth
 3 | % H result
 4 |   if length(T) ~= length(H),
 5 |     size(T)
 6 |     size(H)
 7 |   end;
 8 |   
 9 |   N = length(T);
10 |   numT = 0;
11 |   numH = 0;
12 |   numI = 0;
13 |   for n=1:N,
14 |     Tn = (T(n+1:end))==T(n);
15 |     Hn = (H(n+1:end))==H(n);
16 |     numT = numT + sum(Tn);
17 |     numH = numH + sum(Hn);
18 |     numI = numI + sum(Tn .* Hn);
19 |   end;
20 |   p = 1;
21 |   r = 1;
22 |   f = 1;
23 |   if numH > 0,
24 |     p = numI / numH;
25 |   end;
26 |   if numT > 0,
27 |     r = numI / numT;
28 |   end;
29 |   if (p+r) == 0,
30 |     f = 0;
31 |   else
32 |     f = 2 * p * r / (p + r);
33 |   end;
34 |   


--------------------------------------------------------------------------------
/metric/nmi.m:
--------------------------------------------------------------------------------
 1 | function z = nmi(x, y)
 2 | % Compute normalized mutual information I(x,y)/sqrt(H(x)*H(y)) of two discrete variables x and y.
 3 | % Input:
 4 | %   x, y: two integer vector of the same length 
 5 | % Ouput:
 6 | %   z: normalized mutual information z=I(x,y)/sqrt(H(x)*H(y))
 7 | % Written by Mo Chen (sth4nth@gmail.com).
 8 | assert(numel(x) == numel(y));
 9 | n = numel(x);
10 | x = reshape(x,1,n);
11 | y = reshape(y,1,n);
12 | 
13 | l = min(min(x),min(y));
14 | x = x-l+1;
15 | y = y-l+1;
16 | k = max(max(x),max(y));
17 | 
18 | idx = 1:n;
19 | Mx = sparse(idx,x,1,n,k,n);
20 | My = sparse(idx,y,1,n,k,n);
21 | Pxy = nonzeros(Mx'*My/n); %joint distribution of x and y
22 | Hxy = -dot(Pxy,log2(Pxy));
23 | 
24 | 
25 | % hacking, to elimative the 0log0 issue
26 | Px = nonzeros(mean(Mx,1));
27 | Py = nonzeros(mean(My,1));
28 | 
29 | % entropy of Py and Px
30 | Hx = -dot(Px,log2(Px));
31 | Hy = -dot(Py,log2(Py));
32 | 
33 | % mutual information
34 | MI = Hx + Hy - Hxy;
35 | 
36 | % normalized mutual information
37 | z = sqrt((MI/Hx)*(MI/Hy));
38 | z = max(0,z);
39 | 
40 | 


--------------------------------------------------------------------------------
/metric/rand_index.m:
--------------------------------------------------------------------------------
 1 | function ri = rand_index(p1, p2, varargin)
 2 | %RAND_INDEX Computes the rand index between two partitions.
 3 | %   RAND_INDEX(p1, p2) computes the rand index between partitions p1 and
 4 | %   p2.
 5 | %   
 6 | %   RAND_INDEX(p1, p2, 'adjusted'); computes the adjusted rand index
 7 | %   between partitions p1 and p2. The adjustment accounts for chance
 8 | %   correlation.
 9 | 
10 |     % Parse the input and throw errors
11 |     adj = 0;
12 |     if nargin == 0
13 |     end
14 |     if nargin > 3
15 |         error('Too many input arguments');
16 |     end
17 |     if nargin == 3
18 |         if strcmp(varargin{1}, 'adjusted')
19 |             adj = 1;
20 |         else
21 |             error('%s is an unrecognized argument.', varargin{1});
22 |         end
23 |     end
24 |     if length(p1)~=length(p2)
25 |         error('Both partitions must contain the same number of points.');
26 |     end
27 |     
28 | 	% Preliminary computations and cleansing of the partitions
29 |     N = length(p1);
30 |     [~, ~, p1] = unique(p1);
31 |     N1 = max(p1);
32 |     [~, ~, p2] = unique(p2);
33 |     N2 = max(p2);
34 |     
35 |     % Create the matching matrix
36 |     for i=1:1:N1
37 |         for j=1:1:N2
38 |             G1 = find(p1==i);
39 |             G2 = find(p2==j);
40 |             n(i,j) = length(intersect(G1,G2));
41 |         end
42 |     end
43 |     
44 |     % If required, calculate the basic rand index
45 |     if adj==0
46 |         ss = sum(sum(n.^2));
47 |         ss1 = sum(sum(n,1).^2);
48 |         ss2 =sum(sum(n,2).^2);
49 |         ri = (nchoosek2(N,2) + ss - 0.5*ss1 - 0.5*ss2)/nchoosek2(N,2);
50 |     end
51 |     
52 |     
53 |     % Otherwise, calculate the adjusted rand index
54 |     if adj==1
55 |         ssm = 0;
56 |         sm1 = 0;
57 |         sm2 = 0;
58 |         for i=1:1:N1
59 |             for j=1:1:N2
60 |                 ssm = ssm + nchoosek2(n(i,j),2);
61 |             end
62 |         end
63 |         temp = sum(n,2);
64 |         for i=1:1:N1
65 |             sm1 = sm1 + nchoosek2(temp(i),2);
66 |         end
67 |         temp = sum(n,1);
68 |         for i=1:1:N2
69 |             sm2 = sm2 + nchoosek2(temp(i),2);
70 |         end
71 |         NN = ssm - sm1*sm2/nchoosek2(N,2);
72 |         DD = (sm1 + sm2)/2 - sm1*sm2/nchoosek2(N,2);
73 |         ri = NN/DD;
74 |     end 
75 |     
76 | 
77 |     % Special definition of n choose k
78 |     function c = nchoosek2(a,b)
79 |         if a>1
80 |             c = nchoosek(a,b);
81 |         else
82 |             c = 0;
83 |         end
84 |     end
85 | end
86 | 


--------------------------------------------------------------------------------
/run_3s.m:
--------------------------------------------------------------------------------
 1 | clear;
 2 | clc;
 3 | 
 4 | addpath('./dataset');
 5 | addpath('./metric');
 6 | addpath('./tool');
 7 | 
 8 | load('./dataset/3-sources.mat');
 9 | X{1} = bbc;
10 | X{2} = guardian;
11 | X{3} = reuters;
12 | label = truth;
13 | 
14 | nclass = length(unique(label));
15 | 
16 | opts.alpha = 0.3;
17 | opts.beta = [1, 10];
18 | opts.gamma = [0.001, 0.01];
19 | opts.mu = 10;
20 | 
21 | W = method( X, opts );
22 | group = SpectralClustering2(W, nclass);
23 | [ NMI, ARI, ACC, fscore, precision, recall ] = clustering_metric(label,group);


--------------------------------------------------------------------------------
/run_Caltech.m:
--------------------------------------------------------------------------------
 1 | clear;
 2 | clc;
 3 | 
 4 | addpath('./dataset');
 5 | addpath('./metric');
 6 | addpath('./tool');
 7 | 
 8 | load('./dataset/caltech7.mat');
 9 | X{1} = centrist';
10 | X{2} = garbor';
11 | X{3} = gist';
12 | X{4} = hog';
13 | X{5} = lbp';
14 | X{6} = wm';
15 | 
16 | nclass = length(unique(label));
17 | 
18 | opts.alpha = 0.3;
19 | opts.beta = [1, 1];
20 | opts.gamma = [0.01, 0.01];
21 | opts.mu = 10;
22 | 
23 | W = method( X, opts );
24 | group = SpectralClustering(W, nclass);
25 | [ NMI, ARI, ACC, fscore, precision, recall ] = clustering_metric(label,group);


--------------------------------------------------------------------------------
/run_UCI.m:
--------------------------------------------------------------------------------
 1 | clear;
 2 | clc;
 3 | 
 4 | addpath('./dataset');
 5 | addpath('./metric');
 6 | addpath('./tool');
 7 | 
 8 | load('./dataset/uci.mat');
 9 | X{1} = fou';
10 | X{2} = fac';
11 | X{3} = kar';
12 | 
13 | nclass = length(unique(label));
14 | 
15 | opts.alpha = 0.7;
16 | opts.beta = [0.01, 0.01];
17 | opts.gamma = [1, 0.01];
18 | opts.mu = 10;
19 | 
20 | W = method( X, opts );
21 | group = SpectralClustering2(W, nclass);
22 | [ NMI, ARI, ACC, fscore, precision, recall ] = clustering_metric(label,group);


--------------------------------------------------------------------------------
/run_bbc.m:
--------------------------------------------------------------------------------
 1 | clear;
 2 | clc;
 3 | 
 4 | addpath('./dataset');
 5 | addpath('./metric');
 6 | addpath('./tool');
 7 | 
 8 | load('./dataset/bbc.mat');
 9 | X{1} = X{1}';
10 | X{2} = X{2}';
11 | X{3} = X{3}';
12 | label = truth;
13 | 
14 | nclass = length(unique(label));
15 | 
16 | opts.alpha = 0.5;
17 | opts.beta = [1, 1];
18 | opts.gamma = [0.1, 0.01];
19 | opts.mu = 100;
20 | 
21 | W = method( X, opts );
22 | group = SpectralClustering(W, nclass);
23 | [ NMI, ARI, ACC, fscore, precision, recall ] = clustering_metric(label,group);


--------------------------------------------------------------------------------
/run_syn.m:
--------------------------------------------------------------------------------
 1 | clear;
 2 | clc;
 3 | 
 4 | addpath('./dataset');
 5 | addpath('./metric');
 6 | addpath('./tool');
 7 | 
 8 | load('./dataset/syn500.mat');
 9 | 
10 | nclass = length(unique(label));
11 | 
12 | opts.alpha = 0.5;
13 | opts.beta = [1, 0.001];
14 | opts.gamma = [1, 0.001];
15 | opts.mu = 10;
16 | 
17 | W = method( X, opts );
18 | group = SpectralClustering2(W, nclass);
19 | [ NMI, ARI, ACC, fscore, precision, recall ] = clustering_metric(label,group);
20 | 


--------------------------------------------------------------------------------
/synthetic_lowrank.m:
--------------------------------------------------------------------------------
 1 | function [ X, label ] = synthetic_lowrank( N, view )
 2 | % synthetic data
 3 | % N -- number of data points
 4 | % view -- number of views
 5 | 
 6 | % Enhong Zhuo, 2019
 7 | 
 8 | T_class1 = [1,1;1,2;2,1];
 9 | T_class2 = [3,1;4,1;4,2];
10 | 
11 | for v = 1:view
12 |     
13 |     m = randperm(10,1);
14 |     
15 |     P1 = rand(2,m);
16 |     P2 = rand(2,m);
17 | 
18 |     TP1 = T_class1 * P1;
19 |     TP2 = T_class2 * P2;
20 | 
21 |     C1 = rand(floor(N/2),3);
22 |     C2 = rand(ceil(N/2),3);
23 | 
24 |     A = C1 * TP1;
25 |     B = C2 * TP2;
26 |     
27 |     X{v} = [A;B];
28 |     X{v} = X{v} + randn(size(X{v}));
29 |     
30 | end
31 | 
32 | label = [ones(1,floor(N/2)), 2*ones(1,ceil(N/2))]';
33 | 
34 | end
35 | 
36 | 


--------------------------------------------------------------------------------
/tool/SpectralClustering.m:
--------------------------------------------------------------------------------
 1 | function [group, eigengap] = SpectralClustering(W, NUMC)
 2 | %SPECTRALCLUSTERING Executes spectral clustering algorithm
 3 | 
 4 | 
 5 | % calculate degree matrix
 6 | degs = sum(W, 2);
 7 | D    = sparse(1:size(W, 1), 1:size(W, 2), degs);
 8 | 
 9 | % compute unnormalized Laplacian
10 | L = D - W;
11 | k = max(NUMC);
12 | % compute normalized Laplacian if needed
13 | 
14 | % avoid dividing by zero
15 | degs(degs == 0) = eps;
16 | % calculate D^(-1/2)
17 | D = spdiags(1./(degs.^0.5), 0, size(D, 1), size(D, 2));
18 | % calculate normalized Laplacian
19 | L = D * L * D;
20 | 
21 | % compute the eigenvectors corresponding to the k smallest
22 | % eigenvalues
23 | [U, eigenvalue] = eigs(L, k, eps);
24 | [a,b] = sort(diag(eigenvalue),'ascend');
25 | eigenvalue = eigenvalue(:,b);
26 | U = U(:,b);
27 | eigengap = abs(diff(diag(eigenvalue)));
28 | U = U(:,1:k);
29 | % in case of the Jordan-Weiss algorithm, we need to normalize
30 | % the eigenvectors row-wise
31 | %U = bsxfun(@rdivide, U, sqrt(sum(U.^2, 2)));
32 | %U = U./repmat(sqrt(sum(U.^2,2)),1,size(U,2));
33 | 
34 | 
35 | flag =0;
36 | for ck = NUMC
37 |     Cindex = find(NUMC==ck);
38 |     UU = U(:,1:ck);
39 |     UU = UU./repmat(sqrt(sum(UU.^2,2)),1,size(UU,2));
40 |     [EigenvectorsDiscrete]=discretisation(UU);
41 |     [~,temp] = max(EigenvectorsDiscrete,[],2);
42 | %     for i = 1 : ck
43 | %         initcenter(i,:) = mean(UU(temp==i,:));
44 | %     end
45 |     
46 |     Cluster{Cindex} = temp;
47 | end
48 | 
49 | 
50 | if length(NUMC)==1
51 |     group=Cluster{1};
52 | else
53 |     group = Cluster;
54 | end
55 | 
56 | 
57 | end


--------------------------------------------------------------------------------
/tool/SpectralClustering2.m:
--------------------------------------------------------------------------------
 1 | function [groups] = SpectralClustering2(A, n)
 2 | %SPECTRALCLUSTERING Executes spectral clustering algorithm
 3 | % A      data matrix, each column represents a sample
 4 | % NUMC   number of classes to be clustered
 5 | % *return group of classes
 6 | 
 7 | warning off;
 8 | N = size(A,1);
 9 | MAXiter = 1000; % Maximum number of iterations for KMeans 
10 | REPlic = 20; % Number of replications for KMeans
11 | 
12 | % Normalized spectral clustering according to Ng & Jordan & Weiss
13 | % using Normalized Symmetric Laplacian L = I - D^{-1/2} W D^{-1/2}
14 | 
15 | DN = diag( 1./sqrt(sum(A)+eps) );
16 | LapN = speye(N) - DN * A * DN;
17 | [uN,sN,vN] = svd(LapN);
18 | kerN = vN(:,N-n+1:N);
19 | kerNS = zeros(size(kerN));
20 | for i = 1:N
21 | kerNS(i,:) = kerN(i,:) ./ norm(kerN(i,:)+eps);
22 | end
23 | % groups = kmeans(kerNS,n,'maxiter',MAXiter,'replicates',REPlic,'EmptyAction','singleton');
24 | groups = kmeans(kerNS,n,'maxiter',MAXiter,'replicates',REPlic,'EmptyAction','singleton','Start','sample');
25 | end
26 | 


--------------------------------------------------------------------------------
/tool/discretisation.m:
--------------------------------------------------------------------------------
 1 | function [EigenvectorsDiscrete,EigenVectors]=discretisation(EigenVectors)
 2 | % 
 3 | % EigenvectorsDiscrete=discretisation(EigenVectors)
 4 | % 
 5 | % Input: EigenVectors = continuous Ncut vector, size = ndata x nbEigenvectors 
 6 | % Output EigenvectorsDiscrete = discrete Ncut vector, size = ndata x nbEigenvectors
 7 | %
 8 | % Timothee Cour, Stella Yu, Jianbo Shi, 2004
 9 | 
10 | [n,k]=size(EigenVectors);
11 | 
12 | vm = sqrt(sum(EigenVectors.*EigenVectors,2));
13 | EigenVectors = EigenVectors./repmat(vm+eps,1,k);
14 | 
15 | R=zeros(k);
16 | % R(:,1)=EigenVectors(1+round(rand(1)*(n-1)),:)';
17 |  R(:,1)=EigenVectors(round(n/2),:)';
18 | %R(:,1)=EigenVectors(n,:)';
19 | c=zeros(n,1);
20 | for j=2:k
21 |     c=c+abs(EigenVectors*R(:,j-1));
22 |     [minimum,i]=min(c);
23 |     R(:,j)=EigenVectors(i,:)';
24 | end
25 | 
26 | lastObjectiveValue=0;
27 | exitLoop=0;
28 | nbIterationsDiscretisation = 0;
29 | nbIterationsDiscretisationMax = 20;%voir
30 | while exitLoop== 0 
31 |     nbIterationsDiscretisation = nbIterationsDiscretisation + 1 ;   
32 |     EigenvectorsDiscrete = discretisationEigenVectorData(EigenVectors*R);
33 |     [U,S,V] = svd(EigenvectorsDiscrete'*EigenVectors+eps,0);    
34 |     NcutValue=2*(n-trace(S));
35 |     
36 |     if abs(NcutValue-lastObjectiveValue) < eps | nbIterationsDiscretisation > nbIterationsDiscretisationMax
37 |         exitLoop=1;
38 |     else
39 |         lastObjectiveValue = NcutValue;
40 |         R=V*U';
41 |     end
42 | end


--------------------------------------------------------------------------------
/tool/discretisationEigenVectorData.m:
--------------------------------------------------------------------------------
 1 | function Y = discretisationEigenVectorData(EigenVector)
 2 | % Y = discretisationEigenVectorData(EigenVector)
 3 | %
 4 | % discretizes previously rotated eigenvectors in discretisation
 5 | % Timothee Cour, Stella Yu, Jianbo Shi, 2004
 6 | 
 7 | [n,k]=size(EigenVector);
 8 | 
 9 | 
10 | [Maximum,J]=max(EigenVector');
11 |  
12 | Y=sparse(1:n,J',1,n,k);    
13 | % Y = J';


--------------------------------------------------------------------------------