├── README.md ├── SVthresh.m ├── dataset ├── 3-sources.mat ├── bbc.mat ├── caltech7.mat ├── syn500.mat └── uci.mat ├── method.m ├── metric ├── accuracyMeasure.m ├── adjrand.m ├── bestMap.m ├── clustering_metric.m ├── fprMeasure.m ├── nmi.m └── rand_index.m ├── run_3s.m ├── run_Caltech.m ├── run_UCI.m ├── run_bbc.m ├── run_syn.m ├── synthetic_lowrank.m └── tool ├── SpectralClustering.m ├── SpectralClustering2.m ├── discretisation.m └── discretisationEigenVectorData.m /README.md: -------------------------------------------------------------------------------- 1 | ## DALIGA 2 | MATLAB implementation of the paper "Direct affinity learning to boost multi-view clustering via subspace merging on a Grassmann manifold". 3 | 4 | The main method is ``method.m``. 5 | 6 | run ``run_syn.m``, ``run_3s.m``, ``run_bbc.m``, ``run_Caltech.m``, ``run_UCI.m`` to get experimental results on synthetic, 3-sources, BBCSport, Caltech 101, UCI Digit dataset, respectively. 7 | -------------------------------------------------------------------------------- /SVthresh.m: -------------------------------------------------------------------------------- 1 | function [ thresh_X ] = SVthresh( X, thresh ) 2 | % Singular value thresholding 3 | 4 | % Enhong Zhuo, 2019 5 | 6 | [U, S, V] = svd(X); 7 | S_thresh = S - thresh; 8 | S_thresh = max(S_thresh, 0); 9 | thresh_X = U * S_thresh * V'; 10 | 11 | end 12 | 13 | -------------------------------------------------------------------------------- /dataset/3-sources.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scutbioinformatic/DALIGA/4f8da1df66f3d6b41053c4eab14219ceaee7ff91/dataset/3-sources.mat -------------------------------------------------------------------------------- /dataset/bbc.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scutbioinformatic/DALIGA/4f8da1df66f3d6b41053c4eab14219ceaee7ff91/dataset/bbc.mat -------------------------------------------------------------------------------- /dataset/caltech7.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scutbioinformatic/DALIGA/4f8da1df66f3d6b41053c4eab14219ceaee7ff91/dataset/caltech7.mat -------------------------------------------------------------------------------- /dataset/syn500.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scutbioinformatic/DALIGA/4f8da1df66f3d6b41053c4eab14219ceaee7ff91/dataset/syn500.mat -------------------------------------------------------------------------------- /dataset/uci.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scutbioinformatic/DALIGA/4f8da1df66f3d6b41053c4eab14219ceaee7ff91/dataset/uci.mat -------------------------------------------------------------------------------- /method.m: -------------------------------------------------------------------------------- 1 | function [ B1 ] = method( X, opts ) 2 | % Low-rank sparse metric learning for multi-view subspace clustering 3 | % X -- cell, multi-view data points 4 | % X{i} -- i-th view matrix, row for sample, column for feature 5 | % opts -- parameter settings 6 | 7 | % Enhong Zhuo, 2019 8 | 9 | num_views = length(X); 10 | n = size(X{1},1); 11 | 12 | % setting default parameters 13 | num_iter = 100; 14 | mu = 10; 15 | max_mu = 1e6; 16 | rho = 1.5; 17 | alpha = 0.5; 18 | beta = [1, 1]; 19 | gamma = [0.01 0.01]; 20 | err_thr = 1e-5; 21 | 22 | if ~exist('opts', 'var') 23 | opts = []; 24 | else 25 | if ~isstruct(opts) 26 | error('Parameter error: opts is not a structure.'); 27 | end 28 | end 29 | 30 | if isfield(opts, 'alpha'); alpha = opts.alpha; end 31 | if isfield(opts, 'beta'); beta = opts.beta; end 32 | if isfield(opts, 'gamma'); gamma = opts.gamma; end 33 | if isfield(opts, 'mu'); mu = opts.mu; end 34 | if isfield(opts, 'max_mu'); max_mu = opts.max_mu; end 35 | if isfield(opts, 'rho'); rho = opts.rho; end 36 | if isfield(opts, 'num_iter'); num_iter = opts.num_iter; end 37 | if isfield(opts, 'err_thr'); err_thr = opts.err_thr; end 38 | 39 | alpha = repmat({alpha}, 1, num_views); 40 | 41 | A1 = repmat({zeros(n,n)}, 1, num_views); 42 | A2 = repmat({zeros(n,n)}, 1, num_views); 43 | A3 = repmat({zeros(n,n)}, 1, num_views); 44 | A4 = repmat({zeros(n,n)}, 1, num_views); 45 | 46 | B1 = zeros(n,n); 47 | B2 = zeros(n,n); 48 | B3 = zeros(n,n); 49 | 50 | K = repmat({zeros(n,n)}, 1, num_views); 51 | 52 | Lambda1 = repmat({zeros(n,n)}, 1, num_views); 53 | Lambda2= repmat({zeros(n,n)}, 1, num_views); 54 | Lambda3 = repmat({zeros(n,n)}, 1, num_views); 55 | Lambda4 = zeros(n,n); 56 | Lambda5 = zeros(n,n); 57 | 58 | mu = mu * ones(5,1); 59 | 60 | for v = 1:num_views 61 | K{v} = X{v} * X{v}'; 62 | end 63 | 64 | iter = 0; 65 | err = ones(num_views*3+2,1); 66 | 67 | while iter < num_iter && max(err) > err_thr 68 | 69 | iter = iter + 1; 70 | temp = zeros(n); 71 | for v = 1:num_views 72 | 73 | A1{v} = (K{v}+mu(1)*A2{v}+mu(2)*A3{v}+mu(3)*A4{v}-Lambda1{v}-Lambda2{v}-Lambda3{v}) / (K{v}+sum(mu(1:3))*eye(n)); 74 | 75 | 76 | A2{v} = A1{v} + (alpha{v}*(B1'+B1)+Lambda1{v})/mu(1); 77 | A3{v} = SVthresh(A1{v}+Lambda2{v}/mu(2), beta(1)/mu(2)); 78 | A4{v} = wthresh(A1{v}+Lambda3{v}/mu(3), 's', beta(2)/mu(3)); 79 | A4{v} = A4{v} - diag(diag(A4{v})); 80 | 81 | 82 | Lambda1{v} = Lambda1{v} + mu(1)*(A1{v}-A2{v}); 83 | Lambda2{v} = Lambda2{v} + mu(2)*(A1{v}-A3{v}); 84 | Lambda3{v} = Lambda3{v} + mu(3)*(A1{v}-A4{v}); 85 | 86 | temp = temp + alpha{v}*(A2{v}'+A2{v}); 87 | 88 | err((v-1)*3+1) = norm(A1{v}-A2{v},'inf'); 89 | err((v-1)*3+2) = norm(A1{v}-A3{v},'inf'); 90 | err((v-1)*3+3) = norm(A1{v}-A4{v},'inf'); 91 | 92 | end 93 | 94 | B1 = (temp + mu(4)*B2 + mu(5)*B3 - Lambda4 - Lambda5) / sum(mu(4:5)); 95 | B2 = SVthresh(B1+Lambda4/mu(4), gamma(1)/mu(4)); 96 | B3 = wthresh(B1+Lambda5/mu(5), 's', gamma(2)/mu(5)); 97 | 98 | Lambda4 = Lambda4 + mu(4)*(B1-B2); 99 | Lambda5 = Lambda5 + mu(5)*(B1-B3); 100 | 101 | err(num_views*3+1) = norm(B1-B2,'inf'); 102 | err(num_views*3+2) = norm(B1-B3,'inf'); 103 | 104 | mu = min(rho*mu,max_mu); 105 | 106 | end 107 | 108 | end 109 | 110 | 111 | -------------------------------------------------------------------------------- /metric/accuracyMeasure.m: -------------------------------------------------------------------------------- 1 | function AC = accuracyMeasure(gnd,res) 2 | res = bestMap(gnd,res); 3 | AC = length(find(gnd == res))/length(gnd); 4 | end -------------------------------------------------------------------------------- /metric/adjrand.m: -------------------------------------------------------------------------------- 1 | function ari = adjrand(P1,P2) 2 | 3 | % ADJRAND Adjusted Rand Index to Compare Two Partitions 4 | % 5 | % ARI = ADJRAND(P1,P2) returns the adjusted rand index for partitions 6 | % P1 and P2 for the same data set. Each of these partitions 7 | % are vectors with an index to the group number. For example, 8 | % this could be the output from KMEANS or CLUSTER. 9 | % 10 | 11 | if length(P1) ~= length(P2) 12 | error('Input vectors must be the same length.') 13 | return 14 | end 15 | uP1 = unique(P1); 16 | uP2 = unique(P2); 17 | g1 = length(uP1); 18 | g2 = length(uP2); 19 | n = length(P1); 20 | 21 | % Now find the matching matrix M 22 | M = zeros(g1,g2); 23 | I = 0; 24 | for i = uP1(:)' 25 | I = I + 1; 26 | J = 0; 27 | for j = uP2(:)' 28 | J = J + 1; 29 | indI = find(P1 == i); 30 | indJ = find(P2 == j); 31 | M(I,J) = length(intersect(indI,indJ)); 32 | end 33 | end 34 | 35 | nc2 = nchoosek(n,2); 36 | if g1>1 & g2>1 37 | % The neither one is a vector, so it is ok to just do the transpose. 38 | nidot = sum(M); 39 | njdot = sum(M'); 40 | elseif g1==1 41 | % Then M only has one row. No need to get column totals. 42 | nidot = M; 43 | njdot = sum(M); 44 | else 45 | % Then M has one column. No need to get row totals. 46 | nidot = sum(M); 47 | njdot = M; 48 | end 49 | 50 | % NOw get the stuff needed for the index. 51 | for i = 1:g1 52 | for j = 1:g2 53 | if M(i,j) > 1 54 | nijc2(i,j) = nchoosek(M(i,j),2); 55 | else 56 | nijc2(i,j) = 0; 57 | end 58 | end 59 | end 60 | for i = 1:length(nidot) 61 | if nidot(i) > 1 62 | nidotc2(i) = nchoosek(nidot(i),2); 63 | else 64 | nidotc2(i) = 0; 65 | end 66 | end 67 | for i = 1:length(njdot) 68 | if njdot(i) > 1 69 | njdotc2(i) = nchoosek(njdot(i),2); 70 | else 71 | njdotc2(i) = 0; 72 | end 73 | end 74 | % Now calculate the index. 75 | N = sum(sum(nijc2)) - sum(nidotc2)*sum(njdotc2)/nc2; 76 | D = (sum(nidotc2) + sum(njdotc2))/2 - sum(nidotc2)*sum(njdotc2)/nc2; 77 | ari = N/D; 78 | 79 | -------------------------------------------------------------------------------- /metric/bestMap.m: -------------------------------------------------------------------------------- 1 | function [newL2, c] = bestMap(L1,L2) 2 | %bestmap: permute labels of L2 match L1 as good as possible 3 | % [newL2] = bestMap(L1,L2); 4 | 5 | %=========== 6 | L1 = L1(:); 7 | L2 = L2(:); 8 | if size(L1) ~= size(L2) 9 | error('size(L1) must == size(L2)'); 10 | end 11 | L1 = L1 - min(L1) + 1; % min (L1) <- 1; 12 | L2 = L2 - min(L2) + 1; % min (L2) <- 1; 13 | %=========== make bipartition graph ============ 14 | nClass = max(max(L1), max(L2)); 15 | G = zeros(nClass); 16 | for i=1:nClass 17 | for j=1:nClass 18 | G(i,j) = length(find(L1 == i & L2 == j)); 19 | end 20 | end 21 | %=========== assign with hungarian method ====== 22 | [c,t] = hungarian(-G); 23 | newL2 = zeros(nClass,1); 24 | for i=1:nClass 25 | newL2(L2 == i) = c(i); 26 | end 27 | 28 | 29 | function [C,T]=hungarian(A) 30 | %HUNGARIAN Solve the Assignment problem using the Hungarian method. 31 | % 32 | %[C,T]=hungarian(A) 33 | %A - a square cost matrix. 34 | %C - the optimal assignment. 35 | %T - the cost of the optimal assignment. 36 | %s.t. T = trace(A(C,:)) is minimized over all possible assignments. 37 | 38 | % Adapted from the FORTRAN IV code in Carpaneto and Toth, "Algorithm 548: 39 | % Solution of the assignment problem [H]", ACM Transactions on 40 | % Mathematical Software, 6(1):104-111, 1980. 41 | 42 | % v1.0 96-06-14. Niclas Borlin, niclas@cs.umu.se. 43 | % Department of Computing Science, Ume?University, 44 | % Sweden. 45 | % All standard disclaimers apply. 46 | 47 | % A substantial effort was put into this code. If you use it for a 48 | % publication or otherwise, please include an acknowledgement or at least 49 | % notify me by email. /Niclas 50 | 51 | [m,n]=size(A); 52 | 53 | if (m~=n) 54 | error('HUNGARIAN: Cost matrix must be square!'); 55 | end 56 | 57 | % Save original cost matrix. 58 | orig=A; 59 | 60 | % Reduce matrix. 61 | A=hminired(A); 62 | 63 | % Do an initial assignment. 64 | [A,C,U]=hminiass(A); 65 | 66 | % Repeat while we have unassigned rows. 67 | while (U(n+1)) 68 | % Start with no path, no unchecked zeros, and no unexplored rows. 69 | LR=zeros(1,n); 70 | LC=zeros(1,n); 71 | CH=zeros(1,n); 72 | RH=[zeros(1,n) -1]; 73 | 74 | % No labelled columns. 75 | SLC=[]; 76 | 77 | % Start path in first unassigned row. 78 | r=U(n+1); 79 | % Mark row with end-of-path label. 80 | LR(r)=-1; 81 | % Insert row first in labelled row set. 82 | SLR=r; 83 | 84 | % Repeat until we manage to find an assignable zero. 85 | while (1) 86 | % If there are free zeros in row r 87 | if (A(r,n+1)~=0) 88 | % ...get column of first free zero. 89 | l=-A(r,n+1); 90 | 91 | % If there are more free zeros in row r and row r in not 92 | % yet marked as unexplored.. 93 | if (A(r,l)~=0 && RH(r)==0) 94 | % Insert row r first in unexplored list. 95 | RH(r)=RH(n+1); 96 | RH(n+1)=r; 97 | 98 | % Mark in which column the next unexplored zero in this row 99 | % is. 100 | CH(r)=-A(r,l); 101 | end 102 | else 103 | % If all rows are explored.. 104 | if (RH(n+1)<=0) 105 | % Reduce matrix. 106 | [A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR); 107 | end 108 | 109 | % Re-start with first unexplored row. 110 | r=RH(n+1); 111 | % Get column of next free zero in row r. 112 | l=CH(r); 113 | % Advance "column of next free zero". 114 | CH(r)=-A(r,l); 115 | % If this zero is last in the list.. 116 | if (A(r,l)==0) 117 | % ...remove row r from unexplored list. 118 | RH(n+1)=RH(r); 119 | RH(r)=0; 120 | end 121 | end 122 | 123 | % While the column l is labelled, i.e. in path. 124 | while (LC(l)~=0) 125 | % If row r is explored.. 126 | if (RH(r)==0) 127 | % If all rows are explored.. 128 | if (RH(n+1)<=0) 129 | % Reduce cost matrix. 130 | [A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR); 131 | end 132 | 133 | % Re-start with first unexplored row. 134 | r=RH(n+1); 135 | end 136 | 137 | % Get column of next free zero in row r. 138 | l=CH(r); 139 | 140 | % Advance "column of next free zero". 141 | CH(r)=-A(r,l); 142 | 143 | % If this zero is last in list.. 144 | if(A(r,l)==0) 145 | % ...remove row r from unexplored list. 146 | RH(n+1)=RH(r); 147 | RH(r)=0; 148 | end 149 | end 150 | 151 | % If the column found is unassigned.. 152 | if (C(l)==0) 153 | % Flip all zeros along the path in LR,LC. 154 | [A,C,U]=hmflip(A,C,LC,LR,U,l,r); 155 | % ...and exit to continue with next unassigned row. 156 | break; 157 | else 158 | % ...else add zero to path. 159 | 160 | % Label column l with row r. 161 | LC(l)=r; 162 | 163 | % Add l to the set of labelled columns. 164 | SLC=[SLC l]; 165 | 166 | % Continue with the row assigned to column l. 167 | r=C(l); 168 | 169 | % Label row r with column l. 170 | LR(r)=l; 171 | 172 | % Add r to the set of labelled rows. 173 | SLR=[SLR r]; 174 | end 175 | end 176 | end 177 | 178 | % Calculate the total cost. 179 | T=sum(orig(logical(sparse(C,1:size(orig,2),1)))); 180 | 181 | 182 | function A=hminired(A) 183 | %HMINIRED Initial reduction of cost matrix for the Hungarian method. 184 | % 185 | %B=assredin(A) 186 | %A - the unreduced cost matris. 187 | %B - the reduced cost matrix with linked zeros in each row. 188 | 189 | % v1.0 96-06-13. Niclas Borlin, niclas@cs.umu.se. 190 | 191 | [m,n]=size(A); 192 | 193 | % Subtract column-minimum values from each column. 194 | colMin=min(A); 195 | A=A-colMin(ones(n,1),:); 196 | 197 | % Subtract row-minimum values from each row. 198 | rowMin=min(A')'; 199 | A=A-rowMin(:,ones(1,n)); 200 | 201 | % Get positions of all zeros. 202 | [i,j]=find(A==0); 203 | 204 | % Extend A to give room for row zero list header column. 205 | A(1,n+1)=0; 206 | for k=1:n 207 | % Get all column in this row. 208 | cols=j(k==i)'; 209 | % Insert pointers in matrix. 210 | A(k,[n+1 cols])=[-cols 0]; 211 | end 212 | 213 | 214 | function [A,C,U]=hminiass(A) 215 | %HMINIASS Initial assignment of the Hungarian method. 216 | % 217 | %[B,C,U]=hminiass(A) 218 | %A - the reduced cost matrix. 219 | %B - the reduced cost matrix, with assigned zeros removed from lists. 220 | %C - a vector. C(J)=I means row I is assigned to column J, 221 | % i.e. there is an assigned zero in position I,J. 222 | %U - a vector with a linked list of unassigned rows. 223 | 224 | % v1.0 96-06-14. Niclas Borlin, niclas@cs.umu.se. 225 | 226 | [n,np1]=size(A); 227 | 228 | % Initalize return vectors. 229 | C=zeros(1,n); 230 | U=zeros(1,n+1); 231 | 232 | % Initialize last/next zero "pointers". 233 | LZ=zeros(1,n); 234 | NZ=zeros(1,n); 235 | 236 | for i=1:n 237 | % Set j to first unassigned zero in row i. 238 | lj=n+1; 239 | j=-A(i,lj); 240 | 241 | % Repeat until we have no more zeros (j==0) or we find a zero 242 | % in an unassigned column (c(j)==0). 243 | 244 | while (C(j)~=0) 245 | % Advance lj and j in zero list. 246 | lj=j; 247 | j=-A(i,lj); 248 | 249 | % Stop if we hit end of list. 250 | if (j==0) 251 | break; 252 | end 253 | end 254 | 255 | if (j~=0) 256 | % We found a zero in an unassigned column. 257 | 258 | % Assign row i to column j. 259 | C(j)=i; 260 | 261 | % Remove A(i,j) from unassigned zero list. 262 | A(i,lj)=A(i,j); 263 | 264 | % Update next/last unassigned zero pointers. 265 | NZ(i)=-A(i,j); 266 | LZ(i)=lj; 267 | 268 | % Indicate A(i,j) is an assigned zero. 269 | A(i,j)=0; 270 | else 271 | % We found no zero in an unassigned column. 272 | 273 | % Check all zeros in this row. 274 | 275 | lj=n+1; 276 | j=-A(i,lj); 277 | 278 | % Check all zeros in this row for a suitable zero in another row. 279 | while (j~=0) 280 | % Check the in the row assigned to this column. 281 | r=C(j); 282 | 283 | % Pick up last/next pointers. 284 | lm=LZ(r); 285 | m=NZ(r); 286 | 287 | % Check all unchecked zeros in free list of this row. 288 | while (m~=0) 289 | % Stop if we find an unassigned column. 290 | if (C(m)==0) 291 | break; 292 | end 293 | 294 | % Advance one step in list. 295 | lm=m; 296 | m=-A(r,lm); 297 | end 298 | 299 | if (m==0) 300 | % We failed on row r. Continue with next zero on row i. 301 | lj=j; 302 | j=-A(i,lj); 303 | else 304 | % We found a zero in an unassigned column. 305 | 306 | % Replace zero at (r,m) in unassigned list with zero at (r,j) 307 | A(r,lm)=-j; 308 | A(r,j)=A(r,m); 309 | 310 | % Update last/next pointers in row r. 311 | NZ(r)=-A(r,m); 312 | LZ(r)=j; 313 | 314 | % Mark A(r,m) as an assigned zero in the matrix . . . 315 | A(r,m)=0; 316 | 317 | % ...and in the assignment vector. 318 | C(m)=r; 319 | 320 | % Remove A(i,j) from unassigned list. 321 | A(i,lj)=A(i,j); 322 | 323 | % Update last/next pointers in row r. 324 | NZ(i)=-A(i,j); 325 | LZ(i)=lj; 326 | 327 | % Mark A(r,m) as an assigned zero in the matrix . . . 328 | A(i,j)=0; 329 | 330 | % ...and in the assignment vector. 331 | C(j)=i; 332 | 333 | % Stop search. 334 | break; 335 | end 336 | end 337 | end 338 | end 339 | 340 | % Create vector with list of unassigned rows. 341 | 342 | % Mark all rows have assignment. 343 | r=zeros(1,n); 344 | rows=C(C~=0); 345 | r(rows)=rows; 346 | empty=find(r==0); 347 | 348 | % Create vector with linked list of unassigned rows. 349 | U=zeros(1,n+1); 350 | U([n+1 empty])=[empty 0]; 351 | 352 | 353 | function [A,C,U]=hmflip(A,C,LC,LR,U,l,r) 354 | %HMFLIP Flip assignment state of all zeros along a path. 355 | % 356 | %[A,C,U]=hmflip(A,C,LC,LR,U,l,r) 357 | %Input: 358 | %A - the cost matrix. 359 | %C - the assignment vector. 360 | %LC - the column label vector. 361 | %LR - the row label vector. 362 | %U - the 363 | %r,l - position of last zero in path. 364 | %Output: 365 | %A - updated cost matrix. 366 | %C - updated assignment vector. 367 | %U - updated unassigned row list vector. 368 | 369 | % v1.0 96-06-14. Niclas Borlin, niclas@cs.umu.se. 370 | 371 | n=size(A,1); 372 | 373 | while (1) 374 | % Move assignment in column l to row r. 375 | C(l)=r; 376 | 377 | % Find zero to be removed from zero list.. 378 | 379 | % Find zero before this. 380 | m=find(A(r,:)==-l); 381 | 382 | % Link past this zero. 383 | A(r,m)=A(r,l); 384 | 385 | A(r,l)=0; 386 | 387 | % If this was the first zero of the path.. 388 | if (LR(r)<0) 389 | ...remove row from unassigned row list and return. 390 | U(n+1)=U(r); 391 | U(r)=0; 392 | return; 393 | else 394 | 395 | % Move back in this row along the path and get column of next zero. 396 | l=LR(r); 397 | 398 | % Insert zero at (r,l) first in zero list. 399 | A(r,l)=A(r,n+1); 400 | A(r,n+1)=-l; 401 | 402 | % Continue back along the column to get row of next zero in path. 403 | r=LC(l); 404 | end 405 | end 406 | 407 | 408 | function [A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR) 409 | %HMREDUCE Reduce parts of cost matrix in the Hungerian method. 410 | % 411 | %[A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR) 412 | %Input: 413 | %A - Cost matrix. 414 | %CH - vector of column of 'next zeros' in each row. 415 | %RH - vector with list of unexplored rows. 416 | %LC - column labels. 417 | %RC - row labels. 418 | %SLC - set of column labels. 419 | %SLR - set of row labels. 420 | % 421 | %Output: 422 | %A - Reduced cost matrix. 423 | %CH - Updated vector of 'next zeros' in each row. 424 | %RH - Updated vector of unexplored rows. 425 | 426 | % v1.0 96-06-14. Niclas Borlin, niclas@cs.umu.se. 427 | 428 | n=size(A,1); 429 | 430 | % Find which rows are covered, i.e. unlabelled. 431 | coveredRows=LR==0; 432 | 433 | % Find which columns are covered, i.e. labelled. 434 | coveredCols=LC~=0; 435 | 436 | r=find(~coveredRows); 437 | c=find(~coveredCols); 438 | 439 | % Get minimum of uncovered elements. 440 | m=min(min(A(r,c))); 441 | 442 | % Subtract minimum from all uncovered elements. 443 | A(r,c)=A(r,c)-m; 444 | 445 | % Check all uncovered columns.. 446 | for j=c 447 | % ...and uncovered rows in path order.. 448 | for i=SLR 449 | % If this is a (new) zero.. 450 | if (A(i,j)==0) 451 | % If the row is not in unexplored list.. 452 | if (RH(i)==0) 453 | % ...insert it first in unexplored list. 454 | RH(i)=RH(n+1); 455 | RH(n+1)=i; 456 | % Mark this zero as "next free" in this row. 457 | CH(i)=j; 458 | end 459 | % Find last unassigned zero on row I. 460 | row=A(i,:); 461 | colsInList=-row(row<0); 462 | if (length(colsInList)==0) 463 | % No zeros in the list. 464 | l=n+1; 465 | else 466 | l=colsInList(row(colsInList)==0); 467 | end 468 | % Append this zero to end of list. 469 | A(i,l)=-j; 470 | end 471 | end 472 | end 473 | 474 | % Add minimum to all doubly covered elements. 475 | r=find(coveredRows); 476 | c=find(coveredCols); 477 | 478 | % Take care of the zeros we will remove. 479 | [i,j]=find(A(r,c)<=0); 480 | 481 | i=r(i); 482 | j=c(j); 483 | 484 | for k=1:length(i) 485 | % Find zero before this in this row. 486 | lj=find(A(i(k),:)==-j(k)); 487 | % Link past it. 488 | A(i(k),lj)=A(i(k),j(k)); 489 | % Mark it as assigned. 490 | A(i(k),j(k))=0; 491 | end 492 | 493 | A(r,c)=A(r,c)+m; 494 | -------------------------------------------------------------------------------- /metric/clustering_metric.m: -------------------------------------------------------------------------------- 1 | function [ NMI, ARI, ACC, fscore, precision, recall ] = clustering_metric( gnd, res ) 2 | 3 | NMI = nmi(gnd,res); 4 | ARI = adjrand(gnd,res); 5 | ACC = accuracyMeasure(gnd,res); 6 | [fscore, precision, recall] = fprMeasure(gnd,res); 7 | 8 | end 9 | -------------------------------------------------------------------------------- /metric/fprMeasure.m: -------------------------------------------------------------------------------- 1 | function [f,p,r] = fprMeasure(T,H) 2 | % T ground truth 3 | % H result 4 | if length(T) ~= length(H), 5 | size(T) 6 | size(H) 7 | end; 8 | 9 | N = length(T); 10 | numT = 0; 11 | numH = 0; 12 | numI = 0; 13 | for n=1:N, 14 | Tn = (T(n+1:end))==T(n); 15 | Hn = (H(n+1:end))==H(n); 16 | numT = numT + sum(Tn); 17 | numH = numH + sum(Hn); 18 | numI = numI + sum(Tn .* Hn); 19 | end; 20 | p = 1; 21 | r = 1; 22 | f = 1; 23 | if numH > 0, 24 | p = numI / numH; 25 | end; 26 | if numT > 0, 27 | r = numI / numT; 28 | end; 29 | if (p+r) == 0, 30 | f = 0; 31 | else 32 | f = 2 * p * r / (p + r); 33 | end; 34 | -------------------------------------------------------------------------------- /metric/nmi.m: -------------------------------------------------------------------------------- 1 | function z = nmi(x, y) 2 | % Compute normalized mutual information I(x,y)/sqrt(H(x)*H(y)) of two discrete variables x and y. 3 | % Input: 4 | % x, y: two integer vector of the same length 5 | % Ouput: 6 | % z: normalized mutual information z=I(x,y)/sqrt(H(x)*H(y)) 7 | % Written by Mo Chen (sth4nth@gmail.com). 8 | assert(numel(x) == numel(y)); 9 | n = numel(x); 10 | x = reshape(x,1,n); 11 | y = reshape(y,1,n); 12 | 13 | l = min(min(x),min(y)); 14 | x = x-l+1; 15 | y = y-l+1; 16 | k = max(max(x),max(y)); 17 | 18 | idx = 1:n; 19 | Mx = sparse(idx,x,1,n,k,n); 20 | My = sparse(idx,y,1,n,k,n); 21 | Pxy = nonzeros(Mx'*My/n); %joint distribution of x and y 22 | Hxy = -dot(Pxy,log2(Pxy)); 23 | 24 | 25 | % hacking, to elimative the 0log0 issue 26 | Px = nonzeros(mean(Mx,1)); 27 | Py = nonzeros(mean(My,1)); 28 | 29 | % entropy of Py and Px 30 | Hx = -dot(Px,log2(Px)); 31 | Hy = -dot(Py,log2(Py)); 32 | 33 | % mutual information 34 | MI = Hx + Hy - Hxy; 35 | 36 | % normalized mutual information 37 | z = sqrt((MI/Hx)*(MI/Hy)); 38 | z = max(0,z); 39 | 40 | -------------------------------------------------------------------------------- /metric/rand_index.m: -------------------------------------------------------------------------------- 1 | function ri = rand_index(p1, p2, varargin) 2 | %RAND_INDEX Computes the rand index between two partitions. 3 | % RAND_INDEX(p1, p2) computes the rand index between partitions p1 and 4 | % p2. 5 | % 6 | % RAND_INDEX(p1, p2, 'adjusted'); computes the adjusted rand index 7 | % between partitions p1 and p2. The adjustment accounts for chance 8 | % correlation. 9 | 10 | % Parse the input and throw errors 11 | adj = 0; 12 | if nargin == 0 13 | end 14 | if nargin > 3 15 | error('Too many input arguments'); 16 | end 17 | if nargin == 3 18 | if strcmp(varargin{1}, 'adjusted') 19 | adj = 1; 20 | else 21 | error('%s is an unrecognized argument.', varargin{1}); 22 | end 23 | end 24 | if length(p1)~=length(p2) 25 | error('Both partitions must contain the same number of points.'); 26 | end 27 | 28 | % Preliminary computations and cleansing of the partitions 29 | N = length(p1); 30 | [~, ~, p1] = unique(p1); 31 | N1 = max(p1); 32 | [~, ~, p2] = unique(p2); 33 | N2 = max(p2); 34 | 35 | % Create the matching matrix 36 | for i=1:1:N1 37 | for j=1:1:N2 38 | G1 = find(p1==i); 39 | G2 = find(p2==j); 40 | n(i,j) = length(intersect(G1,G2)); 41 | end 42 | end 43 | 44 | % If required, calculate the basic rand index 45 | if adj==0 46 | ss = sum(sum(n.^2)); 47 | ss1 = sum(sum(n,1).^2); 48 | ss2 =sum(sum(n,2).^2); 49 | ri = (nchoosek2(N,2) + ss - 0.5*ss1 - 0.5*ss2)/nchoosek2(N,2); 50 | end 51 | 52 | 53 | % Otherwise, calculate the adjusted rand index 54 | if adj==1 55 | ssm = 0; 56 | sm1 = 0; 57 | sm2 = 0; 58 | for i=1:1:N1 59 | for j=1:1:N2 60 | ssm = ssm + nchoosek2(n(i,j),2); 61 | end 62 | end 63 | temp = sum(n,2); 64 | for i=1:1:N1 65 | sm1 = sm1 + nchoosek2(temp(i),2); 66 | end 67 | temp = sum(n,1); 68 | for i=1:1:N2 69 | sm2 = sm2 + nchoosek2(temp(i),2); 70 | end 71 | NN = ssm - sm1*sm2/nchoosek2(N,2); 72 | DD = (sm1 + sm2)/2 - sm1*sm2/nchoosek2(N,2); 73 | ri = NN/DD; 74 | end 75 | 76 | 77 | % Special definition of n choose k 78 | function c = nchoosek2(a,b) 79 | if a>1 80 | c = nchoosek(a,b); 81 | else 82 | c = 0; 83 | end 84 | end 85 | end 86 | -------------------------------------------------------------------------------- /run_3s.m: -------------------------------------------------------------------------------- 1 | clear; 2 | clc; 3 | 4 | addpath('./dataset'); 5 | addpath('./metric'); 6 | addpath('./tool'); 7 | 8 | load('./dataset/3-sources.mat'); 9 | X{1} = bbc; 10 | X{2} = guardian; 11 | X{3} = reuters; 12 | label = truth; 13 | 14 | nclass = length(unique(label)); 15 | 16 | opts.alpha = 0.3; 17 | opts.beta = [1, 10]; 18 | opts.gamma = [0.001, 0.01]; 19 | opts.mu = 10; 20 | 21 | W = method( X, opts ); 22 | group = SpectralClustering2(W, nclass); 23 | [ NMI, ARI, ACC, fscore, precision, recall ] = clustering_metric(label,group); -------------------------------------------------------------------------------- /run_Caltech.m: -------------------------------------------------------------------------------- 1 | clear; 2 | clc; 3 | 4 | addpath('./dataset'); 5 | addpath('./metric'); 6 | addpath('./tool'); 7 | 8 | load('./dataset/caltech7.mat'); 9 | X{1} = centrist'; 10 | X{2} = garbor'; 11 | X{3} = gist'; 12 | X{4} = hog'; 13 | X{5} = lbp'; 14 | X{6} = wm'; 15 | 16 | nclass = length(unique(label)); 17 | 18 | opts.alpha = 0.3; 19 | opts.beta = [1, 1]; 20 | opts.gamma = [0.01, 0.01]; 21 | opts.mu = 10; 22 | 23 | W = method( X, opts ); 24 | group = SpectralClustering(W, nclass); 25 | [ NMI, ARI, ACC, fscore, precision, recall ] = clustering_metric(label,group); -------------------------------------------------------------------------------- /run_UCI.m: -------------------------------------------------------------------------------- 1 | clear; 2 | clc; 3 | 4 | addpath('./dataset'); 5 | addpath('./metric'); 6 | addpath('./tool'); 7 | 8 | load('./dataset/uci.mat'); 9 | X{1} = fou'; 10 | X{2} = fac'; 11 | X{3} = kar'; 12 | 13 | nclass = length(unique(label)); 14 | 15 | opts.alpha = 0.7; 16 | opts.beta = [0.01, 0.01]; 17 | opts.gamma = [1, 0.01]; 18 | opts.mu = 10; 19 | 20 | W = method( X, opts ); 21 | group = SpectralClustering2(W, nclass); 22 | [ NMI, ARI, ACC, fscore, precision, recall ] = clustering_metric(label,group); -------------------------------------------------------------------------------- /run_bbc.m: -------------------------------------------------------------------------------- 1 | clear; 2 | clc; 3 | 4 | addpath('./dataset'); 5 | addpath('./metric'); 6 | addpath('./tool'); 7 | 8 | load('./dataset/bbc.mat'); 9 | X{1} = X{1}'; 10 | X{2} = X{2}'; 11 | X{3} = X{3}'; 12 | label = truth; 13 | 14 | nclass = length(unique(label)); 15 | 16 | opts.alpha = 0.5; 17 | opts.beta = [1, 1]; 18 | opts.gamma = [0.1, 0.01]; 19 | opts.mu = 100; 20 | 21 | W = method( X, opts ); 22 | group = SpectralClustering(W, nclass); 23 | [ NMI, ARI, ACC, fscore, precision, recall ] = clustering_metric(label,group); -------------------------------------------------------------------------------- /run_syn.m: -------------------------------------------------------------------------------- 1 | clear; 2 | clc; 3 | 4 | addpath('./dataset'); 5 | addpath('./metric'); 6 | addpath('./tool'); 7 | 8 | load('./dataset/syn500.mat'); 9 | 10 | nclass = length(unique(label)); 11 | 12 | opts.alpha = 0.5; 13 | opts.beta = [1, 0.001]; 14 | opts.gamma = [1, 0.001]; 15 | opts.mu = 10; 16 | 17 | W = method( X, opts ); 18 | group = SpectralClustering2(W, nclass); 19 | [ NMI, ARI, ACC, fscore, precision, recall ] = clustering_metric(label,group); 20 | -------------------------------------------------------------------------------- /synthetic_lowrank.m: -------------------------------------------------------------------------------- 1 | function [ X, label ] = synthetic_lowrank( N, view ) 2 | % synthetic data 3 | % N -- number of data points 4 | % view -- number of views 5 | 6 | % Enhong Zhuo, 2019 7 | 8 | T_class1 = [1,1;1,2;2,1]; 9 | T_class2 = [3,1;4,1;4,2]; 10 | 11 | for v = 1:view 12 | 13 | m = randperm(10,1); 14 | 15 | P1 = rand(2,m); 16 | P2 = rand(2,m); 17 | 18 | TP1 = T_class1 * P1; 19 | TP2 = T_class2 * P2; 20 | 21 | C1 = rand(floor(N/2),3); 22 | C2 = rand(ceil(N/2),3); 23 | 24 | A = C1 * TP1; 25 | B = C2 * TP2; 26 | 27 | X{v} = [A;B]; 28 | X{v} = X{v} + randn(size(X{v})); 29 | 30 | end 31 | 32 | label = [ones(1,floor(N/2)), 2*ones(1,ceil(N/2))]'; 33 | 34 | end 35 | 36 | -------------------------------------------------------------------------------- /tool/SpectralClustering.m: -------------------------------------------------------------------------------- 1 | function [group, eigengap] = SpectralClustering(W, NUMC) 2 | %SPECTRALCLUSTERING Executes spectral clustering algorithm 3 | 4 | 5 | % calculate degree matrix 6 | degs = sum(W, 2); 7 | D = sparse(1:size(W, 1), 1:size(W, 2), degs); 8 | 9 | % compute unnormalized Laplacian 10 | L = D - W; 11 | k = max(NUMC); 12 | % compute normalized Laplacian if needed 13 | 14 | % avoid dividing by zero 15 | degs(degs == 0) = eps; 16 | % calculate D^(-1/2) 17 | D = spdiags(1./(degs.^0.5), 0, size(D, 1), size(D, 2)); 18 | % calculate normalized Laplacian 19 | L = D * L * D; 20 | 21 | % compute the eigenvectors corresponding to the k smallest 22 | % eigenvalues 23 | [U, eigenvalue] = eigs(L, k, eps); 24 | [a,b] = sort(diag(eigenvalue),'ascend'); 25 | eigenvalue = eigenvalue(:,b); 26 | U = U(:,b); 27 | eigengap = abs(diff(diag(eigenvalue))); 28 | U = U(:,1:k); 29 | % in case of the Jordan-Weiss algorithm, we need to normalize 30 | % the eigenvectors row-wise 31 | %U = bsxfun(@rdivide, U, sqrt(sum(U.^2, 2))); 32 | %U = U./repmat(sqrt(sum(U.^2,2)),1,size(U,2)); 33 | 34 | 35 | flag =0; 36 | for ck = NUMC 37 | Cindex = find(NUMC==ck); 38 | UU = U(:,1:ck); 39 | UU = UU./repmat(sqrt(sum(UU.^2,2)),1,size(UU,2)); 40 | [EigenvectorsDiscrete]=discretisation(UU); 41 | [~,temp] = max(EigenvectorsDiscrete,[],2); 42 | % for i = 1 : ck 43 | % initcenter(i,:) = mean(UU(temp==i,:)); 44 | % end 45 | 46 | Cluster{Cindex} = temp; 47 | end 48 | 49 | 50 | if length(NUMC)==1 51 | group=Cluster{1}; 52 | else 53 | group = Cluster; 54 | end 55 | 56 | 57 | end -------------------------------------------------------------------------------- /tool/SpectralClustering2.m: -------------------------------------------------------------------------------- 1 | function [groups] = SpectralClustering2(A, n) 2 | %SPECTRALCLUSTERING Executes spectral clustering algorithm 3 | % A data matrix, each column represents a sample 4 | % NUMC number of classes to be clustered 5 | % *return group of classes 6 | 7 | warning off; 8 | N = size(A,1); 9 | MAXiter = 1000; % Maximum number of iterations for KMeans 10 | REPlic = 20; % Number of replications for KMeans 11 | 12 | % Normalized spectral clustering according to Ng & Jordan & Weiss 13 | % using Normalized Symmetric Laplacian L = I - D^{-1/2} W D^{-1/2} 14 | 15 | DN = diag( 1./sqrt(sum(A)+eps) ); 16 | LapN = speye(N) - DN * A * DN; 17 | [uN,sN,vN] = svd(LapN); 18 | kerN = vN(:,N-n+1:N); 19 | kerNS = zeros(size(kerN)); 20 | for i = 1:N 21 | kerNS(i,:) = kerN(i,:) ./ norm(kerN(i,:)+eps); 22 | end 23 | % groups = kmeans(kerNS,n,'maxiter',MAXiter,'replicates',REPlic,'EmptyAction','singleton'); 24 | groups = kmeans(kerNS,n,'maxiter',MAXiter,'replicates',REPlic,'EmptyAction','singleton','Start','sample'); 25 | end 26 | -------------------------------------------------------------------------------- /tool/discretisation.m: -------------------------------------------------------------------------------- 1 | function [EigenvectorsDiscrete,EigenVectors]=discretisation(EigenVectors) 2 | % 3 | % EigenvectorsDiscrete=discretisation(EigenVectors) 4 | % 5 | % Input: EigenVectors = continuous Ncut vector, size = ndata x nbEigenvectors 6 | % Output EigenvectorsDiscrete = discrete Ncut vector, size = ndata x nbEigenvectors 7 | % 8 | % Timothee Cour, Stella Yu, Jianbo Shi, 2004 9 | 10 | [n,k]=size(EigenVectors); 11 | 12 | vm = sqrt(sum(EigenVectors.*EigenVectors,2)); 13 | EigenVectors = EigenVectors./repmat(vm+eps,1,k); 14 | 15 | R=zeros(k); 16 | % R(:,1)=EigenVectors(1+round(rand(1)*(n-1)),:)'; 17 | R(:,1)=EigenVectors(round(n/2),:)'; 18 | %R(:,1)=EigenVectors(n,:)'; 19 | c=zeros(n,1); 20 | for j=2:k 21 | c=c+abs(EigenVectors*R(:,j-1)); 22 | [minimum,i]=min(c); 23 | R(:,j)=EigenVectors(i,:)'; 24 | end 25 | 26 | lastObjectiveValue=0; 27 | exitLoop=0; 28 | nbIterationsDiscretisation = 0; 29 | nbIterationsDiscretisationMax = 20;%voir 30 | while exitLoop== 0 31 | nbIterationsDiscretisation = nbIterationsDiscretisation + 1 ; 32 | EigenvectorsDiscrete = discretisationEigenVectorData(EigenVectors*R); 33 | [U,S,V] = svd(EigenvectorsDiscrete'*EigenVectors+eps,0); 34 | NcutValue=2*(n-trace(S)); 35 | 36 | if abs(NcutValue-lastObjectiveValue) < eps | nbIterationsDiscretisation > nbIterationsDiscretisationMax 37 | exitLoop=1; 38 | else 39 | lastObjectiveValue = NcutValue; 40 | R=V*U'; 41 | end 42 | end -------------------------------------------------------------------------------- /tool/discretisationEigenVectorData.m: -------------------------------------------------------------------------------- 1 | function Y = discretisationEigenVectorData(EigenVector) 2 | % Y = discretisationEigenVectorData(EigenVector) 3 | % 4 | % discretizes previously rotated eigenvectors in discretisation 5 | % Timothee Cour, Stella Yu, Jianbo Shi, 2004 6 | 7 | [n,k]=size(EigenVector); 8 | 9 | 10 | [Maximum,J]=max(EigenVector'); 11 | 12 | Y=sparse(1:n,J',1,n,k); 13 | % Y = J'; --------------------------------------------------------------------------------