├── A_Number.mat ├── README.md ├── demo_MVCC.m ├── figure_01.jpg ├── obj_MVCC.m └── tools ├── ClusteringMeasure.m ├── Contingency.m ├── EProjSimplex_new.m ├── L2_distance_1.m ├── RandIndex.m ├── Updata_Sv.m ├── bestMap.m ├── compute_f.m ├── compute_nmi.m ├── eig1.m └── hungarian.m /A_Number.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kunzhan/MCGC/d86699c6330be8a23593f4a240e87155ecadc8a5/A_Number.mat -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Multiview Consensus Graph Clustering 2 | A graph is usually formed to reveal the relationship between data points and graph structure is encoded by the affinity matrix. Most graph-based multiview clustering methods use predefined affinity matrices and the clustering performance highly depends on the quality of graph. We learn a consensus graph with minimizing disagreement between different views and constraining the rank of the Laplacian matrix. Since diverse views admit the same underlying cluster structure across multiple views, we use a new disagreement cost function for regularizing graphs from different views toward a common consensus. Simultaneously, we impose a rank constraint on the Laplacian matrix to learn the consensus graph with exactly *k* connected components where *k* is the number of clusters, which is different from using fixed affinity matrices in most existing graph-based methods. With the learned consensus graph, we can directly obtain the cluster labels without performing any post-processing, such as *k*-means clustering algorithm in spectral clustering-based methods. A multiview consensus clustering method is proposed to learn such a graph. An efficient iterative updating algorithm is derived to optimize the proposed challenging optimization problem. Experiments on several benchmark datasets have demonstrated the effectiveness of the proposed method in terms of seven metrics. 3 | 4 | ![](figure_01.jpg) 5 | 6 | ## Citation 7 | We appreciate it if you cite the following paper: 8 | ``` 9 | @Article{Zhan8052206, 10 | author = {Kun Zhan and Feiping Nie and Jing Wang and Yi Yang}, 11 | title = {Multiview consensus graph clustering}, 12 | journal = {IEEE Transactions on Image Processing}, 13 | year = {2019}, 14 | volume = {28}, 15 | number = {3}, 16 | pages = {1261--1270}, 17 | doi = {10.1109/TIP.2018.2877335}, 18 | issn = {1057-7149}, 19 | month = {March} 20 | } 21 | 22 | ``` 23 | DOI 24 | 25 | ## Contact 26 | https://kunzhan.github.io 27 | 28 | If you have any questions, feel free to contact me. (Email: `ice.echo#gmail.com`) 29 | -------------------------------------------------------------------------------- /demo_MVCC.m: -------------------------------------------------------------------------------- 1 | clear 2 | clc 3 | % addpath('./data'); 4 | addpath('tools') 5 | % 6 | % load('MSRC-v1.mat');kk = 23; islocal_1 = 1; X_train = X; truth = gnd; 7 | % % load('Number123456'); kk = 20; islocal_1 = 1; 8 | % % load('COIL_20_ZCQ'); kk = 3; islocal_1 = 0; 9 | % num_views = length(X_train); 10 | % numClust = length(unique(truth)); 11 | % n = length(truth); 12 | % 13 | % A = zeros(n,n,num_views); 14 | % for v = 1:num_views 15 | % A(:,:,v) = Updata_Sv(X_train{v},numClust,kk, islocal_1); 16 | % end 17 | % clearvars -except A num_views numClust truth 18 | % load A_MSRC; 19 | % load A_COIL 20 | load A_Number 21 | numiter = 5; 22 | k = 1; 23 | % t = 0.6:5:100; 24 | t = 0.6; 25 | acc = zeros(length(t),1); 26 | for beta2 = t 27 | % beta2 = 0.6; 28 | [y,acc(k,1), nmi, Pu, P, R, F, AR,OBJ] = obj_MVCC(A,num_views,numClust,beta2,truth,numiter); 29 | k = k +1; 30 | end 31 | % plot(OBJ),axis([0 6 min(OBJ)-5 max(OBJ+5)]),xlabel('Iteration number','Interpreter','latex'),ylabel('Objective value') 32 | 33 | % k =1; 34 | % figure(1),hold on;plot(t(1:k:end),acc(1:k:end,1)),axis([0 100 0 100]),xlabel('$\beta$','Interpreter','latex'),ylabel('ACC') 35 | % clearvars -except acc t 36 | % save msrc_0_6-5-100 37 | % _________ 38 | 39 | % % CAN 40 | % nv = size(A,3); 41 | % for v = 1:nv 42 | % S = A(:,:,v); 43 | % [~, y] = graphconncomp(sparse(S)); y = y'; 44 | % [acc, nmi, Pu] = ClusteringMeasure(truth, y); 45 | % AR = RandIndex(truth, y+1); 46 | % [F,P,R] = compute_f(truth,y); 47 | fprintf('&%.2f$\\pm$%.2f\n', 100*acc(1), 0); 48 | fprintf('&%.2f$\\pm$%.2f\n', 100*nmi(1), 0); 49 | fprintf('&%.2f$\\pm$%.2f\n', 100*Pu(1), 0); 50 | fprintf('&%.2f$\\pm$%.2f\n', 100*P(1), 0); 51 | fprintf('&%.2f$\\pm$%.2f\n', 100*R(1), 0); 52 | fprintf('&%.2f$\\pm$%.2f\n', 100*F(1), 0); 53 | fprintf('&%.2f$\\pm$%.2f\n', 100*AR(1), 0); 54 | % end -------------------------------------------------------------------------------- /figure_01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kunzhan/MCGC/d86699c6330be8a23593f4a240e87155ecadc8a5/figure_01.jpg -------------------------------------------------------------------------------- /obj_MVCC.m: -------------------------------------------------------------------------------- 1 | function [y,acc, nmi, Pu, P, R, F, AR,OBJ] = obj_MVCC(W,num_views,numClust,beta,truth,numiter) 2 | if (min(truth)==0),truth = truth + 1;end 3 | N = size(W,1); 4 | opts.disp = 0; 5 | U = zeros(N,numClust,num_views); 6 | gamma = 1; 7 | S(1:N,1:N) = 0; 8 | for v =1:num_views 9 | fprintf('computing embedding matrix for view (%d)\n',v); 10 | [U(:,:,v),~] = eigs(W(:,:,v),numClust,'LA',opts); 11 | S = S + beta*U(:,:,v)*U(:,:,v)'; 12 | end 13 | S = (S+S')/2; 14 | DA = diag(sum(S)); 15 | LA = DA - S; 16 | [H, ~, ~] = eig1(LA, numClust, 0); 17 | zr = 10e-11; 18 | k = 2; 19 | OBJ = zeros(numiter+1,1); 20 | for v =1:num_views 21 | OBJ(1) = OBJ(1) + trace(U(:,:,v)'*LA*U(:,:,v)) + norm(S-beta*U(:,:,v)*U(:,:,v)','fro'); 22 | end 23 | while(k<=numiter+1) 24 | fprintf('Running iteration %d\n',k-1); 25 | A0 = zeros(N); 26 | for v = 1:num_views 27 | [U(:,:,v), ~] = eigs(W(:,:,v) + beta.*S, numClust,'LA',opts); 28 | A0 = A0 + beta*U(:,:,v)*U(:,:,v)'; 29 | end 30 | for iter = 1:50; 31 | dist = L2_distance_1(H',H'); 32 | S = A0.*0; 33 | for j = 1:N 34 | ai = A0(j,:); 35 | di = dist(j,:); 36 | ad = ai - 0.5.*gamma*di; 37 | S(j,:) = EProjSimplex_new(ad); 38 | end; 39 | S = (S + S.')/2; 40 | D = diag(sum(S)); 41 | L = D - S; 42 | F_old = H; 43 | [H, ~, ev] = eig1(L, numClust, 0); 44 | fn1 = sum(ev(1:numClust)); 45 | fn2 = sum(ev(1:numClust+1)); 46 | if fn1 > zr 47 | gamma = gamma.*2; 48 | elseif fn2 < zr 49 | gamma = gamma/2; H = F_old; 50 | else 51 | break; 52 | end; 53 | end 54 | for v =1:num_views 55 | OBJ(k) = OBJ(k) + trace(U(:,:,v)'*L*U(:,:,v)) + norm(S-beta*U(:,:,v)*U(:,:,v)','fro'); 56 | end 57 | k = k+1; 58 | end 59 | % plot(OBJ) 60 | [~, y]=graphconncomp(sparse(S)); y = y'; 61 | [acc, nmi, Pu] = ClusteringMeasure(truth, y); 62 | AR = RandIndex(truth, y+1); 63 | [F,P,R] = compute_f(truth,y); -------------------------------------------------------------------------------- /tools/ClusteringMeasure.m: -------------------------------------------------------------------------------- 1 | function [ACC MIhat Purity] = ClusteringMeasure(Y, predY) 2 | 3 | if size(Y,2) ~= 1 4 | Y = Y'; 5 | end; 6 | if size(predY,2) ~= 1 7 | predY = predY'; 8 | end; 9 | 10 | n = length(Y); 11 | 12 | uY = unique(Y); 13 | nclass = length(uY); 14 | Y0 = zeros(n,1); 15 | if nclass ~= max(Y) 16 | for i = 1:nclass 17 | Y0(find(Y == uY(i))) = i; 18 | end; 19 | Y = Y0; 20 | end; 21 | 22 | uY = unique(predY); 23 | nclass = length(uY); 24 | predY0 = zeros(n,1); 25 | if nclass ~= max(predY) 26 | for i = 1:nclass 27 | predY0(find(predY == uY(i))) = i; 28 | end; 29 | predY = predY0; 30 | end; 31 | 32 | 33 | Lidx = unique(Y); classnum = length(Lidx); 34 | predLidx = unique(predY); pred_classnum = length(predLidx); 35 | 36 | % purity 37 | correnum = 0; 38 | for ci = 1:pred_classnum 39 | incluster = Y(find(predY == predLidx(ci))); 40 | % cnub = unique(incluster); 41 | % inclunub = 0; 42 | % for cnubi = 1:length(cnub) 43 | % inclunub(cnubi) = length(find(incluster == cnub(cnubi))); 44 | % end; 45 | inclunub = hist(incluster, 1:max(incluster)); if isempty(inclunub) inclunub=0;end; 46 | correnum = correnum + max(inclunub); 47 | end; 48 | Purity = correnum/length(predY); 49 | 50 | %if pred_classnum 51 | res = bestMap(Y, predY); 52 | % accuarcy 53 | ACC = length(find(Y == res))/length(Y); 54 | % NMI 55 | MIhat = MutualInfo(Y,res); 56 | 57 | 58 | 59 | % result = [ACC MIhat Purity]; 60 | 61 | 62 | 63 | 64 | 65 | %% 66 | function [newL2, c] = bestMap(L1,L2) 67 | %bestmap: permute labels of L2 match L1 as good as possible 68 | % [newL2] = bestMap(L1,L2); 69 | 70 | %=========== 71 | L1 = L1(:); 72 | L2 = L2(:); 73 | if size(L1) ~= size(L2) 74 | error('size(L1) must == size(L2)'); 75 | end 76 | L1 = L1 - min(L1) + 1; % min (L1) <- 1; 77 | L2 = L2 - min(L2) + 1; % min (L2) <- 1; 78 | %=========== make bipartition graph ============ 79 | nClass = max(max(L1), max(L2)); 80 | G = zeros(nClass); 81 | for i=1:nClass 82 | for j=1:nClass 83 | G(i,j) = length(find(L1 == i & L2 == j)); 84 | end 85 | end 86 | %=========== assign with hungarian method ====== 87 | [c,t] = hungarian(-G); 88 | newL2 = zeros(nClass,1); 89 | for i=1:nClass 90 | newL2(L2 == i) = c(i); 91 | end 92 | 93 | 94 | 95 | 96 | 97 | %% 98 | function MIhat = MutualInfo(L1,L2) 99 | % mutual information 100 | 101 | %=========== 102 | L1 = L1(:); 103 | L2 = L2(:); 104 | if size(L1) ~= size(L2) 105 | error('size(L1) must == size(L2)'); 106 | end 107 | L1 = L1 - min(L1) + 1; % min (L1) <- 1; 108 | L2 = L2 - min(L2) + 1; % min (L2) <- 1; 109 | %=========== make bipartition graph ============ 110 | nClass = max(max(L1), max(L2)); 111 | G = zeros(nClass); 112 | for i=1:nClass 113 | for j=1:nClass 114 | G(i,j) = length(find(L1 == i & L2 == j))+eps; 115 | end 116 | end 117 | sumG = sum(G(:)); 118 | %=========== calculate MIhat 119 | P1 = sum(G,2); P1 = P1/sumG; 120 | P2 = sum(G,1); P2 = P2/sumG; 121 | H1 = sum(-P1.*log2(P1)); 122 | H2 = sum(-P2.*log2(P2)); 123 | P12 = G/sumG; 124 | PPP = P12./repmat(P2,nClass,1)./repmat(P1,1,nClass); 125 | PPP(abs(PPP) < 1e-12) = 1; 126 | MI = sum(P12(:) .* log2(PPP(:))); 127 | MIhat = MI / max(H1,H2); 128 | %%%%%%%%%%%%% why complex ? %%%%%%%% 129 | MIhat = real(MIhat); 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | %% 139 | function [C,T]=hungarian(A) 140 | %HUNGARIAN Solve the Assignment problem using the Hungarian method. 141 | % 142 | %[C,T]=hungarian(A) 143 | %A - a square cost matrix. 144 | %C - the optimal assignment. 145 | %T - the cost of the optimal assignment. 146 | %s.t. T = trace(A(C,:)) is minimized over all possible assignments. 147 | 148 | % Adapted from the FORTRAN IV code in Carpaneto and Toth, "Algorithm 548: 149 | % Solution of the assignment problem [H]", ACM Transactions on 150 | % Mathematical Software, 6(1):104-111, 1980. 151 | 152 | % v1.0 96-06-14. Niclas Borlin, niclas@cs.umu.se. 153 | % Department of Computing Science, Ume?University, 154 | % Sweden. 155 | % All standard disclaimers apply. 156 | 157 | % A substantial effort was put into this code. If you use it for a 158 | % publication or otherwise, please include an acknowledgement or at least 159 | % notify me by email. /Niclas 160 | 161 | [m,n]=size(A); 162 | 163 | if (m~=n) 164 | error('HUNGARIAN: Cost matrix must be square!'); 165 | end 166 | 167 | % Save original cost matrix. 168 | orig=A; 169 | 170 | % Reduce matrix. 171 | A=hminired(A); 172 | 173 | % Do an initial assignment. 174 | [A,C,U]=hminiass(A); 175 | 176 | % Repeat while we have unassigned rows. 177 | while (U(n+1)) 178 | % Start with no path, no unchecked zeros, and no unexplored rows. 179 | LR=zeros(1,n); 180 | LC=zeros(1,n); 181 | CH=zeros(1,n); 182 | RH=[zeros(1,n) -1]; 183 | 184 | % No labelled columns. 185 | SLC=[]; 186 | 187 | % Start path in first unassigned row. 188 | r=U(n+1); 189 | % Mark row with end-of-path label. 190 | LR(r)=-1; 191 | % Insert row first in labelled row set. 192 | SLR=r; 193 | 194 | % Repeat until we manage to find an assignable zero. 195 | while (1) 196 | % If there are free zeros in row r 197 | if (A(r,n+1)~=0) 198 | % ...get column of first free zero. 199 | l=-A(r,n+1); 200 | 201 | % If there are more free zeros in row r and row r in not 202 | % yet marked as unexplored.. 203 | if (A(r,l)~=0 & RH(r)==0) 204 | % Insert row r first in unexplored list. 205 | RH(r)=RH(n+1); 206 | RH(n+1)=r; 207 | 208 | % Mark in which column the next unexplored zero in this row 209 | % is. 210 | CH(r)=-A(r,l); 211 | end 212 | else 213 | % If all rows are explored.. 214 | if (RH(n+1)<=0) 215 | % Reduce matrix. 216 | [A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR); 217 | end 218 | 219 | % Re-start with first unexplored row. 220 | r=RH(n+1); 221 | % Get column of next free zero in row r. 222 | l=CH(r); 223 | % Advance "column of next free zero". 224 | CH(r)=-A(r,l); 225 | % If this zero is last in the list.. 226 | if (A(r,l)==0) 227 | % ...remove row r from unexplored list. 228 | RH(n+1)=RH(r); 229 | RH(r)=0; 230 | end 231 | end 232 | 233 | % While the column l is labelled, i.e. in path. 234 | while (LC(l)~=0) 235 | % If row r is explored.. 236 | if (RH(r)==0) 237 | % If all rows are explored.. 238 | if (RH(n+1)<=0) 239 | % Reduce cost matrix. 240 | [A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR); 241 | end 242 | 243 | % Re-start with first unexplored row. 244 | r=RH(n+1); 245 | end 246 | 247 | % Get column of next free zero in row r. 248 | l=CH(r); 249 | 250 | % Advance "column of next free zero". 251 | CH(r)=-A(r,l); 252 | 253 | % If this zero is last in list.. 254 | if(A(r,l)==0) 255 | % ...remove row r from unexplored list. 256 | RH(n+1)=RH(r); 257 | RH(r)=0; 258 | end 259 | end 260 | 261 | % If the column found is unassigned.. 262 | if (C(l)==0) 263 | % Flip all zeros along the path in LR,LC. 264 | [A,C,U]=hmflip(A,C,LC,LR,U,l,r); 265 | % ...and exit to continue with next unassigned row. 266 | break; 267 | else 268 | % ...else add zero to path. 269 | 270 | % Label column l with row r. 271 | LC(l)=r; 272 | 273 | % Add l to the set of labelled columns. 274 | SLC=[SLC l]; 275 | 276 | % Continue with the row assigned to column l. 277 | r=C(l); 278 | 279 | % Label row r with column l. 280 | LR(r)=l; 281 | 282 | % Add r to the set of labelled rows. 283 | SLR=[SLR r]; 284 | end 285 | end 286 | end 287 | 288 | % Calculate the total cost. 289 | T=sum(orig(logical(sparse(C,1:size(orig,2),1)))); 290 | 291 | 292 | function A=hminired(A) 293 | %HMINIRED Initial reduction of cost matrix for the Hungarian method. 294 | % 295 | %B=assredin(A) 296 | %A - the unreduced cost matris. 297 | %B - the reduced cost matrix with linked zeros in each row. 298 | 299 | % v1.0 96-06-13. Niclas Borlin, niclas@cs.umu.se. 300 | 301 | [m,n]=size(A); 302 | 303 | % Subtract column-minimum values from each column. 304 | colMin=min(A); 305 | A=A-colMin(ones(n,1),:); 306 | 307 | % Subtract row-minimum values from each row. 308 | rowMin=min(A')'; 309 | A=A-rowMin(:,ones(1,n)); 310 | 311 | % Get positions of all zeros. 312 | [i,j]=find(A==0); 313 | 314 | % Extend A to give room for row zero list header column. 315 | A(1,n+1)=0; 316 | for k=1:n 317 | % Get all column in this row. 318 | cols=j(k==i)'; 319 | % Insert pointers in matrix. 320 | A(k,[n+1 cols])=[-cols 0]; 321 | end 322 | 323 | 324 | function [A,C,U]=hminiass(A) 325 | %HMINIASS Initial assignment of the Hungarian method. 326 | % 327 | %[B,C,U]=hminiass(A) 328 | %A - the reduced cost matrix. 329 | %B - the reduced cost matrix, with assigned zeros removed from lists. 330 | %C - a vector. C(J)=I means row I is assigned to column J, 331 | % i.e. there is an assigned zero in position I,J. 332 | %U - a vector with a linked list of unassigned rows. 333 | 334 | % v1.0 96-06-14. Niclas Borlin, niclas@cs.umu.se. 335 | 336 | [n,np1]=size(A); 337 | 338 | % Initalize return vectors. 339 | C=zeros(1,n); 340 | U=zeros(1,n+1); 341 | 342 | % Initialize last/next zero "pointers". 343 | LZ=zeros(1,n); 344 | NZ=zeros(1,n); 345 | 346 | for i=1:n 347 | % Set j to first unassigned zero in row i. 348 | lj=n+1; 349 | j=-A(i,lj); 350 | 351 | % Repeat until we have no more zeros (j==0) or we find a zero 352 | % in an unassigned column (c(j)==0). 353 | 354 | while (C(j)~=0) 355 | % Advance lj and j in zero list. 356 | lj=j; 357 | j=-A(i,lj); 358 | 359 | % Stop if we hit end of list. 360 | if (j==0) 361 | break; 362 | end 363 | end 364 | 365 | if (j~=0) 366 | % We found a zero in an unassigned column. 367 | 368 | % Assign row i to column j. 369 | C(j)=i; 370 | 371 | % Remove A(i,j) from unassigned zero list. 372 | A(i,lj)=A(i,j); 373 | 374 | % Update next/last unassigned zero pointers. 375 | NZ(i)=-A(i,j); 376 | LZ(i)=lj; 377 | 378 | % Indicate A(i,j) is an assigned zero. 379 | A(i,j)=0; 380 | else 381 | % We found no zero in an unassigned column. 382 | 383 | % Check all zeros in this row. 384 | 385 | lj=n+1; 386 | j=-A(i,lj); 387 | 388 | % Check all zeros in this row for a suitable zero in another row. 389 | while (j~=0) 390 | % Check the in the row assigned to this column. 391 | r=C(j); 392 | 393 | % Pick up last/next pointers. 394 | lm=LZ(r); 395 | m=NZ(r); 396 | 397 | % Check all unchecked zeros in free list of this row. 398 | while (m~=0) 399 | % Stop if we find an unassigned column. 400 | if (C(m)==0) 401 | break; 402 | end 403 | 404 | % Advance one step in list. 405 | lm=m; 406 | m=-A(r,lm); 407 | end 408 | 409 | if (m==0) 410 | % We failed on row r. Continue with next zero on row i. 411 | lj=j; 412 | j=-A(i,lj); 413 | else 414 | % We found a zero in an unassigned column. 415 | 416 | % Replace zero at (r,m) in unassigned list with zero at (r,j) 417 | A(r,lm)=-j; 418 | A(r,j)=A(r,m); 419 | 420 | % Update last/next pointers in row r. 421 | NZ(r)=-A(r,m); 422 | LZ(r)=j; 423 | 424 | % Mark A(r,m) as an assigned zero in the matrix . . . 425 | A(r,m)=0; 426 | 427 | % ...and in the assignment vector. 428 | C(m)=r; 429 | 430 | % Remove A(i,j) from unassigned list. 431 | A(i,lj)=A(i,j); 432 | 433 | % Update last/next pointers in row r. 434 | NZ(i)=-A(i,j); 435 | LZ(i)=lj; 436 | 437 | % Mark A(r,m) as an assigned zero in the matrix . . . 438 | A(i,j)=0; 439 | 440 | % ...and in the assignment vector. 441 | C(j)=i; 442 | 443 | % Stop search. 444 | break; 445 | end 446 | end 447 | end 448 | end 449 | 450 | % Create vector with list of unassigned rows. 451 | 452 | % Mark all rows have assignment. 453 | r=zeros(1,n); 454 | rows=C(C~=0); 455 | r(rows)=rows; 456 | empty=find(r==0); 457 | 458 | % Create vector with linked list of unassigned rows. 459 | U=zeros(1,n+1); 460 | U([n+1 empty])=[empty 0]; 461 | 462 | 463 | function [A,C,U]=hmflip(A,C,LC,LR,U,l,r) 464 | %HMFLIP Flip assignment state of all zeros along a path. 465 | % 466 | %[A,C,U]=hmflip(A,C,LC,LR,U,l,r) 467 | %Input: 468 | %A - the cost matrix. 469 | %C - the assignment vector. 470 | %LC - the column label vector. 471 | %LR - the row label vector. 472 | %U - the 473 | %r,l - position of last zero in path. 474 | %Output: 475 | %A - updated cost matrix. 476 | %C - updated assignment vector. 477 | %U - updated unassigned row list vector. 478 | 479 | % v1.0 96-06-14. Niclas Borlin, niclas@cs.umu.se. 480 | 481 | n=size(A,1); 482 | 483 | while (1) 484 | % Move assignment in column l to row r. 485 | C(l)=r; 486 | 487 | % Find zero to be removed from zero list.. 488 | 489 | % Find zero before this. 490 | m=find(A(r,:)==-l); 491 | 492 | % Link past this zero. 493 | A(r,m)=A(r,l); 494 | 495 | A(r,l)=0; 496 | 497 | % If this was the first zero of the path.. 498 | if (LR(r)<0) 499 | ...remove row from unassigned row list and return. 500 | U(n+1)=U(r); 501 | U(r)=0; 502 | return; 503 | else 504 | 505 | % Move back in this row along the path and get column of next zero. 506 | l=LR(r); 507 | 508 | % Insert zero at (r,l) first in zero list. 509 | A(r,l)=A(r,n+1); 510 | A(r,n+1)=-l; 511 | 512 | % Continue back along the column to get row of next zero in path. 513 | r=LC(l); 514 | end 515 | end 516 | 517 | 518 | function [A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR) 519 | %HMREDUCE Reduce parts of cost matrix in the Hungerian method. 520 | % 521 | %[A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR) 522 | %Input: 523 | %A - Cost matrix. 524 | %CH - vector of column of 'next zeros' in each row. 525 | %RH - vector with list of unexplored rows. 526 | %LC - column labels. 527 | %RC - row labels. 528 | %SLC - set of column labels. 529 | %SLR - set of row labels. 530 | % 531 | %Output: 532 | %A - Reduced cost matrix. 533 | %CH - Updated vector of 'next zeros' in each row. 534 | %RH - Updated vector of unexplored rows. 535 | 536 | % v1.0 96-06-14. Niclas Borlin, niclas@cs.umu.se. 537 | 538 | n=size(A,1); 539 | 540 | % Find which rows are covered, i.e. unlabelled. 541 | coveredRows=LR==0; 542 | 543 | % Find which columns are covered, i.e. labelled. 544 | coveredCols=LC~=0; 545 | 546 | r=find(~coveredRows); 547 | c=find(~coveredCols); 548 | 549 | % Get minimum of uncovered elements. 550 | m=min(min(A(r,c))); 551 | 552 | % Subtract minimum from all uncovered elements. 553 | A(r,c)=A(r,c)-m; 554 | 555 | % Check all uncovered columns.. 556 | for j=c 557 | % ...and uncovered rows in path order.. 558 | for i=SLR 559 | % If this is a (new) zero.. 560 | if (A(i,j)==0) 561 | % If the row is not in unexplored list.. 562 | if (RH(i)==0) 563 | % ...insert it first in unexplored list. 564 | RH(i)=RH(n+1); 565 | RH(n+1)=i; 566 | % Mark this zero as "next free" in this row. 567 | CH(i)=j; 568 | end 569 | % Find last unassigned zero on row I. 570 | row=A(i,:); 571 | colsInList=-row(row<0); 572 | if (length(colsInList)==0) 573 | % No zeros in the list. 574 | l=n+1; 575 | else 576 | l=colsInList(row(colsInList)==0); 577 | end 578 | % Append this zero to end of list. 579 | A(i,l)=-j; 580 | end 581 | end 582 | end 583 | 584 | % Add minimum to all doubly covered elements. 585 | r=find(coveredRows); 586 | c=find(coveredCols); 587 | 588 | % Take care of the zeros we will remove. 589 | [i,j]=find(A(r,c)<=0); 590 | 591 | i=r(i); 592 | j=c(j); 593 | 594 | for k=1:length(i) 595 | % Find zero before this in this row. 596 | lj=find(A(i(k),:)==-j(k)); 597 | % Link past it. 598 | A(i(k),lj)=A(i(k),j(k)); 599 | % Mark it as assigned. 600 | A(i(k),j(k))=0; 601 | end 602 | 603 | A(r,c)=A(r,c)+m; 604 | -------------------------------------------------------------------------------- /tools/Contingency.m: -------------------------------------------------------------------------------- 1 | function Cont=Contingency(Mem1,Mem2) 2 | %CONTINGENCY Form contigency matrix for two vectors 3 | % C=Contingency(Mem1,Mem2) returns contingency matrix for two 4 | % column vectors Mem1, Mem2. These define which cluster each entity 5 | % has been assigned to. 6 | % 7 | % See also RANDINDEX. 8 | % 9 | 10 | %(C) David Corney (2000) D.Corney@cs.ucl.ac.uk 11 | 12 | if nargin < 2 | min(size(Mem1)) > 1 | min(size(Mem2)) > 1 13 | error('Contingency: Requires two vector arguments') 14 | return 15 | end 16 | 17 | Cont=zeros(max(Mem1),max(Mem2)); 18 | 19 | for i = 1:length(Mem1); 20 | Cont(Mem1(i),Mem2(i))=Cont(Mem1(i),Mem2(i))+1; 21 | end 22 | -------------------------------------------------------------------------------- /tools/EProjSimplex_new.m: -------------------------------------------------------------------------------- 1 | function [x ft] = EProjSimplex_new(v, k) 2 | 3 | % 4 | %% Problem 5 | % 6 | % min 1/2 || x - v||^2 7 | % s.t. x>=0, 1'x=k 8 | % 9 | 10 | if nargin < 2 11 | k = 1; 12 | end; 13 | 14 | ft=1; 15 | n = length(v); 16 | 17 | v0 = v-mean(v) + k/n; 18 | %vmax = max(v0); 19 | vmin = min(v0); 20 | if vmin < 0 21 | f = 1; 22 | lambda_m = 0; 23 | while abs(f) > 10^-10 24 | v1 = v0 - lambda_m; 25 | posidx = v1>0; 26 | npos = sum(posidx); 27 | g = -npos; 28 | f = sum(v1(posidx)) - k; 29 | lambda_m = lambda_m - f/g; 30 | ft=ft+1; 31 | if ft > 100 32 | x = max(v1,0); 33 | break; 34 | end; 35 | end; 36 | x = max(v1,0); 37 | 38 | else 39 | x = v0; 40 | end; -------------------------------------------------------------------------------- /tools/L2_distance_1.m: -------------------------------------------------------------------------------- 1 | % compute squared Euclidean distance 2 | % ||A-B||^2 = ||A||^2 + ||B||^2 - 2*A'*B 3 | function d = L2_distance_1(a,b) 4 | % a,b: two matrices. each column is a data 5 | % d: distance matrix of a and b 6 | 7 | 8 | 9 | if (size(a,1) == 1) 10 | a = [a; zeros(1,size(a,2))]; 11 | b = [b; zeros(1,size(b,2))]; 12 | end 13 | 14 | aa=sum(a.*a); bb=sum(b.*b); ab=a'*b; 15 | d = repmat(aa',[1 size(bb,2)]) + repmat(bb,[size(aa,2) 1]) - 2*ab; 16 | 17 | d = real(d); 18 | d = max(d,0); 19 | 20 | % % force 0 on the diagonal? 21 | % if (df==1) 22 | % d = d.*(1-eye(size(d))); 23 | % end 24 | -------------------------------------------------------------------------------- /tools/RandIndex.m: -------------------------------------------------------------------------------- 1 | function [AR,RI,MI,HI]=RandIndex(c1,c2) 2 | %RANDINDEX - calculates Rand Indices to compare two partitions 3 | % ARI=RANDINDEX(c1,c2), where c1,c2 are vectors listing the 4 | % class membership, returns the "Hubert & Arabie adjusted Rand index". 5 | % [AR,RI,MI,HI]=RANDINDEX(c1,c2) returns the adjusted Rand index, 6 | % the unadjusted Rand index, "Mirkin's" index and "Hubert's" index. 7 | % 8 | % See L. Hubert and P. Arabie (1985) "Comparing Partitions" Journal of 9 | % Classification 2:193-218 10 | 11 | %(C) David Corney (2000) D.Corney@cs.ucl.ac.uk 12 | 13 | if nargin < 2 | min(size(c1)) > 1 | min(size(c2)) > 1 14 | error('RandIndex: Requires two vector arguments') 15 | return 16 | end 17 | 18 | C=Contingency(c1,c2); %form contingency matrix 19 | 20 | n=sum(sum(C)); 21 | nis=sum(sum(C,2).^2); %sum of squares of sums of rows 22 | njs=sum(sum(C,1).^2); %sum of squares of sums of columns 23 | 24 | t1=nchoosek(n,2); %total number of pairs of entities 25 | t2=sum(sum(C.^2)); %sum over rows & columnns of nij^2 26 | t3=.5*(nis+njs); 27 | 28 | %Expected index (for adjustment) 29 | nc=(n*(n^2+1)-(n+1)*nis-(n+1)*njs+2*(nis*njs)/n)/(2*(n-1)); 30 | 31 | A=t1+t2-t3; %no. agreements 32 | D= -t2+t3; %no. disagreements 33 | 34 | if t1==nc 35 | AR=0; %avoid division by zero; if k=1, define Rand = 0 36 | else 37 | AR=(A-nc)/(t1-nc); %adjusted Rand - Hubert & Arabie 1985 38 | end 39 | 40 | RI=A/t1; %Rand 1971 %Probability of agreement 41 | MI=D/t1; %Mirkin 1970 %p(disagreement) 42 | HI=(A-D)/t1; %Hubert 1977 %p(agree)-p(disagree) -------------------------------------------------------------------------------- /tools/Updata_Sv.m: -------------------------------------------------------------------------------- 1 | function A = Updata_Sv(X, c, k, islocal) 2 | % X = gpuArray(X); 3 | NITER = 30; 4 | num = size(X,2); 5 | if nargin < 4 6 | islocal = 0; 7 | end; 8 | if nargin < 3 9 | k = 15; 10 | end; 11 | 12 | distX = L2_distance_1(X,X); 13 | [distX1, idx] = sort(distX,2); 14 | A = zeros(num); 15 | rr = zeros(num,1); 16 | for i = 1:num 17 | di = distX1(i,2:k+2); 18 | rr(i) = 0.5*(k*di(k+1)-sum(di(1:k))); 19 | id = idx(i,2:k+2); 20 | A(i,id) = (di(k+1)-di)/(k*di(k+1)-sum(di(1:k))+eps); 21 | end; 22 | lambda = 1; 23 | A0 = (A+A')/2; 24 | 25 | D0 = diag(sum(A0)); 26 | L0 = D0 - A0; 27 | [F, ~, evs] = eig1(L0, c, 0); 28 | % if sum(evs(1:c+1)) < 0.00000000001 29 | % error('The original graph has more than %d connected we component', c); 30 | % end; 31 | for iter = 1:NITER 32 | distf = L2_distance_1(F',F'); 33 | % [distf1, ~] = sort(distf,2); %% 34 | A = zeros(num); 35 | for i=1:num 36 | if islocal == 1 37 | idxa0 = idx(i,2:k+1); 38 | else 39 | idxa0 = 1:num; 40 | end; 41 | dfi = distf(i,idxa0); 42 | ad = -dfi/2/lambda; %% 43 | A(i,idxa0) = EProjSimplex_new(ad); 44 | end; 45 | A = (A+A')/2; 46 | D = diag(sum(A)); 47 | L = D-A; 48 | F_old = F; 49 | [F, ~, ev]=eig1(L, c, 0); 50 | evs(:,iter+1) = ev; 51 | 52 | fn1 = sum(ev(1:c)); 53 | fn2 = sum(ev(1:c+1)); 54 | if fn1 > 0.00000000001 55 | lambda = lambda/2; 56 | elseif fn2 < 0.00000000001 57 | lambda = lambda*2; F = F_old; 58 | else 59 | break; 60 | end 61 | end -------------------------------------------------------------------------------- /tools/bestMap.m: -------------------------------------------------------------------------------- 1 | function [newL2] = bestMap(L1,L2) 2 | %bestmap: permute labels of L2 match L1 as good as possible 3 | % [newL2] = bestMap(L1,L2); 4 | 5 | %=========== 6 | L1 = L1(:); 7 | L2 = L2(:); 8 | if size(L1) ~= size(L2) 9 | error('size(L1) must == size(L2)'); 10 | end 11 | 12 | Label1 = unique(L1); 13 | nClass1 = length(Label1); 14 | Label2 = unique(L2); 15 | nClass2 = length(Label2); 16 | 17 | nClass = max(nClass1,nClass2); 18 | G = zeros(nClass); 19 | for i=1:nClass1 20 | for j=1:nClass2 21 | G(i,j) = length(find(L1 == Label1(i) & L2 == Label2(j))); 22 | end 23 | end 24 | [c,t] = hungarian(-G); 25 | newL2 = zeros(size(L2)); 26 | for i=1:nClass2 27 | newL2(L2 == Label2(i)) = Label1(c(i)); 28 | end 29 | 30 | 31 | return; 32 | 33 | %=======backup old=========== 34 | 35 | L1 = L1 - min(L1) + 1; % min (L1) <- 1; 36 | L2 = L2 - min(L2) + 1; % min (L2) <- 1; 37 | %=========== make bipartition graph ============ 38 | nClass = max(max(L1), max(L2)); 39 | G = zeros(nClass); 40 | for i=1:nClass 41 | for j=1:nClass 42 | G(i,j) = length(find(L1 == i & L2 == j)); 43 | end 44 | end 45 | %=========== assign with hungarian method ====== 46 | [c,t] = hungarian(-G); 47 | newL2 = zeros(nClass,1); 48 | for i=1:nClass 49 | newL2(L2 == i) = c(i); 50 | end -------------------------------------------------------------------------------- /tools/compute_f.m: -------------------------------------------------------------------------------- 1 | function [f,p,r] = compute_f(T,H) 2 | 3 | if length(T) ~= length(H), 4 | size(T) 5 | size(H) 6 | end; 7 | 8 | N = length(T); 9 | numT = 0; 10 | numH = 0; 11 | numI = 0; 12 | for n=1:N, 13 | Tn = (T(n+1:end))==T(n); 14 | Hn = (H(n+1:end))==H(n); 15 | numT = numT + sum(Tn); 16 | numH = numH + sum(Hn); 17 | numI = numI + sum(Tn .* Hn); 18 | end; 19 | p = 1; 20 | r = 1; 21 | f = 1; 22 | if numH > 0, 23 | p = numI / numH; 24 | end; 25 | if numT > 0, 26 | r = numI / numT; 27 | end; 28 | if (p+r) == 0, 29 | f = 0; 30 | else 31 | f = 2 * p * r / (p + r); 32 | end; 33 | -------------------------------------------------------------------------------- /tools/compute_nmi.m: -------------------------------------------------------------------------------- 1 | function [A nmi avgent] = compute_nmi (T, H) 2 | 3 | N = length(T); 4 | classes = unique(T); 5 | clusters = unique(H); 6 | num_class = length(classes); 7 | num_clust = length(clusters); 8 | 9 | %%compute number of points in each class 10 | for j=1:num_class 11 | index_class = (T(:)==classes(j)); 12 | D(j) = sum(index_class); 13 | end 14 | 15 | %%mutual information 16 | mi = 0; 17 | A = zeros(num_clust, num_class); 18 | avgent = 0; 19 | for i=1:num_clust 20 | %number of points in cluster 'i' 21 | index_clust = (H(:)==clusters(i)); 22 | B(i) = sum(index_clust); 23 | for j=1:num_class 24 | index_class = (T(:)==classes(j)); 25 | %%compute number of points in class 'j' that end up in cluster 'i' 26 | A(i,j) = sum(index_class.*index_clust); 27 | if (A(i,j) ~= 0) 28 | miarr(i,j) = A(i,j)/N * log2 (N*A(i,j)/(B(i)*D(j))); 29 | %%average entropy calculation 30 | avgent = avgent - (B(i)/N) * (A(i,j)/B(i)) * log2 (A(i,j)/B(i)); 31 | else 32 | miarr(i,j) = 0; 33 | end 34 | mi = mi + miarr(i,j); 35 | 36 | 37 | 38 | end 39 | end 40 | 41 | %%class entropy 42 | class_ent = 0; 43 | for i=1:num_class 44 | class_ent = class_ent + D(i)/N * log2(N/D(i)); 45 | end 46 | 47 | %%clustering entropy 48 | clust_ent = 0; 49 | for i=1:num_clust 50 | clust_ent = clust_ent + B(i)/N * log2(N/B(i)); 51 | end 52 | 53 | %%normalized mutual information 54 | nmi = 2*mi / (clust_ent + class_ent); -------------------------------------------------------------------------------- /tools/eig1.m: -------------------------------------------------------------------------------- 1 | function [eigvec, eigval, eigval_full] = eig1(A, c, isMax, isSym) 2 | 3 | if nargin < 2 4 | c = size(A,1); 5 | isMax = 1; 6 | isSym = 1; 7 | elseif c > size(A,1) 8 | c = size(A,1); 9 | end; 10 | 11 | if nargin < 3 12 | isMax = 1; 13 | isSym = 1; 14 | end; 15 | 16 | if nargin < 4 17 | isSym = 1; 18 | end; 19 | 20 | if isSym == 1 21 | A = max(A,A'); 22 | end; 23 | % A = gpuArray(A); 24 | [v, d] = eig(A); 25 | % v = gather(v); d = gather(d); 26 | d = diag(d); 27 | %d = real(d); 28 | if isMax == 0 29 | [~, idx] = sort(d); 30 | else 31 | [~, idx] = sort(d,'descend'); 32 | end; 33 | idx1 = idx(1:c); 34 | eigval = d(idx1); 35 | eigvec = v(:,idx1); 36 | eigval_full = d(idx); -------------------------------------------------------------------------------- /tools/hungarian.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kunzhan/MCGC/d86699c6330be8a23593f4a240e87155ecadc8a5/tools/hungarian.m --------------------------------------------------------------------------------