├── FCM_kailugaji.m ├── FSC_kailugaji.m ├── Fuzzy Clustering Algorithms ├── FCM.jpg ├── FSC.jpg ├── MEC.jpg └── readme.txt ├── Initialization & Normalization ├── init_methods.m ├── litekmeans.m └── normlization.m ├── LICENSE ├── MEC_kailugaji.m ├── Performance indexes ├── RandIndex.m ├── label_map.m ├── munkres.m ├── nmi.m └── performance_index.m ├── README.md ├── demo_fuzzy.m └── iris.data /FCM_kailugaji.m: -------------------------------------------------------------------------------- 1 | function [label,iter_FCM, para_miu, NegativeLogLikelihood, responsivity]=FCM_kailugaji(data, K, label_old, m) 2 | % Input: 3 | % K: number of cluster 4 | % data: dataset, N*D 5 | % label_old: initializing label. N*1 6 | % Output: 7 | % label: results of cluster. N*1 8 | % iter_FCM: iterations 9 | % Written by kailugaji. (wangrongrong1996@126.com) 10 | format long 11 | %% initializing parameters 12 | eps=1e-6; % stopping criterion for iteration 13 | max_iter=100; % maximum number of iterations 14 | fitness=zeros(max_iter,1); 15 | [data_num,data_dim]=size(data); 16 | count=zeros(data_num,1); 17 | responsivity=zeros(data_num,K); 18 | para_miu=zeros(K, data_dim); 19 | R_up=zeros(data_num,K); 20 | %% initializing the cluster center 21 | for k=1:K 22 | X_k=data(label_old==k, :); 23 | para_miu(k, :)=mean(X_k); % the center of each cluster 24 | end 25 | %% Fuzzy c-means algorithm 26 | for t=1:max_iter 27 | % (X-para_miu)^2=X^2+para_miu^2-2*para_miu*X'. data_num*K 28 | distant=(sum(data.*data,2))*ones(1,K)+ones(data_num,1)*(sum(para_miu.*para_miu,2))'-2*data*para_miu'; 29 | % update membership. data_num*K 30 | for i=1:data_num 31 | count(i)=sum(distant(i,:)==0); 32 | if count(i)>0 33 | for k=1:K 34 | if distant(i,k)==0 35 | responsivity(i,k)=1./count(i); 36 | else 37 | responsivity(i,k)=0; 38 | end 39 | end 40 | else 41 | R_up(i,:)=distant(i,:).^(-1/(m-1)); 42 | responsivity(i,:)= R_up(i,:)./sum( R_up(i,:),2); % membership 43 | end 44 | end 45 | % update center. K*data_dim 46 | miu_up=(responsivity'.^(m))*data; 47 | para_miu=miu_up./((sum(responsivity.^(m)))'*ones(1,data_dim)); 48 | % object function 49 | fitness(t)=sum(sum(distant.*(responsivity.^(m)))); 50 | if t>1 51 | if abs(fitness(t)-fitness(t-1))1 52 | if abs(fitness(t)-fitness(t-1)) 1 117 | error('litekmeans:AmbiguousDistance', ... 118 | 'Ambiguous ''Distance'' parameter value: %s.', distance); 119 | elseif isempty(j) 120 | error('litekmeans:UnknownDistance', ... 121 | 'Unknown ''Distance'' parameter value: %s.', distance); 122 | end 123 | distance = distNames{j}; 124 | else 125 | error('litekmeans:InvalidDistance', ... 126 | 'The ''Distance'' parameter value must be a string.'); 127 | end 128 | 129 | 130 | center = []; 131 | if ischar(start) 132 | startNames = {'sample','cluster'}; 133 | j = find(strncmpi(start,startNames,length(start))); 134 | if length(j) > 1 135 | error(message('litekmeans:AmbiguousStart', start)); 136 | elseif isempty(j) 137 | error(message('litekmeans:UnknownStart', start)); 138 | elseif isempty(k) 139 | error('litekmeans:MissingK', ... 140 | 'You must specify the number of clusters, K.'); 141 | end 142 | if j == 2 143 | if floor(.1*n) < 5*k 144 | j = 1; 145 | end 146 | end 147 | start = startNames{j}; 148 | elseif isnumeric(start) 149 | if size(start,2) == p 150 | center = start; 151 | elseif (size(start,2) == 1 || size(start,1) == 1) 152 | center = X(start,:); 153 | else 154 | error('litekmeans:MisshapedStart', ... 155 | 'The ''Start'' matrix must have the same number of columns as X.'); 156 | end 157 | if isempty(k) 158 | k = size(center,1); 159 | elseif (k ~= size(center,1)) 160 | error('litekmeans:MisshapedStart', ... 161 | 'The ''Start'' matrix must have K rows.'); 162 | end 163 | start = 'numeric'; 164 | else 165 | error('litekmeans:InvalidStart', ... 166 | 'The ''Start'' parameter value must be a string or a numeric matrix or array.'); 167 | end 168 | 169 | % The maximum iteration number is default 100 170 | if isempty(maxit) 171 | maxit = 100; 172 | end 173 | 174 | % The maximum iteration number for preliminary clustering phase on random 175 | % 10% subsamples is default 10 176 | if isempty(clustermaxit) 177 | clustermaxit = 10; 178 | end 179 | 180 | 181 | % Assume one replicate 182 | if isempty(reps) || ~isempty(center) 183 | reps = 1; 184 | end 185 | 186 | if ~(isscalar(k) && isnumeric(k) && isreal(k) && k > 0 && (round(k)==k)) 187 | error('litekmeans:InvalidK', ... 188 | 'X must be a positive integer value.'); 189 | elseif n < k 190 | error('litekmeans:TooManyClusters', ... 191 | 'X must have more rows than the number of clusters.'); 192 | end 193 | 194 | 195 | bestlabel = []; 196 | sumD = zeros(1,k); 197 | bCon = false; 198 | 199 | for t=1:reps 200 | switch start 201 | case 'sample' 202 | center = X(randsample(n,k),:); 203 | case 'cluster' 204 | Xsubset = X(randsample(n,floor(.1*n)),:); 205 | [dump, center] = litekmeans(Xsubset, k, varargin{:}, 'start','sample', 'replicates',1 ,'MaxIter',clustermaxit); 206 | case 'numeric' 207 | end 208 | 209 | last = 0;label=1; 210 | it=0; 211 | 212 | switch distance 213 | case 'sqeuclidean' 214 | while any(label ~= last) && it1 245 | if it>=maxit 246 | aa = full(sum(X.*X,2)); 247 | bb = full(sum(center.*center,2)); 248 | ab = full(X*center'); 249 | D = bsxfun(@plus,aa,bb') - 2*ab; 250 | D(D<0) = 0; 251 | else 252 | aa = full(sum(X.*X,2)); 253 | D = aa(:,ones(1,k)) + D; 254 | D(D<0) = 0; 255 | end 256 | D = sqrt(D); 257 | for j = 1:k 258 | sumD(j) = sum(D(label==j,j)); 259 | end 260 | bestsumD = sumD; 261 | bestD = D; 262 | end 263 | else 264 | if it>=maxit 265 | aa = full(sum(X.*X,2)); 266 | bb = full(sum(center.*center,2)); 267 | ab = full(X*center'); 268 | D = bsxfun(@plus,aa,bb') - 2*ab; 269 | D(D<0) = 0; 270 | else 271 | aa = full(sum(X.*X,2)); 272 | D = aa(:,ones(1,k)) + D; 273 | D(D<0) = 0; 274 | end 275 | D = sqrt(D); 276 | for j = 1:k 277 | sumD(j) = sum(D(label==j,j)); 278 | end 279 | if sum(sumD) < sum(bestsumD) 280 | bestlabel = label; 281 | bestcenter = center; 282 | bestsumD = sumD; 283 | bestD = D; 284 | end 285 | end 286 | case 'cosine' 287 | while any(label ~= last) && it1 312 | if any(label ~= last) 313 | W=full(X*center'); 314 | end 315 | D = 1-W; 316 | for j = 1:k 317 | sumD(j) = sum(D(label==j,j)); 318 | end 319 | bestsumD = sumD; 320 | bestD = D; 321 | end 322 | else 323 | if any(label ~= last) 324 | W=full(X*center'); 325 | end 326 | D = 1-W; 327 | for j = 1:k 328 | sumD(j) = sum(D(label==j,j)); 329 | end 330 | if sum(sumD) < sum(bestsumD) 331 | bestlabel = label; 332 | bestcenter = center; 333 | bestsumD = sumD; 334 | bestD = D; 335 | end 336 | end 337 | end 338 | end 339 | 340 | label = bestlabel; 341 | center = bestcenter; 342 | if reps>1 343 | sumD = bestsumD; 344 | D = bestD; 345 | elseif nargout > 3 346 | switch distance 347 | case 'sqeuclidean' 348 | if it>=maxit 349 | aa = full(sum(X.*X,2)); 350 | bb = full(sum(center.*center,2)); 351 | ab = full(X*center'); 352 | D = bsxfun(@plus,aa,bb') - 2*ab; 353 | D(D<0) = 0; 354 | else 355 | aa = full(sum(X.*X,2)); 356 | D = aa(:,ones(1,k)) + D; 357 | D(D<0) = 0; 358 | end 359 | D = sqrt(D); 360 | case 'cosine' 361 | if it>=maxit 362 | W=full(X*center'); 363 | end 364 | D = 1-W; 365 | end 366 | for j = 1:k 367 | sumD(j) = sum(D(label==j,j)); 368 | end 369 | end 370 | 371 | 372 | 373 | 374 | function [eid,emsg,varargout]=getargs(pnames,dflts,varargin) 375 | %GETARGS Process parameter name/value pairs 376 | % [EID,EMSG,A,B,...]=GETARGS(PNAMES,DFLTS,'NAME1',VAL1,'NAME2',VAL2,...) 377 | % accepts a cell array PNAMES of valid parameter names, a cell array 378 | % DFLTS of default values for the parameters named in PNAMES, and 379 | % additional parameter name/value pairs. Returns parameter values A,B,... 380 | % in the same order as the names in PNAMES. Outputs corresponding to 381 | % entries in PNAMES that are not specified in the name/value pairs are 382 | % set to the corresponding value from DFLTS. If nargout is equal to 383 | % length(PNAMES)+1, then unrecognized name/value pairs are an error. If 384 | % nargout is equal to length(PNAMES)+2, then all unrecognized name/value 385 | % pairs are returned in a single cell array following any other outputs. 386 | % 387 | % EID and EMSG are empty if the arguments are valid. If an error occurs, 388 | % EMSG is the text of an error message and EID is the final component 389 | % of an error message id. GETARGS does not actually throw any errors, 390 | % but rather returns EID and EMSG so that the caller may throw the error. 391 | % Outputs will be partially processed after an error occurs. 392 | % 393 | % This utility can be used for processing name/value pair arguments. 394 | % 395 | % Example: 396 | % pnames = {'color' 'linestyle', 'linewidth'} 397 | % dflts = { 'r' '_' '1'} 398 | % varargin = {{'linew' 2 'nonesuch' [1 2 3] 'linestyle' ':'} 399 | % [eid,emsg,c,ls,lw] = statgetargs(pnames,dflts,varargin{:}) % error 400 | % [eid,emsg,c,ls,lw,ur] = statgetargs(pnames,dflts,varargin{:}) % ok 401 | 402 | % We always create (nparams+2) outputs: 403 | % one each for emsg and eid 404 | % nparams varargs for values corresponding to names in pnames 405 | % If they ask for one more (nargout == nparams+3), it's for unrecognized 406 | % names/values 407 | 408 | % Original Copyright 1993-2008 The MathWorks, Inc. 409 | % Modified by Deng Cai (dengcai@gmail.com) 2011.11.27 410 | 411 | 412 | 413 | 414 | % Initialize some variables 415 | emsg = ''; 416 | eid = ''; 417 | nparams = length(pnames); 418 | varargout = dflts; 419 | unrecog = {}; 420 | nargs = length(varargin); 421 | 422 | % Must have name/value pairs 423 | if mod(nargs,2)~=0 424 | eid = 'WrongNumberArgs'; 425 | emsg = 'Wrong number of arguments.'; 426 | else 427 | % Process name/value pairs 428 | for j=1:2:nargs 429 | pname = varargin{j}; 430 | if ~ischar(pname) 431 | eid = 'BadParamName'; 432 | emsg = 'Parameter name must be text.'; 433 | break; 434 | end 435 | i = strcmpi(pname,pnames); 436 | i = find(i); 437 | if isempty(i) 438 | % if they've asked to get back unrecognized names/values, add this 439 | % one to the list 440 | if nargout > nparams+2 441 | unrecog((end+1):(end+2)) = {varargin{j} varargin{j+1}}; 442 | % otherwise, it's an error 443 | else 444 | eid = 'BadParamName'; 445 | emsg = sprintf('Invalid parameter name: %s.',pname); 446 | break; 447 | end 448 | elseif length(i)>1 449 | eid = 'BadParamName'; 450 | emsg = sprintf('Ambiguous parameter name: %s.',pname); 451 | break; 452 | else 453 | varargout{i} = varargin{j+1}; 454 | end 455 | end 456 | end 457 | 458 | varargout{nparams+1} = unrecog; 459 | -------------------------------------------------------------------------------- /Initialization & Normalization/normlization.m: -------------------------------------------------------------------------------- 1 | function data = normlization(data, choose) 2 | % Normlization methods 3 | % Written by kailugaji. (wangrongrong1996@126.com) 4 | if choose==0 5 | % no normlization 6 | data = data; 7 | elseif choose==1 8 | % Z-score 9 | data = bsxfun(@minus, data, mean(data)); 10 | data = bsxfun(@rdivide, data, std(data)); 11 | elseif choose==2 12 | % max-min 13 | [data_num,~]=size(data); 14 | data=(data-ones(data_num,1)*min(data))./(ones(data_num,1)*(max(data)-min(data))); 15 | end 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 kailugaji 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MEC_kailugaji.m: -------------------------------------------------------------------------------- 1 | function [label,iter_FCM, para_miu, NegativeLogLikelihood, responsivity]=MEC_kailugaji(data, K, label_old, gama) 2 | % Input: 3 | % K: number of cluster 4 | % data: dataset, N*D 5 | % label_old: initializing label. N*1 6 | % Output: 7 | % label: results of cluster. N*1 8 | % iter_FCM: iterations 9 | % Written by kailugaji. (wangrongrong1996@126.com) 10 | format long 11 | %% initializing parameters 12 | esp=1e-6; % stopping criterion for iteration 13 | max_iter=100; % maximum number of iterations 14 | fitness=zeros(max_iter,1); 15 | [data_num,data_dim]=size(data); 16 | distant=zeros(data_num, K); 17 | responsivity=zeros(data_num,K); 18 | para_miu=zeros(K, data_dim); 19 | %% initializing the cluster center 20 | for k=1:K 21 | X_k=data(label_old==k, :); 22 | para_miu(k, :)=mean(X_k); % the center of each cluster 23 | end 24 | %% Maximum entropy clustering algorithm 25 | for t=1:max_iter 26 | % (X-para_miu)^2=X^2+para_miu^2-2*para_miu*X'. data_num*K 27 | for k=1:K 28 | distant(:,k)=sum((data-repmat(para_miu(k, :), data_num, 1)).^2,2); %N*1 29 | end 30 | % update membership. data_num*K 31 | R_up=exp(-distant./gama); 32 | responsivity= R_up./repmat(sum(R_up,2), 1, K); 33 | % update center. K*data_dim 34 | miu_up=(responsivity')*data; 35 | para_miu=miu_up./((sum(responsivity))'*ones(1,data_dim)); 36 | % object function 37 | fitness(t)=sum(sum(responsivity.*distant))+gama.*sum(sum((responsivity.*log(responsivity+eps)))); 38 | if t>1 39 | if abs(fitness(t)-fitness(t-1)) 1 || min(size(c2)) > 1 14 | error('RandIndex: Requires two vector arguments') 15 | return 16 | end 17 | 18 | C=Contingency(c1,c2); %form contingency matrix 19 | 20 | n=sum(sum(C)); 21 | nis=sum(sum(C,2).^2); %sum of squares of sums of rows 22 | njs=sum(sum(C,1).^2); %sum of squares of sums of columns 23 | 24 | t1=nchoosek(n,2); %total number of pairs of entities 25 | t2=sum(sum(C.^2)); %sum over rows & columnns of nij^2 26 | t3=.5*(nis+njs); 27 | 28 | %Expected index (for adjustment) 29 | nc=(n*(n^2+1)-(n+1)*nis-(n+1)*njs+2*(nis*njs)/n)/(2*(n-1)); 30 | 31 | A=t1+t2-t3; %no. agreements 32 | D= -t2+t3; %no. disagreements 33 | 34 | if t1==nc 35 | AR=0; %avoid division by zero; if k=1, define Rand = 0 36 | else 37 | AR=(A-nc)/(t1-nc); %adjusted Rand - Hubert & Arabie 1985 38 | end 39 | 40 | RI=A/t1; %Rand 1971 %Probability of agreement 41 | MI=D/t1; %Mirkin 1970 %p(disagreement) 42 | HI=(A-D)/t1; %Hubert 1977 %p(agree)-p(disagree) 43 | 44 | function Cont=Contingency(Mem1,Mem2) 45 | 46 | if nargin < 2 || min(size(Mem1)) > 1 || min(size(Mem2)) > 1 47 | error('Contingency: Requires two vector arguments') 48 | return 49 | end 50 | 51 | Cont=zeros(max(Mem1),max(Mem2)); 52 | 53 | for i = 1:length(Mem1) 54 | Cont(Mem1(i),Mem2(i))=Cont(Mem1(i),Mem2(i))+1; 55 | end 56 | -------------------------------------------------------------------------------- /Performance indexes/label_map.m: -------------------------------------------------------------------------------- 1 | function [accuracy, new_label] = label_map( gnd, label ) 2 | K = length(unique(gnd)); 3 | cost_mat = zeros(K,K); 4 | for i=1:K 5 | idx = find(label==i); 6 | for j=1:K 7 | cost_mat(i,j) = length(find(gnd(idx)~=j)); 8 | end 9 | end 10 | [assignment,cost] = munkres(cost_mat); 11 | [assignedrows,dum]=find(assignment'); 12 | new_label = label; 13 | for i=1:K 14 | idx = find(label==i); 15 | new_label(idx) = assignedrows(i); 16 | end 17 | N=length(gnd); 18 | accuracy=length(find(new_label-gnd == 0))/N; 19 | -------------------------------------------------------------------------------- /Performance indexes/munkres.m: -------------------------------------------------------------------------------- 1 | function [assignment,cost] = munkres(costMat) 2 | % MUNKRES Munkres Assign Algorithm 3 | % 4 | % [ASSIGN,COST] = munkres(COSTMAT) returns the optimal assignment in ASSIGN 5 | % with the minimum COST based on the assignment problem represented by the 6 | % COSTMAT, where the (i,j)th element represents the cost to assign the jth 7 | % job to the ith worker. 8 | % 9 | 10 | % This is vectorized implementation of the algorithm. It is the fastest 11 | % among all Matlab implementations of the algorithm. 12 | 13 | % Examples 14 | % Example 1: a 5 x 5 example 15 | %{ 16 | [assignment,cost] = munkres(magic(5)); 17 | [assignedrows,dum]=find(assignment); 18 | disp(assignedrows'); % 3 2 1 5 4 19 | disp(cost); %15 20 | %} 21 | % Example 2: 400 x 400 random data 22 | %{ 23 | n=5; 24 | A=rand(n); 25 | tic 26 | [a,b]=munkres(A); 27 | toc 28 | %} 29 | 30 | % Reference: 31 | % "Munkres' Assignment Algorithm, Modified for Rectangular Matrices", 32 | % http://csclab.murraystate.edu/bob.pilgrim/445/munkres.html 33 | 34 | % version 1.0 by Yi Cao at Cranfield University on 17th June 2008 35 | 36 | assignment = false(size(costMat)); 37 | cost = 0; 38 | 39 | costMat(costMat~=costMat)=Inf; 40 | validMat = costMat