├── AR_Face_img.mat ├── COIL20.mat ├── Demo_Clustering.m ├── Demo_Supervised_Feature_Selection.m ├── Demo_Unsupervised_Feature_Selection.m ├── Demo_Unsupervised_Representation_Learning.m ├── LICENCE ├── algorithm ├── FeatureSelection │ └── unsupervised │ │ ├── 2019-LRLMR │ │ └── LRLMR.m │ │ ├── 2019-URAFS │ │ ├── URAFS.m │ │ └── URAFS_SolveProb14.m │ │ ├── 2021-AGUFS │ │ ├── AGUFS.m │ │ └── AGUFS_SolveProb14.m │ │ ├── 2021-DSLRL │ │ └── DSLRL.m │ │ ├── 2022-DLUFS │ │ └── DLUFS.m │ │ └── 2022-SLMEA │ │ └── SLMEA.m ├── RepresentationLearning │ ├── supervised │ │ ├── 2017-MRSL │ │ │ ├── MRSL.m │ │ │ └── MRSL_getR.m │ │ ├── 2019-RSLDA │ │ │ └── RSLDA.m │ │ ├── 2020-LRDAGP │ │ │ ├── LRDAGP.m │ │ │ ├── LRDAGP_solveAlg1.m │ │ │ └── LRDAGP_solveAlg2.m │ │ ├── 2020-RDA_FSIS │ │ │ └── RDA_FSIS.m │ │ ├── 2021-DSDPL │ │ │ └── DSDPL.m │ │ └── 2021-SN_TSL │ │ │ └── SN_TSL.m │ └── unsupervised │ │ └── 2020-JLRSL │ │ └── JLRSL.m └── clustering │ └── 2015-rLPP │ └── rLPP.m ├── readme.md └── utils ├── EProjSimplex_new.m ├── EuDist2.m ├── GPI.m ├── IterativeMultiplicativeUpdate.m ├── L2Norm.m ├── MyClusteringMeasure.m ├── SVT.m ├── SolveL21Problem.m ├── betweenScatter.m ├── centeringMatrix.m ├── classifyKNN.m ├── computeL.m ├── defaultOptions.m ├── getClusteringResults.m ├── getFeatureSelectionResults.m ├── hotmatrix.m ├── lapgraph.m ├── litekmeans.m ├── mySVD.m ├── shrink.m ├── similarMatrix_CAN.m ├── splitData.m ├── updateL21.m └── withinScatter.m /AR_Face_img.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzf495/Reimplementation-of-Attractive-Feature-Selection-and-Clustering-Methods/70cdeb8f79210b9315721750e7dacb1d4fc63b25/AR_Face_img.mat -------------------------------------------------------------------------------- /COIL20.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzf495/Reimplementation-of-Attractive-Feature-Selection-and-Clustering-Methods/70cdeb8f79210b9315721750e7dacb1d4fc63b25/COIL20.mat -------------------------------------------------------------------------------- /Demo_Clustering.m: -------------------------------------------------------------------------------- 1 | %% Add path 2 | addpath('./utils/'); 3 | addpath(genpath('./algorithm/')); 4 | rng(495); 5 | %% Load the dataclear X Y; 6 | path='./COIL20.mat'; 7 | load(path,'X','Y'); 8 | X=X';%% The input dimension is m*n 9 | %% Select a algorithm 10 | %%% === Clustering === 11 | algorithm=@rLPP; 12 | 13 | %% Set the hyper-parameters 14 | %%% Notice: you should modify `options`, so as to tune the hyper-parameters 15 | options=struct(); 16 | %% Run the algorithm 17 | algorithm(X,Y,options); 18 | -------------------------------------------------------------------------------- /Demo_Supervised_Feature_Selection.m: -------------------------------------------------------------------------------- 1 | %% Add path 2 | addpath('./utils/'); 3 | addpath(genpath('./algorithm/')); 4 | rng(495); 5 | %% Load the dataclear X Y; 6 | 7 | 8 | %% Load COIL20 9 | path='./COIL20.mat'; 10 | load(path,'X','Y'); 11 | X=X';%% The input dimension is m*n 12 | 13 | 14 | %%% Load AR_Face_img 15 | % path='./AR_Face_img.mat'; 16 | % clear AllSet; 17 | % load(path,'AllSet'); 18 | % XY=AllSet; 19 | % X=XY.X; 20 | % Y=XY.y; 21 | 22 | 23 | %% Data process 24 | %%% try `zscore` or `normr` 25 | % X=L2Norm(X')'; 26 | % X=double(zscore(X',1))'; 27 | % X=normr(X')'; 28 | 29 | 30 | %% Split dataset 31 | %%% select 'number' samples from each class as a training set, and use 32 | %%% as a training set 33 | number=10; 34 | [X1,Y1,X2,Y2] = splitData(X,Y,number); 35 | 36 | 37 | %% Select a algorithm 38 | % algorithm=@MRSL; 39 | % algorithm=@DSDPL; X=double(zscore(X',1))'; 40 | % algorithm=@RSLDA; 41 | % algorithm=@LRDAGP; 42 | % algorithm=@RDA_FSIS; 43 | algorithm=@SN_TSL; 44 | %% Set the hyper-parameters 45 | %%% Notice: you should modify `options`, so as to tune the hyper-parameters 46 | options=struct(); 47 | 48 | 49 | %% Run the algorithm 50 | algorithm(X1,Y1,X2,Y2,options); 51 | -------------------------------------------------------------------------------- /Demo_Unsupervised_Feature_Selection.m: -------------------------------------------------------------------------------- 1 | %% Add path 2 | addpath('./utils/'); 3 | addpath(genpath('./algorithm/')); 4 | rng(495); 5 | %% Load the dataclear X Y; 6 | path='./COIL20.mat'; 7 | load(path,'X','Y'); 8 | X=X';%% The input dimension is m*n 9 | %% Select a algorithm 10 | %%% === Feature Selection === 11 | % algorithm=@LRLMR; 12 | % algorithm=@AGUFS; 13 | % algorithm=@DSLRL; 14 | % algorithm=@DLUFS; X=double(zscore(X',1))'; 15 | % algorithm=@SLMEA; 16 | 17 | %% Set the hyper-parameters 18 | %%% Notice: you should modify `options`, so as to tune the hyper-parameters 19 | options=struct(); 20 | %% Run the algorithm 21 | algorithm(X,Y,options); 22 | -------------------------------------------------------------------------------- /Demo_Unsupervised_Representation_Learning.m: -------------------------------------------------------------------------------- 1 | %% Add path 2 | addpath('./utils/'); 3 | addpath(genpath('./algorithm/')); 4 | rng(495); 5 | %% Load the dataclear X Y; 6 | path='./COIL20.mat'; 7 | load(path,'X','Y'); 8 | X=X';%% The input dimension is m*n 9 | 10 | %% Split dataset 11 | %%% select 'number' samples from each class as a training set, and use 12 | %%% as a training set 13 | number=10; 14 | [X1,Y1,X2,Y2] = splitData(X,Y,number); 15 | 16 | %% Select a algorithm 17 | %%% === Unsupervised Representation Learning === 18 | algorithm=@JLRSL; 19 | 20 | %% Set the hyper-parameters 21 | %%% Notice: you should modify `options`, so as to tune the hyper-parameters 22 | options=struct(); 23 | %% Run the algorithm 24 | algorithm(X1,Y1,X2,Y2,options); 25 | 26 | 27 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 zzf495 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /algorithm/FeatureSelection/unsupervised/2019-LRLMR/LRLMR.m: -------------------------------------------------------------------------------- 1 | function [results,results_iter,W]=LRLMR(X,Y,options) 2 | %% Implementation of LRLMR 3 | %%% Authors: Tang et al. 4 | %%% Titl: 2019-Unsupervised feature selection via latent representation learning and manifold regularization 5 | %% intput: 6 | %%% X: The samples, m*n 7 | %%% Y: The labels of samples, n*1 8 | %% options 9 | %%% T The iterations of V 10 | %%% t The iterations of W 11 | %%% dim The dimension selected 12 | %%% alpha The weight of L2,1 norm 13 | %%% beta The weight of A-VV' 14 | %%% gamma The weight of manifold regularization 15 | %%% k The KNN number 16 | %% output: 17 | %%% results The results (list) [acc,acc2,NMI,purity] 18 | %%% results_iter The iteration information of 'results' 19 | %%% W The learned feature selection matrix 20 | %% Version 21 | %%% Implementation 2022-05-19 22 | options=defaultOptions(options,... 23 | 'T',10,... 24 | 't',10,... 25 | 'dim',80,... 26 | 'alpha',1,... 27 | 'beta',1e-4,... 28 | 'gamma',1e-4,... 29 | 'k',10); 30 | %% parameters 31 | T=options.T; 32 | t=options.t; 33 | dim=options.dim; 34 | alpha=options.alpha; 35 | beta=options.beta; 36 | gamma=options.gamma; 37 | k=options.k; 38 | %% Initialization 39 | results_iter=[]; 40 | myeps=1e-8; 41 | C=length(unique(Y)); 42 | [m,n]=size(X); 43 | XX=X*X'; 44 | % Init L by Eq.(6) 45 | clear manifold; 46 | manifold.k = k; 47 | manifold.Metric = 'Euclidean'; 48 | manifold.WeightMode = 'HeatKernel'; 49 | manifold.NeighborMode = 'KNN'; 50 | L=computeL(X,manifold); 51 | L=L./norm(L,'fro'); 52 | XLX=X*L*X'; 53 | % Init A by Eq.(6) 54 | clear manifold; 55 | manifold.k = 0; 56 | manifold.Metric = 'Euclidean'; 57 | manifold.WeightMode = 'HeatKernel'; 58 | manifold.NeighborMode = 'KNN'; 59 | A=lapgraph(X',manifold); 60 | % Init V by random 61 | V=rand(n,C); 62 | % Init G (Lambda in paper) 63 | G=eye(m); 64 | for i=1:T 65 | XV=X*V; 66 | for j=1:t 67 | % Update W by Eq.(11) 68 | W=(XX+alpha*G+gamma*XLX)\(XV); 69 | % Update G (Lambda) by Eq.(8) 70 | G=updateL21(W); 71 | end 72 | % Update V 73 | left=(X'*W)+2*beta*(A*V); 74 | right=V+2*beta*(V*V')*V; 75 | res=left./right; 76 | V=IterativeMultiplicativeUpdate(V,res); 77 | % scores 78 | [~,results] = getFeatureSelectionResults(X,Y,W,dim,C); 79 | for index=1:3 80 | results_iter(index,i)=results(index); 81 | end 82 | fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,... 83 | results(1),results(2),results(3)); 84 | end 85 | end -------------------------------------------------------------------------------- /algorithm/FeatureSelection/unsupervised/2019-URAFS/URAFS.m: -------------------------------------------------------------------------------- 1 | function [results,results_iter,W]=URAFS(X,Y,options) 2 | %% Implementation of URAFS 3 | %%% Authors: Li et al. 4 | %%% Titl: 2019-Generalized Uncorrelated Regression with Adaptive Graph for Unsupervised Feature Selection 5 | %% intput: 6 | %%% X: The samples, m*n 7 | %%% Y: The labels of samples, n*1 8 | %% options 9 | %%% T The iterations of V 10 | %%% t The iterations of GPI and Algorithm 1 11 | %%% dim The dimension reduced 12 | %%% alpha The weight of manfiold regularization 13 | %%% beta The weight of Gaussian kernel (S) 14 | %%% lambda The weight of L2,1 of W 15 | %% output: 16 | %%% results The results (list) [acc,acc2,NMI,purity] 17 | %%% results_iter The iteration information of 'results' 18 | %%% W The learned feature selection matrix 19 | %% Version 20 | %%% Implementation 2022-05-19 21 | options=defaultOptions(options,... 22 | 'T',10,... %% The iterations 23 | 't',10,... %% The iterations of GPI and Algorithm 1 24 | 'dim',60,... %% The dimension reduced 25 | 'alpha',1e3,... %% The weight of NMF w.r.t features |X'-X'WH| 26 | 'beta',1e3,... %% The weight of Gaussian kernel (S) 27 | 'lambda',1e3); %% The weight of entropy 28 | %% parameters 29 | T=options.T; 30 | t=options.t; 31 | dim=options.dim; 32 | alpha=options.alpha; 33 | beta=options.beta; 34 | lambda=options.lambda; 35 | %% Initialization 36 | results_iter=[]; 37 | C=length(unique(Y)); 38 | [~,n]=size(X); 39 | % Init F 40 | [~,F]=litekmeans(X,C);F=F'; % n*C 41 | [Uf,~,~]=mySVD(F); 42 | F=Uf; % F'F=I 43 | % Init H 44 | H=centeringMatrix(n); 45 | % Init St 46 | St=X*H*X'; 47 | for i=1:T 48 | % Update S by Eq.(31) 49 | dist=EuDist2(F,F,0); 50 | expDist=exp(-dist/(2*beta)); 51 | sumExp=sum(expDist,1); 52 | S=expDist./sumExp; 53 | S=(S+S')/2; 54 | % Update W by Algorithm 1 55 | W=URAFS_SolveProb14(X,H,St,F,lambda,t); 56 | % Update L by (7) 57 | P=diag(sparse(sum(S))); 58 | Ls=P-S; 59 | A=H+2*alpha*Ls; 60 | CA=H*X'*W; 61 | % Update F by Algorithm 2 (GPI) 62 | opt.T=t; 63 | F=GPI(A,CA,opt); 64 | %% Classification 65 | % Select top d ranked features (descending order) as the results 66 | % scores 67 | [~,results] = getFeatureSelectionResults(X,Y,W,dim,C); 68 | for index=1:3 69 | results_iter(index,i)=results(index); 70 | end 71 | fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,... 72 | results(1),results(2),results(3)); 73 | end 74 | end -------------------------------------------------------------------------------- /algorithm/FeatureSelection/unsupervised/2019-URAFS/URAFS_SolveProb14.m: -------------------------------------------------------------------------------- 1 | function [W] = URAFS_SolveProb14(X,H,S,F,lambda,T) 2 | [m,~]=size(X); 3 | % Initialize D by eye 4 | D=eye(m,m); 5 | for i=1:T 6 | % Compute Q,B by Eq.(20) 7 | temp=S+lambda*D; 8 | [Ut,Sigma,Vt]=mySVD(temp); 9 | Sigma(Sigma<0)=0; 10 | squreSigma=Sigma.^0.5; 11 | inverseSqrtSigma=diag(1./(diag(squreSigma))); 12 | inverseSqrtSigma(isinf(inverseSqrtSigma))=0; 13 | SlambdaD=Ut*(inverseSqrtSigma)*Vt'; 14 | B= (SlambdaD)*X*H*F; 15 | % Update Q 16 | [Ub,~,Vb]=mySVD(B); 17 | Q=Ub*Vb'; 18 | % Update W 19 | % W=S+lambda*D; 20 | W=SlambdaD*Q; 21 | % Update D 22 | D=updateL21(W); 23 | end 24 | end 25 | 26 | -------------------------------------------------------------------------------- /algorithm/FeatureSelection/unsupervised/2021-AGUFS/AGUFS.m: -------------------------------------------------------------------------------- 1 | function [results,results_iter,W]=AGUFS(X,Y,options) 2 | %% Implementation of AGUFS 3 | %%% Authors: Huang et al. 4 | %%% Titl: 2021-Adaptive graph-based generalized regression model for unsupervised feature selection 5 | %% intput: 6 | %%% X: The samples, m*n 7 | %%% Y: The labels of samples, n*1 8 | %% options 9 | %%% T The iterations 10 | %%% t The iterations of GPI and Algorithm 1 11 | %%% dim The dimension selected 12 | %%% alpha The weight of manifold regularization 13 | %%% k The number of KNN 14 | %%% lambda The weight of L2,1-norm w.r.t W 15 | %% output: 16 | %%% results The results (list) [acc,acc2,NMI,purity] 17 | %%% results_iter The iteration information of 'results' 18 | %%% W The learned feature selection matrix 19 | %% Version 20 | %%% Implementation 2022-05-23 21 | options=defaultOptions(options,... 22 | 'T',10,... %% The iterations 23 | 't',10,... %% The iterations of GPI and Algorithm 1 24 | 'dim',60,... %% The dimension selected 25 | 'alpha',1e3,... %% The weight of manifold regularization 26 | 'k',10,... %% The number of KNN 27 | 'lambda',1e3); %% The weight of L2,1-norm w.r.t W 28 | %% parameters 29 | T=options.T; 30 | t=options.t; 31 | dim=options.dim; 32 | alpha=options.alpha; 33 | lambda=options.lambda; 34 | k=options.k; 35 | %% Initialization 36 | results_iter=[]; 37 | C=length(unique(Y)); 38 | [~,n]=size(X); 39 | % Init F 40 | [~,F]=litekmeans(X,C);F=F'; % n*C 41 | [Uf,~,~]=mySVD(F); 42 | F=Uf; % F'F=I 43 | % Init H 44 | H=centeringMatrix(n); 45 | % Init St 46 | XHX=X*H*X'; 47 | % Init S by Eq.(22) 48 | distX = EuDist2(X',X'); 49 | [S,rr]=similarMatrix_CAN(distX,k,-1); 50 | for i=1:T 51 | % compute L 52 | DS=diag(sparse(sum(S))); 53 | L=DS-S; 54 | XLX=X*L*X'; 55 | % Update W by Algorithm 1 56 | W=AGUFS_SolveProb14(X,H,XHX,XLX,F,alpha,lambda,t); 57 | % Update L by (7) 58 | P=diag(sparse(sum(S))); 59 | Ls=P-S; 60 | A=H+0.5*alpha*Ls; 61 | CA=H*X'*W; 62 | % Update F by Algorithm 2 (GPI) 63 | opt.T=t; 64 | F=GPI(A,CA,opt); 65 | % Update S by Eq.(22) 66 | distX = EuDist2(F,F); 67 | [S,~]=similarMatrix_CAN(distX,k,rr); 68 | S=real(S); 69 | %% Classification 70 | % Select top d ranked features (descending order) as the results 71 | % scores 72 | [~,results] = getFeatureSelectionResults(X,Y,W,dim,C); 73 | for index=1:3 74 | results_iter(index,i)=results(index); 75 | end 76 | fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,... 77 | results(1),results(2),results(3)); 78 | end 79 | end -------------------------------------------------------------------------------- /algorithm/FeatureSelection/unsupervised/2021-AGUFS/AGUFS_SolveProb14.m: -------------------------------------------------------------------------------- 1 | function [W] = AGUFS_SolveProb14(X,H,XHX,XLX,F,alpha,lambda,T) 2 | [m,~]=size(X); 3 | % Initialize D by eye 4 | D=eye(m,m); 5 | for i=1:T 6 | % Compute Q,B by Eq.(20) 7 | temp=XHX+alpha*XLX+lambda*D; 8 | [Ut,Sigma,Vt]=mySVD(temp); 9 | Sigma(Sigma<0)=0; 10 | squreSigma=Sigma.^0.5; 11 | inverseSqrtSigma=diag(1./(diag(squreSigma))); 12 | inverseSqrtSigma(isinf(inverseSqrtSigma))=0; 13 | SlambdaD=Ut*(inverseSqrtSigma)*Vt'; 14 | B= (SlambdaD)*X*H*F; 15 | % Update A 16 | [Ub,~,Vb]=mySVD(B); 17 | A=Ub*Vb'; 18 | % Update W 19 | W=SlambdaD*A; 20 | % Update D 21 | D=updateL21(W); 22 | end 23 | end 24 | 25 | -------------------------------------------------------------------------------- /algorithm/FeatureSelection/unsupervised/2021-DSLRL/DSLRL.m: -------------------------------------------------------------------------------- 1 | function [results,results_iter,W] = DSLRL(X,Y,options) 2 | %% Implementation of DSLRL 3 | %%% Authors: Shang et al. 4 | %%% Titl: 2021-Dual space latent representation learning for unsupervised feature selection 5 | %% intput: 6 | %%% X: The samples, m*n 7 | %%% Y: The labels of samples, n*1 8 | %% options 9 | %%% T The iterations of V 10 | %%% t The iterations of W 11 | %%% d The dimension reduced 12 | %%% alpha The weight of L2-1 for W 13 | %%% beta The weight of A-VV' 14 | %%% gamma The weight of manfiold regularization 15 | %%% lambda The weight of WW'=I 16 | %% output: 17 | %%% results The results (list) 18 | %%% results_iter The iteration information of 'results' 19 | %%% W The learned feature selection matrix 20 | %% Version 21 | %%% Implementation 2022-05-19 22 | options=defaultOptions(options,... 23 | 'T',10,... %% The iterations 24 | 'dim',80,... %% The dimension reduced 25 | 'alpha',1e3,... %% The weight of L2-1 for W 26 | 'beta',1e2,... %% The weight of A-VV' 27 | 'gamma',1e-3,...%% The weight of manfiold regularization 28 | 'lambda',1e-3); %% The weight of WW'=I 29 | %% parameters 30 | T=options.T; 31 | dim=options.dim; 32 | alpha=options.alpha; 33 | beta=options.beta; 34 | gamma=options.gamma; 35 | lambda=options.lambda; 36 | %% Initialization 37 | X=normr(X')'; 38 | % X=L2Norm(X')'; 39 | results_iter=[]; 40 | C=length(unique(Y)); 41 | [m,n]=size(X); 42 | eta=0.1; 43 | % Init A by Eq.(7) 44 | clear manifold; 45 | manifold.k = 0; 46 | manifold.Metric = 'Euclidean'; 47 | manifold.WeightMode = 'HeatKernel'; 48 | manifold.NeighborMode = 'KNN'; 49 | A=lapgraph(X',manifold); 50 | % Init B by Eq.(8) 51 | clear manifold; 52 | manifold.k = 0; 53 | manifold.Metric = 'Euclidean'; 54 | manifold.WeightMode = 'HeatKernel'; 55 | manifold.NeighborMode = 'KNN'; 56 | B=lapgraph(X,manifold); 57 | % Init V 58 | [~,V]=litekmeans(X,C);V=V'; % n*C 59 | % Init H 60 | H=eye(m); 61 | % Init W 62 | W=(X*X'+eta*eye(m))\(X*V); 63 | W=max(W,1e-8); 64 | X=X';% input X: n*m 65 | for i=1:T 66 | % Update W by Eq.(20) 67 | left=X'*V+2*beta*B*W+2*lambda*W; 68 | right=X'*X*W+alpha*H*W+2*(gamma+lambda)*W*W'*W; 69 | res=left./right; 70 | W=IterativeMultiplicativeUpdate(W,res); 71 | % Update H by Eq.(15) 72 | H=updateL21(W); 73 | % Update V by Eq.(23) 74 | left=X*W+2*beta*A*V; 75 | right=V+2*beta*V*V'*V; 76 | res=left./right; 77 | V=IterativeMultiplicativeUpdate(V,res); 78 | % scores 79 | [~,results] = getFeatureSelectionResults(X',Y,W,dim,C); 80 | for index=1:3 81 | results_iter(index,i)=results(index); 82 | end 83 | fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,... 84 | results(1),results(2),results(3)); 85 | end 86 | end -------------------------------------------------------------------------------- /algorithm/FeatureSelection/unsupervised/2022-DLUFS/DLUFS.m: -------------------------------------------------------------------------------- 1 | function [results,results_iter,Z] = DLUFS(X,Y,options) 2 | %% Notice 3 | %%% Official codes (python implementation) are available at https://github.com/mohsengh/DLUFS/ 4 | %% Data process: zscore 5 | %% Implementation of DLUFS 6 | %%% Authors: Ghassemi Parsa et al. 7 | %%% Titl: 2022-Low-rank dictionary learning for unsupervised feature selection 8 | %% intput: 9 | %%% X: The samples, m*n 10 | %%% Y: The labels of samples, n*1 11 | %% options 12 | %%% T The iterations 13 | %%% dim The dimension selected 14 | %%% alpha The weight of manifold regularization 15 | %%% sigma The weight of Gaussian kernel 16 | %%% k The number of KNN 17 | %%% lambda The weight of L2,1-norm of Z 18 | %% output: 19 | %%% results The results (list) 20 | %%% results_iter The iteration information of 'results' 21 | %%% Z The learned feature selection matrix 22 | %% Version 23 | %%% Implementation 2022-05-23 24 | %% Parameter setting 25 | options=defaultOptions(options,... 26 | 'T',10,... %% The iterations 27 | 'dim',300,... %% The dimension selected 28 | 'alpha',1e-3,... %% The weight of manifold regularization 29 | 'sigma',1e2,... %% The weight of Gaussian kernel 30 | 'k',10,... %% The number of KNN 31 | 'lambda',1e-3); %% The weight of L2,1-norm of Z 32 | %% Parameter Setting 33 | T=options.T; 34 | dim=options.dim; 35 | alpha=options.alpha; 36 | lambda=options.lambda; 37 | sigma=options.sigma; 38 | k=options.k; 39 | eta=1e-16; 40 | %% Initialization 41 | C=length(unique(Y)); 42 | m=size(X,1); 43 | results_iter=[]; 44 | % Compute L 45 | manifold.NeighborMode = 'KNN'; 46 | manifold.k = k; 47 | manifold.t =sigma; 48 | manifold.WeightMode = 'Heatkernel'; 49 | manifold.Metric='Euclidean'; 50 | L=computeL(X,manifold); 51 | XX=X'*X; 52 | Z=X; 53 | for i=1:T 54 | % UPdate B by Eq.(16) 55 | Sw=(Z*Z'+eta*eye(m)); 56 | Sb=Z*XX*Z'; 57 | res=Sw\Sb; 58 | [Ub,~,Vb]=mySVD(res,dim); 59 | B=Ub*Vb'; 60 | % Update A by Eq.(10) 61 | A=(X*Z'*B')/(B*Sw*B'+eta*eye(m)); 62 | % Update D by Eq.(19) 63 | D=updateL21(Z); 64 | % Update Z by Eq.(21) 65 | AB=A*B; 66 | E=AB'*AB+lambda*D; 67 | F=alpha*L; 68 | G=AB'*X; 69 | sylvester(E,F,G); 70 | % Scores 71 | [~,results] = getFeatureSelectionResults(X,Y,Z,dim,C); 72 | for index=1:3 73 | results_iter(index,i)=results(index); 74 | end 75 | fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,... 76 | results(1),results(2),results(3)); 77 | end 78 | end 79 | 80 | -------------------------------------------------------------------------------- /algorithm/FeatureSelection/unsupervised/2022-SLMEA/SLMEA.m: -------------------------------------------------------------------------------- 1 | function [results,results_iter,W]=SLMEA(X,Y,options) 2 | %% Implementation of SLMEA 3 | %%% Authors: Shang et al. 4 | %%% Titl: 2022-Sparse and low-dimensional representation with maximum entropy adaptive graph for feature selection 5 | %% intput: 6 | %%% X: The samples, m*n 7 | %%% Y: The labels of samples, n*1 8 | %% options 9 | %%% T The iterations of V 10 | %%% dim The dimension selected 11 | %%% alpha The weight of NMF w.r.t features |X'-X'WH| 12 | %%% beta The weight of L(2,1/2)-(1/2) norm 13 | %%% gamma The weight of manfiold regularization 14 | %%% lambda The weight of entropy 15 | %% output: 16 | %%% results The results (list) [acc,NMI,purity] 17 | %%% results_iter The iteration information of 'results' 18 | %%% W The learned feature selection matrix 19 | %% Version 20 | %%% Implementation 2022-05-19 21 | options=defaultOptions(options,... 22 | 'T',10,... %% The iterations 23 | 'dim',80,... %% The dimension selected 24 | 'alpha',1e-3,... %% The weight of NMF w.r.t features |X'-X'WH| 25 | 'beta',0.01,... %% The weight of L(2,1/2)-(1/2) norm 26 | 'gamma',100,...%% The weight of manfiold regularization 27 | 'lambda',1); %% The weight of entropy 28 | %% parameters 29 | T=options.T; 30 | dim=options.dim; 31 | alpha=options.alpha; 32 | beta=options.beta; 33 | gamma=options.gamma; 34 | lambda=options.lambda; 35 | %% Initialization 36 | eta=0.1; 37 | X=normr(X')'; 38 | results_iter=[]; 39 | myeps=1e-8; 40 | C=length(unique(Y)); 41 | [m,n]=size(X); 42 | XX=X*X'; 43 | % Init H 44 | [~,H]=litekmeans(X',C); % C*m 45 | [~,F]=litekmeans(X,C);F=F'; % n*C 46 | W=(H'*H+eta*eye(m))\H'; % m*C 47 | W=max(W,myeps); 48 | Gn=centeringMatrix(n); 49 | for i=1:T 50 | % Update S^H by Eq.(38) 51 | dist=EuDist2(H',H',0)+myeps; 52 | expDist=exp(dist/(2*lambda)); 53 | sumExp=sum(expDist,1); 54 | SH=expDist./sumExp; 55 | % Update S^F by Eq.(40) 56 | dist=EuDist2(F,F,0); 57 | expDist=exp(dist/(2*lambda)); 58 | sumExp=sum(expDist,1); 59 | SF=expDist./sumExp; 60 | % Update H by Eq.(24) 61 | DH=diag(sparse(sum(SH))); 62 | left=alpha*W'*XX+gamma*H*SH; 63 | right=alpha*W'*XX*W*H+gamma*H*DH; 64 | res=left./right; 65 | H=IterativeMultiplicativeUpdate(H,res); 66 | % Update F by GPI 67 | DF=diag(sparse(sum(SF))); 68 | LF=DF-SF; 69 | A1=Gn+gamma*LF;% support matrix 70 | A2=Gn*X'*W;% support matrix 71 | %% Method 2 72 | try 73 | R = GPI(A1,A2,[]); 74 | [Ur,~,Vr]=mySVD(R); 75 | F=Ur*Vr'; 76 | catch ME 77 | warning('An error occurs when runing GPI'); 78 | break; 79 | end 80 | 81 | % Update W by Eq.(21) 82 | %%% compute U 83 | U=(4*diag(1./sum(power(W.*W+myeps,3/2),2))); 84 | %%% compute W 85 | left=alpha*XX*H'+X*Gn*F; 86 | right=alpha*XX*W*(H*H')+beta*U*W+X*Gn*X'*W; 87 | res=left./right; 88 | W=IterativeMultiplicativeUpdate(W,res); 89 | % Scores 90 | [~,results] = getFeatureSelectionResults(X,Y,W,dim,C); 91 | for index=1:3 92 | results_iter(index,i)=results(index); 93 | end 94 | fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,... 95 | results(1),results(2),results(3)); 96 | end 97 | end -------------------------------------------------------------------------------- /algorithm/RepresentationLearning/supervised/2017-MRSL/MRSL.m: -------------------------------------------------------------------------------- 1 | function [results,results_iter,W]=MRSL(trainX,trainY,testX,testY,options) 2 | %% Oficial codes: https://github.com/DarrenZZhang/MSRL 3 | %% Implementation of MRSL (Semi-supervised version) 4 | %%% Authors: Zhang et al. 5 | %%% Titl: 2017-Marginal Representation Learning With Graph Structure Self-Adaptation 6 | %% intput: 7 | %%% trainX: The traing samples, m*n1 8 | %%% trainY: The labels of training samples, n1*1 9 | %%% testX: The test samples, m*n2 10 | %%% testY: The labels of test samples, n2*1 11 | %% options 12 | %%% T: The iterations 13 | %%% s: The latent dimension of projection matrix 14 | %%% mu: The regularization used for initialize projection 15 | %%% matrix W 16 | %%% lambda: The weight of manifold regularization 17 | %%% beta: The weight of norm \|W\|_F^2 18 | %%% gamma: The weight of reconstruction 19 | %% output: 20 | %%% results The results (list) [acc,acc2,NMI,purity] 21 | %%% results_iter The iteration information of 'results' 22 | %%% W The learned projection matrix 23 | %% Version 24 | %%% Implementation 2022-05-18 25 | options=defaultOptions(options,... 26 | 's',10,... %%% The latent dimension of projection matrix 27 | 'T',10,... %%% The iterations 28 | 'mu',1e-4,... %%% The weight of inter-class term in LDA (1e-4 in paper) 29 | 'lambda',1,... %%% The weight of manifold regularization 30 | 'beta',1,... %%% The weight of norm \|W\|_F^2 31 | 'gamma',1,... %%% The weight of reconstruction 32 | 'k',10); %%% The KNN numbers 33 | %% parameters 34 | T=options.T; 35 | mu=options.mu; 36 | beta=options.beta; 37 | lambda=options.lambda; 38 | gamma=options.gamma; 39 | k=options.k; 40 | s=options.s; 41 | %% Initialization 42 | results_iter=[]; 43 | trainX=normr(trainX')'; 44 | testX=normr(testX')'; 45 | 46 | C=length(unique(trainY)); 47 | [m,~]=size(trainX); 48 | n2=size(testX,2); 49 | X=[trainX,testX]; 50 | %% Set W 51 | hotY1=hotmatrix(trainY,C); 52 | W=((trainX*trainX')+mu*eye(m))\(trainX*hotY1); 53 | %% Set P 54 | distX = EuDist2(X',X'); 55 | [P,rr]=similarMatrix_CAN(distX,k,-1); 56 | A=eye(m,s); 57 | R=[hotY1;eye(n2,C)]'; 58 | for i=1:T 59 | % Update B by Eq.(19) 60 | B=A'*W; 61 | % Update W by Eq.(21) 62 | %%% compute L 63 | Dp=diag(sparse(sum(P))); 64 | L=Dp-P; 65 | L=L./norm(L,'fro'); 66 | %%% compute W 67 | G=X*X'+lambda*X*L*X'+(beta+gamma)*eye(m); 68 | W=(G-gamma*(A*A'))\(X*R'); 69 | % Update A by Eq.(22) 70 | [Ua,~,Va]=mySVD(W*B',s); 71 | A=Ua*Va'; 72 | % Classification 73 | [~,Ytpseudo]=max(W'*testX,[],1);Ytpseudo=Ytpseudo'; 74 | YY=[trainY;Ytpseudo]; 75 | % Update R by Algorithm 1 76 | F=W'*X; 77 | [R] = MRSL_getR(F,YY); 78 | % Update P by Eq.(36) 79 | distF= EuDist2(F',F'); 80 | [P,~]=similarMatrix_CAN(distF,k,rr); 81 | results=MyClusteringMeasure(testY,Ytpseudo,1);%[ACC ACC2 MIhat Purity]'; 82 | for index=1:3 83 | results_iter(index,i)=results(index); 84 | end 85 | fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,... 86 | results(1),results(2),results(3)); 87 | end 88 | end 89 | 90 | -------------------------------------------------------------------------------- /algorithm/RepresentationLearning/supervised/2017-MRSL/MRSL_getR.m: -------------------------------------------------------------------------------- 1 | function [R] = MRSL_getR(F,pseudoLabel) 2 | %% input: 3 | %%% F: The regression matrix with dimensions C*n 4 | %%% pseudoLabel: The pseudo labels of samples, n*1 5 | %% output: 6 | %%% R: The regression matrix solved, C*n 7 | [C,n]=size(F); 8 | % [~,pseudoLabel]=max(F,[],1); 9 | R=zeros(C,n); 10 | for idx=1:n 11 | m=pseudoLabel(idx); 12 | xi=0;t=0; 13 | Zj=F(:,idx)+1-repmat(F(m,idx),C,1); 14 | for c=1:C 15 | if m~=c 16 | zj=Zj(c); 17 | phiXi=2*xi+sum(min(xi-zj,0)); 18 | if phiXi>0 19 | xi=xi+Zj; 20 | t=t+1; 21 | end 22 | end 23 | end 24 | xi=xi/(1+t); 25 | R(:,idx)=F(:,idx)+min(xi-Zj,0); 26 | R(m,idx)=F(m,idx)+xi; 27 | end 28 | end 29 | 30 | -------------------------------------------------------------------------------- /algorithm/RepresentationLearning/supervised/2019-RSLDA/RSLDA.m: -------------------------------------------------------------------------------- 1 | function [results,results_iter,Q]=RSLDA(trainX,trainY,testX,testY,options) 2 | %% Implementation of RSLDA 3 | %%% Authors: Wen et al. 4 | %%% Titl: 2019-Robust Sparse Linear Discriminant Analysis 5 | %% intput: 6 | %%% trainX: The traing samples, m*n1 7 | %%% trainY: The labels of training samples, n1*1 8 | %%% testX: The test samples, m*n2 9 | %%% testY: The labels of test samples, n2*1 10 | %% options 11 | %%% T The iterations 12 | %%% dim The dimensions 13 | %%% mu The weight of inter-class term in LDA (1e-4 in paper) 14 | %%% lambda1 The weight of L2,1 norm 15 | %%% lambda2 The weight of L1 norm in E 16 | %%% betaMax The maximum value of beta 17 | %%% beta The weight of reconstruct (ADMM) 18 | %%% rho The increase rate 19 | %%% epsilon The regularization terms 20 | %% output: 21 | %%% results The results (list) [acc,NMI,purity] 22 | %%% results_iter The iteration information of 'results' 23 | %%% Q The learned projection matrix 24 | %% Version 25 | %%% Implementation 2022-05-18 26 | options=defaultOptions(options,... 27 | 'T',10,... %%% The iterations 28 | 'dim',10,... %%% The dimensions 29 | 'mu',1e-4,... %%% The weight of inter-class term in LDA (1e-4 in paper) 30 | 'beta',0.1,... %%% The weight of reconstruction (0.1 in paper) 31 | 'lambda1',1,... %%% The weight of L2,1 norm 32 | 'lambda2',1e-3,... %%% The weight of L1 norm in E 33 | 'betaMax',1e5,... %%% The maximum value of beta 34 | 'rho',1.01,... %%% The increase rate 35 | 'epsilon',1); %%% The regularization terms 36 | %% parameters 37 | T=options.T; 38 | dim=options.dim; 39 | beta=options.beta; 40 | mu=options.mu; 41 | lambda1=options.lambda1; 42 | lambda2=options.lambda2; 43 | rho=options.rho; 44 | betaMax=options.betaMax; 45 | epsilon=options.epsilon; 46 | 47 | %% Initialization 48 | results_iter=[]; 49 | [m,n1]=size(trainX); 50 | X=[trainX,testX]; 51 | D=eye(m,m); 52 | XX=trainX*trainX'; 53 | E=zeros(m,n1); 54 | Sw=withinScatter(trainX,trainY); 55 | Sb=betweenScatter(trainX,trainY); 56 | 57 | Lagrangian=zeros(m,n1); 58 | % Initialize P 59 | left=Sw-mu*Sb; 60 | left=1/n1*left; 61 | % left=left./norm(left,'fro'); 62 | [P,~]=eigs(left+epsilon*eye(m),eye(m),dim,'sm'); 63 | for i=1:T 64 | % Solve Q by Eq.(15) 65 | M=trainX-E+Lagrangian/beta; 66 | Q=(2*(left)+lambda1*D+beta*XX)\(beta*trainX*M'*P); 67 | % Solve P by Eq.(16) 68 | % [Up,~,Vp]=svd(M*trainX'*Q,'econ'); 69 | [Up,~,Vp]=mySVD(M*trainX'*Q,dim); 70 | P=Up*Vp'; 71 | % Solve E by Eq.(19) 72 | e=lambda2/beta; 73 | E=shrink(trainX-P*Q'*trainX+Lagrangian/beta,e); 74 | % Update Lagrangian multiplier 75 | Lagrangian=Lagrangian+beta*(trainX-P*Q'*trainX-E); 76 | beta=min(rho*beta,betaMax); 77 | % Update D 78 | D=2*updateL21(Q); 79 | % Classification 80 | Z=Q'*X; 81 | Z=L2Norm(Z')'; 82 | Zs=Z(:,1:n1); 83 | Zt=Z(:,n1+1:end); 84 | Ytpseudo=classifyKNN(Zs,trainY,Zt,1); 85 | results=MyClusteringMeasure(testY,Ytpseudo,1);%[ACC ACC2 MIhat Purity]'; 86 | for index=1:3 87 | results_iter(index,i)=results(index); 88 | end 89 | fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,... 90 | results(1),results(2),results(3)); 91 | end 92 | end -------------------------------------------------------------------------------- /algorithm/RepresentationLearning/supervised/2020-LRDAGP/LRDAGP.m: -------------------------------------------------------------------------------- 1 | function [results,results_iter,P]=LRDAGP(trainX,trainY,testX,testY,options) 2 | %% Implementation of LRDAGP 3 | %%% Authors Du et al. 4 | %%% Title 2020-Low-Rank Discriminative Adaptive Graph Preserving Subspace Learning 5 | %% intput: 6 | %%% trainX The traing samples, m*n1 7 | %%% trainY The labels of training samples, n1*1 8 | %%% testX The test samples, m*n2 9 | %%% testY The labels of test samples, n2*1 10 | %% options 11 | %%% T The total iteration times 12 | %%% t The iteration times 13 | %%% dim The dimension reduced 14 | %%% k The number of KNN 15 | %%% alpha The relative weight of Sb, i.e., Sw-alpha*Sb 16 | %%% beta The weight of nuclear norm w.r.t J (Z) 17 | %%% theta The weight of L2,1 norm w.r.t E 18 | %%% lambda The weight of scatters w.r.t P 19 | %%% mu The lagrange coefficient 20 | %%% muMax The maximum value of `mu` 21 | %%% rho The increase rate of `mu` 22 | %% output: 23 | %%% results The results (list) [acc,NMI,purity] 24 | %%% results_iter The iteration information of 'results' 25 | %%% P The learned projection matrix 26 | %% Version 27 | %%% Implementation 2022-05-28 28 | options=defaultOptions(options,... 29 | 'T',10,... %%% The total iteration times 30 | 'dim',100,... %%% The dimension reduced 31 | 'k',10,... %%% The number of KNN 32 | 't',10,... %%% The iteration times 33 | 'alpha',0.1,... %%% The relative weight of Sb, i.e., Sw-alpha*Sb 34 | 'beta',1,... %%% The weight of nuclear norm w.r.t J (Z) 35 | 'theta',1,... %%% The weight of L2,1 norm w.r.t E 36 | 'lambda',0.1,... %%% The weight of scatters w.r.t P 37 | 'mu',0.1,... %%% The lagrange coefficient 38 | 'muMax',1e3,... %%% The maximum value of `mu` 39 | 'rho',1.01); %%% The increase rate of `mu` 40 | %% parameters 41 | T=options.T; 42 | k=options.k; 43 | myeps=1e-4; 44 | %% Initialization 45 | results_iter=[]; 46 | [m,n]=size(trainX); 47 | % Init G & V 48 | G=1/n*withinScatter(trainX,trainY); 49 | V=1/n*betweenScatter(trainX,trainY); 50 | dist=EuDist2(trainX',trainX'); 51 | [S,rr]=similarMatrix_CAN(dist,k,-1); 52 | [P,~]=eigs(options.lambda*G-options.lambda*options.alpha*V+myeps*eye(m),eye(m),options.dim,'sm'); 53 | for i=1:T 54 | % Solve Z and E by Algorithm 1 55 | [Z,~] = LRDAGP_solveAlg1(trainX,P,options); 56 | % Solev P by Algorithm 2 57 | S=(S+S')/2; 58 | D=diag(sparse(sum(S))); 59 | L=D-S; 60 | L=L./norm(L,'fro'); 61 | XLX=trainX*L*trainX'; 62 | P = LRDAGP_solveAlg2(trainX,P,Z,XLX,G,V,options); 63 | % Update S by CAN 64 | Zs=real(P'*trainX); 65 | dist=EuDist2(Zs'); 66 | S=similarMatrix_CAN(dist,k,rr); 67 | % Classification 68 | Zt=real(P'*testX); 69 | Ytpseudo=classifyKNN(Zs,trainY,Zt,1); 70 | results=MyClusteringMeasure(testY,Ytpseudo,1);%[ACC MIhat Purity]'; 71 | for index=1:3 72 | results_iter(index,i)=results(index); 73 | end 74 | fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,... 75 | results(1),results(2),results(3)); 76 | 77 | end 78 | end -------------------------------------------------------------------------------- /algorithm/RepresentationLearning/supervised/2020-LRDAGP/LRDAGP_solveAlg1.m: -------------------------------------------------------------------------------- 1 | function [Z,E] = LRDAGP_solveAlg1(trainX,P,options) 2 | %% Target 3 | %%% Solve Z and E 4 | %% input 5 | %%% trainX The training samples, m*n 6 | %%% P The learned projection matrix, d*m 7 | %%% options 8 | %%%%% mu The lagrange coefficient 9 | %%%%% muMax The maximum value of `mu` 10 | %%%%% rho The increase rate of `mu` 11 | %%%%% t The iteration times 12 | %%%%% beta The weight of nuclear norm w.r.t J (Z) 13 | %%%%% theta The weight of L2,1 norm w.r.t E 14 | %% Parameters 15 | mu=options.mu; 16 | muMax=options.muMax; 17 | rho=options.rho; 18 | T=options.t; 19 | beta=options.beta; 20 | theta=options.theta; 21 | epsilon=1e-8; 22 | %% Init 23 | [m,n]=size(trainX); 24 | Z=0;E=0; 25 | Y1=0;Y2=0; 26 | for i=1:T 27 | % Update J by Eq.(19) 28 | J=SVT(Z+Y2/mu,beta/mu); 29 | % Update Z by Eq.(20) 30 | Z=(trainX'*(P*P')*trainX+eye(n))\(trainX'*P*(P'*trainX-E+Y1/mu)-Y2/mu+J); 31 | % Update E by Eq.(21) 32 | E=SolveL21Problem(P'*trainX-P'*trainX*Z+Y1/mu,theta/mu); 33 | % Update Lagrange multipliers 34 | cd1=P'*trainX-P'*trainX*Z-E; 35 | cd2=Z-J; 36 | Y1=Y1+mu*(cd1); 37 | Y2=Y2+mu*(cd2); 38 | mu=min(rho*mu,muMax); 39 | if norm(cd1,'inf')=2&&(norm(lastU-U,'fro') 'Demo_Supervised.m' gives a simple example for supervised methods. 18 | > 19 | > To run the codes, the size of the inputs are: , where *m* is the dimension, and and present the number of the training and test samples, respectively ( is used to calculate the clustering results, and is not involved in training). 20 | 21 | **To run unsupervised method**: 22 | 23 | > 'Demo_Unsupervised.m' gives a simple example for unsupervised methods. 24 | > 25 | > To run the codes, the size of the inputs are: , where *m* is the dimension, and presents the number of the training samples ( is used to calculate the clustering results, and is not involved in training). 26 | 27 | ## The codes exist in the repository 28 | 29 | ### 1. Feature Selection 30 | 31 | #### 1.1 Supervised Methods 32 | 33 | > The codes will be available soon. 34 | 35 | --- 36 | 37 | #### 1.2 Unsupervised Methods 38 | 39 | - 2019-LRLMR [[1]](https://www.sciencedirect.com/science/article/pii/S0893608019301212): Unsupervised feature selection via latent representation learning and manifold regularization. 40 | 41 | > We reproduce the codes as same as the descriptions of the paper. The official codes are available at http://tangchang.net/ 42 | 43 | - 2019-URAFS [[2]](https://ieeexplore.ieee.org/abstract/document/8474999): Generalized Uncorrelated Regression with Adaptive Graph for Unsupervised Feature Selection. 44 | 45 | - 2021-AGUFS [[3]](https://www.sciencedirect.com/science/article/pii/S0950705121004196): Adaptive graph-based generalized regression model for unsupervised feature selection. 46 | 47 | - 2021-DSLRL [[4]](https://www.sciencedirect.com/science/article/pii/S0031320321000601): Dual space latent representation learning for unsupervised feature selection. 48 | 49 | - 2022-DLUFS [[5]](https://www.sciencedirect.com/science/article/pii/S0957417422005437): Low-rank dictionary learning for unsupervised feature selection. 50 | 51 | > The official codes (python implementation) are available at https://github.com/mohsengh/DLUFS/ 52 | 53 | - 2022-SLMEA [[6]](https://www.sciencedirect.com/science/article/pii/S0925231222001916): Sparse and low-dimensional representation with maximum entropy adaptive graph for feature selection. 54 | 55 | --- 56 | 57 | ## 2. Clustering 58 | 59 | - 2015-rLPP [[13]](https://www.aaai.org/ocs/index.php/AAAI/AAAI15/paper/viewPaper/9921): Learning Robust Locality Preserving Projection via p-Order Minimization. 60 | 61 | ## 3. Representation & Subspace Learning 62 | 63 | #### 3.1 Supervised Methods 64 | 65 | - 2017-MRSL [[9]](https://ieeexplore.ieee.org/abstract/document/8128909/): Marginal Representation Learning With Graph Structure Self-Adaptation. 66 | 67 | > Official codes are available at https://github.com/DarrenZZhang/MSRL. 68 | 69 | - 2019-RSLDA [[7]](https://ieeexplore.ieee.org/abstract/document/8272002): Robust Sparse Linear Discriminant Analysis. 70 | 71 | - 2020-LRDAGP [[10]](https://link.springer.com/article/10.1007/s11063-020-10340-6): Low-Rank Discriminative Adaptive Graph Preserving Subspace Learning. 72 | 73 | - 2020-RDA_FSIS [[12]](https://www.sciencedirect.com/science/article/abs/pii/S0893608020301386): Linear embedding by joint Robust Discriminant Analysis and Inter-class Sparsity. 74 | 75 | - 2021-SN-TSL [[11]](https://www.sciencedirect.com/science/article/abs/pii/S016516842100027X) :Sparse non-negative transition subspace learning for image classification. 76 | 77 | - 2021-DSDPL [[8]](https://www.sciencedirect.com/science/article/pii/S0031320320303848): Dual subspace discriminative projection learning. 78 | 79 | #### 3.2 Unsupervised Methods 80 | 81 | - 2020-JLRSL [[14]](https://www.sciencedirect.com/science/article/pii/S0950705120301428): Joint low-rank representation and spectral regression for robust subspace learning. 82 | 83 | 84 | 85 | # Reference 86 | 87 | [1] Tang, Chang, et al. "Unsupervised feature selection via latent representation learning and manifold regularization." *Neural Networks* 117 (2019): 163-178. 88 | 89 | [2] X. Li, H. Zhang, R. Zhang, Y. Liu and F. Nie, "Generalized Uncorrelated Regression with Adaptive Graph for Unsupervised Feature Selection," in IEEE Transactions on Neural Networks and Learning Systems, vol. 30, no. 5, pp. 1587-1595, May 2019, doi: 10.1109/TNNLS.2018.2868847. 90 | 91 | [3] Huang, Yanyong, et al. "Adaptive graph-based generalized regression model for unsupervised feature selection." *Knowledge-Based Systems* 227 (2021), doi: 10.1016/j.knosys.2021.107156. 92 | 93 | [4] Shang, Ronghua, et al. "Dual space latent representation learning for unsupervised feature selection." *Pattern Recognition* 114 (2021), doi: 10.1016/j.patcog.2021.107873. 94 | 95 | [5] Parsa, Mohsen Ghassemi, Hadi Zare, and Mehdi Ghatee. "Low-rank dictionary learning for unsupervised feature selection." *Expert Systems with Applications* 202 (2022), doi: 10.1016/j.eswa.2022.117149. 96 | 97 | [6] Shang, Ronghua, et al. "Sparse and low-dimensional representation with maximum entropy adaptive graph for feature selection." *Neurocomputing* 485 (2022): 57-73. 98 | 99 | [7] J. Wen et al., "Robust Sparse Linear Discriminant Analysis," in IEEE Transactions on Circuits and Systems for Video Technology, vol. 29, no. 2, pp. 390-403, Feb. 2019, doi: 10.1109/TCSVT.2018.2799214. 100 | 101 | [8] Belous, Gregg, Andrew Busch, and Yongsheng Gao. "Dual subspace discriminative projection learning." *Pattern Recognition* 111 (2021), doi: 10.1016/j.patcog.2020.107581. 102 | 103 | [9] Zhang, Zheng, et al. "Marginal representation learning with graph structure self-adaptation." *IEEE Transactions on Neural Networks and Learning Systems* 29.10 (2017): 4645-4659. 104 | 105 | [10] Du, Haishun, et al. "Low-rank discriminative adaptive graph preserving subspace learning." *Neural Processing Letters* 52.3 (2020): 2127-2149. 106 | 107 | [11] Chen, Zhe, et al. "Sparse non-negative transition subspace learning for image classification." *Signal Processing* 183 (2021), doi: 10.1016/j.sigpro.2021.107988. 108 | 109 | [12] Dornaika, Fadi, and A. Khoder. "Linear embedding by joint robust discriminant analysis and inter-class sparsity." *Neural Networks* 127 (2020): 141-159. 110 | 111 | [13] Wang, Hua, Feiping Nie, and Heng Huang. "Learning robust locality preserving projection via p-order minimization." In *Twenty-Ninth AAAI Conference on Artificial Intelligence*. 2015. 112 | 113 | [14] Peng, Yong, Leijie Zhang, Wanzeng Kong, Feiwei Qin, and Jianhai Zhang. "Joint low-rank representation and spectral regression for robust subspace learning." *Knowledge-Based Systems* 195 (2020), doi: 10.1016/j.knosys.2020.105723. -------------------------------------------------------------------------------- /utils/EProjSimplex_new.m: -------------------------------------------------------------------------------- 1 | function [x ft] = EProjSimplex_new(v, k) 2 | 3 | % 4 | %% Problem 5 | % 6 | % min 1/2 || x - v||^2 7 | % s.t. x>=0, 1'x=1 8 | % 9 | 10 | if nargin < 2 11 | k = 1; 12 | end; 13 | 14 | ft=1; 15 | n = length(v); 16 | 17 | v0 = v-mean(v) + k/n; 18 | %vmax = max(v0); 19 | vmin = min(v0); 20 | if vmin < 0 21 | f = 1; 22 | lambda_m = 0; 23 | while abs(f) > 10^-10 24 | v1 = v0 - lambda_m; 25 | posidx = v1>0; 26 | npos = sum(posidx); 27 | g = -npos; 28 | f = sum(v1(posidx)) - k; 29 | lambda_m = lambda_m - f/g; 30 | ft=ft+1; 31 | if ft > 100 32 | x = max(v1,0); 33 | break; 34 | end; 35 | end; 36 | x = max(v1,0); 37 | 38 | else 39 | x = v0; 40 | end; -------------------------------------------------------------------------------- /utils/EuDist2.m: -------------------------------------------------------------------------------- 1 | function D = EuDist2(fea_a,fea_b,bSqrt) 2 | %EUDIST2 Efficiently Compute the Euclidean Distance Matrix by Exploring the 3 | %Matlab matrix operations. 4 | % 5 | % D = EuDist(fea_a,fea_b) 6 | % fea_a: nSample_a * nFeature 7 | % fea_b: nSample_b * nFeature 8 | % D: nSample_a * nSample_a 9 | % or nSample_a * nSample_b 10 | % 11 | % Examples: 12 | % 13 | % a = rand(500,10); 14 | % b = rand(1000,10); 15 | % 16 | % A = EuDist2(a); % A: 500*500 17 | % D = EuDist2(a,b); % D: 500*1000 18 | % 19 | % version 2.1 --November/2011 20 | % version 2.0 --May/2009 21 | % version 1.0 --November/2005 22 | % 23 | % Written by Deng Cai (dengcai AT gmail.com) 24 | 25 | 26 | if ~exist('bSqrt','var') 27 | bSqrt = 1; 28 | end 29 | 30 | if (~exist('fea_b','var')) || isempty(fea_b) 31 | aa = sum(fea_a.*fea_a,2); 32 | ab = fea_a*fea_a'; 33 | 34 | if issparse(aa) 35 | aa = full(aa); 36 | end 37 | 38 | D = bsxfun(@plus,aa,aa') - 2*ab; 39 | D(D<0) = 0; 40 | if bSqrt 41 | D = sqrt(D); 42 | end 43 | D = max(D,D'); 44 | else 45 | aa = sum(fea_a.*fea_a,2); 46 | bb = sum(fea_b.*fea_b,2); 47 | ab = fea_a*fea_b'; 48 | 49 | if issparse(aa) 50 | aa = full(aa); 51 | bb = full(bb); 52 | end 53 | 54 | D = bsxfun(@plus,aa,bb') - 2*ab; 55 | D(D<0) = 0; 56 | if bSqrt 57 | D = sqrt(D); 58 | end 59 | end 60 | 61 | -------------------------------------------------------------------------------- /utils/GPI.m: -------------------------------------------------------------------------------- 1 | function [W] = GPI(A,B,options) 2 | %% Solve: 3 | %%% min tr(W'AW-2W'B) s.t. W'W=I 4 | %%% => max tr(W' hatA W+2W'B) s.t. W'W=I, hatA=alpha*I-A 5 | %% input: 6 | %%% W learned matrix 7 | %%% A W'AW (m*m) 8 | %%% B 2 W'B (m*k) 9 | %% options: 10 | %%% T: iteration (default:1e3) 11 | %%% maxIter:maxRandom (default:1e3) 12 | %%% precision: the convergence precision (default:1e-4) 13 | if nargin==2 14 | options=struct(); 15 | end 16 | if ~isfield(options,'T') 17 | options.T=1e3; 18 | end 19 | if ~isfield(options,'precision') 20 | options.precision=1e-4; 21 | end 22 | T=options.T; 23 | precision=options.precision; 24 | n=size(A,1); 25 | %% mu : the largest eigenvalue of A 26 | [U,V] = eig(A); 27 | [~, index] = sort(diag(V),'ascend'); 28 | mu=diag(V);mu=max(mu); 29 | Atau=mu*eye(n)-A; 30 | W = U(:, index(1:size(B,2))); 31 | % try chol(Atau); 32 | % % fprintf('Matrix is symmetric positive definite.\n'); 33 | % catch ME 34 | % warning('[GPI] Atau=mu*eye(n)-A is not a positive definite matrix!'); 35 | % end 36 | lastVal=Inf; 37 | for i=1:T 38 | beforeW=W; 39 | M=2*Atau*W+2*B; 40 | [U,~,V]=svd(M,'econ'); 41 | W=U*V'; 42 | val=norm(beforeW-W,'inf'); 43 | if abs(lastVal-val)=3 44 | break; 45 | else 46 | lastVal=val; 47 | end 48 | if i==T 49 | warning('[GPI] No convergence (iteration > maximum T).'); 50 | end 51 | end 52 | end 53 | 54 | -------------------------------------------------------------------------------- /utils/IterativeMultiplicativeUpdate.m: -------------------------------------------------------------------------------- 1 | function result=IterativeMultiplicativeUpdate(X,gradient) 2 | %% input: 3 | %%% X : The matrix waiting for update (m*n) 4 | %%% gradient: The gradient of X (m*n) 5 | %% Output: 6 | %%% result: The matrix updated (m*n) 7 | myeps=1e-8; 8 | gradient=gradient+((abs(gradient)\alpha; 9 | %%% \\ 0, 10 | %%% &\text{otherwise} 11 | %%% \end{cases} 12 | %% input 13 | %%% Q: A tractable matrix with m*n 14 | %%% alpha: The hyperparameter 15 | %% output 16 | %%% E: The pursued matrix 17 | [m,n]=size(Q); 18 | E=zeros(m,n); 19 | sumSqrtQ=sqrt(sum(Q.*Q,1)); 20 | flag=sumSqrtQ>alpha; 21 | if sum(flag)>0 22 | score=sumSqrtQ(flag); 23 | score=(score-alpha)./(score); 24 | score=repmat(score,m,1); 25 | E(:,flag)=score.*Q(:,flag); 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /utils/betweenScatter.m: -------------------------------------------------------------------------------- 1 | function [Sb] = betweenScatter(X,Y) 2 | %% there are two methods construct the betweenScatter 3 | %%% 1, Sb=\sum_{c=1}^C n_c (x - u_c)(x - u_c)' (DIJDA) 4 | %%% 2, Sb=\sum_{c=1}^C n_c (u_c - mean(X,2))(u_c - mean(X,2))' (JGSA) 5 | % input 6 | % X: m*n 7 | % Y: n*1 8 | % output 9 | % Sb: m*m 10 | C=length(find(unique(Y))); 11 | n=length(Y); 12 | Sb=0; 13 | Fc=mean(X,2); 14 | for i=1:C 15 | Xc=X(:,Y==i); 16 | F=Xc-Fc; 17 | nc=size(Xc,2); 18 | Sb=Sb+nc*(F*F'); 19 | end 20 | % Sb=Sb./n; 21 | end -------------------------------------------------------------------------------- /utils/centeringMatrix.m: -------------------------------------------------------------------------------- 1 | function [H] = centeringMatrix(n) 2 | H=eye(n)-1/n*ones(n,n); 3 | end 4 | 5 | -------------------------------------------------------------------------------- /utils/classifyKNN.m: -------------------------------------------------------------------------------- 1 | function [Y_pse] = classifyKNN(Xs,Ys,Xt,k) 2 | knn_model = fitcknn(Xs',Ys,'NumNeighbors',k); 3 | Y_pse = knn_model.predict(Xt'); 4 | end 5 | 6 | -------------------------------------------------------------------------------- /utils/computeL.m: -------------------------------------------------------------------------------- 1 | function [L,D,W] = computeL(X,manifold) 2 | %% input 3 | %%% X: fea*n 4 | %%% manifold: the construct options of graph 5 | %% Construct graph Laplacian 6 | if ~isfield(manifold,'normr') 7 | manifold.normr=1; 8 | end 9 | n=size(X,2); 10 | W = lapgraph(X',manifold); 11 | D=diag(sparse(sum(W))); 12 | if manifold.normr==1 13 | Dw = diag(sparse(sqrt(1 ./ sum(W)))); 14 | L = eye(n) - Dw * W * Dw; 15 | else 16 | L = D-W; 17 | end 18 | 19 | end 20 | %% Cosine 21 | % manifold.k = k; 22 | % manifold.Metric = 'Cosine'; 23 | % manifold.WeightMode = 'Cosine'; 24 | % manifold.NeighborMode = 'KNN'; 25 | %% Eudist 26 | % manifold.k = k; 27 | % manifold.Metric = 'Euclidean'; 28 | % manifold.WeightMode = 'HeatKernel'; 29 | % manifold.NeighborMode = 'KNN'; -------------------------------------------------------------------------------- /utils/defaultOptions.m: -------------------------------------------------------------------------------- 1 | function [options] = defaultOptions(varargin) 2 | options=varargin{1}; 3 | n=nargin-1; 4 | if mod(n,2) ~=0 5 | error('Please enter coupled parameters\n'); 6 | return ; 7 | end 8 | n=n/2; 9 | for i=1:n 10 | pos=1+2*i-1; 11 | key=varargin{pos}; 12 | if ~isfield(options,key) 13 | val=varargin{pos+1}; 14 | options=setfield(options,key,val); 15 | end 16 | end 17 | end 18 | 19 | 20 | -------------------------------------------------------------------------------- /utils/getClusteringResults.m: -------------------------------------------------------------------------------- 1 | function [WX,resultsFinal] = getClusteringResults(X,Y,W,C,options) 2 | %% Formula 3 | %%% select top-k highest scores of features in W'X 4 | %% Input: 5 | %%% X The feature, m*n 6 | %%% Y The labels, n*1 7 | %%% W The feature selection matrix, m*d 8 | %%% C The clustering number (default `length(unique(Y))`) 9 | %% Output: 10 | %%% WX The projection subspace, d*n 11 | %%% results The clustering results [acc,NMI,purity] 12 | if nargin<=4 13 | options=struct(); 14 | end 15 | if nargin<=3 16 | C=length(unique(Y)); 17 | end 18 | options=defaultOptions(options,... 19 | 'T',10,... %% The repeat times of kmeans 20 | 'MaxIter',100,... %% Options of 'litekmeans' 21 | 'Replicates',10,...%% Options of 'litekmeans' 22 | 'supervisedFlag',0); %% Options of 'MyClusteringMeasure' 23 | WX=W'*X; 24 | resultsAll=[]; 25 | for i=1:options.T 26 | Ypseudo=litekmeans(WX',C,'MaxIter',options.MaxIter,'Replicates',options.Replicates); 27 | results=MyClusteringMeasure(Y,Ypseudo);%[ACC ACC2 MIhat Purity]'; 28 | resultsAll=[resultsAll,results]; 29 | end 30 | resultsFinal=mean(resultsAll,2); 31 | end 32 | 33 | -------------------------------------------------------------------------------- /utils/getFeatureSelectionResults.m: -------------------------------------------------------------------------------- 1 | function [X_new,resultsFinal] = getFeatureSelectionResults(X,Y,W,dim,C,options) 2 | %% Formula 3 | %%% select top-k highest scores of features in X'W 4 | %% Input: 5 | %%% X The feature, m*n 6 | %%% Y The labels, n*1 7 | %%% W The feature selection matrix, m*m 8 | %%% dim The dimension reduced 9 | %% Output: 10 | %%% newX The selected feautre sample, k*n 11 | %%% results The clustering results [acc,NMI,purity] 12 | if nargin<=5 13 | options=struct(); 14 | end 15 | if nargin<=4 16 | C=length(unique(Y)); 17 | end 18 | options=defaultOptions(options,... 19 | 'T',10,... %% The repeat times of kmeans 20 | 'MaxIter',100,... %% Options of 'litekmeans' 21 | 'Replicates',10,...%% Options of 'litekmeans' 22 | 'supervisedFlag',0); %% Options of 'MyClusteringMeasure' 23 | score=sum((W.*W),2); 24 | [~,index]=sort(score,'descend'); 25 | X_new = X(index(1:dim),:); 26 | resultsAll=[]; 27 | for i=1:options.T 28 | Ypseudo=litekmeans(X_new',C,'MaxIter',options.MaxIter,'Replicates',options.Replicates); 29 | results=MyClusteringMeasure(Y,Ypseudo,options.supervisedFlag);%[ACC ACC2 MIhat Purity]'; 30 | resultsAll=[resultsAll,results]; 31 | end 32 | resultsFinal=mean(resultsAll,2); 33 | end 34 | 35 | -------------------------------------------------------------------------------- /utils/hotmatrix.m: -------------------------------------------------------------------------------- 1 | function [matrix] = hotmatrix(labels,C,weight) 2 | %% input: 3 | %%% labels: n*1, the labels of samples 4 | %%% C: integer, the number of the classes 5 | %%% weight: integer, if weight==1, then the value is 1/length(classes) 6 | %% output: 7 | %%% matrix: n*C, the output hotmatrix 8 | if nargin==2 9 | weight=0; % weight =0,then Y={0,1} , weight = 1, then Y={0,1/n^c} 10 | end 11 | n=length(labels); 12 | matrix=zeros(n,C); 13 | weightY=zeros(C,1); 14 | for i=1:C 15 | if weight==0 16 | weightY(i)=1; 17 | else 18 | weightY(i)=1/length(find(labels==i)); 19 | end 20 | end 21 | for i=1:n 22 | if(labels(i)>0 &&labels(i)<=C) 23 | matrix(i,labels(i))=weightY(labels(i)); 24 | end 25 | end 26 | % other implementation 27 | % full(sparse(1:ns,Ys,1)); 28 | end 29 | 30 | -------------------------------------------------------------------------------- /utils/lapgraph.m: -------------------------------------------------------------------------------- 1 | function [W, elapse] = lapgraph(fea,options) 2 | % Usage: 3 | % W = graph(fea,options) 4 | % 5 | % fea: Rows of vectors of data points. Each row is x_i 6 | % options: Struct value in Matlab. The fields in options that can be set: 7 | % Metric - Choices are: 8 | % 'Euclidean' - Will use the Euclidean distance of two data 9 | % points to evaluate the "closeness" between 10 | % them. [Default One] 11 | % 'Cosine' - Will use the cosine value of two vectors 12 | % to evaluate the "closeness" between them. 13 | % A popular similarity measure used in 14 | % Information Retrieval. 15 | % 16 | % NeighborMode - Indicates how to construct the graph. Choices 17 | % are: [Default 'KNN'] 18 | % 'KNN' - k = 0 19 | % Complete graph 20 | % k > 0 21 | % Put an edge between two nodes if and 22 | % only if they are among the k nearst 23 | % neighbors of each other. You are 24 | % required to provide the parameter k in 25 | % the options. Default k=5. 26 | % 'Supervised' - k = 0 27 | % Put an edge between two nodes if and 28 | % only if they belong to same class. 29 | % k > 0 30 | % Put an edge between two nodes if 31 | % they belong to same class and they 32 | % are among the k nearst neighbors of 33 | % each other. 34 | % Default: k=0 35 | % You are required to provide the label 36 | % information gnd in the options. 37 | % 38 | % WeightMode - Indicates how to assign weights for each edge 39 | % in the graph. Choices are: 40 | % 'Binary' - 0-1 weighting. Every edge receiveds weight 41 | % of 1. [Default One] 42 | % 'HeatKernel' - If nodes i and j are connected, put weight 43 | % W_ij = exp(-norm(x_i - x_j)/2t^2). This 44 | % weight mode can only be used under 45 | % 'Euclidean' metric and you are required to 46 | % provide the parameter t. 47 | % 'Cosine' - If nodes i and j are connected, put weight 48 | % cosine(x_i,x_j). Can only be used under 49 | % 'Cosine' metric. 50 | % 51 | % k - The parameter needed under 'KNN' NeighborMode. 52 | % Default will be 5. 53 | % gnd - The parameter needed under 'Supervised' 54 | % NeighborMode. Colunm vector of the label 55 | % information for each data point. 56 | % bLDA - 0 or 1. Only effective under 'Supervised' 57 | % NeighborMode. If 1, the graph will be constructed 58 | % to make LPP exactly same as LDA. Default will be 59 | % 0. 60 | % t - The parameter needed under 'HeatKernel' 61 | % WeightMode. Default will be 1 62 | % bNormalized - 0 or 1. Only effective under 'Cosine' metric. 63 | % Indicates whether the fea are already be 64 | % normalized to 1. Default will be 0 65 | % bSelfConnected - 0 or 1. Indicates whether W(i,i) == 1. Default 1 66 | % if 'Supervised' NeighborMode & bLDA == 1, 67 | % bSelfConnected will always be 1. Default 1. 68 | % 69 | % 70 | % Examples: 71 | % 72 | % fea = rand(50,15); 73 | % options = []; 74 | % options.Metric = 'Euclidean'; 75 | % options.NeighborMode = 'KNN'; 76 | % options.k = 5; 77 | % options.WeightMode = 'HeatKernel'; 78 | % options.t = 1; 79 | % W = constructW(fea,options); 80 | % 81 | % 82 | % fea = rand(50,15); 83 | % gnd = [ones(10,1);ones(15,1)*2;ones(10,1)*3;ones(15,1)*4]; 84 | % options = []; 85 | % options.Metric = 'Euclidean'; 86 | % options.NeighborMode = 'Supervised'; 87 | % options.gnd = gnd; 88 | % options.WeightMode = 'HeatKernel'; 89 | % options.t = 1; 90 | % W = constructW(fea,options); 91 | % 92 | % 93 | % fea = rand(50,15); 94 | % gnd = [ones(10,1);ones(15,1)*2;ones(10,1)*3;ones(15,1)*4]; 95 | % options = []; 96 | % options.Metric = 'Euclidean'; 97 | % options.NeighborMode = 'Supervised'; 98 | % options.gnd = gnd; 99 | % options.bLDA = 1; 100 | % W = constructW(fea,options); 101 | % 102 | % 103 | % For more details about the different ways to construct the W, please 104 | % refer: 105 | % Deng Cai, Xiaofei He and Jiawei Han, "Document Clustering Using 106 | % Locality Preserving Indexing" IEEE TKDE, Dec. 2005. 107 | % 108 | % 109 | % Written by Deng Cai (dengcai2 AT cs.uiuc.edu), April/2004, Feb/2006, 110 | % May/2007 111 | % 112 | 113 | if (~exist('options','var')) 114 | options = []; 115 | else 116 | if ~isstruct(options) 117 | error('parameter error!'); 118 | end 119 | end 120 | 121 | %================================================= 122 | if ~isfield(options,'Metric') 123 | options.Metric = 'Cosine'; 124 | end 125 | 126 | switch lower(options.Metric) 127 | case {lower('Euclidean')} 128 | case {lower('Cosine')} 129 | if ~isfield(options,'bNormalized') 130 | options.bNormalized = 0; 131 | end 132 | otherwise 133 | error('Metric does not exist!'); 134 | end 135 | 136 | %================================================= 137 | if ~isfield(options,'NeighborMode') 138 | options.NeighborMode = 'KNN'; 139 | end 140 | 141 | switch lower(options.NeighborMode) 142 | case {lower('KNN')} %For simplicity, we include the data point itself in the kNN 143 | if ~isfield(options,'k') 144 | options.k = 5; 145 | end 146 | case {lower('Supervised')} 147 | if ~isfield(options,'bLDA') 148 | options.bLDA = 0; 149 | end 150 | if options.bLDA 151 | options.bSelfConnected = 1; 152 | end 153 | if ~isfield(options,'k') 154 | options.k = 0; 155 | end 156 | if ~isfield(options,'gnd') 157 | error('Label(gnd) should be provided under ''Supervised'' NeighborMode!'); 158 | end 159 | if ~isempty(fea) && length(options.gnd) ~= size(fea,1) 160 | error('gnd doesn''t match with fea!'); 161 | end 162 | otherwise 163 | error('NeighborMode does not exist!'); 164 | end 165 | 166 | %================================================= 167 | 168 | if ~isfield(options,'WeightMode') 169 | options.WeightMode = 'Binary'; 170 | end 171 | 172 | bBinary = 0; 173 | switch lower(options.WeightMode) 174 | case {lower('Binary')} 175 | bBinary = 1; 176 | case {lower('HeatKernel')} 177 | if ~strcmpi(options.Metric,'Euclidean') 178 | warning('''HeatKernel'' WeightMode should be used under ''Euclidean'' Metric!'); 179 | options.Metric = 'Euclidean'; 180 | end 181 | if ~isfield(options,'t') 182 | options.t = 1; 183 | end 184 | case {lower('Cosine')} 185 | if ~strcmpi(options.Metric,'Cosine') 186 | warning('''Cosine'' WeightMode should be used under ''Cosine'' Metric!'); 187 | options.Metric = 'Cosine'; 188 | end 189 | if ~isfield(options,'bNormalized') 190 | options.bNormalized = 0; 191 | end 192 | otherwise 193 | error('WeightMode does not exist!'); 194 | end 195 | 196 | %================================================= 197 | 198 | if ~isfield(options,'bSelfConnected') 199 | options.bSelfConnected = 1; 200 | end 201 | 202 | %================================================= 203 | tmp_T = cputime; 204 | 205 | if isfield(options,'gnd') 206 | nSmp = length(options.gnd); 207 | else 208 | nSmp = size(fea,1); 209 | end 210 | maxM = 62500000; %500M 211 | BlockSize = floor(maxM/(nSmp*3)); 212 | 213 | 214 | if strcmpi(options.NeighborMode,'Supervised') 215 | Label = unique(options.gnd); 216 | nLabel = length(Label); 217 | if options.bLDA 218 | G = zeros(nSmp,nSmp); 219 | for idx=1:nLabel 220 | classIdx = options.gnd==Label(idx); 221 | G(classIdx,classIdx) = 1/sum(classIdx); 222 | end 223 | W = sparse(G); 224 | elapse = cputime - tmp_T; 225 | return; 226 | end 227 | 228 | switch lower(options.WeightMode) 229 | case {lower('Binary')} 230 | if options.k > 0 231 | G = zeros(nSmp*(options.k+1),3); 232 | idNow = 0; 233 | for i=1:nLabel 234 | classIdx = find(options.gnd==Label(i)); 235 | D = EuDist2(fea(classIdx,:),[],0); 236 | [dump idx] = sort(D,2); % sort each row 237 | clear D dump; 238 | idx = idx(:,1:options.k+1); 239 | 240 | nSmpClass = length(classIdx)*(options.k+1); 241 | G(idNow+1:nSmpClass+idNow,1) = repmat(classIdx,[options.k+1,1]); 242 | G(idNow+1:nSmpClass+idNow,2) = classIdx(idx(:)); 243 | G(idNow+1:nSmpClass+idNow,3) = 1; 244 | idNow = idNow+nSmpClass; 245 | clear idx 246 | end 247 | G = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp); 248 | G = max(G,G'); 249 | else 250 | G = zeros(nSmp,nSmp); 251 | for i=1:nLabel 252 | classIdx = find(options.gnd==Label(i)); 253 | G(classIdx,classIdx) = 1; 254 | end 255 | end 256 | 257 | if ~options.bSelfConnected 258 | for i=1:size(G,1) 259 | G(i,i) = 0; 260 | end 261 | end 262 | 263 | W = sparse(G); 264 | case {lower('HeatKernel')} 265 | if options.k > 0 266 | G = zeros(nSmp*(options.k+1),3); 267 | idNow = 0; 268 | for i=1:nLabel 269 | classIdx = find(options.gnd==Label(i)); 270 | D = EuDist2(fea(classIdx,:),[],0); 271 | [dump idx] = sort(D,2); % sort each row 272 | clear D; 273 | idx = idx(:,1:options.k+1); 274 | dump = dump(:,1:options.k+1); 275 | dump = exp(-dump/(2*options.t^2)); 276 | 277 | nSmpClass = length(classIdx)*(options.k+1); 278 | G(idNow+1:nSmpClass+idNow,1) = repmat(classIdx,[options.k+1,1]); 279 | G(idNow+1:nSmpClass+idNow,2) = classIdx(idx(:)); 280 | G(idNow+1:nSmpClass+idNow,3) = dump(:); 281 | idNow = idNow+nSmpClass; 282 | clear dump idx 283 | end 284 | G = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp); 285 | else 286 | G = zeros(nSmp,nSmp); 287 | for i=1:nLabel 288 | classIdx = find(options.gnd==Label(i)); 289 | D = EuDist2(fea(classIdx,:),[],0); 290 | D = exp(-D/(2*options.t^2)); 291 | G(classIdx,classIdx) = D; 292 | end 293 | end 294 | 295 | if ~options.bSelfConnected 296 | for i=1:size(G,1) 297 | G(i,i) = 0; 298 | end 299 | end 300 | 301 | W = sparse(max(G,G')); 302 | case {lower('Cosine')} 303 | if ~options.bNormalized 304 | [nSmp, nFea] = size(fea); 305 | if issparse(fea) 306 | fea2 = fea'; 307 | feaNorm = sum(fea2.^2,1).^.5; 308 | for i = 1:nSmp 309 | fea2(:,i) = fea2(:,i) ./ max(1e-10,feaNorm(i)); 310 | end 311 | fea = fea2'; 312 | clear fea2; 313 | else 314 | feaNorm = sum(fea.^2,2).^.5; 315 | for i = 1:nSmp 316 | fea(i,:) = fea(i,:) ./ max(1e-12,feaNorm(i)); 317 | end 318 | end 319 | 320 | end 321 | 322 | if options.k > 0 323 | G = zeros(nSmp*(options.k+1),3); 324 | idNow = 0; 325 | for i=1:nLabel 326 | classIdx = find(options.gnd==Label(i)); 327 | D = fea(classIdx,:)*fea(classIdx,:)'; 328 | [dump idx] = sort(-D,2); % sort each row 329 | clear D; 330 | idx = idx(:,1:options.k+1); 331 | dump = -dump(:,1:options.k+1); 332 | 333 | nSmpClass = length(classIdx)*(options.k+1); 334 | G(idNow+1:nSmpClass+idNow,1) = repmat(classIdx,[options.k+1,1]); 335 | G(idNow+1:nSmpClass+idNow,2) = classIdx(idx(:)); 336 | G(idNow+1:nSmpClass+idNow,3) = dump(:); 337 | idNow = idNow+nSmpClass; 338 | clear dump idx 339 | end 340 | G = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp); 341 | else 342 | G = zeros(nSmp,nSmp); 343 | for i=1:nLabel 344 | classIdx = find(options.gnd==Label(i)); 345 | G(classIdx,classIdx) = fea(classIdx,:)*fea(classIdx,:)'; 346 | end 347 | end 348 | 349 | if ~options.bSelfConnected 350 | for i=1:size(G,1) 351 | G(i,i) = 0; 352 | end 353 | end 354 | 355 | W = sparse(max(G,G')); 356 | otherwise 357 | error('WeightMode does not exist!'); 358 | end 359 | elapse = cputime - tmp_T; 360 | return; 361 | end 362 | 363 | 364 | if strcmpi(options.NeighborMode,'KNN') && (options.k > 0) 365 | if strcmpi(options.Metric,'Euclidean') 366 | G = zeros(nSmp*(options.k+1),3); 367 | for i = 1:ceil(nSmp/BlockSize) 368 | if i == ceil(nSmp/BlockSize) 369 | smpIdx = (i-1)*BlockSize+1:nSmp; 370 | dist = EuDist2(fea(smpIdx,:),fea,0); 371 | dist = full(dist); 372 | [dump idx] = sort(dist,2); % sort each row 373 | idx = idx(:,1:options.k+1); 374 | dump = dump(:,1:options.k+1); 375 | if ~bBinary 376 | dump = exp(-dump/(2*options.t^2)); 377 | end 378 | 379 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),1) = repmat(smpIdx',[options.k+1,1]); 380 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),2) = idx(:); 381 | if ~bBinary 382 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),3) = dump(:); 383 | else 384 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),3) = 1; 385 | end 386 | else 387 | smpIdx = (i-1)*BlockSize+1:i*BlockSize; 388 | dist = EuDist2(fea(smpIdx,:),fea,0); 389 | dist = full(dist); 390 | [dump idx] = sort(dist,2); % sort each row 391 | idx = idx(:,1:options.k+1); 392 | dump = dump(:,1:options.k+1); 393 | if ~bBinary 394 | dump = exp(-dump/(2*options.t^2)); 395 | end 396 | 397 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),1) = repmat(smpIdx',[options.k+1,1]); 398 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),2) = idx(:); 399 | if ~bBinary 400 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),3) = dump(:); 401 | else 402 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),3) = 1; 403 | end 404 | end 405 | end 406 | 407 | W = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp); 408 | else 409 | if ~options.bNormalized 410 | [nSmp, nFea] = size(fea); 411 | if issparse(fea) 412 | fea2 = fea'; 413 | clear fea; 414 | for i = 1:nSmp 415 | fea2(:,i) = fea2(:,i) ./ max(1e-10,sum(fea2(:,i).^2,1).^.5); 416 | end 417 | fea = fea2'; 418 | clear fea2; 419 | else 420 | feaNorm = sum(fea.^2,2).^.5; 421 | for i = 1:nSmp 422 | fea(i,:) = fea(i,:) ./ max(1e-12,feaNorm(i)); 423 | end 424 | end 425 | end 426 | 427 | G = zeros(nSmp*(options.k+1),3); 428 | for i = 1:ceil(nSmp/BlockSize) 429 | if i == ceil(nSmp/BlockSize) 430 | smpIdx = (i-1)*BlockSize+1:nSmp; 431 | dist = fea(smpIdx,:)*fea'; 432 | dist = full(dist); 433 | [dump idx] = sort(-dist,2); % sort each row 434 | idx = idx(:,1:options.k+1); 435 | dump = -dump(:,1:options.k+1); 436 | 437 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),1) = repmat(smpIdx',[options.k+1,1]); 438 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),2) = idx(:); 439 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),3) = dump(:); 440 | else 441 | smpIdx = (i-1)*BlockSize+1:i*BlockSize; 442 | dist = fea(smpIdx,:)*fea'; 443 | dist = full(dist); 444 | [dump idx] = sort(-dist,2); % sort each row 445 | idx = idx(:,1:options.k+1); 446 | dump = -dump(:,1:options.k+1); 447 | 448 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),1) = repmat(smpIdx',[options.k+1,1]); 449 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),2) = idx(:); 450 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),3) = dump(:); 451 | end 452 | end 453 | 454 | W = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp); 455 | end 456 | 457 | if strcmpi(options.WeightMode,'Binary') 458 | W(find(W)) = 1; 459 | end 460 | 461 | if isfield(options,'bSemiSupervised') && options.bSemiSupervised 462 | tmpgnd = options.gnd(options.semiSplit); 463 | 464 | Label = unique(tmpgnd); 465 | nLabel = length(Label); 466 | G = zeros(sum(options.semiSplit),sum(options.semiSplit)); 467 | for idx=1:nLabel 468 | classIdx = tmpgnd==Label(idx); 469 | G(classIdx,classIdx) = 1; 470 | end 471 | Wsup = sparse(G); 472 | if ~isfield(options,'SameCategoryWeight') 473 | options.SameCategoryWeight = 1; 474 | end 475 | W(options.semiSplit,options.semiSplit) = (Wsup>0)*options.SameCategoryWeight; 476 | end 477 | 478 | if ~options.bSelfConnected 479 | for i=1:size(W,1) 480 | W(i,i) = 0; 481 | end 482 | end 483 | 484 | W = max(W,W'); 485 | 486 | elapse = cputime - tmp_T; 487 | return; 488 | end 489 | 490 | 491 | % strcmpi(options.NeighborMode,'KNN') & (options.k == 0) 492 | % Complete Graph 493 | 494 | if strcmpi(options.Metric,'Euclidean') 495 | W = EuDist2(fea,[],0); 496 | W = exp(-W/(2*options.t^2)); 497 | else 498 | if ~options.bNormalized 499 | % feaNorm = sum(fea.^2,2).^.5; 500 | % fea = fea ./ repmat(max(1e-10,feaNorm),1,size(fea,2)); 501 | [nSmp, nFea] = size(fea); 502 | if issparse(fea) 503 | fea2 = fea'; 504 | feaNorm = sum(fea2.^2,1).^.5; 505 | for i = 1:nSmp 506 | fea2(:,i) = fea2(:,i) ./ max(1e-10,feaNorm(i)); 507 | end 508 | fea = fea2'; 509 | clear fea2; 510 | else 511 | feaNorm = sum(fea.^2,2).^.5; 512 | for i = 1:nSmp 513 | fea(i,:) = fea(i,:) ./ max(1e-12,feaNorm(i)); 514 | end 515 | end 516 | end 517 | 518 | % W = full(fea*fea'); 519 | W = fea*fea'; 520 | end 521 | 522 | if ~options.bSelfConnected 523 | for i=1:size(W,1) 524 | W(i,i) = 0; 525 | end 526 | end 527 | 528 | W = max(W,W'); 529 | 530 | 531 | 532 | elapse = cputime - tmp_T; 533 | 534 | 535 | function D = EuDist2(fea_a,fea_b,bSqrt) 536 | % Euclidean Distance matrix 537 | % D = EuDist(fea_a,fea_b) 538 | % fea_a: nSample_a * nFeature 539 | % fea_b: nSample_b * nFeature 540 | % D: nSample_a * nSample_a 541 | % or nSample_a * nSample_b 542 | 543 | 544 | if ~exist('bSqrt','var') 545 | bSqrt = 1; 546 | end 547 | 548 | 549 | if (~exist('fea_b','var')) | isempty(fea_b) 550 | [nSmp, nFea] = size(fea_a); 551 | 552 | aa = sum(fea_a.*fea_a,2); 553 | ab = fea_a*fea_a'; 554 | 555 | aa = full(aa); 556 | ab = full(ab); 557 | 558 | if bSqrt 559 | D = sqrt(repmat(aa, 1, nSmp) + repmat(aa', nSmp, 1) - 2*ab); 560 | D = real(D); 561 | else 562 | D = repmat(aa, 1, nSmp) + repmat(aa', nSmp, 1) - 2*ab; 563 | end 564 | 565 | D = max(D,D'); 566 | D = D - diag(diag(D)); 567 | D = abs(D); 568 | else 569 | [nSmp_a, nFea] = size(fea_a); 570 | [nSmp_b, nFea] = size(fea_b); 571 | 572 | aa = sum(fea_a.*fea_a,2); 573 | bb = sum(fea_b.*fea_b,2); 574 | ab = fea_a*fea_b'; 575 | 576 | aa = full(aa); 577 | bb = full(bb); 578 | ab = full(ab); 579 | 580 | if bSqrt 581 | D = sqrt(repmat(aa, 1, nSmp_b) + repmat(bb', nSmp_a, 1) - 2*ab); 582 | D = real(D); 583 | else 584 | D = repmat(aa, 1, nSmp_b) + repmat(bb', nSmp_a, 1) - 2*ab; 585 | end 586 | 587 | D = abs(D); 588 | end 589 | 590 | -------------------------------------------------------------------------------- /utils/litekmeans.m: -------------------------------------------------------------------------------- 1 | function [label, center, bCon, sumD, D] = litekmeans(X, k, varargin) 2 | % LITEKMEANS K-means clustering, accelerated by matlab matrix operations. 3 | % 4 | % label = LITEKMEANS(X, K) partitions the points in the N-by-P data matrix 5 | % X into K clusters. This partition minimizes the sum, over all 6 | % clusters, of the within-cluster sums of point-to-cluster-centroid 7 | % distances. Rows of X correspond to points, columns correspond to 8 | % variables. KMEANS returns an N-by-1 vector label containing the 9 | % cluster indices of each point. 10 | % 11 | % [label, center] = LITEKMEANS(X, K) returns the K cluster centroid 12 | % locations in the K-by-P matrix center. 13 | % 14 | % [label, center, bCon] = LITEKMEANS(X, K) returns the bool value bCon to 15 | % indicate whether the iteration is converged. 16 | % 17 | % [label, center, bCon, SUMD] = LITEKMEANS(X, K) returns the 18 | % within-cluster sums of point-to-centroid distances in the 1-by-K vector 19 | % sumD. 20 | % 21 | % [label, center, bCon, SUMD, D] = LITEKMEANS(X, K) returns 22 | % distances from each point to every centroid in the N-by-K matrix D. 23 | % 24 | % [ ... ] = LITEKMEANS(..., 'PARAM1',val1, 'PARAM2',val2, ...) specifies 25 | % optional parameter name/value pairs to control the iterative algorithm 26 | % used by KMEANS. Parameters are: 27 | % 28 | % 'Distance' - Distance measure, in P-dimensional space, that KMEANS 29 | % should minimize with respect to. Choices are: 30 | % {'sqEuclidean'} - Squared Euclidean distance (the default) 31 | % 'cosine' - One minus the cosine of the included angle 32 | % between points (treated as vectors). Each 33 | % row of X SHOULD be normalized to unit. If 34 | % the intial center matrix is provided, it 35 | % SHOULD also be normalized. 36 | % 37 | % 'Start' - Method used to choose initial cluster centroid positions, 38 | % sometimes known as "seeds". Choices are: 39 | % {'sample'} - Select K observations from X at random (the default) 40 | % 'cluster' - Perform preliminary clustering phase on random 10% 41 | % subsample of X. This preliminary phase is itself 42 | % initialized using 'sample'. An additional parameter 43 | % clusterMaxIter can be used to control the maximum 44 | % number of iterations in each preliminary clustering 45 | % problem. 46 | % matrix - A K-by-P matrix of starting locations; or a K-by-1 47 | % indicate vector indicating which K points in X 48 | % should be used as the initial center. In this case, 49 | % you can pass in [] for K, and KMEANS infers K from 50 | % the first dimension of the matrix. 51 | % 52 | % 'MaxIter' - Maximum number of iterations allowed. Default is 100. 53 | % 54 | % 'Replicates' - Number of times to repeat the clustering, each with a 55 | % new set of initial centroids. Default is 1. If the 56 | % initial centroids are provided, the replicate will be 57 | % automatically set to be 1. 58 | % 59 | % 'clusterMaxIter' - Only useful when 'Start' is 'cluster'. Maximum number 60 | % of iterations of the preliminary clustering phase. 61 | % Default is 10. 62 | % 63 | % 64 | % Examples: 65 | % 66 | % fea = rand(500,10); 67 | % [label, center] = litekmeans(fea, 5, 'MaxIter', 50); 68 | % 69 | % fea = rand(500,10); 70 | % [label, center] = litekmeans(fea, 5, 'MaxIter', 50, 'Replicates', 10); 71 | % 72 | % fea = rand(500,10); 73 | % [label, center, bCon, sumD, D] = litekmeans(fea, 5, 'MaxIter', 50); 74 | % TSD = sum(sumD); 75 | % 76 | % fea = rand(500,10); 77 | % initcenter = rand(5,10); 78 | % [label, center] = litekmeans(fea, 5, 'MaxIter', 50, 'Start', initcenter); 79 | % 80 | % fea = rand(500,10); 81 | % idx=randperm(500); 82 | % [label, center] = litekmeans(fea, 5, 'MaxIter', 50, 'Start', idx(1:5)); 83 | % 84 | % 85 | % See also KMEANS 86 | % 87 | % [Cite] Deng Cai, "Litekmeans: the fastest matlab implementation of 88 | % kmeans," Available at: 89 | % http://www.zjucadcg.cn/dengcai/Data/Clustering.html, 2011. 90 | % 91 | % version 2.0 --December/2011 92 | % version 1.0 --November/2011 93 | % 94 | % Written by Deng Cai (dengcai AT gmail.com) 95 | 96 | 97 | if nargin < 2 98 | error('litekmeans:TooFewInputs','At least two input arguments required.'); 99 | end 100 | 101 | [n, p] = size(X); 102 | 103 | 104 | pnames = { 'distance' 'start' 'maxiter' 'replicates' 'onlinephase' 'clustermaxiter'}; 105 | dflts = {'sqeuclidean' 'sample' [] [] 'off' [] }; 106 | [eid,errmsg,distance,start,maxit,reps,~,clustermaxit] = getargs(pnames, dflts, varargin{:}); 107 | if ~isempty(eid) 108 | error(sprintf('litekmeans:%s',eid),errmsg); 109 | end 110 | 111 | if ischar(distance) 112 | distNames = {'sqeuclidean','cosine'}; 113 | j = strcmpi(distance, distNames); 114 | j = find(j); 115 | if length(j) > 1 116 | error('litekmeans:AmbiguousDistance', ... 117 | 'Ambiguous ''Distance'' parameter value: %s.', distance); 118 | elseif isempty(j) 119 | error('litekmeans:UnknownDistance', ... 120 | 'Unknown ''Distance'' parameter value: %s.', distance); 121 | end 122 | distance = distNames{j}; 123 | else 124 | error('litekmeans:InvalidDistance', ... 125 | 'The ''Distance'' parameter value must be a string.'); 126 | end 127 | 128 | 129 | center = []; 130 | if ischar(start) 131 | startNames = {'sample','cluster'}; 132 | j = find(strncmpi(start,startNames,length(start))); 133 | if length(j) > 1 134 | error(message('litekmeans:AmbiguousStart', start)); 135 | elseif isempty(j) 136 | error(message('litekmeans:UnknownStart', start)); 137 | elseif isempty(k) 138 | error('litekmeans:MissingK', ... 139 | 'You must specify the number of clusters, K.'); 140 | end 141 | if j == 2 142 | if floor(.1*n) < 5*k 143 | j = 1; 144 | end 145 | end 146 | start = startNames{j}; 147 | elseif isnumeric(start) 148 | if size(start,2) == p 149 | center = start; 150 | elseif (size(start,2) == 1 || size(start,1) == 1) 151 | center = X(start,:); 152 | else 153 | error('litekmeans:MisshapedStart', ... 154 | 'The ''Start'' matrix must have the same number of columns as X.'); 155 | end 156 | if isempty(k) 157 | k = size(center,1); 158 | elseif (k ~= size(center,1)) 159 | error('litekmeans:MisshapedStart', ... 160 | 'The ''Start'' matrix must have K rows.'); 161 | end 162 | start = 'numeric'; 163 | else 164 | error('litekmeans:InvalidStart', ... 165 | 'The ''Start'' parameter value must be a string or a numeric matrix or array.'); 166 | end 167 | 168 | % The maximum iteration number is default 100 169 | if isempty(maxit) 170 | maxit = 100; 171 | end 172 | 173 | % The maximum iteration number for preliminary clustering phase on random 174 | % 10% subsamples is default 10 175 | if isempty(clustermaxit) 176 | clustermaxit = 10; 177 | end 178 | 179 | 180 | % Assume one replicate 181 | if isempty(reps) || ~isempty(center) 182 | reps = 1; 183 | end 184 | 185 | if ~(isscalar(k) && isnumeric(k) && isreal(k) && k > 0 && (round(k)==k)) 186 | error('litekmeans:InvalidK', ... 187 | 'X must be a positive integer value.'); 188 | elseif n < k 189 | error('litekmeans:TooManyClusters', ... 190 | 'X must have more rows than the number of clusters.'); 191 | end 192 | 193 | 194 | bestlabel = []; 195 | sumD = zeros(1,k); 196 | bCon = false; 197 | 198 | for t=1:reps 199 | switch start 200 | case 'sample' 201 | center = X(randsample(n,k),:); 202 | case 'cluster' 203 | Xsubset = X(randsample(n,floor(.1*n)),:); 204 | [~, center] = litekmeans(Xsubset, k, varargin{:}, 'start','sample', 'replicates',1 ,'MaxIter',clustermaxit); 205 | case 'numeric' 206 | end 207 | 208 | last = 0;label=1; 209 | it=0; 210 | 211 | switch distance 212 | case 'sqeuclidean' 213 | while any(label ~= last) && it1 244 | if it>=maxit 245 | aa = full(sum(X.*X,2)); 246 | bb = full(sum(center.*center,2)); 247 | ab = full(X*center'); 248 | D = bsxfun(@plus,aa,bb') - 2*ab; 249 | D(D<0) = 0; 250 | else 251 | aa = full(sum(X.*X,2)); 252 | D = aa(:,ones(1,k)) + D; 253 | D(D<0) = 0; 254 | end 255 | D = sqrt(D); 256 | for j = 1:k 257 | sumD(j) = sum(D(label==j,j)); 258 | end 259 | bestsumD = sumD; 260 | bestD = D; 261 | end 262 | else 263 | if it>=maxit 264 | aa = full(sum(X.*X,2)); 265 | bb = full(sum(center.*center,2)); 266 | ab = full(X*center'); 267 | D = bsxfun(@plus,aa,bb') - 2*ab; 268 | D(D<0) = 0; 269 | else 270 | aa = full(sum(X.*X,2)); 271 | D = aa(:,ones(1,k)) + D; 272 | D(D<0) = 0; 273 | end 274 | D = sqrt(D); 275 | for j = 1:k 276 | sumD(j) = sum(D(label==j,j)); 277 | end 278 | if sum(sumD) < sum(bestsumD) 279 | bestlabel = label; 280 | bestcenter = center; 281 | bestsumD = sumD; 282 | bestD = D; 283 | end 284 | end 285 | case 'cosine' 286 | while any(label ~= last) && it1 311 | if any(label ~= last) 312 | W=full(X*center'); 313 | end 314 | D = 1-W; 315 | for j = 1:k 316 | sumD(j) = sum(D(label==j,j)); 317 | end 318 | bestsumD = sumD; 319 | bestD = D; 320 | end 321 | else 322 | if any(label ~= last) 323 | W=full(X*center'); 324 | end 325 | D = 1-W; 326 | for j = 1:k 327 | sumD(j) = sum(D(label==j,j)); 328 | end 329 | if sum(sumD) < sum(bestsumD) 330 | bestlabel = label; 331 | bestcenter = center; 332 | bestsumD = sumD; 333 | bestD = D; 334 | end 335 | end 336 | end 337 | end 338 | 339 | label = bestlabel; 340 | center = bestcenter; 341 | if reps>1 342 | sumD = bestsumD; 343 | D = bestD; 344 | elseif nargout > 3 345 | switch distance 346 | case 'sqeuclidean' 347 | if it>=maxit 348 | aa = full(sum(X.*X,2)); 349 | bb = full(sum(center.*center,2)); 350 | ab = full(X*center'); 351 | D = bsxfun(@plus,aa,bb') - 2*ab; 352 | D(D<0) = 0; 353 | else 354 | aa = full(sum(X.*X,2)); 355 | D = aa(:,ones(1,k)) + D; 356 | D(D<0) = 0; 357 | end 358 | D = sqrt(D); 359 | case 'cosine' 360 | if it>=maxit 361 | W=full(X*center'); 362 | end 363 | D = 1-W; 364 | end 365 | for j = 1:k 366 | sumD(j) = sum(D(label==j,j)); 367 | end 368 | end 369 | 370 | 371 | 372 | 373 | function [eid,emsg,varargout]=getargs(pnames,dflts,varargin) 374 | %GETARGS Process parameter name/value pairs 375 | % [EID,EMSG,A,B,...]=GETARGS(PNAMES,DFLTS,'NAME1',VAL1,'NAME2',VAL2,...) 376 | % accepts a cell array PNAMES of valid parameter names, a cell array 377 | % DFLTS of default values for the parameters named in PNAMES, and 378 | % additional parameter name/value pairs. Returns parameter values A,B,... 379 | % in the same order as the names in PNAMES. Outputs corresponding to 380 | % entries in PNAMES that are not specified in the name/value pairs are 381 | % set to the corresponding value from DFLTS. If nargout is equal to 382 | % length(PNAMES)+1, then unrecognized name/value pairs are an error. If 383 | % nargout is equal to length(PNAMES)+2, then all unrecognized name/value 384 | % pairs are returned in a single cell array following any other outputs. 385 | % 386 | % EID and EMSG are empty if the arguments are valid. If an error occurs, 387 | % EMSG is the text of an error message and EID is the final component 388 | % of an error message id. GETARGS does not actually throw any errors, 389 | % but rather returns EID and EMSG so that the caller may throw the error. 390 | % Outputs will be partially processed after an error occurs. 391 | % 392 | % This utility can be used for processing name/value pair arguments. 393 | % 394 | % Example: 395 | % pnames = {'color' 'linestyle', 'linewidth'} 396 | % dflts = { 'r' '_' '1'} 397 | % varargin = {{'linew' 2 'nonesuch' [1 2 3] 'linestyle' ':'} 398 | % [eid,emsg,c,ls,lw] = statgetargs(pnames,dflts,varargin{:}) % error 399 | % [eid,emsg,c,ls,lw,ur] = statgetargs(pnames,dflts,varargin{:}) % ok 400 | 401 | % We always create (nparams+2) outputs: 402 | % one each for emsg and eid 403 | % nparams varargs for values corresponding to names in pnames 404 | % If they ask for one more (nargout == nparams+3), it's for unrecognized 405 | % names/values 406 | 407 | % Original Copyright 1993-2008 The MathWorks, Inc. 408 | % Modified by Deng Cai (dengcai@gmail.com) 2011.11.27 409 | 410 | 411 | 412 | 413 | % Initialize some variables 414 | emsg = ''; 415 | eid = ''; 416 | nparams = length(pnames); 417 | varargout = dflts; 418 | unrecog = {}; 419 | nargs = length(varargin); 420 | 421 | % Must have name/value pairs 422 | if mod(nargs,2)~=0 423 | eid = 'WrongNumberArgs'; 424 | emsg = 'Wrong number of arguments.'; 425 | else 426 | % Process name/value pairs 427 | for j=1:2:nargs 428 | pname = varargin{j}; 429 | if ~ischar(pname) 430 | eid = 'BadParamName'; 431 | emsg = 'Parameter name must be text.'; 432 | break; 433 | end 434 | i = strcmpi(pname,pnames); 435 | i = find(i); 436 | if isempty(i) 437 | % if they've asked to get back unrecognized names/values, add this 438 | % one to the list 439 | if nargout > nparams+2 440 | unrecog((end+1):(end+2)) = {varargin{j} varargin{j+1}}; 441 | % otherwise, it's an error 442 | else 443 | eid = 'BadParamName'; 444 | emsg = sprintf('Invalid parameter name: %s.',pname); 445 | break; 446 | end 447 | elseif length(i)>1 448 | eid = 'BadParamName'; 449 | emsg = sprintf('Ambiguous parameter name: %s.',pname); 450 | break; 451 | else 452 | varargout{i} = varargin{j+1}; 453 | end 454 | end 455 | end 456 | 457 | varargout{nparams+1} = unrecog; 458 | -------------------------------------------------------------------------------- /utils/mySVD.m: -------------------------------------------------------------------------------- 1 | function [U, S, V] = mySVD(X,ReducedDim) 2 | %mySVD Accelerated singular value decomposition. 3 | % [U,S,V] = mySVD(X) produces a diagonal matrix S, of the 4 | % dimension as the rank of X and with nonnegative diagonal elements in 5 | % decreasing order, and unitary matrices U and V so that 6 | % X = U*S*V'. 7 | % 8 | % [U,S,V] = mySVD(X,ReducedDim) produces a diagonal matrix S, of the 9 | % dimension as ReducedDim and with nonnegative diagonal elements in 10 | % decreasing order, and unitary matrices U and V so that 11 | % Xhat = U*S*V' is the best approximation (with respect to F norm) of X 12 | % among all the matrices with rank no larger than ReducedDim. 13 | % 14 | % Based on the size of X, mySVD computes the eigvectors of X*X^T or X^T*X 15 | % first, and then convert them to the eigenvectors of the other. 16 | % 17 | % See also SVD. 18 | % 19 | % version 2.0 --Feb/2009 20 | % version 1.0 --April/2004 21 | % 22 | % Written by Deng Cai (dengcai AT gmail.com) 23 | % 24 | 25 | MAX_MATRIX_SIZE = 1600; % You can change this number according your machine computational power 26 | EIGVECTOR_RATIO = 0.1; % You can change this number according your machine computational power 27 | 28 | 29 | if ~exist('ReducedDim','var') 30 | ReducedDim = 0; 31 | end 32 | 33 | [nSmp, mFea] = size(X); 34 | if mFea/nSmp > 1.0713 35 | ddata = X*X'; 36 | ddata = max(ddata,ddata'); 37 | 38 | dimMatrix = size(ddata,1); 39 | if (ReducedDim > 0) && (dimMatrix > MAX_MATRIX_SIZE) && (ReducedDim < dimMatrix*EIGVECTOR_RATIO) 40 | option = struct('disp',0); 41 | [U, eigvalue] = eigs(ddata,ReducedDim,'la',option); 42 | eigvalue = diag(eigvalue); 43 | else 44 | if issparse(ddata) 45 | ddata = full(ddata); 46 | end 47 | 48 | [U, eigvalue] = eig(ddata); 49 | eigvalue = diag(eigvalue); 50 | [dump, index] = sort(-eigvalue); 51 | eigvalue = eigvalue(index); 52 | U = U(:, index); 53 | end 54 | clear ddata; 55 | 56 | maxEigValue = max(abs(eigvalue)); 57 | eigIdx = find(abs(eigvalue)/maxEigValue < 1e-10); 58 | eigvalue(eigIdx) = []; 59 | U(:,eigIdx) = []; 60 | 61 | if (ReducedDim > 0) && (ReducedDim < length(eigvalue)) 62 | eigvalue = eigvalue(1:ReducedDim); 63 | U = U(:,1:ReducedDim); 64 | end 65 | 66 | eigvalue_Half = eigvalue.^.5; 67 | S = spdiags(eigvalue_Half,0,length(eigvalue_Half),length(eigvalue_Half)); 68 | 69 | if nargout >= 3 70 | eigvalue_MinusHalf = eigvalue_Half.^-1; 71 | V = X'*(U.*repmat(eigvalue_MinusHalf',size(U,1),1)); 72 | end 73 | else 74 | ddata = X'*X; 75 | ddata = max(ddata,ddata'); 76 | 77 | dimMatrix = size(ddata,1); 78 | if (ReducedDim > 0) && (dimMatrix > MAX_MATRIX_SIZE) && (ReducedDim < dimMatrix*EIGVECTOR_RATIO) 79 | option = struct('disp',0); 80 | [V, eigvalue] = eigs(ddata,ReducedDim,'la',option); 81 | eigvalue = diag(eigvalue); 82 | else 83 | if issparse(ddata) 84 | ddata = full(ddata); 85 | end 86 | 87 | [V, eigvalue] = eig(ddata); 88 | eigvalue = diag(eigvalue); 89 | 90 | [dump, index] = sort(-eigvalue); 91 | eigvalue = eigvalue(index); 92 | V = V(:, index); 93 | end 94 | clear ddata; 95 | 96 | maxEigValue = max(abs(eigvalue)); 97 | eigIdx = find(abs(eigvalue)/maxEigValue < 1e-10); 98 | eigvalue(eigIdx) = []; 99 | V(:,eigIdx) = []; 100 | 101 | if (ReducedDim > 0) && (ReducedDim < length(eigvalue)) 102 | eigvalue = eigvalue(1:ReducedDim); 103 | V = V(:,1:ReducedDim); 104 | end 105 | 106 | eigvalue_Half = eigvalue.^.5; 107 | S = spdiags(eigvalue_Half,0,length(eigvalue_Half),length(eigvalue_Half)); 108 | 109 | eigvalue_MinusHalf = eigvalue_Half.^-1; 110 | U = X*(V.*repmat(eigvalue_MinusHalf',size(V,1),1)); 111 | end 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /utils/shrink.m: -------------------------------------------------------------------------------- 1 | function res = shrink(x,a) 2 | res=sign(x).*( max(abs(x)-a,0)); 3 | end -------------------------------------------------------------------------------- /utils/similarMatrix_CAN.m: -------------------------------------------------------------------------------- 1 | function [S,gamma] = similarMatrix_CAN(distP2D,k,rr) 2 | %% input: 3 | %%% distP2D: the distance from data to prototype, n2*n1 4 | %%% k: the number of neighbors 5 | %%% rr: the optimal parameter 6 | %%% lambda: default = rr, and update ^2 /2 by eigs 7 | %% output: 8 | %%% S: the similar matrix n2*n1 9 | [n2,n1]=size(distP2D); 10 | % distP2D=EuDist2(data',prototype); % n2*n1 11 | if nargin==2 12 | rr=-1; 13 | end 14 | if rr == -1 15 | [d, idx] = sort(distP2D,2,'ascend'); 16 | 17 | gamma=1/n2*sum( k/2* d(:,k+1) - 1/2* sum(d(:,1:k),2) ); 18 | S = zeros(n2,n1); 19 | for i = 1:n2 20 | idxa0 = idx(i,1:k); 21 | S(i,idxa0)=EProjSimplex_new((d(i,k+1)-d(i,1:k))./(2*gamma)); 22 | end 23 | else 24 | gamma=rr; 25 | [dx, idx] = sort(distP2D,2,'ascend'); 26 | S = zeros(n2,n1); 27 | for i = 1:n2 28 | idxa0 = idx(i,1:k); 29 | S(i,idxa0)=EProjSimplex_new(-1*dx(i,1:k)./(2*gamma)); 30 | end 31 | end 32 | 33 | end 34 | 35 | -------------------------------------------------------------------------------- /utils/splitData.m: -------------------------------------------------------------------------------- 1 | function [X1,Y1,X2,Y2] = splitData(varargin) 2 | % input: 3 | % X:fea*n 4 | % Y:n*1 5 | % num: 0~1 or split number 6 | % output 7 | % X1: fea*num 8 | % Y1: num*1 9 | % X2: fea*(n-num) 10 | % Y2: (n-num)*1 11 | X1=[];X2=[];Y1=[];Y2=[]; 12 | if nargin==3 13 | X=varargin{1}; 14 | Y=varargin{2}; 15 | num=varargin{3}; 16 | else 17 | error("splitData: At least 3 parameters are required\n"); 18 | end 19 | if num==0 20 | error("splitData: Parameter{3} must greater than zero\n"); 21 | end 22 | if size(Y,2)>1 23 | Y=Y'; 24 | end 25 | n=size(X,2); 26 | C=length(find(unique(Y))); 27 | if 0len 38 | warning('selectedNumber (%d)> len (%d)!\n',selectedNumber,len); 39 | selectedNumber=len; 40 | end 41 | index=pos(randomIndex(1:selectedNumber)); 42 | index2=pos(randomIndex(selectedNumber+1:end)); 43 | X1=[X1,X(:,index)];Y1=[Y1;Y(index)]; 44 | X2=[X2,X(:,index2)];Y2=[Y2;Y(index2)]; 45 | end 46 | end 47 | 48 | -------------------------------------------------------------------------------- /utils/updateL21.m: -------------------------------------------------------------------------------- 1 | function [G] = updateL21(E) 2 | %% input 3 | %%% ||E||_{2,1} => tr(E'GE) size: (m*d)' m*m (m*d) 4 | %% output 5 | %%% G : 6 | ec = sqrt(sum(E.*E,2)+eps); 7 | G = 0.5./ec; 8 | n=length(G); 9 | G = spdiags(G,0,n,n); 10 | end 11 | -------------------------------------------------------------------------------- /utils/withinScatter.m: -------------------------------------------------------------------------------- 1 | function [Sw] = withinScatter(X,Y) 2 | C=length(find(unique(Y))); 3 | Sw=0; 4 | for c=1:C 5 | Xc=X(:,Y==c); 6 | Xmean=mean(Xc,2); 7 | Fc=(Xc-Xmean); 8 | Sw=Sw+(Fc*Fc'); 9 | end 10 | end 11 | --------------------------------------------------------------------------------