├── AR_Face_img.mat
├── COIL20.mat
├── Demo_Clustering.m
├── Demo_Supervised_Feature_Selection.m
├── Demo_Unsupervised_Feature_Selection.m
├── Demo_Unsupervised_Representation_Learning.m
├── LICENCE
├── algorithm
    ├── FeatureSelection
    │   └── unsupervised
    │   │   ├── 2019-LRLMR
    │   │       └── LRLMR.m
    │   │   ├── 2019-URAFS
    │   │       ├── URAFS.m
    │   │       └── URAFS_SolveProb14.m
    │   │   ├── 2021-AGUFS
    │   │       ├── AGUFS.m
    │   │       └── AGUFS_SolveProb14.m
    │   │   ├── 2021-DSLRL
    │   │       └── DSLRL.m
    │   │   ├── 2022-DLUFS
    │   │       └── DLUFS.m
    │   │   └── 2022-SLMEA
    │   │       └── SLMEA.m
    ├── RepresentationLearning
    │   ├── supervised
    │   │   ├── 2017-MRSL
    │   │   │   ├── MRSL.m
    │   │   │   └── MRSL_getR.m
    │   │   ├── 2019-RSLDA
    │   │   │   └── RSLDA.m
    │   │   ├── 2020-LRDAGP
    │   │   │   ├── LRDAGP.m
    │   │   │   ├── LRDAGP_solveAlg1.m
    │   │   │   └── LRDAGP_solveAlg2.m
    │   │   ├── 2020-RDA_FSIS
    │   │   │   └── RDA_FSIS.m
    │   │   ├── 2021-DSDPL
    │   │   │   └── DSDPL.m
    │   │   └── 2021-SN_TSL
    │   │   │   └── SN_TSL.m
    │   └── unsupervised
    │   │   └── 2020-JLRSL
    │   │       └── JLRSL.m
    └── clustering
    │   └── 2015-rLPP
    │       └── rLPP.m
├── readme.md
└── utils
    ├── EProjSimplex_new.m
    ├── EuDist2.m
    ├── GPI.m
    ├── IterativeMultiplicativeUpdate.m
    ├── L2Norm.m
    ├── MyClusteringMeasure.m
    ├── SVT.m
    ├── SolveL21Problem.m
    ├── betweenScatter.m
    ├── centeringMatrix.m
    ├── classifyKNN.m
    ├── computeL.m
    ├── defaultOptions.m
    ├── getClusteringResults.m
    ├── getFeatureSelectionResults.m
    ├── hotmatrix.m
    ├── lapgraph.m
    ├── litekmeans.m
    ├── mySVD.m
    ├── shrink.m
    ├── similarMatrix_CAN.m
    ├── splitData.m
    ├── updateL21.m
    └── withinScatter.m


/AR_Face_img.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzf495/Reimplementation-of-Attractive-Feature-Selection-and-Clustering-Methods/70cdeb8f79210b9315721750e7dacb1d4fc63b25/AR_Face_img.mat


--------------------------------------------------------------------------------
/COIL20.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzf495/Reimplementation-of-Attractive-Feature-Selection-and-Clustering-Methods/70cdeb8f79210b9315721750e7dacb1d4fc63b25/COIL20.mat


--------------------------------------------------------------------------------
/Demo_Clustering.m:
--------------------------------------------------------------------------------
 1 | %% Add path
 2 | addpath('./utils/');
 3 | addpath(genpath('./algorithm/'));
 4 | rng(495);
 5 | %% Load the dataclear X Y;
 6 | path='./COIL20.mat';
 7 | load(path,'X','Y');
 8 | X=X';%% The input dimension is m*n
 9 | %% Select a algorithm
10 | %%% === Clustering ===
11 | algorithm=@rLPP;
12 | 
13 | %% Set the hyper-parameters
14 | %%% Notice: you should modify `options`, so as to tune the hyper-parameters
15 | options=struct();
16 | %% Run the algorithm
17 | algorithm(X,Y,options);
18 | 


--------------------------------------------------------------------------------
/Demo_Supervised_Feature_Selection.m:
--------------------------------------------------------------------------------
 1 | %% Add path
 2 | addpath('./utils/');
 3 | addpath(genpath('./algorithm/'));
 4 | rng(495);
 5 | %% Load the dataclear X Y;
 6 | 
 7 | 
 8 | %% Load COIL20
 9 | path='./COIL20.mat';
10 | load(path,'X','Y');
11 | X=X';%% The input dimension is m*n
12 | 
13 | 
14 | %%% Load AR_Face_img
15 | % path='./AR_Face_img.mat';
16 | % clear AllSet;
17 | % load(path,'AllSet');
18 | % XY=AllSet;
19 | % X=XY.X;
20 | % Y=XY.y;
21 | 
22 | 
23 | %% Data process
24 | %%% try `zscore` or `normr`
25 | % X=L2Norm(X')';
26 | % X=double(zscore(X',1))';
27 | % X=normr(X')';
28 | 
29 | 
30 | %% Split dataset
31 | %%% select 'number' samples from each class as a training set, and use
32 | %%%     as a training set
33 | number=10;
34 | [X1,Y1,X2,Y2] = splitData(X,Y,number);
35 | 
36 | 
37 | %% Select a algorithm
38 | % algorithm=@MRSL;
39 | % algorithm=@DSDPL; X=double(zscore(X',1))';
40 | % algorithm=@RSLDA;
41 | % algorithm=@LRDAGP;
42 | % algorithm=@RDA_FSIS;
43 | algorithm=@SN_TSL;
44 | %% Set the hyper-parameters
45 | %%% Notice: you should modify `options`, so as to tune the hyper-parameters
46 | options=struct();
47 | 
48 | 
49 | %% Run the algorithm
50 | algorithm(X1,Y1,X2,Y2,options);
51 | 


--------------------------------------------------------------------------------
/Demo_Unsupervised_Feature_Selection.m:
--------------------------------------------------------------------------------
 1 | %% Add path
 2 | addpath('./utils/');
 3 | addpath(genpath('./algorithm/'));
 4 | rng(495);
 5 | %% Load the dataclear X Y;
 6 | path='./COIL20.mat';
 7 | load(path,'X','Y');
 8 | X=X';%% The input dimension is m*n
 9 | %% Select a algorithm
10 | %%% === Feature Selection ===
11 | % algorithm=@LRLMR;
12 | % algorithm=@AGUFS;
13 | % algorithm=@DSLRL;
14 | % algorithm=@DLUFS; X=double(zscore(X',1))';
15 | % algorithm=@SLMEA;
16 | 
17 | %% Set the hyper-parameters
18 | %%% Notice: you should modify `options`, so as to tune the hyper-parameters
19 | options=struct();
20 | %% Run the algorithm
21 | algorithm(X,Y,options);
22 | 


--------------------------------------------------------------------------------
/Demo_Unsupervised_Representation_Learning.m:
--------------------------------------------------------------------------------
 1 | %% Add path
 2 | addpath('./utils/');
 3 | addpath(genpath('./algorithm/'));
 4 | rng(495);
 5 | %% Load the dataclear X Y;
 6 | path='./COIL20.mat';
 7 | load(path,'X','Y');
 8 | X=X';%% The input dimension is m*n
 9 | 
10 | %% Split dataset
11 | %%% select 'number' samples from each class as a training set, and use
12 | %%%     as a training set
13 | number=10;
14 | [X1,Y1,X2,Y2] = splitData(X,Y,number);
15 | 
16 | %% Select a algorithm
17 | %%% === Unsupervised Representation Learning ===
18 | algorithm=@JLRSL;
19 | 
20 | %% Set the hyper-parameters
21 | %%% Notice: you should modify `options`, so as to tune the hyper-parameters
22 | options=struct();
23 | %% Run the algorithm
24 | algorithm(X1,Y1,X2,Y2,options);
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/LICENCE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 zzf495
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/algorithm/FeatureSelection/unsupervised/2019-LRLMR/LRLMR.m:
--------------------------------------------------------------------------------
 1 | function [results,results_iter,W]=LRLMR(X,Y,options)
 2 | %% Implementation of LRLMR
 3 | %%% Authors:                    Tang et al.
 4 | %%% Titl:                       2019-Unsupervised feature selection via latent representation learning and manifold regularization
 5 | %% intput:
 6 | %%% X:                     The samples, m*n
 7 | %%% Y:                     The labels of samples, n*1
 8 | %% options
 9 | %%% T                            The iterations of V
10 | %%% t                            The iterations of W
11 | %%% dim                          The dimension selected
12 | %%% alpha                        The weight of L2,1 norm
13 | %%% beta                         The weight of A-VV'
14 | %%% gamma                        The weight of manifold regularization
15 | %%% k                            The KNN number
16 | %% output:
17 | %%% results                      The results (list) [acc,acc2,NMI,purity]
18 | %%% results_iter                 The iteration information of 'results'
19 | %%% W                            The learned feature selection matrix
20 | %% Version
21 | %%%     Implementation           2022-05-19
22 |     options=defaultOptions(options,...
23 |                 'T',10,...
24 |                 't',10,...
25 |                 'dim',80,...
26 |                 'alpha',1,... 
27 |                 'beta',1e-4,... 
28 |                 'gamma',1e-4,...
29 |                 'k',10);
30 |     %% parameters
31 |     T=options.T;
32 |     t=options.t;
33 |     dim=options.dim;
34 |     alpha=options.alpha;
35 |     beta=options.beta;
36 |     gamma=options.gamma;
37 |     k=options.k;
38 |     %% Initialization
39 |     results_iter=[];
40 |     myeps=1e-8;
41 |     C=length(unique(Y));
42 |     [m,n]=size(X);
43 |     XX=X*X';
44 |     % Init L by Eq.(6)
45 |     clear manifold;
46 |     manifold.k = k;
47 |     manifold.Metric = 'Euclidean';
48 |     manifold.WeightMode = 'HeatKernel';
49 |     manifold.NeighborMode = 'KNN';
50 |     L=computeL(X,manifold);
51 |     L=L./norm(L,'fro');
52 |     XLX=X*L*X';
53 |     % Init A by Eq.(6)
54 |     clear manifold;
55 |     manifold.k = 0;
56 |     manifold.Metric = 'Euclidean';
57 |     manifold.WeightMode = 'HeatKernel';
58 |     manifold.NeighborMode = 'KNN';
59 |     A=lapgraph(X',manifold);
60 |     % Init V by random
61 |     V=rand(n,C);
62 |     % Init G (Lambda in paper)
63 |     G=eye(m);
64 |     for i=1:T
65 |         XV=X*V;
66 |         for j=1:t
67 |            % Update W by Eq.(11)
68 |            W=(XX+alpha*G+gamma*XLX)\(XV);
69 |            % Update G (Lambda) by Eq.(8)
70 |            G=updateL21(W);
71 |         end
72 |         % Update V
73 |         left=(X'*W)+2*beta*(A*V);
74 |         right=V+2*beta*(V*V')*V;
75 |         res=left./right;
76 |         V=IterativeMultiplicativeUpdate(V,res);
77 |         % scores
78 |         [~,results] = getFeatureSelectionResults(X,Y,W,dim,C);
79 |         for index=1:3
80 |             results_iter(index,i)=results(index);
81 |         end
82 |         fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,...
83 |             results(1),results(2),results(3));
84 |     end
85 | end


--------------------------------------------------------------------------------
/algorithm/FeatureSelection/unsupervised/2019-URAFS/URAFS.m:
--------------------------------------------------------------------------------
 1 | function [results,results_iter,W]=URAFS(X,Y,options)
 2 | %% Implementation of URAFS
 3 | %%% Authors:                    Li et al.
 4 | %%% Titl:                       2019-Generalized Uncorrelated Regression with Adaptive Graph for Unsupervised Feature Selection
 5 | %% intput:
 6 | %%% X:                     The samples, m*n
 7 | %%% Y:                     The labels of samples, n*1
 8 | %% options
 9 | %%% T                            The iterations of V
10 | %%% t                            The iterations of GPI and Algorithm 1
11 | %%% dim                          The dimension reduced
12 | %%% alpha                        The weight of manfiold regularization 
13 | %%% beta                         The weight of Gaussian kernel (S)
14 | %%% lambda                       The weight of L2,1 of W
15 | %% output:
16 | %%% results                      The results (list) [acc,acc2,NMI,purity]
17 | %%% results_iter                 The iteration information of 'results'
18 | %%% W                            The learned feature selection matrix
19 | %% Version
20 | %%%     Implementation          2022-05-19
21 |     options=defaultOptions(options,...
22 |                 'T',10,...        %% The iterations
23 |                 't',10,...        %% The iterations of GPI and Algorithm 1
24 |                 'dim',60,...      %% The dimension reduced
25 |                 'alpha',1e3,...   %% The weight of NMF w.r.t features |X'-X'WH|
26 |                 'beta',1e3,...    %% The weight of Gaussian kernel (S)
27 |                 'lambda',1e3);    %% The weight of entropy
28 |     %% parameters
29 |     T=options.T;
30 |     t=options.t;
31 |     dim=options.dim;
32 |     alpha=options.alpha;
33 |     beta=options.beta;
34 |     lambda=options.lambda;
35 |     %% Initialization
36 |     results_iter=[];
37 |     C=length(unique(Y));
38 |     [~,n]=size(X);
39 |     % Init F
40 |     [~,F]=litekmeans(X,C);F=F'; % n*C
41 |     [Uf,~,~]=mySVD(F);
42 |     F=Uf; % F'F=I
43 |     % Init H
44 |     H=centeringMatrix(n);
45 |     % Init St
46 |     St=X*H*X';
47 |     for i=1:T
48 |         % Update S by Eq.(31)
49 |         dist=EuDist2(F,F,0);
50 |         expDist=exp(-dist/(2*beta));
51 |         sumExp=sum(expDist,1);
52 |         S=expDist./sumExp;
53 |         S=(S+S')/2;
54 |         % Update W by Algorithm 1
55 |         W=URAFS_SolveProb14(X,H,St,F,lambda,t);
56 |         % Update L by (7)
57 |         P=diag(sparse(sum(S)));
58 |         Ls=P-S;
59 |         A=H+2*alpha*Ls;
60 |         CA=H*X'*W;
61 |         % Update F by Algorithm 2 (GPI)
62 |         opt.T=t;
63 |         F=GPI(A,CA,opt);
64 |         %% Classification
65 |         % Select top d ranked features (descending order) as the results
66 |         % scores
67 |         [~,results] = getFeatureSelectionResults(X,Y,W,dim,C);
68 |         for index=1:3
69 |             results_iter(index,i)=results(index);
70 |         end
71 |         fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,...
72 |             results(1),results(2),results(3));
73 |     end
74 | end


--------------------------------------------------------------------------------
/algorithm/FeatureSelection/unsupervised/2019-URAFS/URAFS_SolveProb14.m:
--------------------------------------------------------------------------------
 1 | function [W] = URAFS_SolveProb14(X,H,S,F,lambda,T)
 2 |     [m,~]=size(X);
 3 |     % Initialize D by eye
 4 |     D=eye(m,m);
 5 |     for i=1:T
 6 |         % Compute Q,B by Eq.(20)
 7 |         temp=S+lambda*D;
 8 |         [Ut,Sigma,Vt]=mySVD(temp);
 9 |         Sigma(Sigma<0)=0;
10 |         squreSigma=Sigma.^0.5;
11 |         inverseSqrtSigma=diag(1./(diag(squreSigma)));
12 |         inverseSqrtSigma(isinf(inverseSqrtSigma))=0;
13 |         SlambdaD=Ut*(inverseSqrtSigma)*Vt';
14 |         B= (SlambdaD)*X*H*F;
15 |         % Update Q
16 |         [Ub,~,Vb]=mySVD(B);
17 |         Q=Ub*Vb';
18 |         % Update W
19 | %         W=S+lambda*D;
20 |         W=SlambdaD*Q;
21 |         % Update D
22 |         D=updateL21(W);
23 |     end
24 | end
25 | 
26 | 


--------------------------------------------------------------------------------
/algorithm/FeatureSelection/unsupervised/2021-AGUFS/AGUFS.m:
--------------------------------------------------------------------------------
 1 | function [results,results_iter,W]=AGUFS(X,Y,options)
 2 | %% Implementation of AGUFS
 3 | %%% Authors:                    Huang et al.
 4 | %%% Titl:                       2021-Adaptive graph-based generalized regression model for unsupervised feature selection
 5 | %% intput:
 6 | %%% X:                     The samples, m*n
 7 | %%% Y:                     The labels of samples, n*1
 8 | %% options
 9 | %%% T                            The iterations
10 | %%% t                            The iterations of GPI and Algorithm 1
11 | %%% dim                          The dimension selected
12 | %%% alpha                        The weight of manifold regularization
13 | %%% k                            The number of KNN
14 | %%% lambda                       The weight of L2,1-norm w.r.t W
15 | %% output:
16 | %%% results                      The results (list) [acc,acc2,NMI,purity]
17 | %%% results_iter                 The iteration information of 'results'
18 | %%% W                            The learned feature selection matrix
19 | %% Version
20 | %%%     Implementation          2022-05-23
21 |     options=defaultOptions(options,...
22 |                 'T',10,...        %% The iterations
23 |                 't',10,...        %% The iterations of GPI and Algorithm 1
24 |                 'dim',60,...      %% The dimension selected
25 |                 'alpha',1e3,...   %% The weight of manifold regularization
26 |                 'k',10,...        %% The number of KNN
27 |                 'lambda',1e3);    %% The weight of L2,1-norm w.r.t W
28 |     %% parameters
29 |     T=options.T;
30 |     t=options.t;
31 |     dim=options.dim;
32 |     alpha=options.alpha;
33 |     lambda=options.lambda;
34 |     k=options.k;
35 |     %% Initialization
36 |     results_iter=[];
37 |     C=length(unique(Y));
38 |     [~,n]=size(X);
39 |     % Init F
40 |     [~,F]=litekmeans(X,C);F=F'; % n*C
41 |     [Uf,~,~]=mySVD(F);
42 |     F=Uf; % F'F=I
43 |     % Init H
44 |     H=centeringMatrix(n);
45 |     % Init St
46 |     XHX=X*H*X';
47 |     % Init S by Eq.(22)
48 |     distX = EuDist2(X',X');
49 |     [S,rr]=similarMatrix_CAN(distX,k,-1);
50 |     for i=1:T
51 |         % compute L
52 |         DS=diag(sparse(sum(S)));
53 |         L=DS-S;
54 |         XLX=X*L*X';
55 |         % Update W by Algorithm 1
56 |         W=AGUFS_SolveProb14(X,H,XHX,XLX,F,alpha,lambda,t);
57 |         % Update L by (7)
58 |         P=diag(sparse(sum(S)));
59 |         Ls=P-S;
60 |         A=H+0.5*alpha*Ls;
61 |         CA=H*X'*W;
62 |         % Update F by Algorithm 2 (GPI)
63 |         opt.T=t;
64 |         F=GPI(A,CA,opt);
65 |         % Update S by Eq.(22)
66 |         distX = EuDist2(F,F);
67 |         [S,~]=similarMatrix_CAN(distX,k,rr);
68 |         S=real(S);
69 |         %% Classification
70 |         % Select top d ranked features (descending order) as the results
71 |         % scores
72 |         [~,results] = getFeatureSelectionResults(X,Y,W,dim,C);
73 |         for index=1:3
74 |             results_iter(index,i)=results(index);
75 |         end
76 |         fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,...
77 |             results(1),results(2),results(3));
78 |     end
79 | end


--------------------------------------------------------------------------------
/algorithm/FeatureSelection/unsupervised/2021-AGUFS/AGUFS_SolveProb14.m:
--------------------------------------------------------------------------------
 1 | function [W] = AGUFS_SolveProb14(X,H,XHX,XLX,F,alpha,lambda,T)
 2 |     [m,~]=size(X);
 3 |     % Initialize D by eye
 4 |     D=eye(m,m);
 5 |     for i=1:T
 6 |         % Compute Q,B by Eq.(20)
 7 |         temp=XHX+alpha*XLX+lambda*D;
 8 |         [Ut,Sigma,Vt]=mySVD(temp);
 9 |         Sigma(Sigma<0)=0;
10 |         squreSigma=Sigma.^0.5;
11 |         inverseSqrtSigma=diag(1./(diag(squreSigma)));
12 |         inverseSqrtSigma(isinf(inverseSqrtSigma))=0;
13 |         SlambdaD=Ut*(inverseSqrtSigma)*Vt';
14 |         B= (SlambdaD)*X*H*F;
15 |         % Update A
16 |         [Ub,~,Vb]=mySVD(B);
17 |         A=Ub*Vb';
18 |         % Update W
19 |         W=SlambdaD*A;
20 |         % Update D
21 |         D=updateL21(W);
22 |     end
23 | end
24 | 
25 | 


--------------------------------------------------------------------------------
/algorithm/FeatureSelection/unsupervised/2021-DSLRL/DSLRL.m:
--------------------------------------------------------------------------------
 1 | function [results,results_iter,W] = DSLRL(X,Y,options)
 2 | %% Implementation of DSLRL
 3 | %%% Authors:                    Shang et al.
 4 | %%% Titl:                       2021-Dual space latent representation learning for unsupervised feature selection
 5 | %% intput:
 6 | %%% X:                          The samples, m*n
 7 | %%% Y:                          The labels of samples, n*1
 8 | %% options
 9 | %%% T                            The iterations of V
10 | %%% t                            The iterations of W
11 | %%% d                            The dimension reduced
12 | %%% alpha                        The weight of L2-1 for W
13 | %%% beta                         The weight of A-VV'
14 | %%% gamma                        The weight of manfiold regularization
15 | %%% lambda                       The weight of WW'=I
16 | %% output:
17 | %%% results                      The results (list)
18 | %%% results_iter                 The iteration information of 'results'
19 | %%% W                            The learned feature selection matrix
20 | %% Version
21 | %%%     Implementation          2022-05-19
22 |     options=defaultOptions(options,...
23 |                 'T',10,...      %% The iterations
24 |                 'dim',80,...    %% The dimension reduced
25 |                 'alpha',1e3,...   %% The weight of L2-1 for W
26 |                 'beta',1e2,...  %% The weight of A-VV'
27 |                 'gamma',1e-3,...%% The weight of manfiold regularization
28 |                 'lambda',1e-3);    %% The weight of WW'=I
29 |     %% parameters
30 |     T=options.T;
31 |     dim=options.dim;
32 |     alpha=options.alpha;
33 |     beta=options.beta;
34 |     gamma=options.gamma;
35 |     lambda=options.lambda;
36 |     %% Initialization
37 |     X=normr(X')';
38 | %     X=L2Norm(X')';
39 |     results_iter=[];
40 |     C=length(unique(Y));
41 |     [m,n]=size(X);
42 |     eta=0.1;
43 |     % Init A by Eq.(7)
44 |     clear manifold;
45 |     manifold.k = 0;
46 |     manifold.Metric = 'Euclidean';
47 |     manifold.WeightMode = 'HeatKernel';
48 |     manifold.NeighborMode = 'KNN';
49 |     A=lapgraph(X',manifold);
50 |     % Init B by Eq.(8)
51 |     clear manifold;
52 |     manifold.k = 0;
53 |     manifold.Metric = 'Euclidean';
54 |     manifold.WeightMode = 'HeatKernel';
55 |     manifold.NeighborMode = 'KNN';
56 |     B=lapgraph(X,manifold);
57 |     % Init V
58 |     [~,V]=litekmeans(X,C);V=V'; % n*C
59 |     % Init H
60 |     H=eye(m);
61 |     % Init W
62 |     W=(X*X'+eta*eye(m))\(X*V);
63 |     W=max(W,1e-8);
64 |     X=X';% input X: n*m
65 |     for i=1:T
66 |        	% Update W by Eq.(20)
67 |         left=X'*V+2*beta*B*W+2*lambda*W;
68 |         right=X'*X*W+alpha*H*W+2*(gamma+lambda)*W*W'*W;
69 |         res=left./right;
70 |         W=IterativeMultiplicativeUpdate(W,res);
71 |         % Update H by Eq.(15)
72 |         H=updateL21(W);
73 |         % Update V by Eq.(23)
74 |         left=X*W+2*beta*A*V;
75 |         right=V+2*beta*V*V'*V;
76 |         res=left./right;
77 |         V=IterativeMultiplicativeUpdate(V,res);
78 |         % scores
79 |         [~,results] = getFeatureSelectionResults(X',Y,W,dim,C);
80 |         for index=1:3
81 |             results_iter(index,i)=results(index);
82 |         end
83 |         fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,...
84 |             results(1),results(2),results(3));
85 |     end
86 | end


--------------------------------------------------------------------------------
/algorithm/FeatureSelection/unsupervised/2022-DLUFS/DLUFS.m:
--------------------------------------------------------------------------------
 1 | function [results,results_iter,Z] = DLUFS(X,Y,options)
 2 | %% Notice
 3 | %%% Official codes (python implementation) are available at https://github.com/mohsengh/DLUFS/
 4 | %% Data process: zscore
 5 | %% Implementation of DLUFS
 6 | %%% Authors:                     Ghassemi Parsa et al.
 7 | %%% Titl:                       2022-Low-rank dictionary learning for unsupervised feature selection
 8 | %% intput:
 9 | %%% X:                     The samples, m*n
10 | %%% Y:                     The labels of samples, n*1
11 | %% options
12 | %%% T                            The iterations
13 | %%% dim                       	 The dimension selected
14 | %%% alpha                        The weight of manifold regularization
15 | %%% sigma                        The weight of Gaussian kernel
16 | %%% k                            The number of KNN
17 | %%% lambda                       The weight of L2,1-norm of Z
18 | %% output:
19 | %%% results                      The results (list)
20 | %%% results_iter                 The iteration information of 'results'
21 | %%% Z                            The learned feature selection matrix
22 | %% Version
23 | %%%     Implementation          2022-05-23
24 |     %% Parameter setting
25 |     options=defaultOptions(options,...
26 |                 'T',10,...    %% The iterations
27 |                 'dim',300,... %% The dimension selected
28 |                 'alpha',1e-3,... %% The weight of manifold regularization
29 |                 'sigma',1e2,... %% The weight of Gaussian kernel
30 |                 'k',10,... %% The number of KNN
31 |                 'lambda',1e-3); %% The weight of L2,1-norm of Z
32 |     %% Parameter Setting
33 |     T=options.T;
34 |     dim=options.dim;
35 |     alpha=options.alpha;
36 |     lambda=options.lambda;
37 |     sigma=options.sigma;
38 |     k=options.k;
39 |     eta=1e-16;
40 |     %% Initialization
41 |     C=length(unique(Y));
42 |     m=size(X,1);
43 |     results_iter=[];
44 |     % Compute L
45 |     manifold.NeighborMode = 'KNN';
46 |     manifold.k = k;
47 |     manifold.t =sigma;
48 |     manifold.WeightMode = 'Heatkernel';
49 |     manifold.Metric='Euclidean';
50 |     L=computeL(X,manifold);
51 |     XX=X'*X;
52 |     Z=X;
53 |     for i=1:T
54 |         % UPdate B by Eq.(16)
55 |         Sw=(Z*Z'+eta*eye(m));
56 |         Sb=Z*XX*Z';
57 |         res=Sw\Sb;
58 |         [Ub,~,Vb]=mySVD(res,dim);
59 |         B=Ub*Vb';
60 |         % Update A by Eq.(10)
61 |         A=(X*Z'*B')/(B*Sw*B'+eta*eye(m));
62 |         % Update D by Eq.(19)
63 |         D=updateL21(Z);
64 |         % Update Z by Eq.(21)
65 |         AB=A*B;
66 |         E=AB'*AB+lambda*D;
67 |         F=alpha*L;
68 |         G=AB'*X;
69 |         sylvester(E,F,G);
70 |         % Scores
71 |          [~,results] = getFeatureSelectionResults(X,Y,Z,dim,C);
72 |         for index=1:3
73 |             results_iter(index,i)=results(index);
74 |         end
75 |         fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,...
76 |             results(1),results(2),results(3));
77 |     end
78 | end
79 | 
80 | 


--------------------------------------------------------------------------------
/algorithm/FeatureSelection/unsupervised/2022-SLMEA/SLMEA.m:
--------------------------------------------------------------------------------
 1 | function [results,results_iter,W]=SLMEA(X,Y,options)
 2 | %% Implementation of SLMEA
 3 | %%% Authors:                    Shang et al.
 4 | %%% Titl:                       2022-Sparse and low-dimensional representation with maximum entropy adaptive graph for feature selection
 5 | %% intput:
 6 | %%% X:                     The samples, m*n
 7 | %%% Y:                     The labels of samples, n*1
 8 | %% options
 9 | %%% T                            The iterations of V
10 | %%% dim                          The dimension selected
11 | %%% alpha                        The weight of NMF w.r.t features |X'-X'WH|
12 | %%% beta                         The weight of L(2,1/2)-(1/2) norm
13 | %%% gamma                        The weight of manfiold regularization
14 | %%% lambda                       The weight of entropy
15 | %% output:
16 | %%% results                      The results (list) [acc,NMI,purity]
17 | %%% results_iter                 The iteration information of 'results'
18 | %%% W                            The learned feature selection matrix
19 | %% Version
20 | %%%     Implementation           2022-05-19
21 |     options=defaultOptions(options,...
22 |                 'T',10,...      %% The iterations
23 |                 'dim',80,...    %% The dimension selected
24 |                 'alpha',1e-3,...   %% The weight of NMF w.r.t features |X'-X'WH|
25 |                 'beta',0.01,...  %% The weight of L(2,1/2)-(1/2) norm
26 |                 'gamma',100,...%% The weight of manfiold regularization
27 |                 'lambda',1);    %% The weight of entropy
28 |     %% parameters
29 |     T=options.T;
30 |     dim=options.dim;
31 |     alpha=options.alpha;
32 |     beta=options.beta;
33 |     gamma=options.gamma;
34 |     lambda=options.lambda;
35 |     %% Initialization
36 |     eta=0.1;
37 |     X=normr(X')';
38 |     results_iter=[];
39 |     myeps=1e-8;
40 |     C=length(unique(Y));
41 |     [m,n]=size(X);
42 |     XX=X*X';
43 |     % Init H
44 |     [~,H]=litekmeans(X',C); % C*m
45 |     [~,F]=litekmeans(X,C);F=F'; % n*C
46 |     W=(H'*H+eta*eye(m))\H'; % m*C
47 |     W=max(W,myeps);
48 |     Gn=centeringMatrix(n);
49 |     for i=1:T
50 |         % Update S^H by Eq.(38)
51 |         dist=EuDist2(H',H',0)+myeps;
52 |         expDist=exp(dist/(2*lambda));
53 |         sumExp=sum(expDist,1);
54 |         SH=expDist./sumExp;
55 |         % Update S^F by Eq.(40)
56 |         dist=EuDist2(F,F,0);
57 |         expDist=exp(dist/(2*lambda));
58 |         sumExp=sum(expDist,1);
59 |         SF=expDist./sumExp;
60 |         % Update H by Eq.(24)
61 |         DH=diag(sparse(sum(SH)));
62 |         left=alpha*W'*XX+gamma*H*SH;
63 |         right=alpha*W'*XX*W*H+gamma*H*DH;
64 |         res=left./right;
65 |         H=IterativeMultiplicativeUpdate(H,res);
66 |         % Update F by GPI
67 |         DF=diag(sparse(sum(SF)));
68 |         LF=DF-SF;
69 |         A1=Gn+gamma*LF;% support matrix
70 |         A2=Gn*X'*W;% support matrix
71 |         %% Method 2
72 |         try
73 |             R = GPI(A1,A2,[]);
74 |             [Ur,~,Vr]=mySVD(R);
75 |             F=Ur*Vr';
76 |         catch ME
77 |             warning('An error occurs when runing GPI'); 
78 |             break;
79 |         end
80 | 
81 |         % Update W by Eq.(21) 
82 |         %%% compute U
83 |         U=(4*diag(1./sum(power(W.*W+myeps,3/2),2)));
84 |         %%% compute W
85 |         left=alpha*XX*H'+X*Gn*F;
86 |         right=alpha*XX*W*(H*H')+beta*U*W+X*Gn*X'*W;
87 |         res=left./right;
88 |         W=IterativeMultiplicativeUpdate(W,res);
89 |         % Scores
90 |          [~,results] = getFeatureSelectionResults(X,Y,W,dim,C);
91 |         for index=1:3
92 |             results_iter(index,i)=results(index);
93 |         end
94 |         fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,...
95 |             results(1),results(2),results(3));
96 |     end
97 | end


--------------------------------------------------------------------------------
/algorithm/RepresentationLearning/supervised/2017-MRSL/MRSL.m:
--------------------------------------------------------------------------------
 1 | function [results,results_iter,W]=MRSL(trainX,trainY,testX,testY,options)
 2 | %% Oficial codes: https://github.com/DarrenZZhang/MSRL
 3 | %% Implementation of MRSL (Semi-supervised version)
 4 | %%% Authors:        Zhang et al.
 5 | %%% Titl:           2017-Marginal Representation Learning With Graph Structure Self-Adaptation
 6 | %% intput:
 7 | %%% trainX:         The traing samples, m*n1
 8 | %%% trainY:         The labels of training samples, n1*1
 9 | %%% testX:          The test samples, m*n2
10 | %%% testY:          The labels of test samples, n2*1
11 | %% options
12 | %%% T:              The iterations
13 | %%% s:              The latent dimension of projection matrix
14 | %%% mu:             The regularization used for initialize projection
15 | %%%                 matrix W
16 | %%% lambda:         The weight of manifold regularization
17 | %%% beta:           The weight of norm \|W\|_F^2
18 | %%% gamma:          The weight of reconstruction 
19 | %% output:
20 | %%% results                      The results (list) [acc,acc2,NMI,purity]
21 | %%% results_iter                 The iteration information of 'results'
22 | %%% W                            The learned projection matrix
23 | %% Version
24 | %%%     Implementation          2022-05-18
25 |     options=defaultOptions(options,...
26 |                 's',10,...          %%% The latent dimension of projection matrix
27 |                 'T',10,...          %%% The iterations
28 |                 'mu',1e-4,...       %%% The weight of inter-class term in LDA (1e-4 in paper)
29 |                 'lambda',1,...      %%% The weight of manifold regularization
30 |                 'beta',1,...        %%% The weight of norm \|W\|_F^2
31 |                 'gamma',1,...       %%% The weight of reconstruction 
32 |                 'k',10);     %%% The KNN numbers
33 |     %% parameters
34 |     T=options.T;
35 |     mu=options.mu;
36 |     beta=options.beta;
37 |     lambda=options.lambda;
38 |     gamma=options.gamma;
39 |     k=options.k;
40 |     s=options.s;
41 |     %% Initialization
42 |     results_iter=[];
43 |     trainX=normr(trainX')';
44 |     testX=normr(testX')';
45 |     
46 |     C=length(unique(trainY));
47 |     [m,~]=size(trainX);
48 |     n2=size(testX,2);
49 |     X=[trainX,testX];
50 |     %% Set W
51 |     hotY1=hotmatrix(trainY,C);
52 |     W=((trainX*trainX')+mu*eye(m))\(trainX*hotY1);
53 |     %% Set P
54 |     distX = EuDist2(X',X');
55 |     [P,rr]=similarMatrix_CAN(distX,k,-1);
56 |     A=eye(m,s);
57 |     R=[hotY1;eye(n2,C)]';
58 |     for i=1:T
59 |         % Update B by Eq.(19)
60 |         B=A'*W;
61 |         % Update W by Eq.(21)
62 |         %%% compute L
63 |         Dp=diag(sparse(sum(P)));
64 |         L=Dp-P;
65 |         L=L./norm(L,'fro');
66 |         %%% compute W
67 |         G=X*X'+lambda*X*L*X'+(beta+gamma)*eye(m);
68 |         W=(G-gamma*(A*A'))\(X*R');
69 |         % Update A by Eq.(22)
70 |         [Ua,~,Va]=mySVD(W*B',s);
71 |         A=Ua*Va';
72 |         % Classification
73 |         [~,Ytpseudo]=max(W'*testX,[],1);Ytpseudo=Ytpseudo';
74 |         YY=[trainY;Ytpseudo];
75 |         % Update R by Algorithm 1
76 |         F=W'*X;
77 |         [R] = MRSL_getR(F,YY);
78 |         % Update P by Eq.(36)
79 |         distF=   EuDist2(F',F');
80 |         [P,~]=similarMatrix_CAN(distF,k,rr);
81 |         results=MyClusteringMeasure(testY,Ytpseudo,1);%[ACC ACC2 MIhat Purity]';
82 |         for index=1:3
83 |             results_iter(index,i)=results(index);
84 |         end
85 |         fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,...
86 |             results(1),results(2),results(3));
87 |     end
88 | end
89 | 
90 | 


--------------------------------------------------------------------------------
/algorithm/RepresentationLearning/supervised/2017-MRSL/MRSL_getR.m:
--------------------------------------------------------------------------------
 1 | function [R] = MRSL_getR(F,pseudoLabel)
 2 | %% input:
 3 | %%%     F:              The regression matrix with dimensions C*n
 4 | %%%     pseudoLabel:    The pseudo labels of samples, n*1
 5 | %% output:
 6 | %%%     R:              The regression matrix solved, C*n
 7 |     [C,n]=size(F);
 8 | %     [~,pseudoLabel]=max(F,[],1);
 9 |     R=zeros(C,n);
10 |     for idx=1:n
11 |         m=pseudoLabel(idx);
12 |         xi=0;t=0;
13 |         Zj=F(:,idx)+1-repmat(F(m,idx),C,1);
14 |         for c=1:C
15 |             if m~=c
16 |                 zj=Zj(c);
17 |                 phiXi=2*xi+sum(min(xi-zj,0));
18 |                 if phiXi>0
19 |                     xi=xi+Zj;
20 |                     t=t+1;
21 |                 end
22 |             end
23 |         end
24 |         xi=xi/(1+t);
25 |         R(:,idx)=F(:,idx)+min(xi-Zj,0);
26 |         R(m,idx)=F(m,idx)+xi;
27 |     end
28 | end
29 | 
30 | 


--------------------------------------------------------------------------------
/algorithm/RepresentationLearning/supervised/2019-RSLDA/RSLDA.m:
--------------------------------------------------------------------------------
 1 | function [results,results_iter,Q]=RSLDA(trainX,trainY,testX,testY,options)
 2 | %% Implementation of RSLDA
 3 | %%% Authors:                    Wen et al.
 4 | %%% Titl:                       2019-Robust Sparse Linear Discriminant Analysis
 5 | %% intput:
 6 | %%% trainX:                     The traing samples, m*n1
 7 | %%% trainY:                     The labels of training samples, n1*1
 8 | %%% testX:                      The test samples, m*n2
 9 | %%% testY:                      The labels of test samples, n2*1
10 | %% options
11 | %%% T                            The iterations
12 | %%% dim                          The dimensions
13 | %%% mu                           The weight of inter-class term in LDA (1e-4 in paper)
14 | %%% lambda1                      The weight of L2,1 norm
15 | %%% lambda2                      The weight of L1 norm in E
16 | %%% betaMax                      The maximum value of beta
17 | %%% beta                         The weight of reconstruct (ADMM)
18 | %%% rho                          The increase rate
19 | %%% epsilon                      The regularization terms
20 | %% output:
21 | %%% results                      The results (list) [acc,NMI,purity]
22 | %%% results_iter                 The iteration information of 'results'
23 | %%% Q                            The learned projection matrix
24 | %% Version
25 | %%%     Implementation          2022-05-18
26 |     options=defaultOptions(options,...
27 |                 'T',10,...          %%% The iterations
28 |                 'dim',10,...       %%% The dimensions
29 |                 'mu',1e-4,...       %%% The weight of inter-class term in LDA (1e-4 in paper)
30 |                 'beta',0.1,...        %%% The weight of reconstruction (0.1 in paper)
31 |                 'lambda1',1,...     %%% The weight of L2,1 norm
32 |                 'lambda2',1e-3,...   %%% The weight of L1 norm in E
33 |                 'betaMax',1e5,...   %%% The maximum value of beta
34 |                 'rho',1.01,...      %%% The increase rate
35 |                 'epsilon',1);     %%% The regularization terms
36 |     %% parameters
37 |     T=options.T;
38 |     dim=options.dim;
39 |     beta=options.beta;
40 |     mu=options.mu;
41 |     lambda1=options.lambda1;
42 |     lambda2=options.lambda2;
43 |     rho=options.rho;
44 |     betaMax=options.betaMax;
45 |     epsilon=options.epsilon;
46 | 
47 |     %% Initialization
48 |     results_iter=[];
49 |     [m,n1]=size(trainX);
50 |     X=[trainX,testX];
51 |     D=eye(m,m);
52 |     XX=trainX*trainX';
53 |     E=zeros(m,n1);
54 |     Sw=withinScatter(trainX,trainY);
55 |     Sb=betweenScatter(trainX,trainY);
56 |     
57 |     Lagrangian=zeros(m,n1);
58 |     % Initialize P
59 |     left=Sw-mu*Sb;
60 |     left=1/n1*left;
61 | %     left=left./norm(left,'fro');
62 |     [P,~]=eigs(left+epsilon*eye(m),eye(m),dim,'sm');
63 |     for i=1:T
64 |         % Solve Q by Eq.(15)
65 |         M=trainX-E+Lagrangian/beta;
66 |         Q=(2*(left)+lambda1*D+beta*XX)\(beta*trainX*M'*P);
67 |         % Solve P by Eq.(16)
68 | %         [Up,~,Vp]=svd(M*trainX'*Q,'econ');
69 |         [Up,~,Vp]=mySVD(M*trainX'*Q,dim);
70 |         P=Up*Vp';
71 |         % Solve E by Eq.(19)
72 |         e=lambda2/beta;
73 |         E=shrink(trainX-P*Q'*trainX+Lagrangian/beta,e);
74 |         % Update Lagrangian multiplier
75 |         Lagrangian=Lagrangian+beta*(trainX-P*Q'*trainX-E);
76 |         beta=min(rho*beta,betaMax);
77 |         % Update D
78 |         D=2*updateL21(Q);
79 |         % Classification
80 |         Z=Q'*X;
81 |         Z=L2Norm(Z')';
82 |         Zs=Z(:,1:n1);
83 |         Zt=Z(:,n1+1:end);
84 |         Ytpseudo=classifyKNN(Zs,trainY,Zt,1);
85 |         results=MyClusteringMeasure(testY,Ytpseudo,1);%[ACC ACC2 MIhat Purity]';
86 |         for index=1:3
87 |             results_iter(index,i)=results(index);
88 |         end
89 |         fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,...
90 |             results(1),results(2),results(3));
91 |     end
92 | end


--------------------------------------------------------------------------------
/algorithm/RepresentationLearning/supervised/2020-LRDAGP/LRDAGP.m:
--------------------------------------------------------------------------------
 1 | function [results,results_iter,P]=LRDAGP(trainX,trainY,testX,testY,options)
 2 | %% Implementation of LRDAGP
 3 | %%%             Authors         Du et al.
 4 | %%%             Title           2020-Low-Rank Discriminative Adaptive Graph Preserving Subspace Learning
 5 | %% intput:
 6 | %%%             trainX          The traing samples, m*n1
 7 | %%%             trainY          The labels of training samples, n1*1
 8 | %%%             testX           The test samples, m*n2
 9 | %%%             testY           The labels of test samples, n2*1
10 | %% options
11 | %%%             T               The total iteration times
12 | %%%             t               The iteration times
13 | %%%             dim             The dimension reduced
14 | %%%             k               The number of KNN
15 | %%%             alpha           The relative weight of Sb, i.e., Sw-alpha*Sb
16 | %%%             beta            The weight of nuclear norm w.r.t J (Z)
17 | %%%             theta           The weight of L2,1 norm w.r.t E
18 | %%%             lambda          The weight of scatters w.r.t P
19 | %%%             mu              The lagrange coefficient
20 | %%%             muMax           The maximum value of `mu`
21 | %%%             rho             The increase rate of `mu`
22 | %% output:
23 | %%%             results         The results (list) [acc,NMI,purity]
24 | %%%             results_iter    The iteration information of 'results'
25 | %%%             P               The learned projection matrix
26 | %% Version
27 | %%%     Implementation          2022-05-28
28 |     options=defaultOptions(options,...
29 |                 'T',10,...              %%% The total iteration times
30 |                 'dim',100,...           %%% The dimension reduced
31 |                 'k',10,...              %%% The number of KNN
32 |                 't',10,...              %%% The iteration times
33 |                 'alpha',0.1,...         %%% The relative weight of Sb, i.e., Sw-alpha*Sb
34 |                 'beta',1,...            %%% The weight of nuclear norm w.r.t J (Z)
35 |                 'theta',1,...           %%% The weight of L2,1 norm w.r.t E
36 |                 'lambda',0.1,...        %%% The weight of scatters w.r.t P
37 |                 'mu',0.1,...           %%% The lagrange coefficient
38 |                 'muMax',1e3,...         %%% The maximum value of `mu`
39 |                 'rho',1.01);             %%% The increase rate of `mu`
40 |     %% parameters
41 |     T=options.T;
42 |     k=options.k;
43 |     myeps=1e-4;
44 |     %% Initialization
45 |     results_iter=[];
46 |     [m,n]=size(trainX);
47 |     % Init G & V
48 |     G=1/n*withinScatter(trainX,trainY);
49 |     V=1/n*betweenScatter(trainX,trainY);
50 |     dist=EuDist2(trainX',trainX');
51 |     [S,rr]=similarMatrix_CAN(dist,k,-1);
52 |     [P,~]=eigs(options.lambda*G-options.lambda*options.alpha*V+myeps*eye(m),eye(m),options.dim,'sm');
53 |     for i=1:T
54 |         % Solve Z and E by Algorithm 1
55 |         [Z,~] = LRDAGP_solveAlg1(trainX,P,options);
56 |         % Solev P by Algorithm 2
57 |         S=(S+S')/2;
58 |         D=diag(sparse(sum(S)));
59 |         L=D-S;
60 |         L=L./norm(L,'fro');
61 |         XLX=trainX*L*trainX';
62 |         P = LRDAGP_solveAlg2(trainX,P,Z,XLX,G,V,options);
63 |         % Update S by CAN
64 |         Zs=real(P'*trainX);
65 |         dist=EuDist2(Zs');
66 |         S=similarMatrix_CAN(dist,k,rr);
67 |         % Classification
68 |         Zt=real(P'*testX);
69 |         Ytpseudo=classifyKNN(Zs,trainY,Zt,1);
70 |         results=MyClusteringMeasure(testY,Ytpseudo,1);%[ACC MIhat Purity]';
71 |         for index=1:3
72 |             results_iter(index,i)=results(index);
73 |         end
74 |         fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,...
75 |             results(1),results(2),results(3));
76 |         
77 |     end
78 | end


--------------------------------------------------------------------------------
/algorithm/RepresentationLearning/supervised/2020-LRDAGP/LRDAGP_solveAlg1.m:
--------------------------------------------------------------------------------
 1 | function [Z,E] = LRDAGP_solveAlg1(trainX,P,options)
 2 | %% Target
 3 | %%%     Solve Z and E
 4 | %% input
 5 | %%%     trainX              The training samples, m*n
 6 | %%%     P                   The learned projection matrix, d*m
 7 | %%%     options
 8 | %%%%%           mu          The lagrange coefficient
 9 | %%%%%           muMax       The maximum value of `mu`
10 | %%%%%           rho         The increase rate of `mu`
11 | %%%%%           t           The iteration times
12 | %%%%%           beta        The weight of nuclear norm w.r.t J (Z)
13 | %%%%%           theta       The weight of L2,1 norm w.r.t E
14 |         %% Parameters
15 |         mu=options.mu;
16 |         muMax=options.muMax;
17 |         rho=options.rho;
18 |         T=options.t;
19 |         beta=options.beta;
20 |         theta=options.theta;
21 |         epsilon=1e-8;
22 |        %% Init
23 |         [m,n]=size(trainX);
24 |         Z=0;E=0;
25 |         Y1=0;Y2=0;
26 |         for i=1:T
27 |             % Update J by Eq.(19)
28 |             J=SVT(Z+Y2/mu,beta/mu);
29 |             % Update Z by Eq.(20)
30 |             Z=(trainX'*(P*P')*trainX+eye(n))\(trainX'*P*(P'*trainX-E+Y1/mu)-Y2/mu+J);
31 |             % Update E by Eq.(21)
32 |             E=SolveL21Problem(P'*trainX-P'*trainX*Z+Y1/mu,theta/mu);
33 |             % Update Lagrange multipliers
34 |             cd1=P'*trainX-P'*trainX*Z-E;
35 |             cd2=Z-J;
36 |             Y1=Y1+mu*(cd1);
37 |             Y2=Y2+mu*(cd2);
38 |             mu=min(rho*mu,muMax);
39 |             if norm(cd1,'inf')<epsilon&&norm(cd2,'inf')<epsilon
40 |                 fprintf('[Z/E] is convergent at %d-th and break.\n',i); 
41 |                 break;
42 |             end
43 |         end
44 | end
45 | 
46 | 


--------------------------------------------------------------------------------
/algorithm/RepresentationLearning/supervised/2020-LRDAGP/LRDAGP_solveAlg2.m:
--------------------------------------------------------------------------------
 1 | function [P] = LRDAGP_solveAlg2(X,P,Z,XLX,G,V,options)
 2 | %% Target
 3 | %%%     Solve P
 4 | %% input
 5 | %%%     X                   The training samples, m*n
 6 | %%%     P                   The learned projection matrix, d*m
 7 | %%%     Z                   The reconstruction matrix, n*n
 8 | %%%     XLX                 The manifold term of `X`
 9 | %%%     G                   The within-class scatter of `X`
10 | %%%     V                   The between-class scatter of `X`
11 | %%%     options
12 | %%%%%           t           The iteration times
13 | %%%%%           dim         The dimension reduced
14 | %%%%%           alpha       The relative weight of Sb, i.e., Sw-alpha*Sb
15 | %%%%%           lambda      The weight of scatters w.r.t P
16 | %%%%%           theta       The weight of L2,1 norm w.r.t E
17 |         %% Parameters
18 |         T=options.t;
19 |         lambda=options.lambda;
20 |         alpha=options.alpha;
21 |         theta=options.theta;
22 |         dim=options.dim;
23 |         epsilon=1e-8;
24 |         myeps=1e-1;
25 |         %% Init
26 |         [m,n]=size(X);
27 |         XZ=X*Z;
28 |         IZ=eye(n,n)-Z;
29 |         GV=lambda*G-alpha*V;
30 |         U=eye(n);
31 |         for i=1:T
32 |             
33 |             % Update P by Eq.(25)
34 |             [P,~]=eigs(XLX+lambda*GV+theta*(X-XZ)*U*(X-XZ)'+myeps*eye(m),eye(m),dim,'sm');
35 |             P=real(P);
36 |             % Update U by Eq.(24);
37 |             PX=P'*X;
38 |             PXZ=(PX*IZ)';
39 |             PXZ(PXZ==0)=myeps;
40 |             U=updateL21(PXZ);
41 |             if i>=2&&(norm(lastU-U,'fro')<epsilon)
42 |                 fprintf('[P] is convergent at %d-th and break.\n',i); 
43 |                 break;
44 |             else
45 |                 lastU=U;
46 |             end
47 |         end
48 | end
49 | 
50 | 


--------------------------------------------------------------------------------
/algorithm/RepresentationLearning/supervised/2020-RDA_FSIS/RDA_FSIS.m:
--------------------------------------------------------------------------------
  1 | function [results,results_iter,Q]=RDA_FSIS(trainX,trainY,testX,testY,options)
  2 | %% Implementation of RDA_FSIS
  3 | %%% Authors                     Dornaika et al.
  4 | %%% Titl                        2020-Linear embedding by joint Robust Discriminant Analysis and Inter-class Sparsity
  5 | %% intput:
  6 | %%% trainX                      The traing samples, m*n1
  7 | %%% trainY                      The labels of training samples, n1*1
  8 | %%% testX                       The test samples, m*n2
  9 | %%% testY                       The labels of test samples, n2*1
 10 | %% options
 11 | %%% T                            The iterations
 12 | %%% dim                          The dimensions
 13 | %%% mu                           The weight of inter-class term in LDA (1e-4 in paper)
 14 | %%% lambda1                      The weight of L2,1 norm
 15 | %%% lambda2                      The weight of L1 norm in E
 16 | %%% lambda3                      The weight of L2-1 norm w.r.t Q'X
 17 | %%% beta                         The weight of Lagrange coefficient
 18 | %%% betaMax                      The maximum value of `beta`
 19 | %%% rho                          The increase rate
 20 | %%% epsilon                      The regularization terms
 21 | %% output:
 22 | %%% results                      The results (list) [acc,NMI,purity]
 23 | %%% results_iter                 The iteration information of 'results'
 24 | %%% Q                            The learned projection matrix
 25 | %% Version
 26 | %%%     Implementation          2022-06-08
 27 |     options=defaultOptions(options,...
 28 |                     'T',10,...          %%% The iterations
 29 |                     'dim',100,...       %%% The dimensions
 30 |                     'mu',1e-4,...       %%% The weight of inter-class term in LDA (1e-4 in paper)
 31 |                     'lambda1',1,...     %%% The weight of L2,1 norm w.r.t. Q
 32 |                     'lambda2',1,...     %%% The weight of L1 norm in E
 33 |                     'lambda3',1,...     %%% The weight of L2-1 norm w.r.t Q'X
 34 |                     'beta',1e-8,...     %%% The weight of reconstruction (1e-8 in paper)
 35 |                     'betaMax',1e5,...   %%% The maximum value of beta
 36 |                     'rho',10,...        %%% The increase rate
 37 |                     'epsilon',1);       %%% The regularization terms
 38 |     %% parameters
 39 |     T=options.T;
 40 |     dim=options.dim;
 41 |     beta=options.beta;
 42 |     mu=options.mu;
 43 |     lambda1=options.lambda1;
 44 |     lambda2=options.lambda2;
 45 |     lambda3=options.lambda3;
 46 |     rho=options.rho;
 47 |     betaMax=options.betaMax;
 48 |     epsilon=options.epsilon;
 49 |     %% Initialization
 50 |     results_iter=[];
 51 |     [m,n1]=size(trainX);
 52 |     X=[trainX,testX];
 53 |     n=size(X,2);
 54 |     D=eye(m,m);
 55 |     XX=trainX*trainX';
 56 |     E=zeros(m,n1);
 57 |     Sw=withinScatter(trainX,trainY);
 58 |     Sb=betweenScatter(trainX,trainY);
 59 |     Y1=zeros(m,n1);
 60 |     Y2=zeros(dim,n1);
 61 |     % Initialize P
 62 |     S=Sw-mu*Sb;
 63 |     S=1/n1*S;
 64 | %     left=left./norm(left,'fro');
 65 |     [P,~]=eigs(S+epsilon*eye(m),eye(m),dim,'sm');
 66 |     for i=1:T
 67 |         % Solve F by (24)
 68 |         if i==1
 69 |             F=0;
 70 |         else
 71 |             H=Q'*trainX;
 72 |             F=SolveL21Problem(H,lambda3/beta);
 73 |         end
 74 |         % Solve Q by Eq.(12)
 75 |         M=trainX-E+Y1/beta;
 76 |         Mprime=F+Y2/beta;
 77 |         Q=(2*(S)+lambda1*D+2*beta*XX)\(beta*(trainX*M'*P+trainX*Mprime'));
 78 |         % Solve P by Eq.(16)
 79 | %         [Up,~,Vp]=svd(M*trainX'*Q,'econ');
 80 |         [Up,~,Vp]=mySVD(M*trainX'*Q,dim);
 81 |         P=Up*Vp';
 82 |         % Solve E by Eq.(19)
 83 |         e=lambda2/beta;
 84 |         E=shrink(trainX-P*Q'*trainX+Y1/beta,e);
 85 |         % Update Lagrangian multiplier
 86 |         Y1=Y1+beta*(trainX-P*Q'*trainX-E);
 87 |         Y2=Y2+beta*(F-Q'*trainX);
 88 |         beta=min(rho*beta,betaMax);
 89 |         % Update D
 90 |         D=2*updateL21(Q);
 91 |         % Classification
 92 |         Z=Q'*X;
 93 |         Z=L2Norm(Z')';
 94 |         Zs=Z(:,1:n1);
 95 |         Zt=Z(:,n1+1:end);
 96 |         Ytpseudo=classifyKNN(Zs,trainY,Zt,1);
 97 |         results=MyClusteringMeasure(testY,Ytpseudo,1);%[ACC MIhat Purity]';
 98 |         for index=1:3
 99 |             results_iter(index,i)=results(index);
100 |         end
101 |         fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,...
102 |             results(1),results(2),results(3));
103 |     end
104 | end


--------------------------------------------------------------------------------
/algorithm/RepresentationLearning/supervised/2021-DSDPL/DSDPL.m:
--------------------------------------------------------------------------------
  1 | function [results,results_iter,W] = DSDPL(trainX,trainY,testX,testY,options)
  2 | %% Notice
  3 | %%%     The codes implemented require to cope with the data by 'zscore' or
  4 | %%%     'normr' fist (so as to avoid the calculation of large values).
  5 | %% Implementation of DSDPL
  6 | %%% Authors:                    Belous et al.
  7 | %%% Titl:                       2021-Dual subspace discriminative projection learning
  8 | %% intput:
  9 | %%% trainX:                     The traing samples, m*n1
 10 | %%% trainY:                     The labels of training samples, n1*1
 11 | %%% testX:                      The test samples, m*n2
 12 | %%% testY:                      The labels of test samples, n2*1
 13 | %% options
 14 | %%% T                            The iterations
 15 | %%% dim                          The dimension reduced
 16 | %%% mu                           The weight of between-class scatter
 17 | %%% lambda1                      The weight of L2,1 norm w.r.t Q and R
 18 | %%% lambda2                      The weight of L1 norm w.r.t. E0 and E1
 19 | %%% beta                         The initial value of Lagrangian coefficient
 20 | %%% betaMax                      The maximum Lagrangian coefficient
 21 | %%% rho                          The increase rate of Lagrangian coefficient
 22 | %% output:
 23 | %%% results                      The results (list) [acc,NMI,purity]
 24 | %%% results_iter                 The iteration information of 'results'
 25 | %%% W                            The learned projection matrix
 26 | %% Version
 27 | %%%     Implementation          2022-05-22
 28 |     options=defaultOptions(options,'T',10,...
 29 |                            'mu',1e3,...           %% The weight of between-class scatter
 30 |                            'lambda1',0.01,...    %% The weight of L2,1 norm w.r.t Q and R (0.01)
 31 |                            'lambda2',0.001,...  %% The weight of L1 norm w.r.t. E0 and E1
 32 |                            'beta',0.1,...       %% The initial value of Lagrangian coefficient
 33 |                            'beta_Max',1e2,...   %% The maximum Lagrangian coefficient
 34 |                            'rho',1.01,...       %% The increase rate of Lagrangian coefficient
 35 |                            'd',30);             %% The dimension reduced
 36 |     %% Parameters Setting
 37 |     T=options.T;
 38 |     beta=options.beta;
 39 |     lambda1=options.lambda1;
 40 |     lambda2=options.lambda2;
 41 |     mu=options.mu;
 42 |     d=options.d;
 43 |     beta_Max=options.beta_Max;
 44 |     rho=options.rho;
 45 |     %% Data process
 46 |     [m,n]=size(trainX);
 47 |     C=length(unique(trainY));
 48 |     %% initialization
 49 |     Sw=1/n*withinScatter(trainX,trainY);
 50 |     Sb=1/n*betweenScatter(trainX,trainY);
 51 |     V={};
 52 |     R={};
 53 |     E={};
 54 |     W={};
 55 |     E0=zeros(m,n);
 56 |     Ck={};
 57 |     Ck0=0;
 58 |     % init Xc,Yc
 59 |     Xc={};
 60 |     Yc={};
 61 |     
 62 |     for k=1:C
 63 |         tmpXc=trainX(:,trainY==k);
 64 |         tmpYc=hotmatrix(trainY(trainY==k),C);
 65 |         Xc{k}=tmpXc;
 66 |         [m_Xc,d_Xc]=size(Xc{k});
 67 |         Yc{k}=tmpYc';
 68 |         V{k}=rand(m_Xc,d_Xc); %% If V=0, a error occurs
 69 |         R{k}=zeros(m_Xc,d_Xc);
 70 |         E{k}=zeros(m_Xc,d_Xc);
 71 |         W{k}=zeros(C,d_Xc);
 72 |         Ck{k}=zeros(m_Xc,d_Xc);
 73 |     end
 74 |     Q=rand(m,d);
 75 |     % loop T
 76 |     D=eye(m,m);
 77 |     for i=1:T
 78 |        % Step 1, Update P
 79 |        M0=(trainX-E0+Ck0/beta);
 80 |        Mi={};
 81 |        XQMi=0;
 82 |        for k=1:C % m*1 - m*d*d*m*m*1 - m*1
 83 |            Mi{k}=(Xc{k}-V{k}*R{k}'*Xc{k}-E{k}+Ck{k}/beta); %
 84 |            XQMi=XQMi+Mi{k}*Xc{k}'*Q;
 85 |        end
 86 |        [U1,~,V1]=mySVD(M0*trainX'*Q+XQMi,d);
 87 |        P=U1*V1'; % m*d
 88 |        % Step 2, Update Q
 89 |        XMPi=0;
 90 |        sumSqureXc=0;
 91 |        for k=1:C % m*1 - m*d*d*m*m*1 - m*1
 92 |            XMPi=XMPi+Xc{k}*Mi{k}'*P;
 93 |            sumSqureXc=sumSqureXc+Xc{k}*Xc{k}';
 94 |        end
 95 |        
 96 |        Q=(lambda1*D+beta*(trainX*trainX'+ sumSqureXc))\(beta*(trainX*M0'*P)+XMPi); %m*d
 97 |        D=updateL21(Q);
 98 |        % Step 3/4,
 99 |        for k=1:C
100 |           % Update Ri
101 |           if i==1
102 |               F=eye(m,m);
103 |           else
104 |               F=updateL21(R{k});
105 |           end
106 |           tmpM=Xc{k}-P*Q'*Xc{k}-E{k}+Ck{k}/beta;
107 |           R{k}= (lambda1*F+2*(Sw-mu*Sb)+(2+beta)*Xc{k}*Xc{k}')\(beta*Xc{k}*tmpM'*V{k}+2*Xc{k}*Yc{k}'*W{k});
108 |           % Update Vi
109 |           [U2,~,V2]=mySVD(tmpM*Xc{k}'*R{k},d);
110 |           V{k}=U2*V2';
111 |           % Update Wi
112 |           [U3,~,V3]=mySVD(Yc{k}*Xc{k}'*R{k},d);
113 |           W{k}=U3*V3';
114 |           % Update Ei
115 |           E{k}=Xc{k}-P*Q'*Xc{k}-V{k}*R{k}'*Xc{k}-Ck{k}/beta;
116 |           E{k}=shrink(E{k},lambda2/beta);
117 |           Ck{k}=Ck{k}+beta*(Xc{k}-P*Q'*Xc{k}-V{k}*R{k}'*Xc{k}-E{k});
118 |        end
119 |        % Update E0
120 |        E0=trainX-P*Q'*trainX+Ck0/beta;
121 |        E0=shrink(E0,lambda2/beta);
122 |        % Update lagrangian multipliers C
123 |        Ck0=Ck0+beta*(trainX-P*Q'*trainX-E0);
124 |        % Update beta;
125 |        beta=min(rho*beta,beta_Max);
126 |        % Classification
127 |        prob=zeros(C,size(testX,2));
128 |        for k=1:C
129 |           tmp=(W{k}*R{k}'*testX);
130 |           prob(k,:)=tmp(k,:);
131 |        end
132 |        [~,Ytpseudo]=max(prob,[],1);
133 |        results=MyClusteringMeasure(testY,Ytpseudo,1);%[ACC ACC2 MIhat Purity]';
134 |        for index=1:3
135 |            results_iter(index,i)=results(index);
136 |        end
137 |        fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,...
138 |             results(1),results(2),results(3));
139 |     end
140 |     
141 | end
142 | 
143 | 


--------------------------------------------------------------------------------
/algorithm/RepresentationLearning/supervised/2021-SN_TSL/SN_TSL.m:
--------------------------------------------------------------------------------
 1 | function [results,results_iter,QW]=SN_TSL(trainX,trainY,testX,testY,options)
 2 | %% Implementation of SN_TSL
 3 | %%% Authors:                    Chen et al.
 4 | %%% Titl:                       2021-Sparse non-negative transition subspace learning for image classification
 5 | %% intput:
 6 | %%% trainX:                     The traing samples, m*n1
 7 | %%% trainY:                     The labels of training samples, n1*1
 8 | %%% testX:                      The test samples, m*n2
 9 | %%% testY:                      The labels of test samples, n2*1
10 | %% options
11 | %%% T                            The iterations
12 | %%% alpha                        The weight of L1-norm w.r.t J (U)
13 | %%% beta                         The weight of label regression
14 | %%% lambda                       The regularization weight
15 | %%% mu                           The Lagrange coefficient
16 | %%% muMax                        The maximum value of mu
17 | %%% rho                          The increase rate
18 | %% output:
19 | %%% results                      The results (list) [acc,NMI,purity]
20 | %%% results_iter                 The iteration information of 'results'
21 | %%% QW                            The learned projection matrix
22 | %% Version
23 | %%%     Implementation          2022-05-28
24 |     options=defaultOptions(options,...
25 |                 'T',10,...              %%% The iterations
26 |                 'alpha',0.1,...        %%% The weight of L1-norm w.r.t J (U)
27 |                 'beta',1,...          %%% The weight of label regression
28 |                 'lambda',0.1,...          %%% The regularization weight
29 |                 'mu',1e-5,...            %%% The Lagrange coefficient
30 |                 'muMax',1e8,...         %%% The maximum value of mu
31 |                 'rho',1.1);            %%% The increase rate
32 |     %% parameters
33 |     T=options.T;
34 |     alpha=options.alpha;
35 |     beta=options.beta;
36 |     lambda=options.lambda;
37 |     mu=options.mu;
38 |     rho=options.rho;
39 |     muMax=options.muMax;
40 | 
41 |     %% Initialization
42 |     results_iter=[];
43 |     C=length(unique(trainY));
44 |     [m,n1]=size(trainX);
45 |     % Initialize P
46 |     XX=trainX*trainX';
47 |     % Init H
48 |     H=hotmatrix(trainY,C,0)';
49 |     Q=0;Y=0;J=0;
50 |     W=H*trainX'*(XX'+lambda*eye(m));
51 |     for i=1:T
52 |         % Update Omega by Eqs.(15) and (16)
53 |         Omega=((mu+1)*eye(C)+beta*Q*Q')\(W*trainX+beta*Q'*H+mu*J-Y);
54 |         Omega=max(Omega,0);
55 |         % Update J by Eq.(18)
56 |         J=SVT(Omega+Y/mu,alpha/mu);
57 |         % Update W by Eq.(12)
58 |         W=(Omega*trainX')/(XX+lambda*eye(m));
59 |         % Update Q by Eq.(14)
60 |         Q=(beta*H*Omega')/(beta*(Omega*Omega')+lambda*eye(C));
61 |         % Update Lagrange coefficient
62 |         Y=Y+mu*(Omega-J);
63 |         mu=min(rho*mu,muMax);
64 |         % Classification
65 |         QW=Q*W;
66 |         Zs=QW*trainX; 
67 |         Zt=QW*testX;
68 |         Ytpseudo=classifyKNN(Zs,trainY,Zt,1);
69 |         results=MyClusteringMeasure(testY,Ytpseudo,1);%[ACC MIhat Purity]';
70 |         for index=1:3
71 |             results_iter(index,i)=results(index);
72 |         end
73 |         fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,...
74 |             results(1),results(2),results(3));
75 |     end
76 | end


--------------------------------------------------------------------------------
/algorithm/RepresentationLearning/unsupervised/2020-JLRSL/JLRSL.m:
--------------------------------------------------------------------------------
  1 | function [results,results_iter,A]=JLRSL(trainX,trainY,testX,testY,options)
  2 | %% Notice
  3 | %%% The `trainY` is used to generate a 1NN classifier as described in the
  4 | %%% paper, while `testX` and `testY` are used to verify the effectiveness
  5 | %%% of JLRSL
  6 | %% Implementation of JLRSL
  7 | %%% Authors                     Peng et al.
  8 | %%% Titl                        2020-Joint low-rank representation and spectral regression for robust subspace learning
  9 | %% intput
 10 | %%% trainX                       The traing samples, m*n1
 11 | %%% trainY                       The labels of training samples, n1*1
 12 | %%% testX                        The test samples, m*n2
 13 | %%% testY                        The labels of test samples, n2*1
 14 | %% options
 15 | %%% T                            The iterations
 16 | %%% dim                          The dimensions
 17 | %%% lambda1                      The weight of nuclear norm w.r.t J (Z)
 18 | %%% lambda2                      The weight of L2,1 norm w.r.t E
 19 | %%% lambda3                      The weight of F2 norm w.r.t A
 20 | %%% mu                           The Lagrange coefficient 
 21 | %%%                                     (default 1e-8 in the paper)
 22 | %%% muMax                        The maximum value of `mu` 
 23 | %%% rho                          The increase rate of Lagrange coefficient 
 24 | %%%                                     (default 1.1 in the paper)
 25 | %% output:
 26 | %%% results                      The results (list) [acc,NMI,purity]
 27 | %%% results_iter                 The iteration information of 'results'
 28 | %%% A                            The learned projection matrix
 29 | %% Version
 30 | %%%     Implementation          2022-06-09
 31 |     options=defaultOptions(options,...
 32 |                     'T',10,...              %%% The iterations
 33 |                     'dim',100,...           %%% The dimensions
 34 |                     'lambda1',1e3,...       %%% The weight of nuclear norm w.r.t J (Z)
 35 |                     'lambda2',1e1,...       %%% The weight of L2,1 norm w.r.t E
 36 |                     'lambda3',1e3,...       %%% The weight of F2 norm w.r.t A
 37 |                     'mu',1e-8,...           %%% The Lagrange coefficient 
 38 |                     'muMax',1e10,...        %%% The maximum value of `mu`
 39 |                     'rho',1.1);             %%% The increase rate of Lagrange coefficient 
 40 |     %% parameters
 41 |     T=options.T;
 42 |     dim=options.dim;
 43 |     lambda1=options.lambda1;
 44 |     lambda2=options.lambda2;
 45 |     lambda3=options.lambda3;
 46 |     rho=options.rho;
 47 |     mu=options.mu;
 48 |     muMax=options.muMax;
 49 |     %% Initialization
 50 |     results_iter=[];
 51 |     [m,n]=size(trainX);
 52 |     XTX=trainX'*trainX;
 53 |     Y1=zeros(m,n);
 54 |     Y2=zeros(n,n);
 55 |     E=zeros(m,n);
 56 |     J=zeros(n,n);
 57 |     Z=zeros(n,n);
 58 |     %% Init Y by Eq.(1)
 59 |     manifold.k = 0;
 60 |     manifold.Metric = 'Euclidean';
 61 |     manifold.WeightMode = 'HeatKernel';
 62 |     manifold.NeighborMode = 'KNN';
 63 |     manifold.t=mean(mean(EuDist2(trainX')));
 64 |     W=lapgraph(trainX',manifold);
 65 |     D=diag(sparse(sum(W)));
 66 |     [Y,~]=eigs(W,D,dim,'lm');
 67 |     for i=1:T
 68 |         % Solve A by Eq.(11)
 69 |         XZ=trainX*Z;
 70 |         left=XZ*XZ'+lambda3*eye(m); % m * m
 71 |         right=XZ*Y; % m * C
 72 |         A=left\right; % m * C
 73 |         % Solve Z by Eq.(13)
 74 |         left=trainX'*(A*A'+mu*eye(m))*trainX+mu*eye(n);  % n * n 
 75 |         right=(trainX'*A*Y'+trainX'*Y1-Y2+mu*(XTX-trainX'*E+J)); % n * n
 76 |         Z=left\right; % n * n
 77 |         % Solve J by Eq.(14)
 78 |         J=SVT(Z+Y2/mu,lambda1/mu); % n * n
 79 |         % Solve E by Eq.(15)
 80 |         E=SolveL21Problem(trainX-trainX*Z+Y1/mu,lambda2/mu); % m * n
 81 |         % Update Lagrange coefficient
 82 |         loss1=trainX-trainX*Z-E;
 83 |         loss2=Z-J;
 84 |         Y1=Y1+mu*loss1;
 85 |         Y2=Y2+mu*loss2;
 86 |         mu=max(rho*mu,muMax);
 87 |         % Clustering
 88 |         absZ=abs(Z); % n * n
 89 |         W=(absZ+absZ')/2;
 90 |         D=diag(sparse(sum(W)));
 91 |         [Y,~]=eigs(W,D,dim,'lm');
 92 |         %% Classification
 93 |         Zs=A'*trainX;
 94 |         Zt=A'*testX;
 95 |         Ypseudo=classifyKNN(Zs,trainY,Zt,1);
 96 |         results=MyClusteringMeasure(testY,Ypseudo);% [ACC MIhat Purity]
 97 | %         [~,results] = getClusteringResults(X*Z,realY,A);
 98 |         for index=1:3
 99 |             results_iter(index,i)=results(index);
100 |         end
101 |         fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f, loss1: %.6f, loss2: %.6f\n',i,...
102 |             results(1),results(2),results(3),norm(loss1,'inf'),norm(loss2,'inf'));
103 |     end
104 | end


--------------------------------------------------------------------------------
/algorithm/clustering/2015-rLPP/rLPP.m:
--------------------------------------------------------------------------------
 1 | function [results,results_iter,W]=rLPP(X,Y,options)
 2 | %% Implementation of rLPP
 3 | %%% Authors                      Wang et al.
 4 | %%% Titl                         2015-Learning Robust Locality Preserving Projection via p-Order Minimization
 5 | %% intput:
 6 | %%% X                            The samples, m*n
 7 | %%% Y                            The labels of samples, n*1
 8 | %% options
 9 | %%% T                            The iterations
10 | %%% dim                          The dimensions
11 | %%% Metric                       The `Metric` of `lapgraph`
12 | %%%%                         -   0   
13 | %%%%                                Use `Euclidean`
14 | %%%%                         -   1   
15 | %%%%                                Use `Cosine`
16 | %%% WeightMode                   The `WeightMode` of `lapgraph`
17 | %%%%                         -   0   
18 | %%%%                                Use `HeatKernel`
19 | %%%%                         -   1   
20 | %%%%                                Use `Cosine`
21 | %%% k                            The number of KNN
22 | %%% p                            The p-norm (0<p<=2)
23 | %%% lambda                       The weight of regularization
24 | %% output:
25 | %%% results                      The results (list) [acc,NMI,purity]
26 | %%% results_iter                 The iteration information of 'results'
27 | %%% W                            The learned feature selection matrix
28 | %% Version
29 | %%%     Implementation          2022-06-03
30 |     options=defaultOptions(options,...
31 |                 'T',10,...          %% The iterations
32 |                 'dim',100,...       %% The dimensions
33 |                 'Metric',0,...      %% The `Metric` of `lapgraph`
34 |                 'WeightMode',0,...  %% The `WeightMode` of `lapgraph`
35 |                 'k',10,...          %% The number of KNN
36 |                 'p',2,....          %% The p-norm (0<p<=2)
37 |                 'lambda',1);        %% The weight of regularization
38 |      
39 |     %% parameters
40 |     T=options.T;
41 |     dim=options.dim;
42 |     p=options.p;
43 |     lambda=options.lambda;
44 |     [m,n]=size(X);
45 |     %% Init lapgraph S
46 |      opt=struct();
47 |      switch options.Metric
48 |          case 0
49 |              opt.Metric='Euclidean';
50 |          case 1
51 |              opt.Metric='Cosine';
52 |      end
53 |      switch options.WeightMode
54 |          case 0
55 |              opt.WeightMode='HeatKernel';
56 |          case 1
57 |              opt.WeightMode='Cosine';
58 |      end
59 |      opt.k=options.k;
60 |      opt.NeighborMode='KNN';
61 |      [S, ~] = lapgraph(X',opt);
62 |      D=diag(sparse(sum(S)));
63 |      % Init W
64 |      XDX=X*D*X';
65 |      [W,~]=eigs(XDX,eye(m),dim,'lm');
66 |      WX=W'*X;
67 |     for i=1:T
68 |         % Update S' by Eq.(4)
69 |         dist=power(EuDist2(WX'),p-2);
70 |         dist(dist==inf)=0;
71 |         dist(isnan(dist))=0;
72 |         Sprime=p/2*(S.*dist);
73 |         Dprime=diag(sparse(sum(Sprime)));
74 |         % Update Lprime
75 |         Lprime=Dprime-Sprime;
76 |         XLprimeX=X*Lprime*X';
77 |         % Update W
78 |         [W,~]=eigs(XLprimeX+lambda*eye(m),XDX,dim,'sm');
79 |         % Scores
80 |         [WX,results] = getClusteringResults(X,Y,W);
81 |         for index=1:3
82 |             results_iter(index,i)=results(index);
83 |         end
84 |         fprintf('[%d]-th acc:%.4f, MIhat: %.4f, Purity:%.4f\n',i,...
85 |             results(1),results(2),results(3));
86 |     end
87 | end
88 | 
89 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | # The Re-implementation of Feature Selection & Clustering Methods
  4 | 
  5 | This repository integrates the codes for some **feature selection, clustering and subspace learning** methods. I know how hard it is to reproduce the codes, especially for beginners, and I hope it can help.
  6 | 
  7 | - If you find any errors or need any help in reproducing the code, please feel free to contact me. 
  8 | 
  9 | - If any author or publisher has questions, please contact me to remove or replace them.
 10 | 
 11 | - My email is coding495@163.com
 12 | 
 13 | ---
 14 | 
 15 | **To run supervised method**:
 16 | 
 17 | > 'Demo_Supervised.m' gives a simple example for supervised methods. 
 18 | >
 19 | > To run the codes, the size of the inputs are: <img src="https://latex.codecogs.com/svg.image?X_1\in&space;\mathbb{R}^{m\times&space;n_1},&space;X_2\in&space;\mathbb{R}^{m\times&space;n_2},&space;Y_1\in&space;\mathbb{R}^{n_1\times&space;1},&space;Y_2\in&space;\mathbb{R}^{n_2\times&space;1}" />, where *m* is the dimension, and <img src="http://latex.codecogs.com/svg.latex?n_1" title="http://latex.codecogs.com/svg.latex?n_1" /> and <img src="http://latex.codecogs.com/svg.latex?n_2" title="http://latex.codecogs.com/svg.latex?n_2" /> present the number of the training and test samples, respectively (<img src="http://latex.codecogs.com/svg.latex?Y_2" title="http://latex.codecogs.com/svg.latex?Y_2" /> is used to calculate the clustering results, and is not involved in training).
 20 | 
 21 | **To run unsupervised method**:
 22 | 
 23 | > 'Demo_Unsupervised.m' gives a simple example for unsupervised methods. 
 24 | >
 25 | > To run the codes, the size of the inputs are: <img src="https://latex.codecogs.com/svg.image?X\in&space;\mathbb{R}^{m\times&space;n},&space;Y\in&space;\mathbb{R}^{n\times&space;1}" />, where *m* is the dimension, and <img src="http://latex.codecogs.com/svg.latex?n" title="http://latex.codecogs.com/svg.latex?n" /> presents the number of the training samples (<img src="http://latex.codecogs.com/svg.latex?Y" title="http://latex.codecogs.com/svg.latex?Y" /> is used to calculate the clustering results, and is not involved in training).
 26 | 
 27 | ## The codes exist in the repository
 28 | 
 29 | ### 1. Feature Selection
 30 | 
 31 | #### 1.1 Supervised Methods
 32 | 
 33 | > The codes will be available soon.
 34 | 
 35 | ---
 36 | 
 37 | #### 1.2 Unsupervised Methods
 38 | 
 39 | - 2019-LRLMR [[1]](https://www.sciencedirect.com/science/article/pii/S0893608019301212): Unsupervised feature selection via latent representation learning and manifold regularization.
 40 | 
 41 |   > We reproduce the codes as same as the descriptions of the paper. The official codes are available at http://tangchang.net/
 42 | 
 43 | - 2019-URAFS [[2]](https://ieeexplore.ieee.org/abstract/document/8474999): Generalized Uncorrelated Regression with Adaptive Graph for Unsupervised Feature Selection.
 44 | 
 45 | - 2021-AGUFS [[3]](https://www.sciencedirect.com/science/article/pii/S0950705121004196): Adaptive graph-based generalized regression model for unsupervised feature selection.
 46 | 
 47 | - 2021-DSLRL [[4]](https://www.sciencedirect.com/science/article/pii/S0031320321000601): Dual space latent representation learning for unsupervised feature selection.
 48 | 
 49 | - 2022-DLUFS [[5]](https://www.sciencedirect.com/science/article/pii/S0957417422005437): Low-rank dictionary learning for unsupervised feature selection.
 50 | 
 51 |   > The official codes (python implementation) are available at https://github.com/mohsengh/DLUFS/
 52 | 
 53 | - 2022-SLMEA [[6]](https://www.sciencedirect.com/science/article/pii/S0925231222001916): Sparse and low-dimensional representation with maximum entropy adaptive graph for feature selection.
 54 | 
 55 | ---
 56 | 
 57 | ## 2. Clustering
 58 | 
 59 | - 2015-rLPP [[13]](https://www.aaai.org/ocs/index.php/AAAI/AAAI15/paper/viewPaper/9921): Learning Robust Locality Preserving Projection via p-Order Minimization.
 60 | 
 61 | ## 3. Representation & Subspace Learning
 62 | 
 63 | #### 3.1 Supervised Methods
 64 | 
 65 | - 2017-MRSL [[9]](https://ieeexplore.ieee.org/abstract/document/8128909/): Marginal Representation Learning With Graph Structure Self-Adaptation.
 66 | 
 67 |   > Official codes are available at https://github.com/DarrenZZhang/MSRL.
 68 | 
 69 | - 2019-RSLDA [[7]](https://ieeexplore.ieee.org/abstract/document/8272002): Robust Sparse Linear Discriminant Analysis.
 70 | 
 71 | - 2020-LRDAGP [[10]](https://link.springer.com/article/10.1007/s11063-020-10340-6): Low-Rank Discriminative Adaptive Graph Preserving Subspace Learning.
 72 | 
 73 | - 2020-RDA_FSIS [[12]](https://www.sciencedirect.com/science/article/abs/pii/S0893608020301386): Linear embedding by joint Robust Discriminant Analysis and Inter-class Sparsity.
 74 | 
 75 | - 2021-SN-TSL [[11]](https://www.sciencedirect.com/science/article/abs/pii/S016516842100027X) :Sparse non-negative transition subspace learning for image classification.
 76 | 
 77 | - 2021-DSDPL [[8]](https://www.sciencedirect.com/science/article/pii/S0031320320303848): Dual subspace discriminative projection learning.
 78 | 
 79 | #### 3.2 Unsupervised Methods
 80 | 
 81 | - 2020-JLRSL [[14]](https://www.sciencedirect.com/science/article/pii/S0950705120301428): Joint low-rank representation and spectral regression for robust subspace learning.
 82 | 
 83 | 
 84 | 
 85 | # Reference
 86 | 
 87 | [1] Tang, Chang, et al. "Unsupervised feature selection via latent representation learning and manifold regularization." *Neural Networks* 117 (2019): 163-178.
 88 | 
 89 | [2] X. Li, H. Zhang, R. Zhang, Y. Liu and F. Nie, "Generalized Uncorrelated Regression with Adaptive Graph for Unsupervised Feature Selection," in IEEE Transactions on Neural Networks and Learning Systems, vol. 30, no. 5, pp. 1587-1595, May 2019, doi: 10.1109/TNNLS.2018.2868847.
 90 | 
 91 | [3] Huang, Yanyong, et al. "Adaptive graph-based generalized regression model for unsupervised feature selection." *Knowledge-Based Systems* 227 (2021), doi: 10.1016/j.knosys.2021.107156.
 92 | 
 93 | [4] Shang, Ronghua, et al. "Dual space latent representation learning for unsupervised feature selection." *Pattern Recognition* 114 (2021), doi: 10.1016/j.patcog.2021.107873.
 94 | 
 95 | [5] Parsa, Mohsen Ghassemi, Hadi Zare, and Mehdi Ghatee. "Low-rank dictionary learning for unsupervised feature selection." *Expert Systems with Applications* 202 (2022), doi: 10.1016/j.eswa.2022.117149.
 96 | 
 97 | [6] Shang, Ronghua, et al. "Sparse and low-dimensional representation with maximum entropy adaptive graph for feature selection." *Neurocomputing* 485 (2022): 57-73.
 98 | 
 99 | [7] J. Wen et al., "Robust Sparse Linear Discriminant Analysis," in IEEE Transactions on Circuits and Systems for Video Technology, vol. 29, no. 2, pp. 390-403, Feb. 2019, doi: 10.1109/TCSVT.2018.2799214.
100 | 
101 | [8] Belous, Gregg, Andrew Busch, and Yongsheng Gao. "Dual subspace discriminative projection learning." *Pattern Recognition* 111 (2021), doi: 10.1016/j.patcog.2020.107581.
102 | 
103 | [9] Zhang, Zheng, et al. "Marginal representation learning with graph structure self-adaptation." *IEEE Transactions on Neural Networks and Learning Systems* 29.10 (2017): 4645-4659.
104 | 
105 | [10] Du, Haishun, et al. "Low-rank discriminative adaptive graph preserving subspace learning." *Neural Processing Letters* 52.3 (2020): 2127-2149.
106 | 
107 | [11] Chen, Zhe, et al. "Sparse non-negative transition subspace learning for image classification." *Signal Processing* 183 (2021), doi: 10.1016/j.sigpro.2021.107988.
108 | 
109 | [12] Dornaika, Fadi, and A. Khoder. "Linear embedding by joint robust discriminant analysis and inter-class sparsity." *Neural Networks* 127 (2020): 141-159.
110 | 
111 | [13] Wang, Hua, Feiping Nie, and Heng Huang. "Learning robust locality preserving projection via p-order minimization." In *Twenty-Ninth AAAI Conference on Artificial Intelligence*. 2015.
112 | 
113 | [14] Peng, Yong, Leijie Zhang, Wanzeng Kong, Feiwei Qin, and Jianhai Zhang. "Joint low-rank representation and spectral regression for robust subspace learning." *Knowledge-Based Systems* 195 (2020), doi: 10.1016/j.knosys.2020.105723.


--------------------------------------------------------------------------------
/utils/EProjSimplex_new.m:
--------------------------------------------------------------------------------
 1 | function [x ft] = EProjSimplex_new(v, k)
 2 | 
 3 | %
 4 | %% Problem
 5 | %
 6 | %  min  1/2 || x - v||^2
 7 | %  s.t. x>=0, 1'x=1
 8 | %
 9 | 
10 | if nargin < 2
11 |     k = 1;
12 | end;
13 | 
14 | ft=1;
15 | n = length(v);
16 | 
17 | v0 = v-mean(v) + k/n;
18 | %vmax = max(v0);
19 | vmin = min(v0);
20 | if vmin < 0
21 |     f = 1;
22 |     lambda_m = 0;
23 |     while abs(f) > 10^-10
24 |         v1 = v0 - lambda_m;
25 |         posidx = v1>0;
26 |         npos = sum(posidx);
27 |         g = -npos;
28 |         f = sum(v1(posidx)) - k;
29 |         lambda_m = lambda_m - f/g;
30 |         ft=ft+1;
31 |         if ft > 100
32 |             x = max(v1,0);
33 |             break;
34 |         end;
35 |     end;
36 |     x = max(v1,0);
37 | 
38 | else
39 |     x = v0;
40 | end;


--------------------------------------------------------------------------------
/utils/EuDist2.m:
--------------------------------------------------------------------------------
 1 | function D = EuDist2(fea_a,fea_b,bSqrt)
 2 | %EUDIST2 Efficiently Compute the Euclidean Distance Matrix by Exploring the
 3 | %Matlab matrix operations.
 4 | %
 5 | %   D = EuDist(fea_a,fea_b)
 6 | %   fea_a:    nSample_a * nFeature
 7 | %   fea_b:    nSample_b * nFeature
 8 | %   D:      nSample_a * nSample_a
 9 | %       or  nSample_a * nSample_b
10 | %
11 | %    Examples:
12 | %
13 | %       a = rand(500,10);
14 | %       b = rand(1000,10);
15 | %
16 | %       A = EuDist2(a); % A: 500*500
17 | %       D = EuDist2(a,b); % D: 500*1000
18 | %
19 | %   version 2.1 --November/2011
20 | %   version 2.0 --May/2009
21 | %   version 1.0 --November/2005
22 | %
23 | %   Written by Deng Cai (dengcai AT gmail.com)
24 | 
25 | 
26 | if ~exist('bSqrt','var')
27 |     bSqrt = 1;
28 | end
29 | 
30 | if (~exist('fea_b','var')) || isempty(fea_b)
31 |     aa = sum(fea_a.*fea_a,2);
32 |     ab = fea_a*fea_a';
33 |     
34 |     if issparse(aa)
35 |         aa = full(aa);
36 |     end
37 |     
38 |     D = bsxfun(@plus,aa,aa') - 2*ab;
39 |     D(D<0) = 0;
40 |     if bSqrt
41 |         D = sqrt(D);
42 |     end
43 |     D = max(D,D');
44 | else
45 |     aa = sum(fea_a.*fea_a,2);
46 |     bb = sum(fea_b.*fea_b,2);
47 |     ab = fea_a*fea_b';
48 | 
49 |     if issparse(aa)
50 |         aa = full(aa);
51 |         bb = full(bb);
52 |     end
53 | 
54 |     D = bsxfun(@plus,aa,bb') - 2*ab;
55 |     D(D<0) = 0;
56 |     if bSqrt
57 |         D = sqrt(D);
58 |     end
59 | end
60 | 
61 | 


--------------------------------------------------------------------------------
/utils/GPI.m:
--------------------------------------------------------------------------------
 1 | function [W] = GPI(A,B,options)
 2 | %% Solve:
 3 | %%%         min tr(W'AW-2W'B)       s.t. W'W=I
 4 | %%%      => max tr(W' hatA W+2W'B)  s.t. W'W=I, hatA=alpha*I-A
 5 | %% input:
 6 | %%% W       learned matrix
 7 | %%% A       W'AW  (m*m)
 8 | %%% B       2 W'B  (m*k)
 9 | %% options:
10 | %%%         T: iteration (default:1e3)
11 | %%%         maxIter:maxRandom (default:1e3)
12 | %%%         precision: the convergence precision (default:1e-4)
13 | if  nargin==2
14 |     options=struct();
15 | end
16 | if ~isfield(options,'T')
17 |     options.T=1e3;
18 | end
19 | if ~isfield(options,'precision')
20 |    options.precision=1e-4; 
21 | end
22 | T=options.T;
23 | precision=options.precision;
24 | n=size(A,1);
25 | %% mu : the largest eigenvalue of A
26 | [U,V] = eig(A);
27 | [~, index] = sort(diag(V),'ascend');
28 | mu=diag(V);mu=max(mu);
29 | Atau=mu*eye(n)-A;
30 | W = U(:, index(1:size(B,2)));
31 | % try chol(Atau);
32 | % %     fprintf('Matrix is symmetric positive definite.\n');
33 | % catch ME
34 | %     warning('[GPI] Atau=mu*eye(n)-A is not a positive definite matrix!');
35 | % end
36 | lastVal=Inf;
37 | for i=1:T
38 |     beforeW=W;
39 |     M=2*Atau*W+2*B;
40 |     [U,~,V]=svd(M,'econ');
41 |     W=U*V';
42 |     val=norm(beforeW-W,'inf');
43 |     if abs(lastVal-val)<precision &&i>=3
44 |         break;
45 |     else
46 |         lastVal=val;
47 |     end
48 |     if i==T
49 |        warning('[GPI] No convergence (iteration > maximum T).'); 
50 |     end
51 | end
52 | end
53 | 
54 | 


--------------------------------------------------------------------------------
/utils/IterativeMultiplicativeUpdate.m:
--------------------------------------------------------------------------------
 1 | function result=IterativeMultiplicativeUpdate(X,gradient)
 2 | %% input:
 3 | %%%     X :         The matrix waiting for update (m*n)
 4 | %%%     gradient:   The gradient of X (m*n)
 5 | %% Output:
 6 | %%%     result:     The matrix updated (m*n)
 7 |     myeps=1e-8;
 8 |     gradient=gradient+((abs(gradient)<myeps).*myeps);
 9 |     result=X.*gradient;
10 | end
11 | 
12 | 


--------------------------------------------------------------------------------
/utils/L2Norm.m:
--------------------------------------------------------------------------------
1 | function y = L2Norm(x)
2 | % x is a feature matrix: one example in a row
3 | % add 1e-4
4 | y = x./repmat(1e-4+sqrt(sum(x.^2,2)),[1 size(x,2)]);


--------------------------------------------------------------------------------
/utils/MyClusteringMeasure.m:
--------------------------------------------------------------------------------
  1 | function result = MyClusteringMeasure(Y, predY,supervisedFlag)
  2 | %% input
  3 | %%%     Y                   The ground-truth labels
  4 | %%%     predY               The predict labels
  5 | %%%     supervisedFlag      The supervised flag
  6 | %%%%                     -  1   Use `length(find(predY==Y))/length(Y)` to calculate acc
  7 | %%%%                     -  0   Use `bestMap` to calculate acc
  8 | %% Output
  9 | %%% result               -  a list with [acc,NMI,Purity]
 10 | if nargin<=2
 11 |    supervisedFlag=0; 
 12 | end
 13 | if size(Y,2) ~= 1
 14 |     Y = Y';
 15 | end
 16 | if size(predY,2) ~= 1
 17 |     predY = predY';
 18 | end
 19 | 
 20 | n = length(Y);
 21 | 
 22 | uY = unique(Y);
 23 | nclass = length(uY);
 24 | Y0 = zeros(n,1);
 25 | if nclass ~= max(Y)
 26 |     for i = 1:nclass
 27 |         Y0(find(Y == uY(i))) = i;
 28 |     end
 29 |     Y = Y0;
 30 | end
 31 | 
 32 | uY = unique(predY);
 33 | nclass = length(uY);
 34 | predY0 = zeros(n,1);
 35 | if nclass ~= max(predY)
 36 |     for i = 1:nclass
 37 |         predY0(find(predY == uY(i))) = i;
 38 |     end
 39 |     predY = predY0;
 40 | end
 41 | 
 42 | 
 43 | Lidx = unique(Y); classnum = length(Lidx);
 44 | predLidx = unique(predY); pred_classnum = length(predLidx);
 45 | 
 46 | % purity
 47 | correnum = 0;
 48 | for ci = 1:pred_classnum
 49 |     incluster = Y(find(predY == predLidx(ci)));
 50 | %     cnub = unique(incluster);
 51 | %     inclunub = 0;
 52 | %     for cnubi = 1:length(cnub)
 53 | %         inclunub(cnubi) = length(find(incluster == cnub(cnubi)));
 54 | %     end;
 55 |     inclunub = hist(incluster, 1:max(incluster)); if isempty(inclunub) inclunub=0;end;
 56 |     correnum = correnum + max(inclunub);
 57 | end;
 58 | Purity = correnum/length(predY);
 59 | 
 60 | %if pred_classnum
 61 | res = bestMap(Y, predY);
 62 | % accuarcy
 63 | ACC = length(find(Y == res))/length(Y); % acc used for unsupervised methods
 64 | % NMI
 65 | MIhat = MutualInfo(Y,res);
 66 | 
 67 | if supervisedFlag==1
 68 |     ACC=length(find(predY==Y))/length(Y);% acc used for supervised methods
 69 | end
 70 | result = [ACC MIhat Purity]';%% 3*1 vector
 71 | 
 72 | %%
 73 | function [newL2, c] = bestMap(L1,L2)
 74 | %bestmap: permute labels of L2 match L1 as good as possible
 75 | %   [newL2] = bestMap(L1,L2);
 76 | 
 77 | %===========    
 78 | L1 = L1(:);
 79 | L2 = L2(:);
 80 | if size(L1) ~= size(L2)
 81 |     error('size(L1) must == size(L2)');
 82 | end
 83 | L1 = L1 - min(L1) + 1;      %   min (L1) <- 1;
 84 | L2 = L2 - min(L2) + 1;      %   min (L2) <- 1;
 85 | %===========    make bipartition graph  ============
 86 | nClass = max(max(L1), max(L2));
 87 | G = zeros(nClass);
 88 | for i=1:nClass
 89 |     for j=1:nClass
 90 |         G(i,j) = length(find(L1 == i & L2 == j));
 91 |     end
 92 | end
 93 | %===========    assign with hungarian method    ======
 94 | [c,t] = hungarian(-G);
 95 | newL2 = zeros(nClass,1);
 96 | for i=1:nClass
 97 |     newL2(L2 == i) = c(i);
 98 | end
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | %%
105 | function MIhat = MutualInfo(L1,L2)
106 | %   mutual information
107 | 
108 | %===========    
109 | L1 = L1(:);
110 | L2 = L2(:);
111 | if size(L1) ~= size(L2)
112 |     error('size(L1) must == size(L2)');
113 | end
114 | L1 = L1 - min(L1) + 1;      %   min (L1) <- 1;
115 | L2 = L2 - min(L2) + 1;      %   min (L2) <- 1;
116 | %===========    make bipartition graph  ============
117 | nClass = max(max(L1), max(L2));
118 | G = zeros(nClass);
119 | for i=1:nClass
120 |     for j=1:nClass
121 |         G(i,j) = length(find(L1 == i & L2 == j))+eps;
122 |     end
123 | end
124 | sumG = sum(G(:));
125 | %===========    calculate MIhat
126 | P1 = sum(G,2);  P1 = P1/sumG;
127 | P2 = sum(G,1);  P2 = P2/sumG;
128 | H1 = sum(-P1.*log2(P1));
129 | H2 = sum(-P2.*log2(P2));
130 | P12 = G/sumG;
131 | PPP = P12./repmat(P2,nClass,1)./repmat(P1,1,nClass);
132 | PPP(abs(PPP) < 1e-12) = 1;
133 | MI = sum(P12(:) .* log2(PPP(:)));
134 | MIhat = MI / max(H1,H2);
135 | %%%%%%%%%%%%%   why complex ?       %%%%%%%%
136 | MIhat = real(MIhat);
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | %%
146 | function [C,T]=hungarian(A)
147 | %HUNGARIAN Solve the Assignment problem using the Hungarian method.
148 | %
149 | %[C,T]=hungarian(A)
150 | %A - a square cost matrix.
151 | %C - the optimal assignment.
152 | %T - the cost of the optimal assignment.
153 | %s.t. T = trace(A(C,:)) is minimized over all possible assignments.
154 | 
155 | % Adapted from the FORTRAN IV code in Carpaneto and Toth, "Algorithm 548:
156 | % Solution of the assignment problem [H]", ACM Transactions on
157 | % Mathematical Software, 6(1):104-111, 1980.
158 | 
159 | % v1.0  96-06-14. Niclas Borlin, niclas@cs.umu.se.
160 | %                 Department of Computing Science, Ume� University,
161 | %                 Sweden. 
162 | %                 All standard disclaimers apply.
163 | 
164 | % A substantial effort was put into this code. If you use it for a
165 | % publication or otherwise, please include an acknowledgement or at least
166 | % notify me by email. /Niclas
167 | 
168 | [m,n]=size(A);
169 | 
170 | if (m~=n)
171 |     error('HUNGARIAN: Cost matrix must be square!');
172 | end
173 | 
174 | % Save original cost matrix.
175 | orig=A;
176 | 
177 | % Reduce matrix.
178 | A=hminired(A);
179 | 
180 | % Do an initial assignment.
181 | [A,C,U]=hminiass(A);
182 | 
183 | % Repeat while we have unassigned rows.
184 | while (U(n+1))
185 |     % Start with no path, no unchecked zeros, and no unexplored rows.
186 |     LR=zeros(1,n);
187 |     LC=zeros(1,n);
188 |     CH=zeros(1,n);
189 |     RH=[zeros(1,n) -1];
190 |     
191 |     % No labelled columns.
192 |     SLC=[];
193 |     
194 |     % Start path in first unassigned row.
195 |     r=U(n+1);
196 |     % Mark row with end-of-path label.
197 |     LR(r)=-1;
198 |     % Insert row first in labelled row set.
199 |     SLR=r;
200 |     
201 |     % Repeat until we manage to find an assignable zero.
202 |     while (1)
203 |         % If there are free zeros in row r
204 |         if (A(r,n+1)~=0)
205 |             % ...get column of first free zero.
206 |             l=-A(r,n+1);
207 |             
208 |             % If there are more free zeros in row r and row r in not
209 |             % yet marked as unexplored..
210 |             if (A(r,l)~=0 & RH(r)==0)
211 |                 % Insert row r first in unexplored list.
212 |                 RH(r)=RH(n+1);
213 |                 RH(n+1)=r;
214 |                 
215 |                 % Mark in which column the next unexplored zero in this row
216 |                 % is.
217 |                 CH(r)=-A(r,l);
218 |             end
219 |         else
220 |             % If all rows are explored..
221 |             if (RH(n+1)<=0)
222 |                 % Reduce matrix.
223 |                 [A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR);
224 |             end
225 |             
226 |             % Re-start with first unexplored row.
227 |             r=RH(n+1);
228 |             % Get column of next free zero in row r.
229 |             l=CH(r);
230 |             % Advance "column of next free zero".
231 |             CH(r)=-A(r,l);
232 |             % If this zero is last in the list..
233 |             if (A(r,l)==0)
234 |                 % ...remove row r from unexplored list.
235 |                 RH(n+1)=RH(r);
236 |                 RH(r)=0;
237 |             end
238 |         end
239 |         
240 |         % While the column l is labelled, i.e. in path.
241 |         while (LC(l)~=0)
242 |             % If row r is explored..
243 |             if (RH(r)==0)
244 |                 % If all rows are explored..
245 |                 if (RH(n+1)<=0)
246 |                     % Reduce cost matrix.
247 |                     [A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR);
248 |                 end
249 |                 
250 |                 % Re-start with first unexplored row.
251 |                 r=RH(n+1);
252 |             end
253 |             
254 |             % Get column of next free zero in row r.
255 |             l=CH(r);
256 |             
257 |             % Advance "column of next free zero".
258 |             CH(r)=-A(r,l);
259 |             
260 |             % If this zero is last in list..
261 |             if(A(r,l)==0)
262 |                 % ...remove row r from unexplored list.
263 |                 RH(n+1)=RH(r);
264 |                 RH(r)=0;
265 |             end
266 |         end
267 |         
268 |         % If the column found is unassigned..
269 |         if (C(l)==0)
270 |             % Flip all zeros along the path in LR,LC.
271 |             [A,C,U]=hmflip(A,C,LC,LR,U,l,r);
272 |             % ...and exit to continue with next unassigned row.
273 |             break;
274 |         else
275 |             % ...else add zero to path.
276 |             
277 |             % Label column l with row r.
278 |             LC(l)=r;
279 |             
280 |             % Add l to the set of labelled columns.
281 |             SLC=[SLC l];
282 |             
283 |             % Continue with the row assigned to column l.
284 |             r=C(l);
285 |             
286 |             % Label row r with column l.
287 |             LR(r)=l;
288 |             
289 |             % Add r to the set of labelled rows.
290 |             SLR=[SLR r];
291 |         end
292 |     end
293 | end
294 | 
295 | % Calculate the total cost.
296 | T=sum(orig(logical(sparse(C,1:size(orig,2),1))));
297 | 
298 | 
299 | function A=hminired(A)
300 | %HMINIRED Initial reduction of cost matrix for the Hungarian method.
301 | %
302 | %B=assredin(A)
303 | %A - the unreduced cost matris.
304 | %B - the reduced cost matrix with linked zeros in each row.
305 | 
306 | % v1.0  96-06-13. Niclas Borlin, niclas@cs.umu.se.
307 | 
308 | [m,n]=size(A);
309 | 
310 | % Subtract column-minimum values from each column.
311 | colMin=min(A);
312 | A=A-colMin(ones(n,1),:);
313 | 
314 | % Subtract row-minimum values from each row.
315 | rowMin=min(A')';
316 | A=A-rowMin(:,ones(1,n));
317 | 
318 | % Get positions of all zeros.
319 | [i,j]=find(A==0);
320 | 
321 | % Extend A to give room for row zero list header column.
322 | A(1,n+1)=0;
323 | for k=1:n
324 |     % Get all column in this row. 
325 |     cols=j(k==i)';
326 |     % Insert pointers in matrix.
327 |     A(k,[n+1 cols])=[-cols 0];
328 | end
329 | 
330 | 
331 | function [A,C,U]=hminiass(A)
332 | %HMINIASS Initial assignment of the Hungarian method.
333 | %
334 | %[B,C,U]=hminiass(A)
335 | %A - the reduced cost matrix.
336 | %B - the reduced cost matrix, with assigned zeros removed from lists.
337 | %C - a vector. C(J)=I means row I is assigned to column J,
338 | %              i.e. there is an assigned zero in position I,J.
339 | %U - a vector with a linked list of unassigned rows.
340 | 
341 | % v1.0  96-06-14. Niclas Borlin, niclas@cs.umu.se.
342 | 
343 | [n,np1]=size(A);
344 | 
345 | % Initalize return vectors.
346 | C=zeros(1,n);
347 | U=zeros(1,n+1);
348 | 
349 | % Initialize last/next zero "pointers".
350 | LZ=zeros(1,n);
351 | NZ=zeros(1,n);
352 | 
353 | for i=1:n
354 |     % Set j to first unassigned zero in row i.
355 | 	lj=n+1;
356 | 	j=-A(i,lj);
357 | 
358 |     % Repeat until we have no more zeros (j==0) or we find a zero
359 | 	% in an unassigned column (c(j)==0).
360 |     
361 | 	while (C(j)~=0)
362 | 		% Advance lj and j in zero list.
363 | 		lj=j;
364 | 		j=-A(i,lj);
365 | 	
366 | 		% Stop if we hit end of list.
367 | 		if (j==0)
368 | 			break;
369 | 		end
370 | 	end
371 | 
372 | 	if (j~=0)
373 | 		% We found a zero in an unassigned column.
374 | 		
375 | 		% Assign row i to column j.
376 | 		C(j)=i;
377 | 		
378 | 		% Remove A(i,j) from unassigned zero list.
379 | 		A(i,lj)=A(i,j);
380 | 
381 | 		% Update next/last unassigned zero pointers.
382 | 		NZ(i)=-A(i,j);
383 | 		LZ(i)=lj;
384 | 
385 | 		% Indicate A(i,j) is an assigned zero.
386 | 		A(i,j)=0;
387 | 	else
388 | 		% We found no zero in an unassigned column.
389 | 
390 | 		% Check all zeros in this row.
391 | 
392 | 		lj=n+1;
393 | 		j=-A(i,lj);
394 | 		
395 | 		% Check all zeros in this row for a suitable zero in another row.
396 | 		while (j~=0)
397 | 			% Check the in the row assigned to this column.
398 | 			r=C(j);
399 | 			
400 | 			% Pick up last/next pointers.
401 | 			lm=LZ(r);
402 | 			m=NZ(r);
403 | 			
404 | 			% Check all unchecked zeros in free list of this row.
405 | 			while (m~=0)
406 | 				% Stop if we find an unassigned column.
407 | 				if (C(m)==0)
408 | 					break;
409 | 				end
410 | 				
411 | 				% Advance one step in list.
412 | 				lm=m;
413 | 				m=-A(r,lm);
414 | 			end
415 | 			
416 | 			if (m==0)
417 | 				% We failed on row r. Continue with next zero on row i.
418 | 				lj=j;
419 | 				j=-A(i,lj);
420 | 			else
421 | 				% We found a zero in an unassigned column.
422 | 			
423 | 				% Replace zero at (r,m) in unassigned list with zero at (r,j)
424 | 				A(r,lm)=-j;
425 | 				A(r,j)=A(r,m);
426 | 			
427 | 				% Update last/next pointers in row r.
428 | 				NZ(r)=-A(r,m);
429 | 				LZ(r)=j;
430 | 			
431 | 				% Mark A(r,m) as an assigned zero in the matrix . . .
432 | 				A(r,m)=0;
433 | 			
434 | 				% ...and in the assignment vector.
435 | 				C(m)=r;
436 | 			
437 | 				% Remove A(i,j) from unassigned list.
438 | 				A(i,lj)=A(i,j);
439 | 			
440 | 				% Update last/next pointers in row r.
441 | 				NZ(i)=-A(i,j);
442 | 				LZ(i)=lj;
443 | 			
444 | 				% Mark A(r,m) as an assigned zero in the matrix . . .
445 | 				A(i,j)=0;
446 | 			
447 | 				% ...and in the assignment vector.
448 | 				C(j)=i;
449 | 				
450 | 				% Stop search.
451 | 				break;
452 | 			end
453 | 		end
454 | 	end
455 | end
456 | 
457 | % Create vector with list of unassigned rows.
458 | 
459 | % Mark all rows have assignment.
460 | r=zeros(1,n);
461 | rows=C(C~=0);
462 | r(rows)=rows;
463 | empty=find(r==0);
464 | 
465 | % Create vector with linked list of unassigned rows.
466 | U=zeros(1,n+1);
467 | U([n+1 empty])=[empty 0];
468 | 
469 | 
470 | function [A,C,U]=hmflip(A,C,LC,LR,U,l,r)
471 | %HMFLIP Flip assignment state of all zeros along a path.
472 | %
473 | %[A,C,U]=hmflip(A,C,LC,LR,U,l,r)
474 | %Input:
475 | %A   - the cost matrix.
476 | %C   - the assignment vector.
477 | %LC  - the column label vector.
478 | %LR  - the row label vector.
479 | %U   - the 
480 | %r,l - position of last zero in path.
481 | %Output:
482 | %A   - updated cost matrix.
483 | %C   - updated assignment vector.
484 | %U   - updated unassigned row list vector.
485 | 
486 | % v1.0  96-06-14. Niclas Borlin, niclas@cs.umu.se.
487 | 
488 | n=size(A,1);
489 | 
490 | while (1)
491 |     % Move assignment in column l to row r.
492 |     C(l)=r;
493 |     
494 |     % Find zero to be removed from zero list..
495 |     
496 |     % Find zero before this.
497 |     m=find(A(r,:)==-l);
498 |     
499 |     % Link past this zero.
500 |     A(r,m)=A(r,l);
501 |     
502 |     A(r,l)=0;
503 |     
504 |     % If this was the first zero of the path..
505 |     if (LR(r)<0)
506 |         ...remove row from unassigned row list and return.
507 |         U(n+1)=U(r);
508 |         U(r)=0;
509 |         return;
510 |     else
511 |         
512 |         % Move back in this row along the path and get column of next zero.
513 |         l=LR(r);
514 |         
515 |         % Insert zero at (r,l) first in zero list.
516 |         A(r,l)=A(r,n+1);
517 |         A(r,n+1)=-l;
518 |         
519 |         % Continue back along the column to get row of next zero in path.
520 |         r=LC(l);
521 |     end
522 | end
523 | 
524 | 
525 | function [A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR)
526 | %HMREDUCE Reduce parts of cost matrix in the Hungerian method.
527 | %
528 | %[A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR)
529 | %Input:
530 | %A   - Cost matrix.
531 | %CH  - vector of column of 'next zeros' in each row.
532 | %RH  - vector with list of unexplored rows.
533 | %LC  - column labels.
534 | %RC  - row labels.
535 | %SLC - set of column labels.
536 | %SLR - set of row labels.
537 | %
538 | %Output:
539 | %A   - Reduced cost matrix.
540 | %CH  - Updated vector of 'next zeros' in each row.
541 | %RH  - Updated vector of unexplored rows.
542 | 
543 | % v1.0  96-06-14. Niclas Borlin, niclas@cs.umu.se.
544 | 
545 | n=size(A,1);
546 | 
547 | % Find which rows are covered, i.e. unlabelled.
548 | coveredRows=LR==0;
549 | 
550 | % Find which columns are covered, i.e. labelled.
551 | coveredCols=LC~=0;
552 | 
553 | r=find(~coveredRows);
554 | c=find(~coveredCols);
555 | 
556 | % Get minimum of uncovered elements.
557 | m=min(min(A(r,c)));
558 | 
559 | % Subtract minimum from all uncovered elements.
560 | A(r,c)=A(r,c)-m;
561 | 
562 | % Check all uncovered columns..
563 | for j=c
564 |     % ...and uncovered rows in path order..
565 |     for i=SLR
566 |         % If this is a (new) zero..
567 |         if (A(i,j)==0)
568 |             % If the row is not in unexplored list..
569 |             if (RH(i)==0)
570 |                 % ...insert it first in unexplored list.
571 |                 RH(i)=RH(n+1);
572 |                 RH(n+1)=i;
573 |                 % Mark this zero as "next free" in this row.
574 |                 CH(i)=j;
575 |             end
576 |             % Find last unassigned zero on row I.
577 |             row=A(i,:);
578 |             colsInList=-row(row<0);
579 |             if (length(colsInList)==0)
580 |                 % No zeros in the list.
581 |                 l=n+1;
582 |             else
583 |                 l=colsInList(row(colsInList)==0);
584 |             end
585 |             % Append this zero to end of list.
586 |             A(i,l)=-j;
587 |         end
588 |     end
589 | end
590 | 
591 | % Add minimum to all doubly covered elements.
592 | r=find(coveredRows);
593 | c=find(coveredCols);
594 | 
595 | % Take care of the zeros we will remove.
596 | [i,j]=find(A(r,c)<=0);
597 | 
598 | i=r(i);
599 | j=c(j);
600 | 
601 | for k=1:length(i)
602 |     % Find zero before this in this row.
603 |     lj=find(A(i(k),:)==-j(k));
604 |     % Link past it.
605 |     A(i(k),lj)=A(i(k),j(k));
606 |     % Mark it as assigned.
607 |     A(i(k),j(k))=0;
608 | end
609 | 
610 | A(r,c)=A(r,c)+m;
611 | 


--------------------------------------------------------------------------------
/utils/SVT.m:
--------------------------------------------------------------------------------
 1 | function res = SVT(W,tau)
 2 | %% paper: A singular value thresholding algorithm for matrix completion
 3 | %% input:
 4 | %%%     W: dealed matrix
 5 | %%%     tau: threshold
 6 | %% ouput:
 7 | %%%     res:SVT results
 8 |     [U,S,V]=svd(W,'econ');
 9 |     %% equal to S=max(0,S-tau)+min(0,S+tau);
10 |     S=sign(S).*max(abs(S)-tau,0);
11 |     res=U*S*V';
12 | end
13 | 
14 | 


--------------------------------------------------------------------------------
/utils/SolveL21Problem.m:
--------------------------------------------------------------------------------
 1 | function [E] = SolveL21Problem(Q,alpha)
 2 | %% problem
 3 | %%%         \min\limits_{E} \alpha\|E\|_{2,1}+\frac{1}{2}\|E-Q\|_F^2
 4 | %%% solve:
 5 | %%%         [E^*]_{:,i}=
 6 | %%%                     \begin{cases}   
 7 | %%%                     \large \frac{\|[Q]_{:,i}\|_2-\alpha }{ \|[Q]_{:,i}\|_2}Q_{:,i} 
 8 | %%%                                                 & \text{if } \|[Q]_{:,i}\|_2>\alpha; 
 9 | %%%                     \\ 0, 
10 | %%%                                                 &\text{otherwise}  
11 | %%%                     \end{cases}
12 |  %% input
13 |  %%%    Q:          A tractable matrix with m*n
14 |  %%%    alpha:      The hyperparameter
15 |  %% output
16 |  %%%    E:          The pursued matrix
17 |     [m,n]=size(Q);
18 |     E=zeros(m,n);
19 |     sumSqrtQ=sqrt(sum(Q.*Q,1));
20 |     flag=sumSqrtQ>alpha;
21 |     if sum(flag)>0
22 |         score=sumSqrtQ(flag);
23 |         score=(score-alpha)./(score);
24 |         score=repmat(score,m,1);
25 |         E(:,flag)=score.*Q(:,flag);
26 |     end
27 | end
28 | 


--------------------------------------------------------------------------------
/utils/betweenScatter.m:
--------------------------------------------------------------------------------
 1 | function [Sb] = betweenScatter(X,Y)
 2 |     %% there are two methods construct the betweenScatter
 3 |     %%% 1, Sb=\sum_{c=1}^C n_c (x - u_c)(x - u_c)' (DIJDA)
 4 |     %%% 2, Sb=\sum_{c=1}^C n_c (u_c - mean(X,2))(u_c - mean(X,2))' (JGSA)
 5 |     % input 
 6 |     %   X: m*n
 7 |     %   Y: n*1
 8 |     % output
 9 |     %   Sb: m*m
10 |     C=length(find(unique(Y)));
11 |     n=length(Y);
12 |     Sb=0;
13 |     Fc=mean(X,2);
14 |     for i=1:C
15 |        Xc=X(:,Y==i);
16 |        F=Xc-Fc;
17 |        nc=size(Xc,2);
18 |        Sb=Sb+nc*(F*F');
19 |     end
20 | %     Sb=Sb./n;
21 | end


--------------------------------------------------------------------------------
/utils/centeringMatrix.m:
--------------------------------------------------------------------------------
1 | function [H] = centeringMatrix(n)
2 |     H=eye(n)-1/n*ones(n,n);
3 | end
4 | 
5 | 


--------------------------------------------------------------------------------
/utils/classifyKNN.m:
--------------------------------------------------------------------------------
1 | function [Y_pse] = classifyKNN(Xs,Ys,Xt,k)
2 |    knn_model = fitcknn(Xs',Ys,'NumNeighbors',k);
3 |    Y_pse = knn_model.predict(Xt');
4 | end
5 | 
6 | 


--------------------------------------------------------------------------------
/utils/computeL.m:
--------------------------------------------------------------------------------
 1 | function [L,D,W] = computeL(X,manifold)
 2 | %% input 
 3 | %%% X: fea*n
 4 | %%% manifold: the construct options of graph
 5 |     %% Construct graph Laplacian
 6 |     if ~isfield(manifold,'normr')
 7 |         manifold.normr=1;
 8 |     end
 9 |     n=size(X,2);
10 |     W = lapgraph(X',manifold);
11 |     D=diag(sparse(sum(W)));
12 |     if manifold.normr==1
13 |         Dw = diag(sparse(sqrt(1 ./ sum(W))));
14 |         L = eye(n) - Dw * W * Dw;
15 |     else
16 |         L = D-W;
17 |     end
18 |     
19 | end
20 | %% Cosine
21 | %     manifold.k = k;
22 | %     manifold.Metric = 'Cosine';
23 | %     manifold.WeightMode = 'Cosine';
24 | %     manifold.NeighborMode = 'KNN';
25 | %% Eudist
26 | %     manifold.k = k;
27 | %     manifold.Metric = 'Euclidean';
28 | %     manifold.WeightMode = 'HeatKernel';
29 | %     manifold.NeighborMode = 'KNN';


--------------------------------------------------------------------------------
/utils/defaultOptions.m:
--------------------------------------------------------------------------------
 1 | function [options] = defaultOptions(varargin)
 2 |     options=varargin{1};
 3 |     n=nargin-1;
 4 |     if mod(n,2) ~=0
 5 |         error('Please enter coupled parameters\n');
 6 |         return ;
 7 |     end
 8 |     n=n/2;
 9 |     for i=1:n
10 |        pos=1+2*i-1;
11 |        key=varargin{pos};
12 |        if ~isfield(options,key)
13 |            val=varargin{pos+1};
14 |            options=setfield(options,key,val);
15 |        end
16 |     end
17 | end
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/utils/getClusteringResults.m:
--------------------------------------------------------------------------------
 1 | function [WX,resultsFinal] = getClusteringResults(X,Y,W,C,options)
 2 | %% Formula
 3 | %%%     select top-k highest scores of features in W'X
 4 | %% Input:
 5 | %%%     X              The feature, m*n
 6 | %%%     Y              The labels, n*1
 7 | %%%     W              The feature selection matrix, m*d
 8 | %%%     C              The clustering number (default `length(unique(Y))`)
 9 | %% Output:
10 | %%%     WX             The projection subspace, d*n
11 | %%%     results        The clustering results [acc,NMI,purity]
12 |     if nargin<=4
13 |         options=struct();
14 |     end
15 |     if nargin<=3
16 |        C=length(unique(Y)); 
17 |     end
18 |     options=defaultOptions(options,...
19 |                 'T',10,...     %% The repeat times of kmeans
20 |                 'MaxIter',100,... %% Options of 'litekmeans'
21 |                 'Replicates',10,...%% Options of 'litekmeans'
22 |                 'supervisedFlag',0); %% Options of 'MyClusteringMeasure'
23 |    	WX=W'*X;
24 |     resultsAll=[];
25 |     for i=1:options.T
26 |         Ypseudo=litekmeans(WX',C,'MaxIter',options.MaxIter,'Replicates',options.Replicates);
27 |         results=MyClusteringMeasure(Y,Ypseudo);%[ACC ACC2 MIhat Purity]';
28 |         resultsAll=[resultsAll,results];
29 |     end
30 |     resultsFinal=mean(resultsAll,2);
31 | end
32 | 
33 | 


--------------------------------------------------------------------------------
/utils/getFeatureSelectionResults.m:
--------------------------------------------------------------------------------
 1 | function [X_new,resultsFinal] = getFeatureSelectionResults(X,Y,W,dim,C,options)
 2 | %% Formula
 3 | %%%     select top-k highest scores of features in X'W
 4 | %% Input:
 5 | %%%     X              The feature, m*n
 6 | %%%     Y              The labels, n*1
 7 | %%%     W              The feature selection matrix, m*m
 8 | %%%     dim            The dimension reduced
 9 | %% Output:
10 | %%%     newX           The selected feautre sample, k*n
11 | %%%     results        The clustering results [acc,NMI,purity]
12 |     if nargin<=5
13 |         options=struct();
14 |     end
15 |     if nargin<=4
16 |        C=length(unique(Y)); 
17 |     end
18 |     options=defaultOptions(options,...
19 |                 'T',10,...     %% The repeat times of kmeans
20 |                 'MaxIter',100,... %% Options of 'litekmeans'
21 |                 'Replicates',10,...%% Options of 'litekmeans'
22 |                 'supervisedFlag',0); %% Options of 'MyClusteringMeasure'
23 |     score=sum((W.*W),2);
24 |     [~,index]=sort(score,'descend');
25 |     X_new = X(index(1:dim),:);
26 |     resultsAll=[];
27 |     for i=1:options.T
28 |         Ypseudo=litekmeans(X_new',C,'MaxIter',options.MaxIter,'Replicates',options.Replicates);
29 |         results=MyClusteringMeasure(Y,Ypseudo,options.supervisedFlag);%[ACC ACC2 MIhat Purity]';
30 |         resultsAll=[resultsAll,results];
31 |     end
32 |     resultsFinal=mean(resultsAll,2);
33 | end
34 | 
35 | 


--------------------------------------------------------------------------------
/utils/hotmatrix.m:
--------------------------------------------------------------------------------
 1 | function [matrix] = hotmatrix(labels,C,weight)
 2 | %% input:
 3 | %%% labels: n*1, the labels of samples
 4 | %%% C: integer, the number of the classes
 5 | %%% weight: integer, if weight==1, then the value is 1/length(classes)
 6 | %% output:
 7 | %%% matrix: n*C, the output hotmatrix
 8 |     if nargin==2
 9 |        weight=0; % weight =0,then Y={0,1} , weight = 1, then Y={0,1/n^c} 
10 |     end
11 |     n=length(labels);
12 |     matrix=zeros(n,C);
13 |     weightY=zeros(C,1);
14 |     for i=1:C
15 |         if weight==0
16 |             weightY(i)=1;
17 |         else
18 |             weightY(i)=1/length(find(labels==i));
19 |         end
20 |     end
21 |     for i=1:n
22 |         if(labels(i)>0 &&labels(i)<=C)
23 |             matrix(i,labels(i))=weightY(labels(i));
24 |         end
25 |     end
26 |     % other implementation
27 | %     full(sparse(1:ns,Ys,1));
28 | end
29 | 
30 | 


--------------------------------------------------------------------------------
/utils/lapgraph.m:
--------------------------------------------------------------------------------
  1 | function [W, elapse] = lapgraph(fea,options)
  2 | %	Usage:
  3 | %	W = graph(fea,options)
  4 | %
  5 | %	fea: Rows of vectors of data points. Each row is x_i
  6 | %   options: Struct value in Matlab. The fields in options that can be set:
  7 | %           Metric -  Choices are:
  8 | %               'Euclidean' - Will use the Euclidean distance of two data 
  9 | %                             points to evaluate the "closeness" between 
 10 | %                             them. [Default One]
 11 | %               'Cosine'    - Will use the cosine value of two vectors
 12 | %                             to evaluate the "closeness" between them.
 13 | %                             A popular similarity measure used in
 14 | %                             Information Retrieval.
 15 | %                  
 16 | %           NeighborMode -  Indicates how to construct the graph. Choices
 17 | %                           are: [Default 'KNN']
 18 | %                'KNN'            -  k = 0
 19 | %                                       Complete graph
 20 | %                                    k > 0
 21 | %                                      Put an edge between two nodes if and
 22 | %                                      only if they are among the k nearst
 23 | %                                      neighbors of each other. You are
 24 | %                                      required to provide the parameter k in
 25 | %                                      the options. Default k=5.
 26 | %               'Supervised'      -  k = 0
 27 | %                                       Put an edge between two nodes if and
 28 | %                                       only if they belong to same class. 
 29 | %                                    k > 0
 30 | %                                       Put an edge between two nodes if
 31 | %                                       they belong to same class and they
 32 | %                                       are among the k nearst neighbors of
 33 | %                                       each other. 
 34 | %                                    Default: k=0
 35 | %                                   You are required to provide the label
 36 | %                                   information gnd in the options.
 37 | %                                              
 38 | %           WeightMode   -  Indicates how to assign weights for each edge
 39 | %                           in the graph. Choices are:
 40 | %               'Binary'       - 0-1 weighting. Every edge receiveds weight
 41 | %                                of 1. [Default One]
 42 | %               'HeatKernel'   - If nodes i and j are connected, put weight
 43 | %                                W_ij = exp(-norm(x_i - x_j)/2t^2). This
 44 | %                                weight mode can only be used under
 45 | %                                'Euclidean' metric and you are required to
 46 | %                                provide the parameter t.
 47 | %               'Cosine'       - If nodes i and j are connected, put weight
 48 | %                                cosine(x_i,x_j). Can only be used under
 49 | %                                'Cosine' metric.
 50 | %               
 51 | %            k         -   The parameter needed under 'KNN' NeighborMode.
 52 | %                          Default will be 5.
 53 | %            gnd       -   The parameter needed under 'Supervised'
 54 | %                          NeighborMode.  Colunm vector of the label
 55 | %                          information for each data point.
 56 | %            bLDA      -   0 or 1. Only effective under 'Supervised'
 57 | %                          NeighborMode. If 1, the graph will be constructed
 58 | %                          to make LPP exactly same as LDA. Default will be
 59 | %                          0. 
 60 | %            t         -   The parameter needed under 'HeatKernel'
 61 | %                          WeightMode. Default will be 1
 62 | %         bNormalized  -   0 or 1. Only effective under 'Cosine' metric.
 63 | %                          Indicates whether the fea are already be
 64 | %                          normalized to 1. Default will be 0
 65 | %      bSelfConnected  -   0 or 1. Indicates whether W(i,i) == 1. Default 1
 66 | %                          if 'Supervised' NeighborMode & bLDA == 1,
 67 | %                          bSelfConnected will always be 1. Default 1.
 68 | %
 69 | %
 70 | %    Examples:
 71 | %
 72 | %       fea = rand(50,15);
 73 | %       options = [];
 74 | %       options.Metric = 'Euclidean';
 75 | %       options.NeighborMode = 'KNN';
 76 | %       options.k = 5;
 77 | %       options.WeightMode = 'HeatKernel';
 78 | %       options.t = 1;
 79 | %       W = constructW(fea,options);
 80 | %       
 81 | %       
 82 | %       fea = rand(50,15);
 83 | %       gnd = [ones(10,1);ones(15,1)*2;ones(10,1)*3;ones(15,1)*4];
 84 | %       options = [];
 85 | %       options.Metric = 'Euclidean';
 86 | %       options.NeighborMode = 'Supervised';
 87 | %       options.gnd = gnd;
 88 | %       options.WeightMode = 'HeatKernel';
 89 | %       options.t = 1;
 90 | %       W = constructW(fea,options);
 91 | %       
 92 | %       
 93 | %       fea = rand(50,15);
 94 | %       gnd = [ones(10,1);ones(15,1)*2;ones(10,1)*3;ones(15,1)*4];
 95 | %       options = [];
 96 | %       options.Metric = 'Euclidean';
 97 | %       options.NeighborMode = 'Supervised';
 98 | %       options.gnd = gnd;
 99 | %       options.bLDA = 1;
100 | %       W = constructW(fea,options);      
101 | %       
102 | %
103 | %    For more details about the different ways to construct the W, please
104 | %    refer:
105 | %       Deng Cai, Xiaofei He and Jiawei Han, "Document Clustering Using
106 | %       Locality Preserving Indexing" IEEE TKDE, Dec. 2005.
107 | %    
108 | %
109 | %    Written by Deng Cai (dengcai2 AT cs.uiuc.edu), April/2004, Feb/2006,
110 | %                                             May/2007
111 | % 
112 | 
113 | if (~exist('options','var'))
114 |    options = [];
115 | else
116 |    if ~isstruct(options) 
117 |        error('parameter error!');
118 |    end
119 | end
120 | 
121 | %=================================================
122 | if ~isfield(options,'Metric')
123 |     options.Metric = 'Cosine';
124 | end
125 | 
126 | switch lower(options.Metric)
127 |     case {lower('Euclidean')}
128 |     case {lower('Cosine')}
129 |         if ~isfield(options,'bNormalized')
130 |             options.bNormalized = 0;
131 |         end
132 |     otherwise
133 |         error('Metric does not exist!');
134 | end
135 | 
136 | %=================================================
137 | if ~isfield(options,'NeighborMode')
138 |     options.NeighborMode = 'KNN';
139 | end
140 | 
141 | switch lower(options.NeighborMode)
142 |     case {lower('KNN')}  %For simplicity, we include the data point itself in the kNN
143 |         if ~isfield(options,'k')
144 |             options.k = 5;
145 |         end
146 |     case {lower('Supervised')}
147 |         if ~isfield(options,'bLDA')
148 |             options.bLDA = 0;
149 |         end
150 |         if options.bLDA
151 |             options.bSelfConnected = 1;
152 |         end
153 |         if ~isfield(options,'k')
154 |             options.k = 0;
155 |         end
156 |         if ~isfield(options,'gnd')
157 |             error('Label(gnd) should be provided under ''Supervised'' NeighborMode!');
158 |         end
159 |         if ~isempty(fea) && length(options.gnd) ~= size(fea,1)
160 |             error('gnd doesn''t match with fea!');
161 |         end
162 |     otherwise
163 |         error('NeighborMode does not exist!');
164 | end
165 | 
166 | %=================================================
167 | 
168 | if ~isfield(options,'WeightMode')
169 |     options.WeightMode = 'Binary';
170 | end
171 | 
172 | bBinary = 0;
173 | switch lower(options.WeightMode)
174 |     case {lower('Binary')}
175 |         bBinary = 1; 
176 |     case {lower('HeatKernel')}
177 |         if ~strcmpi(options.Metric,'Euclidean')
178 |             warning('''HeatKernel'' WeightMode should be used under ''Euclidean'' Metric!');
179 |             options.Metric = 'Euclidean';
180 |         end
181 |         if ~isfield(options,'t')
182 |             options.t = 1;
183 |         end
184 |     case {lower('Cosine')}
185 |         if ~strcmpi(options.Metric,'Cosine')
186 |             warning('''Cosine'' WeightMode should be used under ''Cosine'' Metric!');
187 |             options.Metric = 'Cosine';
188 |         end
189 |         if ~isfield(options,'bNormalized')
190 |             options.bNormalized = 0;
191 |         end
192 |     otherwise
193 |         error('WeightMode does not exist!');
194 | end
195 | 
196 | %=================================================
197 | 
198 | if ~isfield(options,'bSelfConnected')
199 |     options.bSelfConnected = 1;
200 | end
201 | 
202 | %=================================================
203 | tmp_T = cputime;
204 | 
205 | if isfield(options,'gnd') 
206 |     nSmp = length(options.gnd);
207 | else
208 |     nSmp = size(fea,1);
209 | end
210 | maxM = 62500000; %500M
211 | BlockSize = floor(maxM/(nSmp*3));
212 | 
213 | 
214 | if strcmpi(options.NeighborMode,'Supervised')
215 |     Label = unique(options.gnd);
216 |     nLabel = length(Label);
217 |     if options.bLDA
218 |         G = zeros(nSmp,nSmp);
219 |         for idx=1:nLabel
220 |             classIdx = options.gnd==Label(idx);
221 |             G(classIdx,classIdx) = 1/sum(classIdx);
222 |         end
223 |         W = sparse(G);
224 |         elapse = cputime - tmp_T;
225 |         return;
226 |     end
227 |     
228 |     switch lower(options.WeightMode)
229 |         case {lower('Binary')}
230 |             if options.k > 0
231 |                 G = zeros(nSmp*(options.k+1),3);
232 |                 idNow = 0;
233 |                 for i=1:nLabel
234 |                     classIdx = find(options.gnd==Label(i));
235 |                     D = EuDist2(fea(classIdx,:),[],0);
236 |                     [dump idx] = sort(D,2); % sort each row
237 |                     clear D dump;
238 |                     idx = idx(:,1:options.k+1);
239 |                     
240 |                     nSmpClass = length(classIdx)*(options.k+1);
241 |                     G(idNow+1:nSmpClass+idNow,1) = repmat(classIdx,[options.k+1,1]);
242 |                     G(idNow+1:nSmpClass+idNow,2) = classIdx(idx(:));
243 |                     G(idNow+1:nSmpClass+idNow,3) = 1;
244 |                     idNow = idNow+nSmpClass;
245 |                     clear idx
246 |                 end
247 |                 G = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp);
248 |                 G = max(G,G');
249 |             else
250 |                 G = zeros(nSmp,nSmp);
251 |                 for i=1:nLabel
252 |                     classIdx = find(options.gnd==Label(i));
253 |                     G(classIdx,classIdx) = 1;
254 |                 end
255 |             end
256 |             
257 |             if ~options.bSelfConnected
258 |                 for i=1:size(G,1)
259 |                     G(i,i) = 0;
260 |                 end
261 |             end
262 |             
263 |             W = sparse(G);
264 |         case {lower('HeatKernel')}
265 |             if options.k > 0
266 |                 G = zeros(nSmp*(options.k+1),3);
267 |                 idNow = 0;
268 |                 for i=1:nLabel
269 |                     classIdx = find(options.gnd==Label(i));
270 |                     D = EuDist2(fea(classIdx,:),[],0);
271 |                     [dump idx] = sort(D,2); % sort each row
272 |                     clear D;
273 |                     idx = idx(:,1:options.k+1);
274 |                     dump = dump(:,1:options.k+1);
275 |                     dump = exp(-dump/(2*options.t^2));
276 |                     
277 |                     nSmpClass = length(classIdx)*(options.k+1);
278 |                     G(idNow+1:nSmpClass+idNow,1) = repmat(classIdx,[options.k+1,1]);
279 |                     G(idNow+1:nSmpClass+idNow,2) = classIdx(idx(:));
280 |                     G(idNow+1:nSmpClass+idNow,3) = dump(:);
281 |                     idNow = idNow+nSmpClass;
282 |                     clear dump idx
283 |                 end
284 |                 G = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp);
285 |             else
286 |                 G = zeros(nSmp,nSmp);
287 |                 for i=1:nLabel
288 |                     classIdx = find(options.gnd==Label(i));
289 |                     D = EuDist2(fea(classIdx,:),[],0);
290 |                     D = exp(-D/(2*options.t^2));
291 |                     G(classIdx,classIdx) = D;
292 |                 end
293 |             end
294 |             
295 |             if ~options.bSelfConnected
296 |                 for i=1:size(G,1)
297 |                     G(i,i) = 0;
298 |                 end
299 |             end
300 | 
301 |             W = sparse(max(G,G'));
302 |         case {lower('Cosine')}
303 |             if ~options.bNormalized
304 |                 [nSmp, nFea] = size(fea);
305 |                 if issparse(fea)
306 |                     fea2 = fea';
307 |                     feaNorm = sum(fea2.^2,1).^.5;
308 |                     for i = 1:nSmp
309 |                         fea2(:,i) = fea2(:,i) ./ max(1e-10,feaNorm(i));
310 |                     end
311 |                     fea = fea2';
312 |                     clear fea2;
313 |                 else
314 |                     feaNorm = sum(fea.^2,2).^.5;
315 |                     for i = 1:nSmp
316 |                         fea(i,:) = fea(i,:) ./ max(1e-12,feaNorm(i));
317 |                     end
318 |                 end
319 | 
320 |             end
321 | 
322 |             if options.k > 0
323 |                 G = zeros(nSmp*(options.k+1),3);
324 |                 idNow = 0;
325 |                 for i=1:nLabel
326 |                     classIdx = find(options.gnd==Label(i));
327 |                     D = fea(classIdx,:)*fea(classIdx,:)';
328 |                     [dump idx] = sort(-D,2); % sort each row
329 |                     clear D;
330 |                     idx = idx(:,1:options.k+1);
331 |                     dump = -dump(:,1:options.k+1);
332 |                     
333 |                     nSmpClass = length(classIdx)*(options.k+1);
334 |                     G(idNow+1:nSmpClass+idNow,1) = repmat(classIdx,[options.k+1,1]);
335 |                     G(idNow+1:nSmpClass+idNow,2) = classIdx(idx(:));
336 |                     G(idNow+1:nSmpClass+idNow,3) = dump(:);
337 |                     idNow = idNow+nSmpClass;
338 |                     clear dump idx
339 |                 end
340 |                 G = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp);
341 |             else
342 |                 G = zeros(nSmp,nSmp);
343 |                 for i=1:nLabel
344 |                     classIdx = find(options.gnd==Label(i));
345 |                     G(classIdx,classIdx) = fea(classIdx,:)*fea(classIdx,:)';
346 |                 end
347 |             end
348 | 
349 |             if ~options.bSelfConnected
350 |                 for i=1:size(G,1)
351 |                     G(i,i) = 0;
352 |                 end
353 |             end
354 | 
355 |             W = sparse(max(G,G'));
356 |         otherwise
357 |             error('WeightMode does not exist!');
358 |     end
359 |     elapse = cputime - tmp_T;
360 |     return;
361 | end
362 | 
363 | 
364 | if strcmpi(options.NeighborMode,'KNN') && (options.k > 0)
365 |     if strcmpi(options.Metric,'Euclidean')
366 |         G = zeros(nSmp*(options.k+1),3);
367 |         for i = 1:ceil(nSmp/BlockSize)
368 |             if i == ceil(nSmp/BlockSize)
369 |                 smpIdx = (i-1)*BlockSize+1:nSmp;
370 |                 dist = EuDist2(fea(smpIdx,:),fea,0);
371 |                 dist = full(dist);
372 |                 [dump idx] = sort(dist,2); % sort each row
373 |                 idx = idx(:,1:options.k+1);
374 |                 dump = dump(:,1:options.k+1);
375 |                 if ~bBinary
376 |                     dump = exp(-dump/(2*options.t^2));
377 |                 end
378 |                 
379 |                 G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),1) = repmat(smpIdx',[options.k+1,1]);
380 |                 G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),2) = idx(:);
381 |                 if ~bBinary
382 |                     G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),3) = dump(:);
383 |                 else
384 |                     G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),3) = 1;
385 |                 end
386 |             else
387 |                 smpIdx = (i-1)*BlockSize+1:i*BlockSize;
388 |                 dist = EuDist2(fea(smpIdx,:),fea,0);
389 |                 dist = full(dist);
390 |                 [dump idx] = sort(dist,2); % sort each row
391 |                 idx = idx(:,1:options.k+1);
392 |                 dump = dump(:,1:options.k+1);
393 |                 if ~bBinary
394 |                     dump = exp(-dump/(2*options.t^2));
395 |                 end
396 |                 
397 |                 G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),1) = repmat(smpIdx',[options.k+1,1]);
398 |                 G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),2) = idx(:);
399 |                 if ~bBinary
400 |                     G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),3) = dump(:);
401 |                 else
402 |                     G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),3) = 1;
403 |                 end
404 |             end
405 |         end
406 | 
407 |         W = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp);
408 |     else
409 |         if ~options.bNormalized
410 |             [nSmp, nFea] = size(fea);
411 |             if issparse(fea)
412 |                 fea2 = fea';
413 |                 clear fea;
414 |                 for i = 1:nSmp
415 |                     fea2(:,i) = fea2(:,i) ./ max(1e-10,sum(fea2(:,i).^2,1).^.5);
416 |                 end
417 |                 fea = fea2';
418 |                 clear fea2;
419 |             else
420 |                 feaNorm = sum(fea.^2,2).^.5;
421 |                 for i = 1:nSmp
422 |                     fea(i,:) = fea(i,:) ./ max(1e-12,feaNorm(i));
423 |                 end
424 |             end
425 |         end
426 |         
427 |         G = zeros(nSmp*(options.k+1),3);
428 |         for i = 1:ceil(nSmp/BlockSize)
429 |             if i == ceil(nSmp/BlockSize)
430 |                 smpIdx = (i-1)*BlockSize+1:nSmp;
431 |                 dist = fea(smpIdx,:)*fea';
432 |                 dist = full(dist);
433 |                 [dump idx] = sort(-dist,2); % sort each row
434 |                 idx = idx(:,1:options.k+1);
435 |                 dump = -dump(:,1:options.k+1);
436 | 
437 |                 G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),1) = repmat(smpIdx',[options.k+1,1]);
438 |                 G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),2) = idx(:);
439 |                 G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),3) = dump(:);
440 |             else
441 |                 smpIdx = (i-1)*BlockSize+1:i*BlockSize;
442 |                 dist = fea(smpIdx,:)*fea';
443 |                 dist = full(dist);
444 |                 [dump idx] = sort(-dist,2); % sort each row
445 |                 idx = idx(:,1:options.k+1);
446 |                 dump = -dump(:,1:options.k+1);
447 | 
448 |                 G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),1) = repmat(smpIdx',[options.k+1,1]);
449 |                 G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),2) = idx(:);
450 |                 G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),3) = dump(:);
451 |             end
452 |         end
453 | 
454 |         W = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp);
455 |     end
456 |     
457 |     if strcmpi(options.WeightMode,'Binary')
458 |         W(find(W)) = 1;
459 |     end
460 |     
461 |     if isfield(options,'bSemiSupervised') && options.bSemiSupervised
462 |         tmpgnd = options.gnd(options.semiSplit);
463 |         
464 |         Label = unique(tmpgnd);
465 |         nLabel = length(Label);
466 |         G = zeros(sum(options.semiSplit),sum(options.semiSplit));
467 |         for idx=1:nLabel
468 |             classIdx = tmpgnd==Label(idx);
469 |             G(classIdx,classIdx) = 1;
470 |         end
471 |         Wsup = sparse(G);
472 |         if ~isfield(options,'SameCategoryWeight')
473 |             options.SameCategoryWeight = 1;
474 |         end
475 |         W(options.semiSplit,options.semiSplit) = (Wsup>0)*options.SameCategoryWeight;
476 |     end
477 |     
478 |     if ~options.bSelfConnected
479 |         for i=1:size(W,1)
480 |             W(i,i) = 0;
481 |         end
482 |     end
483 | 
484 |     W = max(W,W');
485 |     
486 |     elapse = cputime - tmp_T;
487 |     return;
488 | end
489 | 
490 | 
491 | % strcmpi(options.NeighborMode,'KNN') & (options.k == 0)
492 | % Complete Graph
493 | 
494 | if strcmpi(options.Metric,'Euclidean')
495 |     W = EuDist2(fea,[],0);
496 |     W = exp(-W/(2*options.t^2));
497 | else
498 |     if ~options.bNormalized
499 | %         feaNorm = sum(fea.^2,2).^.5;
500 | %         fea = fea ./ repmat(max(1e-10,feaNorm),1,size(fea,2));
501 |         [nSmp, nFea] = size(fea);
502 |         if issparse(fea)
503 |             fea2 = fea';
504 |             feaNorm = sum(fea2.^2,1).^.5;
505 |             for i = 1:nSmp
506 |                 fea2(:,i) = fea2(:,i) ./ max(1e-10,feaNorm(i));
507 |             end
508 |             fea = fea2';
509 |             clear fea2;
510 |         else
511 |             feaNorm = sum(fea.^2,2).^.5;
512 |             for i = 1:nSmp
513 |                 fea(i,:) = fea(i,:) ./ max(1e-12,feaNorm(i));
514 |             end
515 |         end
516 |     end
517 |     
518 | %     W = full(fea*fea');
519 |     W = fea*fea';
520 | end
521 | 
522 | if ~options.bSelfConnected
523 |     for i=1:size(W,1)
524 |         W(i,i) = 0;
525 |     end
526 | end
527 | 
528 | W = max(W,W');
529 | 
530 | 
531 | 
532 | elapse = cputime - tmp_T;
533 | 
534 | 
535 | function D = EuDist2(fea_a,fea_b,bSqrt)
536 | % Euclidean Distance matrix
537 | %   D = EuDist(fea_a,fea_b)
538 | %   fea_a:    nSample_a * nFeature
539 | %   fea_b:    nSample_b * nFeature
540 | %   D:      nSample_a * nSample_a
541 | %       or  nSample_a * nSample_b
542 | 
543 | 
544 | if ~exist('bSqrt','var')
545 |     bSqrt = 1;
546 | end
547 | 
548 | 
549 | if (~exist('fea_b','var')) | isempty(fea_b)
550 |     [nSmp, nFea] = size(fea_a);
551 | 
552 |     aa = sum(fea_a.*fea_a,2);
553 |     ab = fea_a*fea_a';
554 |     
555 |     aa = full(aa);
556 |     ab = full(ab);
557 | 
558 |     if bSqrt
559 |         D = sqrt(repmat(aa, 1, nSmp) + repmat(aa', nSmp, 1) - 2*ab);
560 |         D = real(D);
561 |     else
562 |         D = repmat(aa, 1, nSmp) + repmat(aa', nSmp, 1) - 2*ab;
563 |     end
564 |     
565 |     D = max(D,D');
566 |     D = D - diag(diag(D));
567 |     D = abs(D);
568 | else
569 |     [nSmp_a, nFea] = size(fea_a);
570 |     [nSmp_b, nFea] = size(fea_b);
571 |     
572 |     aa = sum(fea_a.*fea_a,2);
573 |     bb = sum(fea_b.*fea_b,2);
574 |     ab = fea_a*fea_b';
575 | 
576 |     aa = full(aa);
577 |     bb = full(bb);
578 |     ab = full(ab);
579 | 
580 |     if bSqrt
581 |         D = sqrt(repmat(aa, 1, nSmp_b) + repmat(bb', nSmp_a, 1) - 2*ab);
582 |         D = real(D);
583 |     else
584 |         D = repmat(aa, 1, nSmp_b) + repmat(bb', nSmp_a, 1) - 2*ab;
585 |     end
586 |     
587 |     D = abs(D);
588 | end
589 | 
590 | 


--------------------------------------------------------------------------------
/utils/litekmeans.m:
--------------------------------------------------------------------------------
  1 | function [label, center, bCon, sumD, D] = litekmeans(X, k, varargin)
  2 | %   LITEKMEANS K-means clustering, accelerated by matlab matrix operations.
  3 | %
  4 | %   label = LITEKMEANS(X, K) partitions the points in the N-by-P data matrix
  5 | %   X into K clusters.  This partition minimizes the sum, over all
  6 | %   clusters, of the within-cluster sums of point-to-cluster-centroid
  7 | %   distances.  Rows of X correspond to points, columns correspond to
  8 | %   variables.  KMEANS returns an N-by-1 vector label containing the
  9 | %   cluster indices of each point.
 10 | %
 11 | %   [label, center] = LITEKMEANS(X, K) returns the K cluster centroid
 12 | %   locations in the K-by-P matrix center.
 13 | %
 14 | %   [label, center, bCon] = LITEKMEANS(X, K) returns the bool value bCon to
 15 | %   indicate whether the iteration is converged.  
 16 | %
 17 | %   [label, center, bCon, SUMD] = LITEKMEANS(X, K) returns the
 18 | %   within-cluster sums of point-to-centroid distances in the 1-by-K vector
 19 | %   sumD.    
 20 | %
 21 | %   [label, center, bCon, SUMD, D] = LITEKMEANS(X, K) returns
 22 | %   distances from each point to every centroid in the N-by-K matrix D. 
 23 | %
 24 | %   [ ... ] = LITEKMEANS(..., 'PARAM1',val1, 'PARAM2',val2, ...) specifies
 25 | %   optional parameter name/value pairs to control the iterative algorithm
 26 | %   used by KMEANS.  Parameters are:
 27 | %
 28 | %   'Distance' - Distance measure, in P-dimensional space, that KMEANS
 29 | %      should minimize with respect to.  Choices are:
 30 | %            {'sqEuclidean'} - Squared Euclidean distance (the default)
 31 | %             'cosine'       - One minus the cosine of the included angle
 32 | %                              between points (treated as vectors). Each
 33 | %                              row of X SHOULD be normalized to unit. If
 34 | %                              the intial center matrix is provided, it
 35 | %                              SHOULD also be normalized.
 36 | %
 37 | %   'Start' - Method used to choose initial cluster centroid positions,
 38 | %      sometimes known as "seeds".  Choices are:
 39 | %         {'sample'}  - Select K observations from X at random (the default)
 40 | %          'cluster' - Perform preliminary clustering phase on random 10%
 41 | %                      subsample of X.  This preliminary phase is itself
 42 | %                      initialized using 'sample'. An additional parameter
 43 | %                      clusterMaxIter can be used to control the maximum
 44 | %                      number of iterations in each preliminary clustering
 45 | %                      problem.
 46 | %           matrix   - A K-by-P matrix of starting locations; or a K-by-1
 47 | %                      indicate vector indicating which K points in X
 48 | %                      should be used as the initial center.  In this case,
 49 | %                      you can pass in [] for K, and KMEANS infers K from
 50 | %                      the first dimension of the matrix.
 51 | %
 52 | %   'MaxIter'    - Maximum number of iterations allowed.  Default is 100.
 53 | %
 54 | %   'Replicates' - Number of times to repeat the clustering, each with a
 55 | %                  new set of initial centroids. Default is 1. If the
 56 | %                  initial centroids are provided, the replicate will be
 57 | %                  automatically set to be 1.
 58 | %
 59 | % 'clusterMaxIter' - Only useful when 'Start' is 'cluster'. Maximum number
 60 | %                    of iterations of the preliminary clustering phase.
 61 | %                    Default is 10.  
 62 | %
 63 | %
 64 | %    Examples:
 65 | %
 66 | %       fea = rand(500,10);
 67 | %       [label, center] = litekmeans(fea, 5, 'MaxIter', 50);
 68 | %
 69 | %       fea = rand(500,10);
 70 | %       [label, center] = litekmeans(fea, 5, 'MaxIter', 50, 'Replicates', 10);
 71 | %
 72 | %       fea = rand(500,10);
 73 | %       [label, center, bCon, sumD, D] = litekmeans(fea, 5, 'MaxIter', 50);
 74 | %       TSD = sum(sumD);
 75 | %
 76 | %       fea = rand(500,10);
 77 | %       initcenter = rand(5,10);
 78 | %       [label, center] = litekmeans(fea, 5, 'MaxIter', 50, 'Start', initcenter);
 79 | %
 80 | %       fea = rand(500,10);
 81 | %       idx=randperm(500);
 82 | %       [label, center] = litekmeans(fea, 5, 'MaxIter', 50, 'Start', idx(1:5));
 83 | %
 84 | %
 85 | %   See also KMEANS
 86 | %
 87 | %    [Cite] Deng Cai, "Litekmeans: the fastest matlab implementation of
 88 | %           kmeans," Available at:
 89 | %           http://www.zjucadcg.cn/dengcai/Data/Clustering.html, 2011. 
 90 | %
 91 | %   version 2.0 --December/2011
 92 | %   version 1.0 --November/2011
 93 | %
 94 | %   Written by Deng Cai (dengcai AT gmail.com)
 95 | 
 96 | 
 97 | if nargin < 2
 98 |     error('litekmeans:TooFewInputs','At least two input arguments required.');
 99 | end
100 | 
101 | [n, p] = size(X);
102 | 
103 | 
104 | pnames = {   'distance' 'start'   'maxiter'  'replicates' 'onlinephase' 'clustermaxiter'};
105 | dflts =  {'sqeuclidean' 'sample'       []        []        'off'              []        };
106 | [eid,errmsg,distance,start,maxit,reps,~,clustermaxit] = getargs(pnames, dflts, varargin{:});
107 | if ~isempty(eid)
108 |     error(sprintf('litekmeans:%s',eid),errmsg);
109 | end
110 | 
111 | if ischar(distance)
112 |     distNames = {'sqeuclidean','cosine'};
113 |     j = strcmpi(distance, distNames);
114 |     j = find(j);
115 |     if length(j) > 1
116 |         error('litekmeans:AmbiguousDistance', ...
117 |             'Ambiguous ''Distance'' parameter value:  %s.', distance);
118 |     elseif isempty(j)
119 |         error('litekmeans:UnknownDistance', ...
120 |             'Unknown ''Distance'' parameter value:  %s.', distance);
121 |     end
122 |     distance = distNames{j};
123 | else
124 |     error('litekmeans:InvalidDistance', ...
125 |         'The ''Distance'' parameter value must be a string.');
126 | end
127 | 
128 | 
129 | center = [];
130 | if ischar(start)
131 |     startNames = {'sample','cluster'};
132 |     j = find(strncmpi(start,startNames,length(start)));
133 |     if length(j) > 1
134 |         error(message('litekmeans:AmbiguousStart', start));
135 |     elseif isempty(j)
136 |         error(message('litekmeans:UnknownStart', start));
137 |     elseif isempty(k)
138 |         error('litekmeans:MissingK', ...
139 |             'You must specify the number of clusters, K.');
140 |     end
141 |     if j == 2
142 |         if floor(.1*n) < 5*k
143 |             j = 1;
144 |         end
145 |     end
146 |     start = startNames{j};
147 | elseif isnumeric(start)
148 |     if size(start,2) == p
149 |         center = start;
150 |     elseif (size(start,2) == 1 || size(start,1) == 1)
151 |         center = X(start,:);
152 |     else
153 |         error('litekmeans:MisshapedStart', ...
154 |             'The ''Start'' matrix must have the same number of columns as X.');
155 |     end
156 |     if isempty(k)
157 |         k = size(center,1);
158 |     elseif (k ~= size(center,1))
159 |         error('litekmeans:MisshapedStart', ...
160 |             'The ''Start'' matrix must have K rows.');
161 |     end
162 |     start = 'numeric';
163 | else
164 |     error('litekmeans:InvalidStart', ...
165 |         'The ''Start'' parameter value must be a string or a numeric matrix or array.');
166 | end
167 | 
168 | % The maximum iteration number is default 100
169 | if isempty(maxit)
170 |     maxit = 100;
171 | end
172 | 
173 | % The maximum iteration number for preliminary clustering phase on random
174 | % 10% subsamples is default 10 
175 | if isempty(clustermaxit)
176 |     clustermaxit = 10;
177 | end
178 | 
179 | 
180 | % Assume one replicate
181 | if isempty(reps) || ~isempty(center)
182 |     reps = 1;
183 | end
184 | 
185 | if ~(isscalar(k) && isnumeric(k) && isreal(k) && k > 0 && (round(k)==k))
186 |     error('litekmeans:InvalidK', ...
187 |         'X must be a positive integer value.');
188 | elseif n < k
189 |     error('litekmeans:TooManyClusters', ...
190 |         'X must have more rows than the number of clusters.');
191 | end
192 | 
193 | 
194 | bestlabel = [];
195 | sumD = zeros(1,k);
196 | bCon = false;
197 | 
198 | for t=1:reps
199 |     switch start
200 |         case 'sample'
201 |             center = X(randsample(n,k),:);
202 |         case 'cluster'
203 |             Xsubset = X(randsample(n,floor(.1*n)),:);
204 |             [~, center] = litekmeans(Xsubset, k, varargin{:}, 'start','sample', 'replicates',1 ,'MaxIter',clustermaxit);
205 |         case 'numeric'
206 |     end
207 |     
208 |     last = 0;label=1;
209 |     it=0;
210 |     
211 |     switch distance
212 |         case 'sqeuclidean'
213 |             while any(label ~= last) && it<maxit
214 |                 last = label;
215 |                 
216 |                 bb = full(sum(center.*center,2)');
217 |                 ab = full(X*center');
218 |                 D = bb(ones(1,n),:) - 2*ab;
219 |                 
220 |                 [val,label] = min(D,[],2); % assign samples to the nearest centers
221 |                 ll = unique(label);
222 |                 if length(ll) < k
223 |                     %disp([num2str(k-length(ll)),' clusters dropped at iter ',num2str(it)]);
224 |                     missCluster = 1:k;
225 |                     missCluster(ll) = [];
226 |                     missNum = length(missCluster);
227 |                     
228 |                     aa = sum(X.*X,2);
229 |                     val = aa + val;
230 |                     [~,idx] = sort(val,1,'descend');
231 |                     label(idx(1:missNum)) = missCluster;
232 |                 end
233 |                 E = sparse(1:n,label,1,n,k,n);  % transform label into indicator matrix
234 |                 center = full((E*spdiags(1./sum(E,1)',0,k,k))'*X);    % compute center of each cluster
235 |                 it=it+1;
236 |             end
237 |             if it<maxit
238 |                 bCon = true;
239 |             end
240 |             if isempty(bestlabel)
241 |                 bestlabel = label;
242 |                 bestcenter = center;
243 |                 if reps>1
244 |                     if it>=maxit
245 |                         aa = full(sum(X.*X,2));
246 |                         bb = full(sum(center.*center,2));
247 |                         ab = full(X*center');
248 |                         D = bsxfun(@plus,aa,bb') - 2*ab;
249 |                         D(D<0) = 0;
250 |                     else
251 |                         aa = full(sum(X.*X,2));
252 |                         D = aa(:,ones(1,k)) + D;
253 |                         D(D<0) = 0;
254 |                     end
255 |                     D = sqrt(D);
256 |                     for j = 1:k
257 |                         sumD(j) = sum(D(label==j,j));
258 |                     end
259 |                     bestsumD = sumD;
260 |                     bestD = D;
261 |                 end
262 |             else
263 |                 if it>=maxit
264 |                     aa = full(sum(X.*X,2));
265 |                     bb = full(sum(center.*center,2));
266 |                     ab = full(X*center');
267 |                     D = bsxfun(@plus,aa,bb') - 2*ab;
268 |                     D(D<0) = 0;
269 |                 else
270 |                     aa = full(sum(X.*X,2));
271 |                     D = aa(:,ones(1,k)) + D;
272 |                     D(D<0) = 0;
273 |                 end
274 |                 D = sqrt(D);
275 |                 for j = 1:k
276 |                     sumD(j) = sum(D(label==j,j));
277 |                 end
278 |                 if sum(sumD) < sum(bestsumD)
279 |                     bestlabel = label;
280 |                     bestcenter = center;
281 |                     bestsumD = sumD;
282 |                     bestD = D;
283 |                 end
284 |             end
285 |         case 'cosine'
286 |             while any(label ~= last) && it<maxit
287 |                 last = label;
288 |                 W=full(X*center');
289 |                 [val,label] = max(W,[],2); % assign samples to the nearest centers
290 |                 ll = unique(label);
291 |                 if length(ll) < k
292 |                     missCluster = 1:k;
293 |                     missCluster(ll) = [];
294 |                     missNum = length(missCluster);
295 |                     [~,idx] = sort(val);
296 |                     label(idx(1:missNum)) = missCluster;
297 |                 end
298 |                 E = sparse(1:n,label,1,n,k,n);  % transform label into indicator matrix
299 |                 center = full((E*spdiags(1./sum(E,1)',0,k,k))'*X);    % compute center of each cluster
300 |                 centernorm = sqrt(sum(center.^2, 2));
301 |                 center = center ./ centernorm(:,ones(1,p));
302 |                 it=it+1;
303 |             end
304 |             if it<maxit
305 |                 bCon = true;
306 |             end
307 |             if isempty(bestlabel)
308 |                 bestlabel = label;
309 |                 bestcenter = center;
310 |                 if reps>1
311 |                     if any(label ~= last)
312 |                         W=full(X*center');
313 |                     end
314 |                     D = 1-W;
315 |                     for j = 1:k
316 |                         sumD(j) = sum(D(label==j,j));
317 |                     end
318 |                     bestsumD = sumD;
319 |                     bestD = D;
320 |                 end
321 |             else
322 |                 if any(label ~= last)
323 |                     W=full(X*center');
324 |                 end
325 |                 D = 1-W;
326 |                 for j = 1:k
327 |                     sumD(j) = sum(D(label==j,j));
328 |                 end
329 |                 if sum(sumD) < sum(bestsumD)
330 |                     bestlabel = label;
331 |                     bestcenter = center;
332 |                     bestsumD = sumD;
333 |                     bestD = D;
334 |                 end
335 |             end
336 |     end
337 | end
338 | 
339 | label = bestlabel;
340 | center = bestcenter;
341 | if reps>1
342 |     sumD = bestsumD;
343 |     D = bestD;
344 | elseif nargout > 3
345 |     switch distance
346 |         case 'sqeuclidean'
347 |             if it>=maxit
348 |                 aa = full(sum(X.*X,2));
349 |                 bb = full(sum(center.*center,2));
350 |                 ab = full(X*center');
351 |                 D = bsxfun(@plus,aa,bb') - 2*ab;
352 |                 D(D<0) = 0;
353 |             else
354 |                 aa = full(sum(X.*X,2));
355 |                 D = aa(:,ones(1,k)) + D;
356 |                 D(D<0) = 0;
357 |             end
358 |             D = sqrt(D);
359 |         case 'cosine'
360 |             if it>=maxit
361 |                 W=full(X*center');
362 |             end
363 |             D = 1-W;
364 |     end
365 |     for j = 1:k
366 |         sumD(j) = sum(D(label==j,j));
367 |     end
368 | end
369 | 
370 | 
371 | 
372 | 
373 | function [eid,emsg,varargout]=getargs(pnames,dflts,varargin)
374 | %GETARGS Process parameter name/value pairs 
375 | %   [EID,EMSG,A,B,...]=GETARGS(PNAMES,DFLTS,'NAME1',VAL1,'NAME2',VAL2,...)
376 | %   accepts a cell array PNAMES of valid parameter names, a cell array
377 | %   DFLTS of default values for the parameters named in PNAMES, and
378 | %   additional parameter name/value pairs.  Returns parameter values A,B,...
379 | %   in the same order as the names in PNAMES.  Outputs corresponding to
380 | %   entries in PNAMES that are not specified in the name/value pairs are
381 | %   set to the corresponding value from DFLTS.  If nargout is equal to
382 | %   length(PNAMES)+1, then unrecognized name/value pairs are an error.  If
383 | %   nargout is equal to length(PNAMES)+2, then all unrecognized name/value
384 | %   pairs are returned in a single cell array following any other outputs.
385 | %
386 | %   EID and EMSG are empty if the arguments are valid.  If an error occurs,
387 | %   EMSG is the text of an error message and EID is the final component
388 | %   of an error message id.  GETARGS does not actually throw any errors,
389 | %   but rather returns EID and EMSG so that the caller may throw the error.
390 | %   Outputs will be partially processed after an error occurs.
391 | %
392 | %   This utility can be used for processing name/value pair arguments.
393 | %
394 | %   Example:
395 | %       pnames = {'color' 'linestyle', 'linewidth'}
396 | %       dflts  = {    'r'         '_'          '1'}
397 | %       varargin = {{'linew' 2 'nonesuch' [1 2 3] 'linestyle' ':'}
398 | %       [eid,emsg,c,ls,lw] = statgetargs(pnames,dflts,varargin{:})    % error
399 | %       [eid,emsg,c,ls,lw,ur] = statgetargs(pnames,dflts,varargin{:}) % ok
400 | 
401 | % We always create (nparams+2) outputs:
402 | %    one each for emsg and eid
403 | %    nparams varargs for values corresponding to names in pnames
404 | % If they ask for one more (nargout == nparams+3), it's for unrecognized
405 | % names/values
406 | 
407 | %   Original Copyright 1993-2008 The MathWorks, Inc. 
408 | %   Modified by Deng Cai (dengcai@gmail.com) 2011.11.27
409 | 
410 | 
411 | 
412 | 
413 | % Initialize some variables
414 | emsg = '';
415 | eid = '';
416 | nparams = length(pnames);
417 | varargout = dflts;
418 | unrecog = {};
419 | nargs = length(varargin);
420 | 
421 | % Must have name/value pairs
422 | if mod(nargs,2)~=0
423 |     eid = 'WrongNumberArgs';
424 |     emsg = 'Wrong number of arguments.';
425 | else
426 |     % Process name/value pairs
427 |     for j=1:2:nargs
428 |         pname = varargin{j};
429 |         if ~ischar(pname)
430 |             eid = 'BadParamName';
431 |             emsg = 'Parameter name must be text.';
432 |             break;
433 |         end
434 |         i = strcmpi(pname,pnames);
435 |         i = find(i);
436 |         if isempty(i)
437 |             % if they've asked to get back unrecognized names/values, add this
438 |             % one to the list
439 |             if nargout > nparams+2
440 |                 unrecog((end+1):(end+2)) = {varargin{j} varargin{j+1}};
441 |                 % otherwise, it's an error
442 |             else
443 |                 eid = 'BadParamName';
444 |                 emsg = sprintf('Invalid parameter name:  %s.',pname);
445 |                 break;
446 |             end
447 |         elseif length(i)>1
448 |             eid = 'BadParamName';
449 |             emsg = sprintf('Ambiguous parameter name:  %s.',pname);
450 |             break;
451 |         else
452 |             varargout{i} = varargin{j+1};
453 |         end
454 |     end
455 | end
456 | 
457 | varargout{nparams+1} = unrecog;
458 | 


--------------------------------------------------------------------------------
/utils/mySVD.m:
--------------------------------------------------------------------------------
  1 | function [U, S, V] = mySVD(X,ReducedDim)
  2 | %mySVD    Accelerated singular value decomposition.
  3 | %   [U,S,V] = mySVD(X) produces a diagonal matrix S, of the  
  4 | %   dimension as the rank of X and with nonnegative diagonal elements in
  5 | %   decreasing order, and unitary matrices U and V so that
  6 | %   X = U*S*V'.
  7 | %
  8 | %   [U,S,V] = mySVD(X,ReducedDim) produces a diagonal matrix S, of the  
  9 | %   dimension as ReducedDim and with nonnegative diagonal elements in
 10 | %   decreasing order, and unitary matrices U and V so that
 11 | %   Xhat = U*S*V' is the best approximation (with respect to F norm) of X
 12 | %   among all the matrices with rank no larger than ReducedDim.
 13 | %
 14 | %   Based on the size of X, mySVD computes the eigvectors of X*X^T or X^T*X
 15 | %   first, and then convert them to the eigenvectors of the other.  
 16 | %
 17 | %   See also SVD.
 18 | %
 19 | %   version 2.0 --Feb/2009 
 20 | %   version 1.0 --April/2004 
 21 | %
 22 | %   Written by Deng Cai (dengcai AT gmail.com)
 23 | %                                                   
 24 | 
 25 | MAX_MATRIX_SIZE = 1600; % You can change this number according your machine computational power
 26 | EIGVECTOR_RATIO = 0.1; % You can change this number according your machine computational power
 27 | 
 28 | 
 29 | if ~exist('ReducedDim','var')
 30 |     ReducedDim = 0;
 31 | end
 32 | 
 33 | [nSmp, mFea] = size(X);
 34 | if mFea/nSmp > 1.0713
 35 |     ddata = X*X';
 36 |     ddata = max(ddata,ddata');
 37 |     
 38 |     dimMatrix = size(ddata,1);
 39 |     if (ReducedDim > 0) && (dimMatrix > MAX_MATRIX_SIZE) && (ReducedDim < dimMatrix*EIGVECTOR_RATIO)
 40 |         option = struct('disp',0);
 41 |         [U, eigvalue] = eigs(ddata,ReducedDim,'la',option);
 42 |         eigvalue = diag(eigvalue);
 43 |     else
 44 |         if issparse(ddata)
 45 |             ddata = full(ddata);
 46 |         end
 47 |         
 48 |         [U, eigvalue] = eig(ddata);
 49 |         eigvalue = diag(eigvalue);
 50 |         [dump, index] = sort(-eigvalue);
 51 |         eigvalue = eigvalue(index);
 52 |         U = U(:, index);
 53 |     end
 54 |     clear ddata;
 55 |     
 56 |     maxEigValue = max(abs(eigvalue));
 57 |     eigIdx = find(abs(eigvalue)/maxEigValue < 1e-10);
 58 |     eigvalue(eigIdx) = [];
 59 |     U(:,eigIdx) = [];
 60 |     
 61 |     if (ReducedDim > 0) && (ReducedDim < length(eigvalue))
 62 |         eigvalue = eigvalue(1:ReducedDim);
 63 |         U = U(:,1:ReducedDim);
 64 |     end
 65 |     
 66 |     eigvalue_Half = eigvalue.^.5;
 67 |     S =  spdiags(eigvalue_Half,0,length(eigvalue_Half),length(eigvalue_Half));
 68 | 
 69 |     if nargout >= 3
 70 |         eigvalue_MinusHalf = eigvalue_Half.^-1;
 71 |         V = X'*(U.*repmat(eigvalue_MinusHalf',size(U,1),1));
 72 |     end
 73 | else
 74 |     ddata = X'*X;
 75 |     ddata = max(ddata,ddata');
 76 |     
 77 |     dimMatrix = size(ddata,1);
 78 |     if (ReducedDim > 0) && (dimMatrix > MAX_MATRIX_SIZE) && (ReducedDim < dimMatrix*EIGVECTOR_RATIO)
 79 |         option = struct('disp',0);
 80 |         [V, eigvalue] = eigs(ddata,ReducedDim,'la',option);
 81 |         eigvalue = diag(eigvalue);
 82 |     else
 83 |         if issparse(ddata)
 84 |             ddata = full(ddata);
 85 |         end
 86 |         
 87 |         [V, eigvalue] = eig(ddata);
 88 |         eigvalue = diag(eigvalue);
 89 |         
 90 |         [dump, index] = sort(-eigvalue);
 91 |         eigvalue = eigvalue(index);
 92 |         V = V(:, index);
 93 |     end
 94 |     clear ddata;
 95 |     
 96 |     maxEigValue = max(abs(eigvalue));
 97 |     eigIdx = find(abs(eigvalue)/maxEigValue < 1e-10);
 98 |     eigvalue(eigIdx) = [];
 99 |     V(:,eigIdx) = [];
100 |     
101 |     if (ReducedDim > 0) && (ReducedDim < length(eigvalue))
102 |         eigvalue = eigvalue(1:ReducedDim);
103 |         V = V(:,1:ReducedDim);
104 |     end
105 |     
106 |     eigvalue_Half = eigvalue.^.5;
107 |     S =  spdiags(eigvalue_Half,0,length(eigvalue_Half),length(eigvalue_Half));
108 |     
109 |     eigvalue_MinusHalf = eigvalue_Half.^-1;
110 |     U = X*(V.*repmat(eigvalue_MinusHalf',size(V,1),1));
111 | end
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/utils/shrink.m:
--------------------------------------------------------------------------------
1 | function res = shrink(x,a)
2 |    res=sign(x).*( max(abs(x)-a,0));
3 | end


--------------------------------------------------------------------------------
/utils/similarMatrix_CAN.m:
--------------------------------------------------------------------------------
 1 | function [S,gamma] = similarMatrix_CAN(distP2D,k,rr)
 2 | %% input:
 3 | %%% distP2D:   the distance from data to prototype, n2*n1
 4 | %%% k:         the number of neighbors
 5 | %%% rr:        the optimal parameter
 6 | %%% lambda:    default = rr, and update ^2 /2 by eigs
 7 | %% output:
 8 | %%% S:         the similar matrix n2*n1
 9 |     [n2,n1]=size(distP2D);
10 | %     distP2D=EuDist2(data',prototype); % n2*n1
11 |     if nargin==2
12 |        rr=-1; 
13 |     end
14 |     if rr == -1
15 |         [d, idx] = sort(distP2D,2,'ascend');
16 |         
17 |         gamma=1/n2*sum( k/2* d(:,k+1) - 1/2* sum(d(:,1:k),2) );
18 |         S = zeros(n2,n1);
19 |         for i = 1:n2
20 |             idxa0 = idx(i,1:k);
21 |             S(i,idxa0)=EProjSimplex_new((d(i,k+1)-d(i,1:k))./(2*gamma));
22 |         end
23 |     else
24 |         gamma=rr;
25 |         [dx, idx] = sort(distP2D,2,'ascend');
26 |         S = zeros(n2,n1);
27 |         for i = 1:n2
28 |             idxa0 = idx(i,1:k);
29 |             S(i,idxa0)=EProjSimplex_new(-1*dx(i,1:k)./(2*gamma));
30 |         end
31 |     end
32 |     
33 | end
34 | 
35 | 


--------------------------------------------------------------------------------
/utils/splitData.m:
--------------------------------------------------------------------------------
 1 | function [X1,Y1,X2,Y2] = splitData(varargin)
 2 | % input: 
 3 | %   X:fea*n
 4 | %   Y:n*1
 5 | %   num: 0~1 or split number
 6 | % output 
 7 | %   X1: fea*num
 8 | %   Y1: num*1
 9 | %   X2: fea*(n-num)
10 | %   Y2: (n-num)*1
11 |     X1=[];X2=[];Y1=[];Y2=[];
12 |     if nargin==3
13 |         X=varargin{1};
14 |         Y=varargin{2};
15 |         num=varargin{3};
16 |     else
17 |          error("splitData: At least 3 parameters are required\n");
18 |     end
19 |     if num==0
20 |          error("splitData: Parameter{3} must greater than zero\n");
21 |     end
22 |     if size(Y,2)>1
23 |        Y=Y'; 
24 |     end
25 |     n=size(X,2);
26 |     C=length(find(unique(Y)));
27 |     if 0<num&&num<1
28 |         num=max(1,floor(num*n));
29 |     end
30 |     %% split start
31 |     for i=1:C
32 |         Cn=length(find(Y==i));
33 |         selectedNumber=min(max(floor(num),1),Cn);
34 |         randomIndex=randperm(Cn);% random
35 |         pos=find(Y==i);
36 |         len=length(pos);
37 |         if selectedNumber>len
38 |             warning('selectedNumber (%d)> len (%d)!\n',selectedNumber,len);
39 |             selectedNumber=len;
40 |         end
41 |         index=pos(randomIndex(1:selectedNumber));
42 |         index2=pos(randomIndex(selectedNumber+1:end));
43 |         X1=[X1,X(:,index)];Y1=[Y1;Y(index)];
44 |         X2=[X2,X(:,index2)];Y2=[Y2;Y(index2)];
45 |     end
46 | end
47 | 
48 | 


--------------------------------------------------------------------------------
/utils/updateL21.m:
--------------------------------------------------------------------------------
 1 | function [G] = updateL21(E)
 2 |  %% input
 3 |  %%%    ||E||_{2,1} => tr(E'GE)  size: (m*d)' m*m (m*d)
 4 |  %% output
 5 |  %%% G : 
 6 |     ec = sqrt(sum(E.*E,2)+eps);
 7 |     G = 0.5./ec;
 8 |     n=length(G);
 9 |     G = spdiags(G,0,n,n);
10 | end
11 | 


--------------------------------------------------------------------------------
/utils/withinScatter.m:
--------------------------------------------------------------------------------
 1 | function [Sw] = withinScatter(X,Y)
 2 |     C=length(find(unique(Y)));
 3 |     Sw=0;
 4 |     for c=1:C
 5 |         Xc=X(:,Y==c);
 6 |         Xmean=mean(Xc,2);
 7 |         Fc=(Xc-Xmean);
 8 |         Sw=Sw+(Fc*Fc');
 9 |     end
10 | end
11 | 


--------------------------------------------------------------------------------