├── email_notify.m
├── compute_SbSw_sup.m
├── fs_unsup_maxvar.m
├── optSigma.m
├── LabelFormat.m
├── funG.m
├── get_mdcs_ip_dir.m
├── README.md
├── extractXY.m
├── fs_unsup_udfs_build_param.m
├── compute_SbSw_unsup.m
├── compute_W.m
├── fs_unsup_jelsr_liang_build_param.m
├── fs_unsup_udfs.m
├── prettyPlotProcessOptions.m
├── fs_unsup_mcfs_build_param.m
├── LocalDisAna.m
├── fs_unsup_lapscore_build_param.m
├── exp1_aio.m
├── fs_unsup_rufs_build_param.m
├── localLearnMx_KRR.m
├── fs_unsup_allfea_single_func.m
├── fs_unsup_spfs_sfs.m
├── fs_unsup_traceratio.m
├── ms2tex.m
├── fs_unsup_jelsr_build_param.m
├── eval_fsasl_param.m
├── tfidf.m
├── fs_unsup_ndfs_build_param.m
├── EuDist2.m
├── NormalizeFea.m
├── fs_unsup_spfs_nes.m
├── fs_unsup_llcfs_build_param.m
├── fs_unsup_spec_build_param.m
├── fs_unsup_maxvar_single_func.m
├── fs_unsup_glspfs.m
├── LocalReconstructLap.m
├── compute_accuracy_F.m
├── components.m
├── fs_unsup_jelsr.m
├── fs_unsup_spfs.m
├── compute_Y.m
├── fs_unsup_glspfs_build_param.m
├── fs_unsup_fsasl_build_param.m
├── fs_unsup_traceratio_single_func.m
├── fs_unsup_spfs_lar.m
├── fs_unsup_llcfs_single_func.m
├── fs_unsup_ndfs_single_func.m
├── fs_unsup_udfs_single_func.m
├── fs_unsup_lapscore_single_func.m
├── fs_unsup_spec_single_func.m
├── fs_unsup_fsasl_11_11_1_single_func.m
├── fs_unsup_jelsr_liang_lle_single_func.m
├── fs_unsup_jelsr_liang_lpp_single_func.m
├── fs_unsup_fsasl_11_11_5_single_func.m
├── fs_unsup_jelsr_lle_single_func.m
├── fs_unsup_jelsr_lpp_single_func.m
├── fs_unsup_spfs_single_func.m
├── fs_unsup_fsasl_11_5_5_single_func.m
├── fs_unsup_ndfs.m
├── fs_unsup_rufs_single_func.m
├── L2_distance.m
├── SimGraph_NearestNeighbors.m
├── fs_unsup_glspfs_single_func.m
├── mdcs_check.m
├── fs_unsup_mcfs_single_func.m
├── initFactor.m
├── constructKernel.m
├── find_nn.m
├── grid_search_fs.m
├── fs_unsup_jelsr_liang.m
├── fs_unsup_lapscore.m
├── Eigenmap.m
├── fs_unsup_spec.m
├── lpp.m
├── ltsa.m
├── fs_unsup_spfs_larnes.m
├── fs_unsup_llcfs.m
├── evalUnSupFS.m
├── scale_dist3_knn.m
├── lle.m
├── computeLocalStructure.m
├── sll_opts.m
├── run_exp1_func.m
├── fs_unsup_mcfs.m
├── plot_result.m
├── FSASL.m
└── lars.m


/email_notify.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csliangdu/FSASL/HEAD/email_notify.m


--------------------------------------------------------------------------------
/compute_SbSw_sup.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csliangdu/FSASL/HEAD/compute_SbSw_sup.m


--------------------------------------------------------------------------------
/fs_unsup_maxvar.m:
--------------------------------------------------------------------------------
1 | function FeaScore = fs_unsup_maxvar(X)
2 | % 
3 | % X nSmp * nDim
4 | % 
5 | 
6 | FeaScore = var(X);
7 | 


--------------------------------------------------------------------------------
/optSigma.m:
--------------------------------------------------------------------------------
1 | function sigma = optSigma(X)
2 |     N = size(X,1);
3 |     dist = EuDist2(X,X); 
4 |     dist = reshape(dist,1,N*N);
5 |     sigma = median(dist);


--------------------------------------------------------------------------------
/LabelFormat.m:
--------------------------------------------------------------------------------
1 | function Y = LabelFormat(y)
2 | [~,~,y] = unique(y);
3 | nClass = max(y);
4 | Y = zeros(length(y), nClass);
5 | Y(sub2ind(size(Y), 1:length(y), y')) = 1;


--------------------------------------------------------------------------------
/funG.m:
--------------------------------------------------------------------------------
 1 | function [ V, D ] = funG( G, t )
 2 | %function [ V, D ] = funG( G, t )
 3 | %   modify the eigenvalue of a matrix by t order.
 4 | 
 5 | G = full(G);
 6 | [V,D] = eig(G);
 7 | d = diag(D);
 8 | % it is important to calculate G first
 9 | [d, orderIDX] = sort(d);
10 | V = V(:,orderIDX);
11 | d = d.^t;
12 | d(isnan(d)) = 0;
13 | d(isinf(d)) = 0;
14 | D = diag(d);


--------------------------------------------------------------------------------
/get_mdcs_ip_dir.m:
--------------------------------------------------------------------------------
 1 | function [mdcs_ips, mdcs_dirs] = get_mdcs_ip_dir(n)
 2 | mdcs_ips = cell(n,1);
 3 | mdcs_dirs = cell(n,1);
 4 | parfor i=1:n*1000
 5 |     localhost = java.net.InetAddress.getLocalHost();
 6 |     ip = localhost.getHostAddress();
 7 |     mdcs_ips{i} = char(ip);
 8 |     mdcs_dirs{i} = pwd;
 9 | end
10 | mdcs_ips = unique(mdcs_ips);
11 | mdcs_dirs = unique(mdcs_dirs);
12 | end


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | FSASL
 2 | =====
 3 | 
 4 | An unsupervised feature selection algorithm with adaptive structure learning.
 5 | 
 6 | The code is used to generate fully reproducible experimental results in [1].
 7 | 
 8 | [1]Liang Du and Yi-Dong Shen. Unsupervised Feature Selection with Adaptive Structure Learning. in Proceedings of the 21th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD), pages 209-218, Sydney, Australia, August 10–13, 2015.
 9 | 
10 | 


--------------------------------------------------------------------------------
/extractXY.m:
--------------------------------------------------------------------------------
 1 | function [X, Y] = extractXY(dataset)
 2 | % For single view data : 'X', 'y'
 3 | load(dataset);
 4 | if ~exist('X', 'var') && exist('fea', 'var')
 5 |     X = fea;
 6 | end
 7 | 
 8 | if exist('Y', 'var') && size(Y,1) < size(Y,2)
 9 |     Y = Y';
10 | end
11 | 
12 | if exist('Y', 'var') && min(size(Y)) > 1
13 |     Y = LabelFormat(Y);
14 | end
15 | 
16 | if ~exist('Y', 'var') && exist('gnd', 'var')
17 |     Y = gnd(:);
18 | end
19 | 
20 | if ~exist('Y', 'var') && exist('y', 'var')
21 |     Y = y(:);
22 | end
23 | end


--------------------------------------------------------------------------------
/fs_unsup_udfs_build_param.m:
--------------------------------------------------------------------------------
 1 | function paramCell = fs_unsup_udfs_build_param(knnCandi, gammaCandi, lamdaCandi)
 2 | n1 = length(knnCandi);
 3 | n2 = length(gammaCandi);
 4 | n3 = length(lamdaCandi);
 5 | nP = n1 * n2 * n3;
 6 | paramCell = cell(nP, 1);
 7 | idx = 0;
 8 | for i1 = 1:n1
 9 |     for i2 = 1:n2
10 | 		for i3 = 1:n3
11 | 			param = [];
12 | 			param.k = knnCandi(i1);
13 | 			param.gamma = gammaCandi(i2);
14 | 			param.lamda = lamdaCandi(i3);
15 | 			idx = idx + 1;
16 | 			paramCell{idx} = param;
17 | 		end
18 |     end
19 | end
20 | 


--------------------------------------------------------------------------------
/compute_SbSw_unsup.m:
--------------------------------------------------------------------------------
 1 | function [Sb, Sw] = compute_SbSw_unsup(X, nK)
 2 | % X: training data each row is a data;
 3 | % calculate L_b and L_w defined in Laplacian score
 4 | % Sb = X*L_b*X';
 5 | % Sw = X*L_w*X';
 6 | if nargin < 2
 7 |     nK = 5;
 8 | end
 9 | W = constructW(X, struct('k', nK));
10 | Dw = sum(W,2);
11 | L_w = diag(Dw) - W;
12 | L_b = (Dw * Dw') / sum(Dw);
13 | 
14 | L_w = (L_w + L_w')/2;
15 | L_b = (L_b + L_b')/2;
16 | 
17 | Sb = X'*L_b*X;
18 | Sw = X'*L_w*X;
19 | 
20 | % very important!
21 | Sb = (Sb + Sb')/2;
22 | Sw = (Sw + Sw')/2;
23 | 


--------------------------------------------------------------------------------
/compute_W.m:
--------------------------------------------------------------------------------
 1 | function [W] = compute_W(W,data,D_mhalf)
 2 | 
 3 | [nSmp,nFea] = size(data);
 4 | 
 5 | %%%%%%%%%%%%%%%%%%%% Normalize W
 6 | if nSmp < 5000
 7 |     tmpD_mhalf = repmat(D_mhalf,1,nSmp);
 8 |     W = (tmpD_mhalf.*W).*tmpD_mhalf';
 9 |     clear tmpD_mhalf;
10 | else
11 |     [i_idx,j_idx,v_idx] = find(W);
12 |     v1_idx = zeros(size(v_idx));
13 |     for i=1:length(v_idx)
14 |         v1_idx(i) = v_idx(i)*D_mhalf(i_idx(i))*D_mhalf(j_idx(i));
15 |     end
16 |     W = sparse(i_idx,j_idx,v1_idx);
17 |     clear i_idx j_idx v_idx v1_idx
18 | end
19 | W = (W+W')/2;


--------------------------------------------------------------------------------
/fs_unsup_jelsr_liang_build_param.m:
--------------------------------------------------------------------------------
 1 | function paramCell = fs_unsup_jelsr_liang_build_param(r1Candi, r2Candi, knnCandi)
 2 | n1 = length(r1Candi);
 3 | n2 = length(r2Candi);
 4 | n3 = length(knnCandi);
 5 | nP = n1 * n2 * n3;
 6 | paramCell = cell(nP, 1);
 7 | idx = 0;
 8 | for i1 = 1:n1
 9 |     for i2 = 1:n2
10 |         for i3 = 1:n3
11 |             param = [];
12 |             param.r1 = r1Candi(i1);
13 |             param.r2 = r2Candi(i2);
14 |             param.r3 = knnCandi(i3);
15 |             idx = idx + 1;
16 |             paramCell{idx} = param;
17 |         end
18 |     end
19 | end
20 | end


--------------------------------------------------------------------------------
/fs_unsup_udfs.m:
--------------------------------------------------------------------------------
 1 | function [X, obj]=fs_unsup_udfs(A, k, r, X0)
 2 | % quadratic loss with 21-norm regularization
 3 | %
 4 | %  min_{X'*X=I}  Tr(X'*A*X) + r * ||X||_21
 5 | % 
 6 | 
 7 | NIter = 20;
 8 | [m, n] = size(A); %#ok
 9 | if nargin < 4
10 |     d = ones(n,1);
11 | else
12 |     Xi = sqrt(sum(X0.*X0,2)+eps);
13 |     d = 0.5./(Xi);
14 | end;
15 | 
16 | for iter = 1:NIter
17 |     D = diag(d);
18 |     M = A+r*D;
19 |     M = max(M,M');
20 |     [evec, eval] = eig(M);
21 |     eval = diag(eval);
22 |     [~, idx] = sort(eval);
23 |     X = evec(:,idx(1:k));
24 |     
25 |     Xi = sqrt(sum(X.*X,2)+eps);
26 |     d = 0.5./(Xi);
27 |     
28 |     obj(iter) = trace(X'*A*X) + r*sum(Xi); %#ok
29 | end;


--------------------------------------------------------------------------------
/prettyPlotProcessOptions.m:
--------------------------------------------------------------------------------
 1 | function [varargout] = prettyPlotProcessOptions(options,varargin)
 2 | % Similar to processOptions, but case insensitive and
 3 | %   using a struct instead of a variable length list
 4 | 
 5 | options = toUpper(options);
 6 | 
 7 | for i = 1:2:length(varargin)
 8 |     if isfield(options,upper(varargin{i}))
 9 |         v = getfield(options,upper(varargin{i}));
10 |         if isempty(v)
11 |             varargout{(i+1)/2}=varargin{i+1};
12 |         else
13 |             varargout{(i+1)/2}=v;
14 |         end
15 |     else
16 |         varargout{(i+1)/2}=varargin{i+1};
17 |     end
18 | end
19 | 
20 | end
21 | 
22 | function [o] = toUpper(o)
23 | if ~isempty(o)
24 |     fn = fieldnames(o);
25 |     for i = 1:length(fn)
26 |         o = setfield(o,upper(fn{i}),getfield(o,fn{i}));
27 |     end
28 | end
29 | end


--------------------------------------------------------------------------------
/fs_unsup_mcfs_build_param.m:
--------------------------------------------------------------------------------
 1 | function paramCell = fs_unsup_mcfs_build_param(knnCandi, weightCandi, weight_param_Candi)
 2 | n1 = length(knnCandi);
 3 | n2 = length(weightCandi);
 4 | n3 = zeros(n2, 1);
 5 | for i1 = 1:length(weightCandi)
 6 |     n3(i1) = max(1, length(weight_param_Candi{i1}));
 7 | end
 8 | nP = n1 * max(sum(n3), 1) ;
 9 | paramCell = cell(nP, 1);
10 | idx = 0;
11 | for i1 = 1:n1
12 |     for i2 = 1:n2
13 |         for i3 = 1:max(n3(i2), 1)
14 |             param = [];
15 |             param.k = knnCandi(i1);
16 |             param.weightMode = weightCandi{i2};
17 |             if ~isempty(weightCandi) && ~isempty(weight_param_Candi{i2})
18 |                 tmp = weight_param_Candi{i2};
19 |                 param.t = tmp(i3);
20 |             else
21 |                 param.t = 1; % place holder
22 |             end
23 |             idx = idx + 1;
24 |             paramCell{idx} = param;
25 |         end
26 |     end
27 | end


--------------------------------------------------------------------------------
/LocalDisAna.m:
--------------------------------------------------------------------------------
 1 | function L = LocalDisAna(X, para)
 2 | % unsupervised local discriminative analysis 
 3 | % each column is a data
 4 | 
 5 | 
 6 | 
 7 | [D, n] = size(X);
 8 | 
 9 | if isfield(para, 'k')
10 |     k = para.k+1;
11 | else
12 |     k = 16;
13 | end;
14 | if isfield(para, 'lamda')
15 |     lamda = para.lamda;
16 | else
17 |     lamda = 1000;
18 | end;
19 | 
20 | Lc = eye(k) - 1/k*ones(k);
21 | A = spalloc(n*k,n*k,5*n*k);
22 | S = spalloc(n,n*k,5*n*k);
23 | for i = 1:n
24 |     dis = repmat(X(:,i),1,n) - X;
25 |     dis = sum(dis.*dis);
26 |     [dumb, nnidx] = sort(dis);
27 |     Xi = X(:,nnidx(1:k));
28 |     Xi = Xi*Lc;
29 |     if D > k
30 |         Ai = inv(lamda*eye(k) + Xi'*Xi);
31 |         Ai = Lc*Ai*Lc;
32 |     else
33 |          Ai = Lc - lamda*Xi'*inv(eye(D) + lamda*Xi*Xi')*Xi;
34 |     end;
35 |     lidx = (i-1)*k+1:(i-1)*k+k;
36 |     A(lidx, lidx) = Ai;
37 |     S(nnidx(1:k),lidx) = eye(k);
38 | end;
39 |     
40 | L = S*A*S';
41 | 
42 | 
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/fs_unsup_lapscore_build_param.m:
--------------------------------------------------------------------------------
 1 | function paramCell = fs_unsup_lapscore_build_param(knnCandi, weightCandi, weight_param_Candi)
 2 | n1 = length(knnCandi);
 3 | n2 = length(weightCandi);
 4 | n3 = zeros(n2, 1);
 5 | for i1 = 1:length(weightCandi)
 6 |     n3(i1) = max(1, length(weight_param_Candi{i1}));
 7 | end
 8 | 
 9 | nP = n1 * max(sum(n3), 1);
10 | paramCell = cell(nP, 1);
11 | idx = 0;
12 | for i1 = 1:n1
13 |     for i2 = 1:n2
14 |         for i3 = 1:max(n3(i2), 1)
15 |             
16 |             param = [];
17 |             param.k = knnCandi(i1);
18 |             param.weightMode = weightCandi{i2};
19 |             if ~isempty(weightCandi) && ~isempty(weight_param_Candi{i2})
20 |                 tmp = weight_param_Candi{i2};
21 |                 param.t = tmp(i3);
22 |             else
23 |                 param.t = 1; % place holder
24 |             end           
25 |             
26 |             idx = idx + 1;
27 |             paramCell{idx} = param;
28 |         end
29 |     end
30 | end
31 | 
32 | 


--------------------------------------------------------------------------------
/exp1_aio.m:
--------------------------------------------------------------------------------
 1 | algs = {'AllFea', 'LapScore', 'SPFS', 'UDFS', 'LLCFS', 'MCFS',  'NDFS', 'RUFS', 'JELSR_lpp', 'GLSPFS', 'FSSL_11_11_5'};
 2 | lab_cluster = 'local'; % the matlab distributed computing server (MDCS) name, you may use 'local' as default 
 3 | lab_cluster_size = 11; % number of node
 4 | lab_email_username = '';% the email notification service provided by our lab, you can also use other public email configuration.
 5 | lab_email_password = '';
 6 | ds = {'USPS_9298n_256d_10c', 'wap_1560n_8460d_20c_tfidf',  ...
 7 |     'webbb_texas_814n_4029d_7c_binary', 'webkb_washington_1166n_4165d_7c_binary', ...
 8 |     'Carcinom_174n_9182d_11c', 'binaryalphadigs_1404n_320d_36c'};
 9 | ds = {'JAFFE_213n_676d_10c'}; % demo data
10 | for i1 = 1:1%length(ds)
11 |     dataset = ds{i1};
12 |     job = batch(@run_exp1_func, 4, {dataset, algs, 'lab_email_username', 'lab_email_password'},...
13 |         'Profile', lab_cluster, 'pool', lab_cluster_size, ...
14 |         'AttachedFiles', {[dataset, '.mat'], 'eppMatrix.mexa64', 'eppMatrix.mexglx'},...
15 |         'CaptureDiary',true, 'CurrentDirectory', '.');
16 | end
17 | 


--------------------------------------------------------------------------------
/fs_unsup_rufs_build_param.m:
--------------------------------------------------------------------------------
 1 | function paramCell = fs_unsup_rufs_build_param(llkrrParamCell, alphaCandi, betaCandi, nuCandi)
 2 | n1 = length(alphaCandi);
 3 | n2 = length(betaCandi);
 4 | n3 = length(nuCandi);
 5 | n4 = length(llkrrParamCell);
 6 | nP = n1 * n2 * n3 * n4;
 7 | paramCell = cell(nP, 1);
 8 | idx = 0;
 9 | for i1 = 1:n1
10 |     for i2 = 1:n2
11 |         for i3 = 1:n3
12 |             for i4 = 1:n4
13 |                 param = [];
14 |                 param.alpha = alphaCandi(i1);
15 |                 param.beta = betaCandi(i2);
16 |                 param.nu = nuCandi(i3);
17 |                 param.MaxIter = 20;
18 |                 if param.alpha + param.beta + param.nu > 1e4
19 |                     param.MaxIter = 5; % large parameter is costly for convergence
20 |                 end
21 |                 param.epsilon = 1e-2;
22 |                 param.verbose = 0;
23 |                 
24 |                 param.llkrrParam = llkrrParamCell{i4};
25 |                 idx = idx + 1;
26 |                 paramCell{idx} = param;
27 |             end
28 |         end
29 |     end
30 | end


--------------------------------------------------------------------------------
/localLearnMx_KRR.m:
--------------------------------------------------------------------------------
 1 | function T = localLearnMx_KRR( X, param)
 2 | 
 3 | % conpute K via rbf function, the width is computed by self-tunning
 4 | 
 5 | K = constructW(X, struct('WeightMode', 'HeatKernel', 'k', param.nNeighbors)); 
 6 | 
 7 | [nSmp, nDim] = size(X);
 8 | 
 9 | % locate neighbors for each data
10 | W = 1*(K>0);
11 | 
12 | % compute the local learning matrices
13 | if param.nNeighbors < nSmp - 1 && param.nNeighbors > 0
14 |     % compute A by local regularized kernel ridge regression
15 |     A = zeros( nSmp, nSmp );
16 |     for n = 1 : nSmp
17 |         idxV = find( W( n, : ) > 0 );
18 |         A( n, idxV ) = K( n, idxV )*inv( K(idxV, idxV) + param.rLambda * eye( length( idxV ) ) );
19 |     end
20 | 
21 |     % matrix T
22 |     T = eye( nSmp ) - A;
23 |     T = T' * T;
24 | 
25 | else  % all the data are neighboring to each other
26 |     A = [];  % A can not be computed directly
27 |     I = eye( nSmp );
28 | 
29 |     % deformed kernel
30 |     T = K * inv( K + param.rLambda * I );
31 | 
32 |     T = I - T;
33 |     T = inv( diag( diag( T )  ) ) * T;
34 |     T = T' * T;
35 | end


--------------------------------------------------------------------------------
/fs_unsup_allfea_single_func.m:
--------------------------------------------------------------------------------
 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_allfea_single_func(dataset, exp_settings, algo_settings)
 2 | %Unsupervised feature selection using AllFea
 3 | 
 4 | %======================setup===========================
 5 | FeaNumCandi = exp_settings.FeaNumCandi;
 6 | nKmeans = exp_settings.nKmeans;
 7 | prefix_mdcs = [];
 8 | if isfield(exp_settings, 'prefix_mdcs')
 9 |     prefix_mdcs = exp_settings.prefix_mdcs;
10 | end
11 | %===============================================
12 | [X, Y] = extractXY(dataset);
13 | [nSmp,nDim] = size(X);
14 | 
15 | t_start = clock;
16 | disp('get AllFea ...');
17 | fs_res = evalUnSupFS(X, Y, [1:nDim], struct('nKm', nKmeans));
18 | res_aio = cell(1, length(FeaNumCandi)); 
19 | parfor feaIdx = 1:length(FeaNumCandi)
20 |     res_aio{1, feaIdx} = fs_res;
21 | end
22 | [res_gs, res_gs_ps] = grid_search_fs(res_aio);
23 | res_gs.feaset = FeaNumCandi;
24 | t_end = clock;
25 | t1 = etime(t_end,t_start);
26 | disp(['exe time: ',num2str(t1)]);
27 | res_gs.time = t1;
28 | res_gs.time2 = t1;
29 | 
30 | save(fullfile(prefix_mdcs, [dataset, '_best_result_AllFea.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps');
31 | end


--------------------------------------------------------------------------------
/fs_unsup_spfs_sfs.m:
--------------------------------------------------------------------------------
 1 | function [ fList ] = fs_unsup_spfs_sfs(X, K, numF)
 2 | % function [ fList ] = spfs_sfs(X, K, numF)
 3 | %   X - data, each row is an instance
 4 | %   K - the similarity matrix of instances
 5 | %   numF - the number of features to be selected
 6 | 
 7 | nF = size(X,2);
 8 | fList = zeros(numF,1);
 9 | R = K;
10 | count = 1;
11 | while count <= numF && count <= nF
12 |     %     fprintf('%i,',count);
13 |     %     if mod(count,10)==0
14 |     %         fprintf('\n');
15 |     %     end
16 |     
17 |     [R, selF] = find_best_match(X, fList, R);
18 |     if selF == -1
19 |         return;
20 |     else
21 |         fList(count) = selF;
22 |     end
23 |     count = count + 1;
24 | end
25 | end
26 | 
27 | function [ newR, selF ] = find_best_match(X, fList, R)
28 | nF = size(X,2);
29 | newR = R;
30 | selF = -1;
31 | %     smallestErr = norm(newR,'fro'); modified
32 | smallestErr = inf;
33 | for i = 1:nF
34 |     if sum(fList == i)>0
35 |         continue;
36 |     end
37 |     curF = X(:,i);
38 |     curErr = (curF'*curF)^2-2*curF'*R*curF;
39 |     if smallestErr >= curErr
40 |         newR = R - curF*curF';
41 |         smallestErr = curErr;
42 |         selF = i;
43 |     end
44 | end
45 | 
46 | end


--------------------------------------------------------------------------------
/fs_unsup_traceratio.m:
--------------------------------------------------------------------------------
 1 | function [feature_idx, feature_score, subset_score] = fs_unsup_traceratio(Sb, Sw, feature_num)
 2 | % Sb: a matrix to reflects the between-class or global affinity
 3 | %     relationship encoded on Graph, Sb = X*Lb*X'
 4 | % Sw: a matrix to reflects the within-class or local affinity relationship
 5 | %     encoded on Graph, Sw = X*Lw*X'
 6 | % feature_idx: the ranked feature index based on subset-level score
 7 | % feature_score: the feature-level score
 8 | % subset_score: the subset-level score
 9 | 
10 | 
11 | sb = abs(diag(Sb));
12 | sw = abs(diag(Sw));
13 | sw(find(sw == 0)) = 0.000000000000001; %#ok
14 | 
15 | % preprocessing.
16 | t_fnum = length(sb);
17 | [fs, fs_idx] = sort(sb./sw,'descend');
18 | 
19 | para = 0.9;
20 | 
21 | u_fnum = floor(para*t_fnum);
22 | sb = sb(fs_idx(1:u_fnum));
23 | sw = sw(fs_idx(1:u_fnum));
24 | 
25 | 
26 | ind = 1:feature_num;
27 | k = sum(sb(ind))/sum(sw(ind));
28 | for i = 1: 20
29 |     [score, I] = sort(sb - k*sw, 'descend');
30 |     ind = I(1:feature_num);
31 |     old_k = k;
32 |     k = sum(sb(ind))/sum(sw(ind));
33 |     if abs(k - old_k) < 0.000000000001
34 |         break;
35 |     end;
36 | end
37 | I = fs_idx(I);
38 | 
39 | feature_idx = I;
40 | feature_score = score;
41 | subset_score = k;


--------------------------------------------------------------------------------
/ms2tex.m:
--------------------------------------------------------------------------------
 1 | function t = ms2tex(mean_val, std_val, ismax, sigs, sigs2, prefix)
 2 | if ~exist('ismax', 'var')
 3 |     ismax = 1;
 4 | end
 5 | 
 6 | sigs(isnan(sigs)) = 0; % failed to reject a=b, nan means a = b, of course sigs = 0
 7 | sigs2(isnan(sigs2)) = 1.0;
 8 | 
 9 | n = length(mean_val);
10 | t = prefix;
11 | if ismax
12 |     [~, idx] = max(mean_val);
13 | else
14 |     [~, idx] = min(mean_val);
15 | end
16 | for i1 = 1:n
17 |     if isempty(sigs2)
18 |         if i1 == idx
19 |             t = [t, '& \tabincell{c}{ \textbf{', num2str(mean_val(i1) * 100, '%4.2f'), '} \\ \textbf{$\pm$ ', num2str(std_val(i1) * 100, '%4.2f'), '}} '];
20 |         else
21 |             t = [t, '& \tabincell{c}{ ', num2str(mean_val(i1) * 100, '%4.2f'), ' \\ $\pm$ ', num2str(std_val(i1) * 100, '%4.2f'), '} '];
22 |         end
23 |     else
24 |         if sigs(i1) == 0
25 |             t = [t, '& \tabincell{c}{ \textbf{', num2str(mean_val(i1) * 100, '%4.2f'), '} \\ \textbf{$\pm$ ', num2str(std_val(i1) * 100, '%4.2f'), ' } \\ \textbf{', num2str(sigs2(i1), '%4.2f'), ' }} '];
26 |         else
27 |             t = [t, '& \tabincell{c}{ ', num2str(mean_val(i1) * 100, '%4.2f'), ' \\ $\pm$ ', num2str(std_val(i1) * 100, '%4.2f'), ' \\', num2str(sigs2(i1), '%4.2f'), ' } '];
28 |         end
29 |     end
30 |     
31 | end
32 | t = [t, '\\ \hline'];


--------------------------------------------------------------------------------
/fs_unsup_jelsr_build_param.m:
--------------------------------------------------------------------------------
 1 | function paramCell = fs_unsup_jelsr_build_param(knnCandi, weightCandi, weight_param_Candi, alphaCandi, betaCandi)
 2 | n1 = length(knnCandi);
 3 | n2 = length(weightCandi);
 4 | n3 = zeros(n2, 1);
 5 | for i1 = 1:length(weightCandi)
 6 |     n3(i1) = max(1, length(weight_param_Candi{i1}));
 7 | end
 8 | n4 = length(alphaCandi);
 9 | n5 = length(betaCandi);
10 | 
11 | nP = n1 * max(sum(n3), 1) * n4 * n5;
12 | paramCell = cell(nP, 1);
13 | idx = 0;
14 | for i1 = 1:n1
15 |     for i2 = 1:n2
16 |         for i3 = 1:max(n3(i2), 1)
17 |             for i4 = 1:n4
18 |                 for i5 = 1:n5
19 |                     param = [];
20 |                     param.k = knnCandi(i1);
21 |                     param.weightMode = weightCandi{i2};
22 |                     if ~isempty(weightCandi) && ~isempty(weight_param_Candi{i2})
23 |                         tmp = weight_param_Candi{i2};
24 |                         param.t = tmp(i3);
25 |                     else
26 |                         param.t = 1; % place holder
27 |                     end
28 |                     param.alpha = alphaCandi(i4);
29 |                     param.beta = betaCandi(i5);
30 |                     
31 |                     idx = idx + 1;
32 |                     paramCell{idx} = param;
33 |                 end
34 |             end
35 |         end
36 |     end
37 | end


--------------------------------------------------------------------------------
/eval_fsasl_param.m:
--------------------------------------------------------------------------------
 1 | function r = eval_fsasl_param(p_name, param_candi, ids, fns, paramCell, res_aio)
 2 | % 
 3 | % r1 = eval_fsasl_param('lambda3',10.^[-5:5], [11:25], {'mean_acc', 'mean_nmi_sqrt', 'loocv'}, paramCell, res_aio);
 4 | % r2 = eval_fsasl_param('SLEPreg',[10.^-3, 0.005, 10.^-2, 0.05, 0.01], [11:25], {'mean_acc', 'mean_nmi_sqrt', 'loocv'}, paramCell, res_aio);
 5 | % r3 = eval_fsasl_param('lambda1',[0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99], [11:25], {'mean_acc', 'mean_nmi_sqrt', 'loocv'}, paramCell, res_aio);
 6 | % r1
 7 | % r2
 8 | % r3
 9 | 
10 | if isvector(param_candi)
11 |     param_candi = num2cell(param_candi);
12 | end
13 | 
14 | 
15 | r = zeros(length(param_candi), length(fns));
16 | for i1 = 1:length(param_candi)
17 |     tmp = []; % nP_a * 3
18 |     for i2 = 1:size(res_aio, 1);
19 |         if isfield(paramCell{i2, 1}, p_name) && strcmp(num2str(paramCell{i2,1}.(p_name)), num2str(param_candi{i1}))
20 |             
21 |             tmp2 = zeros(length(fns), length(ids));
22 |             for i3 = 1:length(ids)
23 |                 tmp3 = zeros(length(fns),1);
24 |                 for i4 = 1:length(fns)
25 |                     tmp3(i4) = res_aio{i2, ids(i3)}.(fns{i4});
26 |                 end
27 |                 tmp2(:,i3) = tmp3;
28 |             end
29 |             tmp = [tmp; mean(tmp2, 2)'];
30 |         end
31 |         
32 |     end
33 |     r(i1,:) = max(tmp, [], 1);
34 | end
35 | 
36 | 


--------------------------------------------------------------------------------
/tfidf.m:
--------------------------------------------------------------------------------
 1 | function fea = tfidf(fea,bNorm)
 2 | %  fea is a document-term frequency matrix, this function return the tfidf ([1+log(tf)]*log[N/df])
 3 | %  weighted document-term matrix.
 4 | %    
 5 | %     If bNorm == 1, each document verctor will be further normalized to
 6 | %                    have unit norm. (default)
 7 | %
 8 | %   version 2.0 --Jan/2012 
 9 | %   version 1.0 --Oct/2003 
10 | %
11 | %   Written by Deng Cai (dengcai AT gmail.com)
12 | %
13 | 
14 | if ~exist('bNorm','var')
15 |     bNorm = 1;
16 | end
17 | 
18 | 
19 | [nSmp,mFea] = size(fea);
20 | [idx,jdx,vv] = find(fea);
21 | df = full(sum(sparse(idx,jdx,1),1));
22 | 
23 | df(df==0) = 1;
24 | idf = log(nSmp./df);
25 | 
26 | tffea = sparse(idx,jdx,log(vv)+1);
27 | 
28 | fea2 = tffea';
29 | idf = idf';
30 | 
31 | MAX_MATRIX_SIZE = 5000; % You can change this number based on your memory.
32 | nBlock = ceil(MAX_MATRIX_SIZE*MAX_MATRIX_SIZE/mFea);
33 | for i = 1:ceil(nSmp/nBlock)
34 |     if i == ceil(nSmp/nBlock)
35 |         smpIdx = (i-1)*nBlock+1:nSmp;
36 |     else
37 |         smpIdx = (i-1)*nBlock+1:i*nBlock;
38 |     end
39 |     fea2(:,smpIdx) = fea2(:,smpIdx) .* idf(:,ones(1,length(smpIdx)));
40 | end
41 | 
42 | %Now each column of fea2 is the tf-idf vector.
43 | %One can further normalize each vector to unit by using following codes:
44 | 
45 | if bNorm
46 |    fea = NormalizeFea(fea2,0)'; 
47 | end
48 | 
49 | % fea is the final document-term matrix.
50 | 


--------------------------------------------------------------------------------
/fs_unsup_ndfs_build_param.m:
--------------------------------------------------------------------------------
 1 | function paramCell = fs_unsup_ndfs_build_param(knnCandi, weightCandi, weight_param_Candi, alphaCandi, betaCandi)
 2 | n1 = length(knnCandi);
 3 | n2 = length(weightCandi);
 4 | n3 = zeros(n2, 1);
 5 | for i1 = 1:length(weightCandi)
 6 |     n3(i1) = max(1, length(weight_param_Candi{i1}));
 7 | end
 8 | n4 = length(alphaCandi);
 9 | n5 = length(betaCandi);
10 | 
11 | nP = n1 * max(sum(n3), 1) * n4 * n5;
12 | paramCell = cell(nP, 1);
13 | idx = 0;
14 | for i1 = 1:n1
15 |     for i2 = 1:n2
16 |         for i3 = 1:max(n3(i2), 1)
17 |             for i4 = 1:n4
18 |                 for i5 = 1:n5
19 |                     param = [];
20 |                     param.k = knnCandi(i1);
21 |                     param.weightMode = weightCandi{i2};
22 |                     if ~isempty(weightCandi) && ~isempty(weight_param_Candi{i2})
23 |                         tmp = weight_param_Candi{i2};
24 |                         param.t = tmp(i3);
25 |                     else
26 |                         param.t = 1; % place holder
27 |                     end
28 |                     param.alpha = alphaCandi(i4);
29 |                     param.beta = betaCandi(i5);
30 |                     param.gamma = 10^8;
31 |                     param.maxiter = 100;
32 |                     
33 |                     idx = idx + 1;
34 |                     paramCell{idx} = param;
35 |                 end
36 |             end
37 |         end
38 |     end
39 | end
40 | 


--------------------------------------------------------------------------------
/EuDist2.m:
--------------------------------------------------------------------------------
 1 | function D = EuDist2(fea_a,fea_b,bSqrt)
 2 | %EUDIST2 Efficiently Compute the Euclidean Distance Matrix by Exploring the
 3 | %Matlab matrix operations.
 4 | %
 5 | %   D = EuDist(fea_a,fea_b)
 6 | %   fea_a:    nSample_a * nFeature
 7 | %   fea_b:    nSample_b * nFeature
 8 | %   D:      nSample_a * nSample_a
 9 | %       or  nSample_a * nSample_b
10 | %
11 | %    Examples:
12 | %
13 | %       a = rand(500,10);
14 | %       b = rand(1000,10);
15 | %
16 | %       A = EuDist2(a); % A: 500*500
17 | %       D = EuDist2(a,b); % D: 500*1000
18 | %
19 | %   version 2.1 --November/2011
20 | %   version 2.0 --May/2009
21 | %   version 1.0 --November/2005
22 | %
23 | %   Written by Deng Cai (dengcai AT gmail.com)
24 | 
25 | 
26 | if ~exist('bSqrt','var')
27 |     bSqrt = 1;
28 | end
29 | 
30 | if (~exist('fea_b','var')) || isempty(fea_b)
31 |     aa = sum(fea_a.*fea_a,2);
32 |     ab = fea_a*fea_a';
33 |     
34 |     if issparse(aa)
35 |         aa = full(aa);
36 |     end
37 |     
38 |     D = bsxfun(@plus,aa,aa') - 2*ab;
39 |     D(D<0) = 0;
40 |     if bSqrt
41 |         D = sqrt(D);
42 |     end
43 |     D = max(D,D');
44 | else
45 |     aa = sum(fea_a.*fea_a,2);
46 |     bb = sum(fea_b.*fea_b,2);
47 |     ab = fea_a*fea_b';
48 | 
49 |     if issparse(aa)
50 |         aa = full(aa);
51 |         bb = full(bb);
52 |     end
53 | 
54 |     D = bsxfun(@plus,aa,bb') - 2*ab;
55 |     D(D<0) = 0;
56 |     if bSqrt
57 |         D = sqrt(D);
58 |     end
59 | end
60 | 
61 | 


--------------------------------------------------------------------------------
/NormalizeFea.m:
--------------------------------------------------------------------------------
 1 | function fea = NormalizeFea(fea,row)
 2 | % if row == 1, normalize each row of fea to have unit norm;
 3 | % if row == 0, normalize each column of fea to have unit norm;
 4 | %
 5 | %   version 3.0 --Jan/2012 
 6 | %   version 2.0 --Jan/2012 
 7 | %   version 1.0 --Oct/2003 
 8 | %
 9 | %   Written by Deng Cai (dengcai AT gmail.com)
10 | %
11 | 
12 | if ~exist('row','var')
13 |     row = 1;
14 | end
15 | 
16 | if row
17 |     nSmp = size(fea,1);
18 |     feaNorm = max(1e-14,full(sum(fea.^2,2)));
19 |     fea = spdiags(feaNorm.^-.5,0,nSmp,nSmp)*fea;
20 | else
21 |     nSmp = size(fea,2);
22 |     feaNorm = max(1e-14,full(sum(fea.^2,1))');
23 |     fea = fea*spdiags(feaNorm.^-.5,0,nSmp,nSmp);
24 | end
25 |             
26 | return;
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 
34 | if row
35 |     [nSmp, mFea] = size(fea);
36 |     if issparse(fea)
37 |         fea2 = fea';
38 |         feaNorm = mynorm(fea2,1);
39 |         for i = 1:nSmp
40 |             fea2(:,i) = fea2(:,i) ./ max(1e-10,feaNorm(i));
41 |         end
42 |         fea = fea2';
43 |     else
44 |         feaNorm = sum(fea.^2,2).^.5;
45 |         fea = fea./feaNorm(:,ones(1,mFea));
46 |     end
47 | else
48 |     [mFea, nSmp] = size(fea);
49 |     if issparse(fea)
50 |         feaNorm = mynorm(fea,1);
51 |         for i = 1:nSmp
52 |             fea(:,i) = fea(:,i) ./ max(1e-10,feaNorm(i));
53 |         end
54 |     else
55 |         feaNorm = sum(fea.^2,1).^.5;
56 |         fea = fea./feaNorm(ones(1,mFea),:);
57 |     end
58 | end
59 |             
60 | 
61 | 


--------------------------------------------------------------------------------
/fs_unsup_spfs_nes.m:
--------------------------------------------------------------------------------
 1 | function [ W, lam ]= fs_unsup_spfs_nes( X, Y, k, err, starting )
 2 | %unsupervised feature selection by 2-1 norm regression
 3 | % X - the training data, each row is an instance
 4 | % Y - the class label
 5 | 
 6 | if nargin < 5
 7 |     starting = 0.5;
 8 | end
 9 | % L2-1 norm
10 | opts.q=2;
11 | 
12 | % lambda = lambda * lambda_{max}
13 | opts.rFlag=1;
14 | 
15 | % norm( x_i - x_{i-1}, 2) <= .tol
16 | % opts.tFlag = 3;
17 | 
18 | % Tolerance parameter.
19 | % opts.tol=1e-4;
20 | 
21 | % opts.init=2;
22 | 
23 | % .x0= zeros(n,1), .c0=0
24 | % opts.init=2;
25 | opts.verbose = 0;
26 | opts.maxIter = 500;
27 | 
28 | upL = 1; downL = 0;
29 | lam = starting; % the initial search point
30 | nZ = k + 2*err;
31 | count = 1;
32 | need = -1;
33 | 
34 | while abs(nZ - k) > err && count <= 10
35 |     oldNZ = nZ;
36 |     oldNeed = need;
37 |     
38 | %     fprintf('need %i, iteration: %2i, lam: %f\n', k, count, lam);
39 |     W = mcLeastR(X, Y, lam, opts);
40 |     opts.x0=W;
41 |     nZ = sum(sum(W.^2,2)>0);
42 |     if nZ - k > err
43 |         need = -1;
44 |         downL = lam; lam = (downL + upL) / 2;
45 |     elseif nZ - k < -err
46 |         need = 1;
47 |         upL = lam; lam = (downL + upL) / 2;
48 |     end
49 |     if nZ < oldNZ && oldNeed == 1
50 |         opts = rmfield(opts, 'x0');
51 |         W = mcLeastR(X, Y, lam, opts);
52 |         nZ = sum(sum(W.^2,2)>0);
53 | %         fprintf('restart, %f, sel feat: %i\n-----\n', lam, nZ);
54 |     end
55 | %     fprintf('sel feat: %i\n-----\n', nZ);
56 |     count = count + 1;
57 | end


--------------------------------------------------------------------------------
/fs_unsup_llcfs_build_param.m:
--------------------------------------------------------------------------------
 1 | function paramCell = fs_unsup_llcfs_build_param(nClusters, kCandidates, betaCandidates, kTypeCandidates, maxiterCandidates, epsilonCandidates )
 2 | if ~exist('kTypeCandidates', 'var') || isempty(kTypeCandidates)
 3 | 	kTypeCandidates = [1];
 4 | end
 5 | 
 6 | if ~exist('maxiterCandidates', 'var') || isempty(maxiterCandidates)
 7 | 	maxiterCandidates = [20];
 8 | end
 9 | 
10 | if ~exist('epsilonCandidates', 'var') || isempty(epsilonCandidates)
11 | 	epsilonCandidates = [1e-4];
12 | end
13 | 
14 | 
15 | n1 = length( kCandidates );
16 | n2 = length( betaCandidates );
17 | n3 = length( kTypeCandidates );
18 | n4 = length( maxiterCandidates );
19 | n5 = length( epsilonCandidates );
20 | 
21 | % number of parameter sets
22 | nP = n1 * n2 * n3 * n4 * n5;
23 | paramCell = cell( 1, nP );
24 | 
25 | idx = 0;
26 | for id1 = 1 : n1
27 |     for id2 = 1 : n2
28 |         for id3 = 1 : n3
29 |             for id4 = 1 : n4
30 |                 for id5 = 1 : n5
31 |                     param = [];
32 |                     
33 |                     param.nClusters = nClusters;
34 |                     param.k = kCandidates( id1 );
35 |                     param.beta = betaCandidates( id2 );
36 |                     param.kType = kTypeCandidates( id3 );
37 |                     param.maxiter = maxiterCandidates( id4 );
38 |                     param.epsilon = epsilonCandidates( id5 );
39 |                     
40 |                     idx = idx + 1;
41 |                     paramCell{idx} = param;
42 |                 end
43 |             end
44 |         end
45 |     end
46 | end


--------------------------------------------------------------------------------
/fs_unsup_spec_build_param.m:
--------------------------------------------------------------------------------
 1 | function paramCell = fs_unsup_spec_build_param(kernelParamCell, styleCandi, expLamCandi, funcCandi)
 2 | %   Pram.style - 1: unsupervised feature selection 2: supervised feature
 3 | %                         selection
 4 | %   Pram.expLam - the exp order for the eigenvalue
 5 | %   Pram.function - 1:f'Lf; 2:using all eigenvalue except the first one; 3:
 6 | %                             using the first k eigenvalues. (In this case
 7 | %                             the wieght the bigger the better.
 8 | if ~exist('styleCandi', 'var')
 9 | 	kernelParamCell = {};
10 | end
11 | if ~exist('styleCandi', 'var') || isempty(styleCandi)
12 | 	styleCandi = [1];
13 | end
14 | 
15 | if ~exist('expLamCandi', 'var') || isempty(expLamCandi)
16 | 	expLamCandi = [0.25, 1, 4];
17 | end
18 | 
19 | if ~exist('funcCandi', 'var') || isempty(funcCandi)
20 | 	funcCandi = [1, 2, 3];
21 | end
22 | 
23 | n0 = max(length(kernelParamCell), 1);
24 | n1 = length(styleCandi);
25 | n2 = length(expLamCandi);
26 | n3 = length(funcCandi);
27 | nP = n0 * n1 * n2 * n3;
28 | paramCell = cell(nP, 1);
29 | idx = 0;
30 | for i0 = 1:n0
31 | for i1 = 1:n1
32 |     for i2 = 1:n2
33 |         for i3 = 1:n3
34 |             param = [];
35 |             if ~isempty(kernelParamCell)
36 |             	param.kernelOption = kernelParamCell{i0};
37 |             end
38 |             param.style = styleCandi(i1);
39 |             param.expLam = expLamCandi(i2);
40 |             param.function = funcCandi(i3);
41 |             idx = idx + 1;
42 |             paramCell{idx} = param;
43 |         end
44 |     end
45 | end
46 | end


--------------------------------------------------------------------------------
/fs_unsup_maxvar_single_func.m:
--------------------------------------------------------------------------------
 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_maxvar_single_func(dataset, exp_settings, algo_settings)
 2 | %use laplacian score to select features.
 3 | 
 4 | %======================setup===========================
 5 | FeaNumCandi = exp_settings.FeaNumCandi;
 6 | nKmeans = exp_settings.nKmeans;
 7 | prefix_mdcs = [];
 8 | if isfield(exp_settings, 'prefix_mdcs')
 9 |     prefix_mdcs = exp_settings.prefix_mdcs;
10 | end
11 | %======================================================
12 | 
13 | %================setup======================
14 | 
15 | %===========================================
16 | disp(['dataset:',dataset]);
17 | [X, Y] = extractXY(dataset);
18 | [nSmp,nDim] = size(X);
19 | 
20 | %get maxvar score
21 | disp('get maxvar score...');
22 | t_start = clock;
23 | FeaScore = fs_unsup_maxvar(X);
24 | [~, index] = sort(FeaScore, 'descend');
25 | % save([dataset, filesep,'feaIdx.mat'],'index');
26 | t_end = clock;
27 | t1 = etime(t_end,t_start);
28 | disp(['exe time: ',num2str(t1)]);
29 | 
30 | t_start = clock;
31 | disp('evaluation ...');
32 | res_aio = cell(1, length(FeaNumCandi)); 
33 | parfor feaIdx = 1:length(FeaNumCandi)
34 |     res_aio{1, feaIdx} = evalUnSupFS(X, Y, index(1:FeaNumCandi(feaIdx)), struct('nKm', nKmeans));
35 | end
36 | [res_gs, res_gs_ps] = grid_search_fs(res_aio);
37 | res_gs.feaset = FeaNumCandi;
38 | t_end = clock;
39 | t2 = etime(t_end,t_start);
40 | disp(['exe time: ',num2str(t2)]);
41 | res_gs.time = t1;
42 | res_gs.time2 = t2;
43 | 
44 | save([prefix_mdcs, filesep, dataset, '_best_result_MaxVar.mat'],'FeaNumCandi','res_gs','res_aio', 'res_gs_ps');
45 | end


--------------------------------------------------------------------------------
/fs_unsup_glspfs.m:
--------------------------------------------------------------------------------
 1 | function [feaIndx,W,obj] = fs_unsup_glspfs(X, Kmatrix, L, r1, r2, numFea) %% 
 2 | [num, dim] = size(X);
 3 | d = ones(dim,1);
 4 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 5 | % L = computeM(X,Kmatrix,options);
 6 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 7 | [UY,VY] = eig(Kmatrix);
 8 | diagVal = diag(VY);
 9 | indxPos = find(diagVal>eps);
10 | UYpos = UY(:,indxPos);
11 | VYpos = diag(sqrt(diagVal(indxPos)));
12 | Ypos = UYpos*VYpos;
13 | 
14 | NIter = 20;
15 | flag =1;
16 | objold = inf;
17 | iter = 0;
18 | if num<dim    
19 |     while flag
20 |         iter = iter +1;
21 |         D = spdiags(d,0,dim,dim);
22 |         DX = D*X';
23 |         %%% Notive trick!!!
24 |         W = DX*(((eye(num)+ r2*L)*X*DX + r1*eye(num))\Ypos);
25 |         Xi = sqrt(sum(W.*W,2));
26 |         d = 2*Xi;     
27 |         XW = X*W -Ypos;
28 |         obj(iter) = trace(XW*XW') + r2*trace(L*((X*W)*(X*W)')) + r1*sum(Xi);        
29 |         if abs((objold-obj(iter))/obj(iter)) <1e-4 || iter>NIter
30 |             flag = 0;
31 |         end
32 |         objold = obj(iter);
33 |     end
34 | else
35 |     while flag
36 |         iter = iter +1;
37 |         D = spdiags(d,0,dim,dim);
38 |         DX = D*X';
39 |         %%% Notive trick!!!
40 |         W = (DX*(eye(num)+ r2*L)*X + r1*eye(dim) )\(DX*Ypos);     
41 |         Xi = sqrt(sum(W.*W,2));
42 |         d = 2*Xi;    
43 |         XW = X*W -Ypos;
44 |         obj(iter) = trace(XW*XW') + r2*trace(L*((X*W)*(X*W)')) + r1*sum(Xi);        
45 |         if abs((objold-obj(iter))/obj(iter)) <1e-4 || iter>NIter
46 |             flag = 0;
47 |         end
48 |         objold = obj(iter);
49 |     end  
50 | end
51 | Xi = sqrt(sum(W.*W,2));
52 | [val0,indx0] = sort(Xi,'descend');
53 | feaIndx = indx0(1:numFea);


--------------------------------------------------------------------------------
/LocalReconstructLap.m:
--------------------------------------------------------------------------------
 1 | function L = LocalReconstructLap(X, K)
 2 | [D,N] = size(X);
 3 | % fprintf(1,'- LLE running on %d points in %d dimensions\n',N,D);
 4 | 
 5 | 
 6 | % STEP1: COMPUTE PAIRWISE DISTANCES & FIND NEIGHBORS 
 7 | % fprintf(1,'- Finding %d nearest neighbours.\n',K);
 8 | 
 9 | X2 = sum(X.^2,1);
10 | distance = repmat(X2,N,1)+repmat(X2',1,N)-2*X'*X;
11 | 
12 | [sorted,index] = sort(distance);
13 | neighborhood = index(2:(1+K),:);
14 | 
15 | 
16 | 
17 | % STEP2: SOLVE FOR RECONSTRUCTION WEIGHTS
18 | % fprintf(1,'- Solving for reconstruction weights.\n');
19 | 
20 | if(K>D) 
21 |   fprintf(1,'   [note: K>D; regularization will be used]\n'); 
22 |   tol=1e-3; % regularlizer in case constrained fits are ill conditioned
23 | else
24 |   tol=0;
25 | end
26 | 
27 | W = zeros(K,N);
28 | for ii=1:N
29 |    z = X(:,neighborhood(:,ii))-repmat(X(:,ii),1,K); % shift ith pt to origin
30 |    C = z'*z;                                        % local covariance
31 |    C = C + eye(K,K)*tol*trace(C);                   % regularlization (K>D)
32 |    W(:,ii) = C\ones(K,1);                           % solve Cw=1
33 |    W(:,ii) = W(:,ii)/sum(W(:,ii));                  % enforce sum(w)=1
34 | end;
35 | 
36 | % STEP 3: COMPUTE EMBEDDING FROM EIGENVECTS OF COST MATRIX M=(I-W)'(I-W)
37 | % fprintf(1,'- Computing embedding.\n');
38 | 
39 | % M=eye(N,N); % use a sparse matrix with storage for 4KN nonzero elements
40 | % M = sparse(1:N,1:N,ones(1,N),N,N,4*K*N); 
41 | 
42 | % for ii=1:N
43 | %    w = W(:,ii);
44 | %    jj = neighborhood(:,ii);
45 | %    M(ii,jj) = M(ii,jj) - w';
46 | %    M(jj,ii) = M(jj,ii) - w;
47 | %    M(jj,jj) = M(jj,jj) + w*w';
48 | % end;
49 | 
50 | M = zeros(N);
51 | for ii = 1:N
52 |     M(ii,neighborhood(:,ii)) = W(:,ii)';
53 | end
54 | L = (eye(N) - M)'*(eye(N) - M);
55 | end


--------------------------------------------------------------------------------
/compute_accuracy_F.m:
--------------------------------------------------------------------------------
 1 | function [confus,accuracy,numcorrect,precision,recall,F] = compute_accuracy_F (actual,pred,classes)
 2 | % GETCM : gets confusion matrices, precision, recall, and F scores
 3 | % [confus,numcorrect,precision,recall,F] = getcm (actual,pred,[classes])
 4 | %
 5 | % actual is a N-element vector representing the actual classes
 6 | % pred is a N-element vector representing the predicted classes
 7 | % classes is a vector with the numbers of the classes (by default, it is 1:k, where k is the
 8 | %    largest integer to appear in actual or pred.
 9 | 
10 | 
11 | if size(actual,1) ~= size(pred,1)
12 |     pred=pred';
13 | end
14 | if nargin < 3
15 |     classes = [1:max(max(actual),max(pred))];
16 | end
17 | 
18 | numcorrect = sum(actual==pred);
19 | accuracy = numcorrect/length(actual);
20 | for i=1:length(classes)
21 |     % confus(i,:) = hist(pred,classes);
22 |     a = classes(i);
23 |     d = find(actual==a);     % d has indices of points with class a
24 |     for j=1:length(classes)
25 |         confus(i,j) = length(find(pred(d)==classes(j)));
26 |     end
27 | end
28 | 
29 | precision=[];
30 | recall=[];
31 | F=[];
32 | for i=1:length(classes)
33 |     S = sum(confus(i,:));
34 |     if nargout>=4
35 |         if S
36 |             recall(i) = confus(i,i) / sum(confus(i,:));
37 |         else
38 |             recall(i) = 0;
39 |         end
40 |     end
41 |     S =  sum(confus(:,i));
42 |     if nargout>=3
43 |         if S
44 |             precision(i) = confus(i,i) / S;
45 |         else
46 |             precision(i) = 0;
47 |         end
48 |     end
49 |     if nargout>=5
50 |         if (precision(i)+recall(i))
51 |             F(i) = 2 * (precision(i)*recall(i)) / (precision(i)+recall(i));
52 |         else
53 |             F(i) = 0;
54 |         end
55 |     end
56 | end


--------------------------------------------------------------------------------
/components.m:
--------------------------------------------------------------------------------
 1 | function blocks = components(A)
 2 | %COMPONENTS Finds connected components in a graph defined by a adjacency matrix
 3 | %
 4 | %   blocks = components(A)
 5 | %
 6 | % Finds connected components in a graph defined by the adjacency matrix A.
 7 | % The function outputs an n-vector of integers 1:k in blocks, meaning that
 8 | % A has k components. The vector blocks labels the vertices of A according 
 9 | % to component.
10 | % If the adjacency matrix A is undirected (i.e. symmetric), the blocks are 
11 | % its connected components. If the adjacency matrix A is directed (i.e. 
12 | % unsymmetric), the blocks are its strongly connected components.
13 | %
14 | %
15 | 
16 | % This file is part of the Matlab Toolbox for Dimensionality Reduction v0.7.1b.
17 | % The toolbox can be obtained from http://homepage.tudelft.nl/19j49
18 | % You are free to use, change, or redistribute this code in any way you
19 | % want for non-commercial purposes. However, it is appreciated if you 
20 | % maintain the name of the original author.
21 | %
22 | % (C) Laurens van der Maaten, 2010
23 | % University California, San Diego / Delft University of Technology
24 | 
25 | 
26 |     % Check size of adjacency matrix
27 |     [n, m] = size(A);
28 |     if n ~= m, error ('Adjacency matrix must be square'), end;
29 | 
30 |     % Compute Dulmage-Mendelsohn permutation on A
31 |     if ~all(diag(A)) 
32 |         [foo, p, bar, r] = dmperm(A | speye(size(A)));
33 |     else
34 |         [foo, p, bar, r] = dmperm(A);  
35 |     end
36 | 
37 |     % Compute sizes and number of clusters
38 |     sizes = diff(r);
39 |     k = length(sizes);
40 | 
41 |     % Now compute the array blocks
42 |     blocks = zeros(1, n);
43 |     blocks(r(1:k)) = ones(1, k);
44 |     blocks = cumsum(blocks);
45 | 
46 |     % Permute blocks so it maps vertices of A to components
47 |     blocks(p) = blocks;
48 | 


--------------------------------------------------------------------------------
/fs_unsup_jelsr.m:
--------------------------------------------------------------------------------
 1 | function [W_compute, Y, obj] = fs_unsup_jelsr(data, W_ori, ReducedDim,alpha,beta)
 2 | 
 3 | %%%%%%%% Input: data: nSmp*nFea;
 4 | %%%             W_ori: The original local similarity matrix
 5 | %%%             ReducedDim: the dimensionality for low dimensionality
 6 | %%%                         embedding $Y$
 7 | %%%             alpha and beta ar two parameters
 8 | 
 9 | [nSmp,nFea] = size(data);
10 | 
11 | %%%%%%%%%%%%%%%%%%% Normalization of W_ori
12 | D_mhalf = full(sum(W_ori,2).^-.5); 
13 | W = compute_W(W_ori,data,D_mhalf); 
14 | %%%%%%%%%%%%%%%%%% Eigen_decomposition
15 | Y = compute_Y(data,W, ReducedDim, D_mhalf);       
16 | if issparse(data)
17 |     data = [data ones(size(data,1),1)];
18 |     [nSmp,nFea] = size(data);
19 | else
20 |     sampleMean = mean(data);
21 |     data = (data - repmat(sampleMean,nSmp,1));
22 | end
23 | 
24 | %%% To minimize squared loss with L21 normalization
25 | %%%%%%%%%%%% Initialization
26 | AA = data'*data; 
27 | Ay = data'*Y;
28 | W_compute = (AA+alpha*eye(nFea))\Ay;
29 | d = sqrt(sum(W_compute.*W_compute,2));
30 | 
31 | itermax = 20;
32 | obj = zeros(itermax,1);
33 | feaK = data'*data; % modified by liang du
34 | for iter = 1:itermax 
35 |    %%%%%%%%%%%%%%%%%%% Fix D to updata W_compute, Y
36 |    D = 2*spdiags(d,0,nFea,nFea);
37 |    %%%%%%%%%%%%%%%% To updata Y
38 |    A = (D*feaK+alpha*eye(nFea));   
39 |    Temp  = A\(D*data'); 
40 |    Temp =  data*Temp;
41 |    Temp = W_ori-beta*eye(nSmp)+beta*Temp; 
42 |    
43 |    %%%%% Normalization
44 |    Temp = compute_W(Temp,data,D_mhalf); 
45 |    %%%%% Eigen_decomposition   
46 |    Y = compute_Y(data,Temp, ReducedDim, D_mhalf);
47 |    
48 |    %%%%%%%%%%%%%%%%% To updata W
49 |    B = D*data'*Y; 
50 |    W_compute = A\B;
51 |    
52 |    %%%%%%%%%%%%%%%%%% Fix W and update D
53 |    d = sqrt(sum(W_compute.*W_compute,2));
54 |    
55 | end 
56 | end 
57 |  


--------------------------------------------------------------------------------
/fs_unsup_spfs.m:
--------------------------------------------------------------------------------
 1 | function fList = fs_unsup_spfs(X, K, Y, numF, options)
 2 | % A wrapper function for different solvers of SPFS
 3 | %
 4 | % [ fList ] = spfs_sfs(X, K, numF);
 5 | %
 6 | % [ W, lam ]= spfs_nes( X, Y, k, err, starting );
 7 | %
 8 | % [ fList, W ] = spfs_larnes( X, Y, numF );
 9 | %
10 | % [ fList, W ] = spfs_lar( X, Y, numF )
11 | %
12 | % each solver is downloaded from the author Zheng Zhao
13 | % https://sites.google.com/site/alanzhao/
14 | %
15 | % [1] Efficient Spectral Feature Selection with Minimum Redundancy, AAAI 2010
16 | % [2] On Similarity Preserving Feature Selection, TKDE, 2013
17 | 
18 | if ~exist('options', 'var') || ~isfield(options, 'spfs_type')
19 |     options.spfs_type = 'SFS';
20 | end
21 | 
22 | switch lower(options.spfs_type)
23 |     case lower('SFS')
24 |         [ fList ] = fs_unsup_spfs_sfs(X, K, numF);
25 |     case lower('LAR')
26 |         error('not supported yet!');
27 |         % the following code with LAR did not return enough features
28 |         [eigvec, eigval] = eigs(K, options.nClass, 'LA');
29 |         Y = eigvec * diag(sqrt(max(diag(eigval), eps)));
30 |         [ fList, W ] = fs_unsup_spfs_lar( X, Y, numF );
31 |     case lower('LARNES')
32 |         error('not supported yet!');
33 |         % the following code with LARNES did not return enough features
34 |         [eigvec, eigval] = eigs(K, options.nClass, 'LA');
35 |         Y = eigvec * diag(sqrt(max(diag(eigval), eps)));
36 |         [ fList, W ] = fs_unsup_spfs_larnes( X, Y, numF );
37 |     case lower('NES')
38 |         [eigvec, eigval] = eigs(K, options.nClass, 'LA');
39 |         Y = eigvec * diag(sqrt(max(diag(eigval), eps)));
40 |         [ W, lam ]= fs_unsup_spfs_nes( X, Y, numF, 0.1*numF);
41 |         fList = sum(W.^2,2);
42 |         [~, fList] = sort(fList, 'descend');
43 |         fList = fList(1:numF);
44 |     otherwise
45 |         error('not supported yet!');
46 | end


--------------------------------------------------------------------------------
/compute_Y.m:
--------------------------------------------------------------------------------
 1 | function Y = compute_Y(data, W, ReducedDim, D_mhalf)
 2 | 
 3 | [nSmp,nFea] = size(data);
 4 | 
 5 | dimMatrix = size(W,2);
 6 | if (dimMatrix > 500 && ReducedDim < dimMatrix/10)
 7 |     option = struct('disp',0);
 8 |     [Y, eigvalue] = eigs(W,ReducedDim,'la',option);
 9 |     eigvalue = diag(eigvalue);
10 | else
11 |     W = full(W);
12 |     [Y, eigvalue] = eig(W);
13 |     eigvalue = diag(eigvalue);
14 |     
15 |     [junk, index] = sort(-eigvalue);
16 |     eigvalue = eigvalue(index);
17 |     Y = Y(:,index);
18 |     if ReducedDim < length(eigvalue)
19 |         Y = Y(:, 1:ReducedDim);
20 |         eigvalue = eigvalue(1:ReducedDim);
21 |     end
22 | end
23 | 
24 | eigIdx = find(abs(eigvalue) < 1e-6);
25 | eigvalue (eigIdx) = [];
26 | Y (:,eigIdx) = [];
27 | 
28 | nGotDim = length(eigvalue);
29 | 
30 | idx = 1;
31 | while(abs(eigvalue(idx)-1) < 1e-12)
32 |     idx = idx + 1;
33 |     if idx > nGotDim
34 |         break;
35 |     end
36 | end
37 | idx = idx - 1;
38 | 
39 | if(idx > 1)
40 |     % more than one eigenvector of 1 eigenvalue
41 |     u = zeros(size(Y,1),idx);
42 |     d_m = 1./D_mhalf;
43 |     cc = 1/norm(d_m);
44 |     u(:,1) = cc./D_mhalf;
45 |     
46 |     bDone = 0;
47 |     for i = 1:idx
48 |         if abs(Y(:,i)' * u(:,1) - 1) < 1e-14
49 |             Y(:,i) = Y(:,1);
50 |             Y(:,1) = u(:,1);
51 |             bDone = 1;
52 |         end
53 |     end
54 |     
55 |     if ~bDone
56 |         for i = 2:idx
57 |             u(:,i) = Y(:,i);
58 |             for j= 1:i-1
59 |                 u(:,i) = u(:,i) - (u(:,j)' * Y(:,i))*u(:,j);
60 |             end
61 |             u(:,i) = u(:,i)/norm(u(:,i));
62 |         end
63 |         Y(:,1:idx) = u;
64 |     end
65 | end
66 | 
67 | if nGotDim < 5000
68 |     Y = repmat(D_mhalf,1,nGotDim).*Y;
69 | else
70 |     for k = 1:nGotDim
71 |         Y(:,k) = Y(:,k).*D_mhalf;
72 |     end
73 | end
74 | 
75 | Y(:,1) = [];
76 | 


--------------------------------------------------------------------------------
/fs_unsup_glspfs_build_param.m:
--------------------------------------------------------------------------------
 1 | function paramCell = fs_unsup_glspfs_build_param(local_type_candi, local_type_param_candi, knn_size_candi, ...
 2 |     lambda1_candi, lambda2_candi, global_kernel_cell_candi)
 3 | n1 = length( local_type_candi );
 4 | n2 = zeros(n1, 1);
 5 | for i1 = 1:length( local_type_candi )
 6 |     n2(i1) = max(1, length(local_type_param_candi{i1}));
 7 | end
 8 | n3 = length( knn_size_candi );
 9 | n4 = length( lambda1_candi );
10 | n5 = length( lambda2_candi );
11 | n6 = length( global_kernel_cell_candi );
12 | 
13 | nP = max(sum(n2), 1) * n3 * n4 * n5 * n6;
14 | 
15 | paramCell = cell(nP, 1);
16 | idx = 0;
17 | for i1 = 1:n1
18 |     for i2 = 1:max(n2(i1), 1)
19 |         for i3 = 1:n3
20 |             for i4 = 1:n4
21 |                 for i5 = 1:n5
22 |                     for i6 = 1:n6
23 |                         param = [];
24 |                         param.local_type = local_type_candi{i1};
25 |                         if ~isempty(local_type_candi) && ~isempty(local_type_param_candi{i1})
26 |                             tmp = local_type_param_candi{i1};
27 |                             param.local_lpp_sigma = tmp(i2);
28 |                             param.local_ltsa_embedded_dim = tmp(i2);
29 |                         else
30 |                             param.local_lpp_sigma = []; %place holder
31 |                             param.local_ltsa_embedded_dim = [];%place holder
32 |                         end
33 |                         param.local_k = knn_size_candi(i3);
34 |                         param.lambda1 = lambda1_candi(i4);
35 |                         param.lambda2 = lambda2_candi(i5);
36 |                         param.global_kernel_option = global_kernel_cell_candi{i6};
37 |                         idx = idx + 1;
38 |                         paramCell{idx} = param;                        
39 |                         
40 |                     end
41 |                 end
42 |             end
43 |         end
44 |     end
45 | end
46 | end


--------------------------------------------------------------------------------
/fs_unsup_fsasl_build_param.m:
--------------------------------------------------------------------------------
 1 | function paramCell = fs_unsup_fsasl_build_param(sr_solver_candi, sr_solver_param_candi, knn_size_candi, ...
 2 |     lambda2_candi, lambda3_candi, fs_solver_candi, iter_candi)
 3 | n1 = length( sr_solver_candi );
 4 | n2 = zeros(n1, 1);
 5 | for i1 = 1:length( sr_solver_candi )
 6 |     n2(i1) = max(1, length(sr_solver_param_candi{i1}));
 7 | end
 8 | n3 = length( knn_size_candi );
 9 | n4 = length( lambda2_candi );
10 | n5 = length( lambda3_candi );
11 | n6 = length( fs_solver_candi );
12 | n7 = length( iter_candi );
13 | 
14 | nP = max(sum(n2), 1) * n3 * n4 * n5 * n6 * n7;
15 | 
16 | paramCell = cell(nP, 1);
17 | idx = 0;
18 | % for i0 = 1: n0
19 | for i1 = 1:n1
20 |     for i2 = 1:max(n2(i1), 1)
21 |         for i3 = 1:n3
22 |             for i4 = 1:n4
23 |                 for i5 = 1:n5
24 |                     for i6 = 1:n6
25 |                         for i7 = 1:n7
26 |                             param = [];
27 |                             param.LassoType = sr_solver_candi{i1};
28 |                             if ~isempty(sr_solver_candi) && ~isempty(sr_solver_param_candi{i1})
29 |                                 tmp = sr_solver_param_candi{i1};
30 |                                 param.SLEPreg = tmp(i2);
31 |                                 param.LARSk = tmp(i2);
32 |                             end
33 |                             param.Localk = knn_size_candi(i3);
34 |                             param.lambda2 = lambda2_candi(i4);
35 |                             param.lambda1 = 1;
36 |                             param.lambda3 = lambda3_candi(i5);
37 |                             param.GroupLassoType = fs_solver_candi{i6};
38 |                             param.maxiter = iter_candi(i7);
39 |                             idx = idx + 1;
40 |                             paramCell{idx} = param;
41 |                             
42 |                         end
43 |                     end
44 |                 end
45 |             end
46 |         end
47 |     end
48 | end
49 | end


--------------------------------------------------------------------------------
/fs_unsup_traceratio_single_func.m:
--------------------------------------------------------------------------------
 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_traceratio_single_func(dataset, exp_settings, algo_settings)
 2 | %use trace ratio to select features.
 3 | 
 4 | %======================setup===========================
 5 | FeaNumCandi = exp_settings.FeaNumCandi;
 6 | nKmeans = exp_settings.nKmeans;
 7 | prefix_mdcs = [];
 8 | if isfield(exp_settings, 'prefix_mdcs')
 9 |     prefix_mdcs = exp_settings.prefix_mdcs;
10 | end
11 | %======================================================
12 | 
13 | disp(['dataset:',dataset]);
14 | [X, Y] = extractXY(dataset);
15 | [nSmp,nDim] = size(X);
16 | nClass = length(unique(Y));
17 | 
18 | %================setup======================
19 | knnCandi = 5;
20 | n1 = length(knnCandi);
21 | nP = n1;
22 | paramCell = cell(nP, 1);
23 | idx = 0;
24 | for i1 = 1:n1
25 |     param = [];
26 |     param.k = knnCandi(i1);
27 |     idx = idx + 1;
28 |     paramCell{idx} = param;
29 | end
30 | %===========================================
31 | 
32 | t_start = clock;
33 | disp('trace ratio...');
34 | feaSubsets = cell(length(paramCell), length(FeaNumCandi));
35 | for i1 = 1:length(paramCell)
36 |     fprintf('UDFS parameter search %d out of %d...\n', i1, length(paramCell));
37 |     param = paramCell{i1};
38 |     [Sb, Sw] = compute_SbSw_unsup(X, param.k);
39 |     parfor i2 = 1:length(FeaNumCandi)
40 |         feaSubsets{i1, i2} = fs_unsup_traceratio(Sb, Sw, FeaNumCandi(i2));
41 |     end
42 | end
43 | t_end = clock;
44 | t1 = etime(t_end,t_start);
45 | disp(['exe time: ',num2str(t1)]);
46 | 
47 | t_start = clock;
48 | disp('evaluation ...');
49 | res_aio = cell(1, length(FeaNumCandi)); 
50 | parfor i1 = 1:length(FeaNumCandi)
51 |     idx = feaSubsets{i1};
52 |     res_aio{1, i1} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i1)), struct('nKm', nKmeans));
53 | end
54 | [res_gs, res_gs_ps] = grid_search_fs(res_aio);
55 | res_gs.feaset = FeaNumCandi;
56 | t_end = clock;
57 | t2 = etime(t_end,t_start);
58 | disp(['exe time: ',num2str(t2)]);
59 | res_gs.time = t1;
60 | res_gs.time2 = t2;
61 | 
62 | save(fullfile(prefix_mdcs, [dataset, '_best_result_TraceRatio.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps');
63 | end


--------------------------------------------------------------------------------
/fs_unsup_spfs_lar.m:
--------------------------------------------------------------------------------
 1 | function [ fList, W ] = fs_unsup_spfs_lar( X, Y, numF )
 2 | % function [ fList W ] = spfs_lar( X, K, numF )
 3 | %   X - the data, each row is an instance
 4 | %   Y - the response of nY column
 5 | %   numF - the number of features we want to selected
 6 | 
 7 | [nD, nF] = size(X);
 8 | nY = size(Y,2);
 9 | 
10 | W = zeros(nF, nY);
11 | fList = zeros(numF, 1);
12 | k = 1; R = Y;
13 | 
14 | % find the most correlated one
15 | bestCor = -1; bestNor = 0;
16 | for i = 1:nF
17 |     curF = X(:,i);
18 |     curNorm = norm(curF'*R,2);
19 |     if curNorm > bestNor
20 |         bestCor = i; bestNor = curNorm;
21 |     end
22 | end
23 | fList(k) = bestCor; XA = X(:, bestCor);
24 | 
25 | while k < numF && k < nF && k < nD
26 |     k = k + 1;
27 | %     fprintf('%i,',k);
28 |     
29 |     % obtain the proceed direction
30 |     GA = XA\R;
31 |     
32 |     % compute how far can we go for every f
33 |     a = X(:,fList(1))'*R;
34 |     b = X(:,fList(1))'*XA*GA;
35 |     bestCor = -1; bestNor = inf;
36 |     for i = 1:nF
37 |         if sum(fList==i) > 0
38 |             continue;
39 |         end
40 |         c = X(:,i)'*R;
41 |         d = X(:,i)'*XA*GA;
42 |         p1=b*b'-d*d'; p2 = a*b'-c*d'; p3 = a*a'-c*c';
43 |         s1 = (p2+abs(sqrt(p2^2-p1*p3)))/p1;
44 |         s2 = (p2-abs(sqrt(p2^2-p1*p3)))/p1;
45 |         if (s1<=0 || s1>1)
46 |             s1 = 100;
47 |         end
48 |         if (s2<=0 || s2>1)
49 |             s2 = 100;
50 |         end
51 |         if s1==100 && s2==100
52 |             continue;
53 |         else
54 |             s = min(s1,s2);
55 |         end
56 |         if s < bestNor
57 |             bestNor = s;
58 |             bestCor = i;
59 |         end
60 |     end
61 |     if bestCor == -1;
62 |         return
63 |     else
64 |         fList(k) = bestCor;
65 |         XA = X(:, fList(1:k));
66 |         W(fList(1:k-1),:) = W(fList(1:k-1),:) + bestNor*GA;
67 |         R = Y - X*W;
68 |         % fprintf(' R: %f, W: %f, l: %f\n',norm(R), norm(W), bestNor);
69 |     end
70 | end
71 | 
72 | GA = pinv(full(XA'*XA))*XA'*R;
73 | W(fList(1:k),:) = W(fList(1:k),:) + GA;
74 | R = Y - X*W;
75 | % fprintf(' R: %f, W: %f\n',norm(R), norm(W));


--------------------------------------------------------------------------------
/fs_unsup_llcfs_single_func.m:
--------------------------------------------------------------------------------
 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_llcfs_single_func(dataset, exp_settings, algo_settings)
 2 | %Unsupervised feature selection using all features
 3 | 
 4 | %======================setup===========================
 5 | FeaNumCandi = exp_settings.FeaNumCandi;
 6 | nKmeans = exp_settings.nKmeans;
 7 | prefix_mdcs = [];
 8 | if isfield(exp_settings, 'prefix_mdcs')
 9 |     prefix_mdcs = exp_settings.prefix_mdcs;
10 | end
11 | %================================
12 | 
13 | disp(['dataset:',dataset]);
14 | [X, Y] = extractXY(dataset);
15 | [nSmp,nDim] = size(X);
16 | nClass = length(unique(Y));
17 | 
18 | %===================setup=======================
19 | knnCandi = 5;
20 | graphTypeCandi = [2];
21 | betaCandidates = 10.^[-5:5];
22 | paramCell = fs_unsup_llcfs_build_param(nClass, knnCandi, betaCandidates, graphTypeCandi );
23 | %===============================================
24 | 
25 | disp('LLCFS ...');
26 | t_start = clock;
27 | feaSubsets = cell(length(paramCell), 1);
28 | parfor i1 = 1:length(paramCell)
29 |     fprintf(['LLCFS parameter search %d out of %d...\n'], i1, length(paramCell));
30 |     param = paramCell{i1};
31 |     [~, tao] = fs_unsup_llcfs(X,param);
32 | 	[~, idx] = sort(tao, 'descend');
33 |     feaSubsets{i1,1} = idx;
34 | end
35 | t_end = clock;
36 | t1 = etime(t_end,t_start);
37 | disp(['exe time: ',num2str(t1)]);
38 | 
39 | t_start = clock;
40 | disp('evaluation ...');
41 | res_aio = cell(length(paramCell), length(FeaNumCandi));
42 | for i2 = 1:length(FeaNumCandi)
43 |     m = FeaNumCandi(i2);
44 |     parfor i1 = 1:length(paramCell)
45 |         fprintf('LLCFS parameter evaluation %d outof %d  ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell));
46 |         idx = feaSubsets{i1,1};
47 |         res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:m), struct('nKm', nKmeans));
48 |     end
49 | end
50 | [res_gs, res_gs_ps] = grid_search_fs(res_aio);
51 | res_gs.feaset = FeaNumCandi;
52 | t_end = clock;
53 | t2 = etime(t_end,t_start);
54 | disp(['exe time: ',num2str(t2)]);
55 | res_gs.time = t1;
56 | res_gs.time2 = t2;
57 | 
58 | save(fullfile(prefix_mdcs, [dataset, '_best_result_LLCFS.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps');
59 | end


--------------------------------------------------------------------------------
/fs_unsup_ndfs_single_func.m:
--------------------------------------------------------------------------------
 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_ndfs_single_func(dataset, exp_settings, algo_settings)
 2 | %Unsupervised feature selection using NDFS
 3 | 
 4 | %======================setup===========================
 5 | FeaNumCandi = exp_settings.FeaNumCandi;
 6 | nKmeans = exp_settings.nKmeans;
 7 | prefix_mdcs = [];
 8 | if isfield(exp_settings, 'prefix_mdcs')
 9 |     prefix_mdcs = exp_settings.prefix_mdcs;
10 | end
11 | %======================================================
12 | 
13 | disp(['dataset:',dataset]);
14 | [X, Y] = extractXY(dataset);
15 | [nSmp,nDim] = size(X);
16 | nClass = length(unique(Y));
17 | 
18 | %===================setup=======================
19 | knnCandi = 5;
20 | weightCandi = {'HeatKernel'};
21 | alphaCandi = 10.^[-5:5];
22 | betaCandi = 10.^[-5:5];
23 | s1 = optSigma(X);
24 | weight_param_Candi = {2.^[0] .* s1.^2};
25 | paramCell = fs_unsup_ndfs_build_param(knnCandi, weightCandi, weight_param_Candi, alphaCandi, betaCandi);
26 | %===============================================
27 | 
28 | disp('NDFS ...');
29 | t_start = clock;
30 | feaSubsets = cell(length(paramCell), 1);
31 | for i1 = 1:length(paramCell)
32 |     fprintf(['NDFS parameter search %d out of %d...\n'], i1, length(paramCell));
33 |     param = paramCell{i1};
34 |     idx = fs_unsup_ndfs(X, nClass, param);
35 |     feaSubsets{i1,1} = idx;
36 | end
37 | t_end = clock;
38 | t1 = etime(t_end,t_start);
39 | disp(['exe time: ',num2str(t1)]);
40 | 
41 | disp('evaluation....');
42 | t_start = clock;
43 | res_aio = cell(length(paramCell), length(FeaNumCandi));
44 | for i2 = 1:length(FeaNumCandi)
45 |     for i1 = 1:length(paramCell)
46 |         fprintf('NDFS parameter evaluation %d outof %d  ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell));
47 |         idx = feaSubsets{i1,1};    
48 |         res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans));
49 |     end
50 | end
51 | [res_gs, res_gs_ps] = grid_search_fs(res_aio);
52 | res_gs.feaset = FeaNumCandi;
53 | t_end = clock;
54 | t2 = etime(t_end,t_start);
55 | disp(['exe time: ',num2str(t2)]);
56 | res_gs.time = t1;
57 | res_gs.time2 = t2;
58 | 
59 | save(fullfile(prefix_mdcs, [dataset, '_best_result_NDFS.mat']) ,'FeaNumCandi','res_gs','res_aio', 'res_gs_ps');
60 | end


--------------------------------------------------------------------------------
/fs_unsup_udfs_single_func.m:
--------------------------------------------------------------------------------
 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_udfs_single_func(dataset, exp_settings, algo_settings)
 2 | % run UDFS feature selection algorithm
 3 | 
 4 | %======================setup===========================
 5 | FeaNumCandi = exp_settings.FeaNumCandi;
 6 | nKmeans = exp_settings.nKmeans;
 7 | prefix_mdcs = [];
 8 | if isfield(exp_settings, 'prefix_mdcs')
 9 |     prefix_mdcs = exp_settings.prefix_mdcs;
10 | end
11 | %======================================================
12 | 
13 | disp(['dataset:',dataset]);
14 | [X, Y] = extractXY(dataset);
15 | [nSmp, nDim] = size(X);
16 | nClass = length(unique(Y));
17 | 
18 | %======================setup===========================
19 | gammaCandi = 10.^(-5:5);
20 | lamdaCandi = 10.^(-5:5);
21 | knnCandi = 5;
22 | paramCell = fs_unsup_udfs_build_param(knnCandi, gammaCandi, lamdaCandi);
23 | %======================================================
24 | 
25 | t_start = clock;
26 | disp('UDFS ...');
27 | feaSubsets = cell(length(paramCell), 1);
28 | parfor i1 = 1:length(paramCell)
29 |     fprintf('UDFS parameter search %d out of %d...\n', i1, length(paramCell));
30 |     param = paramCell{i1};
31 |     L = LocalDisAna(X', param);
32 |     A = X'*L*X;
33 |     W = fs_unsup_udfs(A, nClass, param.gamma);
34 |     [~, idx] = sort(sum(W.*W,2),'descend');
35 |     feaSubsets{i1,1} = idx;
36 | end
37 | t_end = clock;
38 | t1 = etime(t_end,t_start);
39 | disp(['exe time: ',num2str(t1)]);
40 | 
41 | t_start = clock;
42 | disp('evaluation ...');
43 | res_aio = cell(length(paramCell), length(FeaNumCandi));
44 | for i2 = 1:length(FeaNumCandi)
45 |     m = FeaNumCandi(i2);
46 |     parfor i1 = 1:length(paramCell)
47 |         fprintf('UDFS parameter evaluation %d outof %d  ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell));
48 |         idx = feaSubsets{i1,1};
49 |         res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:m), struct('nKm', nKmeans));
50 |     end
51 | end
52 | [res_gs, res_gs_ps] = grid_search_fs(res_aio);
53 | res_gs.feaset = FeaNumCandi;
54 | t_end = clock;
55 | t2 = etime(t_end,t_start);
56 | disp(['exe time: ',num2str(t2)]);
57 | res_gs.time = t1;
58 | res_gs.time2 = t2;
59 | 
60 | save(fullfile(prefix_mdcs, [dataset, '_best_result_UDFS.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps');
61 | end


--------------------------------------------------------------------------------
/fs_unsup_lapscore_single_func.m:
--------------------------------------------------------------------------------
 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_lapscore_single_func(dataset, exp_settings, algo_settings)
 2 | %Unsupervised feature selection using LapScore
 3 | 
 4 | %======================setup===========================
 5 | FeaNumCandi = exp_settings.FeaNumCandi;
 6 | nKmeans = exp_settings.nKmeans;
 7 | prefix_mdcs = [];
 8 | if isfield(exp_settings, 'prefix_mdcs')
 9 |     prefix_mdcs = exp_settings.prefix_mdcs;
10 | end
11 | %======================================================
12 | 
13 | disp(['dataset:',dataset]);
14 | [X, Y] = extractXY(dataset);
15 | [nSmp,nDim] = size(X);
16 | nClass = length(unique(Y));
17 | 
18 | %================setup======================
19 | knnCandi = 5;
20 | weightCandi = {'HeatKernel'};%{'Binary','HeatKernel'};
21 | s1 = optSigma(X);
22 | weight_param_Candi = {2.^[-3:3] .* s1.^2};% {[], 2.^[-3:3] .* s1.^2};
23 | paramCell = fs_unsup_lapscore_build_param(knnCandi, weightCandi, weight_param_Candi);
24 | %===========================================
25 | 
26 | 
27 | disp('LapScore ...');
28 | t_start = clock;
29 | feaSubsets = cell(length(paramCell), 1);
30 | parfor i1 = 1:length(paramCell)
31 |     fprintf(['LapScore parameter search %d out of %d...\n'], i1, length(paramCell));
32 |     param = paramCell{i1};
33 |     W = constructW(X, param);
34 |     LS = fs_unsup_lapscore(X, W);
35 |     [~, idx] = sort(-LS);
36 |     feaSubsets{i1,1} = idx;
37 | end
38 | t_end = clock;
39 | t1 = etime(t_end,t_start);
40 | disp(['exe time: ',num2str(t1)]);
41 | 
42 | disp('evaluation....');
43 | t_start = clock;
44 | res_aio = cell(length(paramCell), length(FeaNumCandi));
45 | for i2 = 1:length(FeaNumCandi)
46 |     parfor i1 = 1:length(paramCell)
47 |         fprintf('LapScore parameter evaluation %d outof %d  ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell));
48 |         idx = feaSubsets{i1,1};    
49 |         res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans));
50 |     end
51 | end
52 | [res_gs, res_gs_ps] = grid_search_fs(res_aio);
53 | res_gs.feaset = FeaNumCandi;
54 | t_end = clock;
55 | t2 = etime(t_end,t_start);
56 | disp(['exe time: ',num2str(t2)]);
57 | res_gs.time = t1;
58 | res_gs.time2 = t2;
59 | 
60 | save(fullfile(prefix_mdcs, [dataset, '_best_result_LapScore.mat']), 'FeaNumCandi','res_gs','res_aio', 'res_gs_ps');
61 | end


--------------------------------------------------------------------------------
/fs_unsup_spec_single_func.m:
--------------------------------------------------------------------------------
 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_spec_single_func(dataset, exp_settings, algo_settings)
 2 | %feature selection by SPEC
 3 | 
 4 | %======================setup===========================
 5 | FeaNumCandi = exp_settings.FeaNumCandi;
 6 | nKmeans = exp_settings.nKmeans;
 7 | prefix_mdcs = [];
 8 | if isfield(exp_settings, 'prefix_mdcs')
 9 |     prefix_mdcs = exp_settings.prefix_mdcs;
10 | end
11 | %======================================================
12 | 
13 | disp(['dataset:',dataset]);
14 | [X, Y] = extractXY(dataset);
15 | [nSmp,nDim] = size(X);
16 | nClass = length(unique(Y));
17 | 
18 | %===================setup=======================
19 | styleCandi = [1];
20 | expLamCandi = [0.25, 1, 4];
21 | funcCandi = [1, 2, 3];
22 | s1 = optSigma(X);
23 | kernelParamCell = buildParamKernel({'Gaussian'}, {sqrt(2.^[-4:2]) * s1}, {''});
24 | paramCell = fs_unsup_spec_build_param(kernelParamCell, styleCandi, expLamCandi, funcCandi);
25 | %===============================================
26 | 
27 | disp('SPEC ...');
28 | t_start = clock;
29 | feaSubsets = cell(length(paramCell), 1);
30 | parfor i1 = 1:length(paramCell)
31 |     fprintf(['SPEC parameter search %d out of %d...\n'], i1, length(paramCell));
32 |     K = constructKernel(X, X, paramCell{i1}.kernelOption);
33 |     wFeat = fs_unsup_spec( K, X, LabelFormat(Y), paramCell{i1} );
34 |     [~, idx] = sort(wFeat,'descend');
35 |     feaSubsets{i1,1} = idx;
36 | end
37 | t_end = clock;
38 | t1 = etime(t_end,t_start);
39 | disp(['exe time: ',num2str(t1)]);
40 | 
41 | t_start = clock;
42 | disp('evaluation ...');
43 | res_aio = cell(length(paramCell), length(FeaNumCandi));
44 | for i2 = 1:length(FeaNumCandi)
45 |     m = FeaNumCandi(i2);
46 |     parfor i1 = 1:length(paramCell)
47 |         fprintf('SPEC parameter evaluation %d outof %d  ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell));
48 |         idx = feaSubsets{i1,1};
49 |         res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:m), struct('nKm', nKmeans));
50 |     end
51 | end
52 | [res_gs, res_gs_ps] = grid_search_fs(res_aio);
53 | res_gs.feaset = FeaNumCandi;
54 | t_end = clock;
55 | t2 = etime(t_end,t_start);
56 | disp(['exe time: ',num2str(t2)]);
57 | res_gs.time = t1;
58 | res_gs.time2 = t2;
59 | 
60 | save(fullfile(prefix_mdcs, [dataset, '_best_result_SPEC.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps');
61 | end


--------------------------------------------------------------------------------
/fs_unsup_fsasl_11_11_1_single_func.m:
--------------------------------------------------------------------------------
 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_fsasl_11_11_1_single_func(dataset, exp_settings, algo_settings)
 2 | %feature selection by FSASL
 3 | 
 4 | %======================setup===========================
 5 | FeaNumCandi = exp_settings.FeaNumCandi;
 6 | nKmeans = exp_settings.nKmeans;
 7 | prefix_mdcs = [];
 8 | if isfield(exp_settings, 'prefix_mdcs')
 9 |     prefix_mdcs = exp_settings.prefix_mdcs;
10 | end
11 | %======================================================
12 | 
13 | disp(['dataset:',dataset]);
14 | [X, Y] = extractXY(dataset);
15 | [nSmp,nDim] = size(X);
16 | nClass = length(unique(Y));
17 | 
18 | %===================setup=======================
19 | alphaCandi = 10.^[-5:5];
20 | betaCandi = 10.^[-5:5];
21 | gammaCandi = [0.01];
22 | maxIter = 50;
23 | nnCandi = 5;
24 | paramCell = fs_unsup_fsasl_build_param({'SLEP'}, {gammaCandi}, nnCandi, ...
25 |     alphaCandi, betaCandi, {'LS21'}, maxIter);
26 | %===============================================
27 | 
28 | disp('FSSL ...');
29 | t_start = clock;
30 | feaSubsets = cell(length(paramCell), 1);
31 | parfor i1 = 1:length(paramCell)
32 |     fprintf(['FSSL parameter search %d out of %d...\n'], i1, length(paramCell));
33 |     W = FSASL(X', nClass, paramCell{i1});
34 |     [~, idx] = sort(sum(W.^2,2),'descend');
35 |     % save([dataset,'\','feaIdx_param_', num2str(i1), '.mat'],'idx');
36 |     feaSubsets{i1,1} = idx;
37 | end
38 | t_end = clock;
39 | t1 = etime(t_end,t_start);
40 | disp(['exe time: ',num2str(t1)]);
41 | 
42 | t_start = clock;
43 | disp('evaluation ...');
44 | res_aio = cell(length(paramCell), length(FeaNumCandi));
45 | for i2 = 1:length(FeaNumCandi)
46 |     parfor i1 = 1:length(paramCell)
47 |         fprintf('FSASL parameter evaluation %d outof %d  ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell));
48 |         idx = feaSubsets{i1,1};
49 |         res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans));
50 |     end
51 | end
52 | [res_gs, res_gs_ps] = grid_search_fs(res_aio);
53 | res_gs.feaset = FeaNumCandi;
54 | t_end = clock;
55 | t2 = etime(t_end,t_start);
56 | disp(['exe time: ',num2str(t2)]);
57 | res_gs.time = t1;
58 | res_gs.time2 = t2;
59 | 
60 | 
61 | save(fullfile(prefix_mdcs, [dataset, '_best_result_FSSL_11_11_1.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps', 'paramCell', 'feaSubsets');
62 | end


--------------------------------------------------------------------------------
/fs_unsup_jelsr_liang_lle_single_func.m:
--------------------------------------------------------------------------------
 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_jelsr_liang_lle_single_func(dataset, exp_settings, algo_settings)
 2 | %Unsupervised feature selection using JELSR_liang
 3 | 
 4 | %======================setup===========================
 5 | FeaNumCandi = exp_settings.FeaNumCandi;
 6 | nKmeans = exp_settings.nKmeans;
 7 | prefix_mdcs = [];
 8 | if isfield(exp_settings, 'prefix_mdcs')
 9 |     prefix_mdcs = exp_settings.prefix_mdcs;
10 | end
11 | %======================================================
12 | 
13 | disp(['dataset:',dataset]);
14 | [X, Y] = extractXY(dataset);
15 | [nSmp,nDim] = size(X);
16 | nClass = length(unique(Y));
17 | 
18 | %===================setup=======================
19 | r1Candi = 10.^[-5:5];
20 | r2Candi = 10.^[-5:5];
21 | knnCandi = 5;
22 | weightCandi = {'lle'};
23 | s1 = optSigma(X);
24 | weight_param_Candi = {s1};
25 | paramCell = fs_unsup_jelsr_build_param(knnCandi, weightCandi, weight_param_Candi, r1Candi, r2Candi);
26 | %===============================================
27 | disp('JELSR ...');
28 | t_start = clock;
29 | feaSubsets = cell(length(paramCell), 1);
30 | parfor i1 = 1:length(paramCell)
31 |     fprintf('JELSR_liang parameter search %d out of %d...\n', i1, length(paramCell));
32 |     param = paramCell{i1};
33 |     param.nClusters = nClass;
34 |     [model_jelsr] = fs_unsup_jelsr_liang(X', param);
35 |     [~,idx] = sort(model_jelsr.z, 'descend');
36 |     feaSubsets{i1,1} = idx;
37 | end
38 | t_end = clock;
39 | t1 = etime(t_end,t_start);
40 | disp(['exe time: ',num2str(t1)]);
41 | 
42 | t_start = clock;
43 | disp('evaluation....');
44 | res_aio = cell(length(paramCell), length(FeaNumCandi));
45 | for i2 = 1:length(FeaNumCandi)
46 |     parfor i1 = 1:length(paramCell)
47 |         fprintf('JELSR parameter evaluation %d outof %d  ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell));
48 |         idx = feaSubsets{i1,1};
49 |         res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans));
50 |     end
51 | end
52 | [res_gs, res_gs_ps] = grid_search_fs(res_aio);
53 | res_gs.feaset = FeaNumCandi;
54 | t_end = clock;
55 | t2 = etime(t_end,t_start);
56 | disp(['exe time: ',num2str(t2)]);
57 | res_gs.time = t1;
58 | res_gs.time2 = t2;
59 | 
60 | save([prefix_mdcs, filesep, dataset, '_best_result_JELSR_liang_lle.mat'],'FeaNumCandi','res_gs','res_aio', 'res_gs_ps');
61 | end


--------------------------------------------------------------------------------
/fs_unsup_jelsr_liang_lpp_single_func.m:
--------------------------------------------------------------------------------
 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_jelsr_liang_lpp_single_func(dataset, exp_settings, algo_settings)
 2 | %Unsupervised feature selection using JELSR_liang
 3 | 
 4 | %======================setup===========================
 5 | FeaNumCandi = exp_settings.FeaNumCandi;
 6 | nKmeans = exp_settings.nKmeans;
 7 | prefix_mdcs = [];
 8 | if isfield(exp_settings, 'prefix_mdcs')
 9 |     prefix_mdcs = exp_settings.prefix_mdcs;
10 | end
11 | %======================================================
12 | 
13 | disp(['dataset:',dataset]);
14 | [X, Y] = extractXY(dataset);
15 | [nSmp,nDim] = size(X);
16 | nClass = length(unique(Y));
17 | 
18 | %===================setup=======================
19 | r1Candi = 10.^[-5:5];
20 | r2Candi = 10.^[-5:5];
21 | knnCandi = 5;
22 | weightCandi = {'lpp'};
23 | s1 = optSigma(X);
24 | weight_param_Candi = {s1};
25 | paramCell = fs_unsup_jelsr_build_param(knnCandi, weightCandi, weight_param_Candi, r1Candi, r2Candi);
26 | %===============================================
27 | disp('JELSR ...');
28 | t_start = clock;
29 | feaSubsets = cell(length(paramCell), 1);
30 | parfor i1 = 1:length(paramCell)
31 |     fprintf('JELSR_liang parameter search %d out of %d...\n', i1, length(paramCell));
32 |     param = paramCell{i1};
33 |     param.nClusters = nClass;
34 |     [model_jelsr] = fs_unsup_jelsr_liang(X', param);
35 |     [~,idx] = sort(model_jelsr.z, 'descend');
36 |     feaSubsets{i1,1} = idx;
37 | end
38 | t_end = clock;
39 | t1 = etime(t_end,t_start);
40 | disp(['exe time: ',num2str(t1)]);
41 | 
42 | t_start = clock;
43 | disp('evaluation....');
44 | res_aio = cell(length(paramCell), length(FeaNumCandi));
45 | for i2 = 1:length(FeaNumCandi)
46 |     parfor i1 = 1:length(paramCell)
47 |         fprintf('JELSR parameter evaluation %d outof %d  ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell));
48 |         idx = feaSubsets{i1,1};
49 |         res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans));
50 |     end
51 | end
52 | [res_gs, res_gs_ps] = grid_search_fs(res_aio);
53 | res_gs.feaset = FeaNumCandi;
54 | t_end = clock;
55 | t2 = etime(t_end,t_start);
56 | disp(['exe time: ',num2str(t2)]);
57 | res_gs.time = t1;
58 | res_gs.time2 = t2;
59 | 
60 | save([prefix_mdcs, filesep, dataset, '_best_result_JELSR_liang_lpp.mat'],'FeaNumCandi','res_gs','res_aio', 'res_gs_ps');
61 | end


--------------------------------------------------------------------------------
/fs_unsup_fsasl_11_11_5_single_func.m:
--------------------------------------------------------------------------------
 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_fsasl_11_11_5_single_func(dataset, exp_settings, algo_settings)
 2 | %feature selection by FSASL
 3 | 
 4 | %======================setup===========================
 5 | FeaNumCandi = exp_settings.FeaNumCandi;
 6 | nKmeans = exp_settings.nKmeans;
 7 | prefix_mdcs = [];
 8 | if isfield(exp_settings, 'prefix_mdcs')
 9 |     prefix_mdcs = exp_settings.prefix_mdcs;
10 | end
11 | %======================================================
12 | 
13 | disp(['dataset:',dataset]);
14 | [X, Y] = extractXY(dataset);
15 | [nSmp,nDim] = size(X);
16 | nClass = length(unique(Y));
17 | 
18 | %===================setup=======================
19 | alphaCandi = 10.^[-5:5];
20 | betaCandi = 10.^[-5:5];
21 | gammaCandi = [0.001, 0.005, 0.01, 0.05, 0.1];
22 | maxIter = 20;
23 | nnCandi = 5;
24 | paramCell = fs_unsup_fsasl_build_param({'SLEP'}, {gammaCandi}, nnCandi, ...
25 |     alphaCandi, betaCandi, {'LS21'}, maxIter);
26 | %===============================================
27 | 
28 | disp('FSSL ...');
29 | t_start = clock;
30 | feaSubsets = cell(length(paramCell), 1);
31 | parfor i1 = 1:length(paramCell)
32 |     fprintf(['FSSL parameter search %d out of %d...\n'], i1, length(paramCell));
33 |     W = FSASL(X', nClass, paramCell{i1});
34 |     [~, idx] = sort(sum(W.^2,2),'descend');
35 |     % save([dataset,'\','feaIdx_param_', num2str(i1), '.mat'],'idx');
36 |     feaSubsets{i1,1} = idx;
37 | end
38 | t_end = clock;
39 | t1 = etime(t_end,t_start);
40 | disp(['exe time: ',num2str(t1)]);
41 | 
42 | t_start = clock;
43 | disp('evaluation ...');
44 | res_aio = cell(length(paramCell), length(FeaNumCandi));
45 | for i2 = 1:length(FeaNumCandi)
46 |     parfor i1 = 1:length(paramCell)
47 |         fprintf('FSASL parameter evaluation %d outof %d  ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell));
48 |         idx = feaSubsets{i1,1};
49 |         res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans));
50 |     end
51 | end
52 | [res_gs, res_gs_ps] = grid_search_fs(res_aio);
53 | res_gs.feaset = FeaNumCandi;
54 | t_end = clock;
55 | t2 = etime(t_end,t_start);
56 | disp(['exe time: ',num2str(t2)]);
57 | res_gs.time = t1;
58 | res_gs.time2 = t2;
59 | 
60 | 
61 | save(fullfile(prefix_mdcs, [dataset, '_best_result_FSSL_11_11_5.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps', 'paramCell', 'feaSubsets');
62 | end


--------------------------------------------------------------------------------
/fs_unsup_jelsr_lle_single_func.m:
--------------------------------------------------------------------------------
 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_jelsr_lle_single_func(dataset, exp_settings, algo_settings)
 2 | %Unsupervised feature selection using JELSR
 3 | 
 4 | %======================setup===========================
 5 | FeaNumCandi = exp_settings.FeaNumCandi;
 6 | nKmeans = exp_settings.nKmeans;
 7 | prefix_mdcs = [];
 8 | if isfield(exp_settings, 'prefix_mdcs')
 9 |     prefix_mdcs = exp_settings.prefix_mdcs;
10 | end
11 | %================================
12 | 
13 | disp(['dataset:',dataset]);
14 | [X, Y] = extractXY(dataset);
15 | [nSmp,nDim] = size(X);
16 | nClass = length(unique(Y));
17 | 
18 | %===================setup=======================
19 | r1Candi = 10.^[-5:5];
20 | r2Candi = 10.^[-5:5];
21 | knnCandi = 5;
22 | weightCandi = {'lle'};
23 | s1 = optSigma(X);
24 | weight_param_Candi = {s1};
25 | paramCell = fs_unsup_jelsr_build_param(knnCandi, weightCandi, weight_param_Candi, r1Candi, r2Candi);
26 | %===============================================
27 | 
28 | t_start = clock;
29 | disp('JELSR ...');
30 | feaSubsets = cell(length(paramCell), 1);
31 | parfor i1 = 1:length(paramCell)
32 |     fprintf('JELSR parameter search %d out of %d...\n', i1, length(paramCell));
33 |     param = paramCell{i1};
34 |     [~, W] = computeLocalStructure(X, param.weightMode, param.k, param.t);
35 |     W_compute = fs_unsup_jelsr(X, W, nClass, param.alpha, param.beta);
36 |     [~,idx] = sort(sum(W_compute.^2,2), 'descend');
37 |     feaSubsets{i1,1} = idx;
38 | end
39 | t_end = clock;
40 | t1 = etime(t_end,t_start);
41 | disp(['exe time: ',num2str(t1)]);
42 | 
43 | t_start = clock;
44 | disp('evaluation....');
45 | res_aio = cell(length(paramCell), length(FeaNumCandi));
46 | for i2 = 1:length(FeaNumCandi)
47 |     parfor i1 = 1:length(paramCell)
48 |         fprintf('JELSR parameter evaluation %d outof %d  ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell));
49 |         idx = feaSubsets{i1,1};
50 |         res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans));
51 |     end
52 | end
53 | [res_gs, res_gs_ps] = grid_search_fs(res_aio);
54 | res_gs.feaset = FeaNumCandi;
55 | t_end = clock;
56 | t2 = etime(t_end,t_start);
57 | disp(['exe time: ',num2str(t2)]);
58 | res_gs.time = t1;
59 | res_gs.time2 = t2;
60 | 
61 | save(fullfile(prefix_mdcs, [dataset, '_best_result_JELSR_lle.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps');
62 | end


--------------------------------------------------------------------------------
/fs_unsup_jelsr_lpp_single_func.m:
--------------------------------------------------------------------------------
 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_jelsr_lpp_single_func(dataset, exp_settings, algo_settings)
 2 | %Unsupervised feature selection using JELSR
 3 | 
 4 | %======================setup===========================
 5 | FeaNumCandi = exp_settings.FeaNumCandi;
 6 | nKmeans = exp_settings.nKmeans;
 7 | prefix_mdcs = [];
 8 | if isfield(exp_settings, 'prefix_mdcs')
 9 |     prefix_mdcs = exp_settings.prefix_mdcs;
10 | end
11 | %================================
12 | 
13 | disp(['dataset:',dataset]);
14 | [X, Y] = extractXY(dataset);
15 | [nSmp,nDim] = size(X);
16 | nClass = length(unique(Y));
17 | 
18 | %===================setup=======================
19 | r1Candi = 10.^[-5:5];
20 | r2Candi = 10.^[-5:5];
21 | knnCandi = 5;
22 | weightCandi = {'lpp'};
23 | s1 = optSigma(X);
24 | weight_param_Candi = {s1};
25 | paramCell = fs_unsup_jelsr_build_param(knnCandi, weightCandi, weight_param_Candi, r1Candi, r2Candi);
26 | %===============================================
27 | 
28 | t_start = clock;
29 | disp('JELSR ...');
30 | feaSubsets = cell(length(paramCell), 1);
31 | parfor i1 = 1:length(paramCell)
32 |     fprintf('JELSR parameter search %d out of %d...\n', i1, length(paramCell));
33 |     param = paramCell{i1};
34 |     [~, W] = computeLocalStructure(X, param.weightMode, param.k, param.t);
35 |     W_compute = fs_unsup_jelsr(X, W, nClass, param.alpha, param.beta);
36 |     [~,idx] = sort(sum(W_compute.^2,2), 'descend');
37 |     feaSubsets{i1,1} = idx;
38 | end
39 | t_end = clock;
40 | t1 = etime(t_end,t_start);
41 | disp(['exe time: ',num2str(t1)]);
42 | 
43 | t_start = clock;
44 | disp('evaluation....');
45 | res_aio = cell(length(paramCell), length(FeaNumCandi));
46 | for i2 = 1:length(FeaNumCandi)
47 |     parfor i1 = 1:length(paramCell)
48 |         fprintf('JELSR parameter evaluation %d outof %d  ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell));
49 |         idx = feaSubsets{i1,1};
50 |         res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans));
51 |     end
52 | end
53 | [res_gs, res_gs_ps] = grid_search_fs(res_aio);
54 | res_gs.feaset = FeaNumCandi;
55 | t_end = clock;
56 | t2 = etime(t_end,t_start);
57 | disp(['exe time: ',num2str(t2)]);
58 | res_gs.time = t1;
59 | res_gs.time2 = t2;
60 | 
61 | save(fullfile(prefix_mdcs, [dataset, '_best_result_JELSR_lpp.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps');
62 | end


--------------------------------------------------------------------------------
/fs_unsup_spfs_single_func.m:
--------------------------------------------------------------------------------
 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_spfs_single_func(dataset, exp_settings, algo_settings)
 2 | %feature selection by SPFS
 3 | 
 4 | %======================setup===========================
 5 | FeaNumCandi = exp_settings.FeaNumCandi;
 6 | nKmeans = exp_settings.nKmeans;
 7 | prefix_mdcs = [];
 8 | if isfield(exp_settings, 'prefix_mdcs')
 9 |     prefix_mdcs = exp_settings.prefix_mdcs;
10 | end
11 | %======================================================
12 | 
13 | disp(['dataset:',dataset]);
14 | [X, Y] = extractXY(dataset);
15 | [nSmp,nDim] = size(X);
16 | nClass = length(unique(Y));
17 | 
18 | %================setup======================
19 | s1 = optSigma(X);
20 | tCandi = 2.^[-3:3] * s1.^2;
21 | spfs_typeCandi = {'SFS', 'NES'};
22 | nP = length(tCandi) * length(spfs_typeCandi);
23 | paramCell = cell(nP, 1) ;
24 | idx = 0;
25 | for i1 = 1:length(tCandi)
26 |     for i2 = 1:length(spfs_typeCandi)
27 |         param = [];
28 |         param.t = tCandi(i1);
29 |         param.spfs_type = spfs_typeCandi{i2};
30 |         idx = idx + 1;
31 |         paramCell{idx} = param;
32 |     end
33 | end
34 | %===========================================
35 | 
36 | disp('get SPFS...');
37 | t_start = clock;
38 | Dist = EuDist2(X, X, 0);
39 | 
40 | feaSubsets = cell(length(paramCell), 1);
41 | for i1 = 1:length(paramCell)
42 |     param = paramCell{i1};
43 |     param.nClass = nClass;
44 | 	K = exp( - Dist / param.t);
45 |     index = fs_unsup_spfs(X, K, [], max(FeaNumCandi), param);
46 | 	feaSubsets{i1,1} = index;
47 | end
48 | t_end = clock;
49 | t1 = etime(t_end,t_start);
50 | disp(['exe time: ',num2str(t1)]);
51 | 
52 | t_start = clock;
53 | disp('evaluation ...');
54 | res_aio = cell(length(paramCell), length(FeaNumCandi));
55 | for i2 = 1:length(FeaNumCandi)
56 |     for i1 = 1:length(paramCell)
57 |         fprintf('SPFS parameter evaluation %d outof %d  ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell));
58 |         idx = feaSubsets{i1,1};
59 |         res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans));
60 |     end
61 | end
62 | [res_gs, res_gs_ps] = grid_search_fs(res_aio);
63 | res_gs.feaset = FeaNumCandi;
64 | t_end = clock;
65 | t2 = etime(t_end,t_start);
66 | disp(['exe time: ',num2str(t2)]);
67 | res_gs.time = t1;
68 | res_gs.time2 = t2;
69 | 
70 | save(fullfile(prefix_mdcs, [dataset, '_best_result_SPFS.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps');
71 | end


--------------------------------------------------------------------------------
/fs_unsup_fsasl_11_5_5_single_func.m:
--------------------------------------------------------------------------------
 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_fsasl_11_5_5_single_func(dataset, exp_settings, algo_settings)
 2 | %feature selection by FSASL
 3 | 
 4 | %======================setup===========================
 5 | FeaNumCandi = exp_settings.FeaNumCandi;
 6 | nKmeans = exp_settings.nKmeans;
 7 | prefix_mdcs = [];
 8 | if isfield(exp_settings, 'prefix_mdcs')
 9 |     prefix_mdcs = exp_settings.prefix_mdcs;
10 | end
11 | %======================================================
12 | 
13 | disp(['dataset:',dataset]);
14 | [X, Y] = extractXY(dataset);
15 | [nSmp,nDim] = size(X);
16 | nClass = length(unique(Y));
17 | 
18 | %===================setup=======================
19 | alphaCandi = 10.^[-5:5];
20 | betaCandi = [0.1:0.2:0.9];
21 | gammaCandi = [0.001, 0.005, 0.01, 0.05, 0.1];
22 | maxIter = 50;
23 | nnCandi = 5;
24 | paramCell = fs_unsup_fsasl_build_param({'SLEP'}, {gammaCandi}, nnCandi, ...
25 |     alphaCandi, betaCandi, {'LS21'}, maxIter);
26 | %===============================================
27 | 
28 | disp('FSSL ...');
29 | t_start = clock;
30 | feaSubsets = cell(length(paramCell), 1);
31 | parfor i1 = 1:length(paramCell)
32 |     fprintf(['FSSL parameter search %d out of %d...\n'], i1, length(paramCell));
33 |     param = paramCell{i1};
34 |     param.lambda1 = 1 - param.lambda2;
35 |     W = FSASL(X', nClass, param);
36 |     [~, idx] = sort(sum(W.^2,2),'descend');
37 |     % save([dataset,'\','feaIdx_param_', num2str(i1), '.mat'],'idx');
38 |     feaSubsets{i1,1} = idx;
39 | end
40 | t_end = clock;
41 | t1 = etime(t_end,t_start);
42 | disp(['exe time: ',num2str(t1)]);
43 | 
44 | t_start = clock;
45 | disp('evaluation ...');
46 | res_aio = cell(length(paramCell), length(FeaNumCandi));
47 | for i2 = 1:length(FeaNumCandi)
48 |     parfor i1 = 1:length(paramCell)
49 |         fprintf('FSASL parameter evaluation %d outof %d  ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell));
50 |         idx = feaSubsets{i1,1};
51 |         res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans));
52 |     end
53 | end
54 | [res_gs, res_gs_ps] = grid_search_fs(res_aio);
55 | res_gs.feaset = FeaNumCandi;
56 | t_end = clock;
57 | t2 = etime(t_end,t_start);
58 | disp(['exe time: ',num2str(t2)]);
59 | res_gs.time = t1;
60 | res_gs.time2 = t2;
61 | 
62 | 
63 | save(fullfile(prefix_mdcs, [dataset, '_best_result_FSSL_11_5_5.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps', 'paramCell', 'feaSubsets');
64 | end


--------------------------------------------------------------------------------
/fs_unsup_ndfs.m:
--------------------------------------------------------------------------------
 1 | function idx = fs_unsup_ndfs(X, nClass, param)
 2 | 
 3 | [L, F_init] = NDFS_init(X, nClass, param);
 4 | 
 5 | W_init = ones(size(X,2),nClass);   %W: the feature selection matrix
 6 | warning off;
 7 | [F,W,obj]=NDFS_iter(X', L, F_init, W_init, param.maxiter, param.alpha, param.beta, param.gamma);
 8 | warning on;
 9 | [~, idx] = sort(sum(W.*W,2),'descend');
10 | end
11 | 
12 | function [L, F_init] = NDFS_init(X, nClass, param)
13 | %construct the affinity matrix
14 | S = constructW(X, struct('k', param.k, 'WeightMode', param.weightMode, 't', param.t));
15 | diag_ele_arr = sum(S);
16 | diag_ele_arr_t = diag_ele_arr.^(-1/2);
17 | L = eye(size(X,1)) - diag(diag_ele_arr_t)* S *diag(diag_ele_arr_t);
18 | L = (L + L')/2;
19 | [eigvec, eigval] = eig(L);
20 | [~, t1] = sort(diag(eigval), 'ascend');
21 | eigvec = eigvec(:, t1(1:nClass));
22 | eigvec = bsxfun(@rdivide, eigvec, sqrt(sum(eigvec.^2,2) + eps));
23 | 
24 | %init F and W
25 | rand('twister',5489); %#ok
26 | label = litekmeans(eigvec,nClass,'Replicates',10); % significantly!
27 | F_init = rand(size(X,1),nClass);
28 | for i = 1:size(X,1)
29 |     F_init(i,label(i)) = 1;
30 | end
31 | F_init = F_init + 0.2;
32 | end
33 | 
34 | function [F,W,obj]=NDFS_iter(X,L,F,W,maxIter,alpha,beta,gamma)
35 | %	X: Rows of vectors of data points
36 | %	L: The laplacian matrix.
37 | %   F: the cluster result
38 | %   W: the feature selection matrix
39 | 
40 | if nargin == 0
41 |     return; 
42 | end
43 | 
44 | [nFeat,nSamp] = size(X);
45 | 
46 | if size(L,1) ~= nSamp
47 |     error('L is error');
48 | end
49 | XX=X*X';
50 | 
51 | Wi = sqrt(sum(W.*W,2)+eps);
52 | d = 0.5./Wi;
53 | D = diag(d);
54 | 
55 | % G=inv(XX+beta*D);
56 | % W=G*X*F;
57 | % Wi = sqrt(sum(W.*W,2)+eps);
58 | % d = 0.5./Wi;
59 | % D = diag(d);
60 | % clear Wi
61 | % M=L+alpha*(eye(nSamp)-X'*G*X);
62 | % clear G
63 | % M=(M+M')/2;
64 | % F = F.*(gamma*F + eps)./(M*F + gamma*F*F'*F + eps);
65 | % F = F*diag(sqrt(1./(diag(F'*F)+eps)));
66 | 
67 | iter=1;
68 | while iter<=maxIter %|| (iter>2&& obj(end-1)-obj(end)>10^(-3)*obj(end))
69 |     G=inv(XX+beta*D);
70 |     W=G*X*F;
71 |     Wi = sqrt(sum(W.*W,2)+eps);
72 |     d = 0.5./Wi;
73 |     D = diag(d);
74 |     clear Wi
75 |     M=L+alpha*(eye(nSamp)-X'*G*X);
76 |     clear G
77 |     M=(M+M')/2;
78 | 
79 |     F = F.*(gamma*F + eps)./(M*F + gamma*F*F'*F + eps);
80 |     F = F*diag(sqrt(1./(diag(F'*F)+eps)));
81 |     clear Wnew   
82 |     
83 |     obj(iter)=trace(F'*M*F)+gamma/4*norm(F'*F-eye(size(F,2)),'fro')^2;
84 |     iter=iter+1;
85 |     
86 | end
87 | end


--------------------------------------------------------------------------------
/fs_unsup_rufs_single_func.m:
--------------------------------------------------------------------------------
 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_rufs_single_func(dataset, exp_settings, algo_settings)
 2 | %feature selection by RUFS
 3 | 
 4 | %======================setup===========================
 5 | FeaNumCandi = exp_settings.FeaNumCandi;
 6 | nKmeans = exp_settings.nKmeans;
 7 | prefix_mdcs = [];
 8 | if isfield(exp_settings, 'prefix_mdcs')
 9 |     prefix_mdcs = exp_settings.prefix_mdcs;
10 | end
11 | %======================================================
12 | 
13 | disp(['dataset:',dataset]);
14 | [X, Y] = extractXY(dataset);
15 | [nSmp,nDim] = size(X);
16 | nClass = length(unique(Y));
17 | 
18 | %===================setup=======================
19 | knnCandi = 5;
20 | rLamdaCandi = [0.1];
21 | nuCandi = 10.^[-5:5];
22 | alphaCandi = 10.^[-5:5];
23 | betaCandi = 10.^[-5:5];
24 | llkrrParamCell = buildParam_LLKRR(knnCandi, rLamdaCandi);
25 | paramCell = fs_unsup_rufs_build_param(llkrrParamCell, alphaCandi, betaCandi, nuCandi);
26 | %===============================================
27 | 
28 | t_start = clock;
29 | disp('RUFS ...');
30 | feaSubsets = cell(length(paramCell), 1);
31 | rand('twister',5489); %#ok
32 | label = litekmeans(X,nClass,'Replicates',10);
33 | G0 = zeros(size(X,1),nClass);
34 | for i = 1:size(X,1)
35 |     G0(i,label(i)) = 1;
36 | end
37 | 
38 | %feature selection by RUFS
39 | parfor i1 = 1:length(paramCell)
40 |     fprintf('RUFS parameter search %d out of %d...\n', i1, length(paramCell));
41 |     param = paramCell{i1};
42 |     L_init = localLearnMx_KRR(X, param.llkrrParam);
43 |     W = fs_unsup_rufs(X,L_init,G0, param);
44 |     [~, idx] = sort(sum(W.^2,2), 'descend');
45 |     feaSubsets{i1,1} = idx;
46 | end
47 | t_end = clock;
48 | t1 = etime(t_end,t_start);
49 | disp(['exe time: ',num2str(t1)]);
50 | 
51 | t_start = clock;
52 | disp('evaluation ...');
53 | res_aio = cell(length(paramCell), length(FeaNumCandi));
54 | for i2 = 1:length(FeaNumCandi)
55 |     m = FeaNumCandi(i2);
56 |     parfor i1 = 1:length(paramCell)
57 |         fprintf('RUFS parameter evaluation %d outof %d  ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell));
58 |         idx = feaSubsets{i1,1};
59 |         res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:m), struct('nKm', nKmeans));
60 |     end
61 | end
62 | [res_gs, res_gs_ps] = grid_search_fs(res_aio);
63 | res_gs.feaset = FeaNumCandi;
64 | t_end = clock;
65 | t2 = etime(t_end,t_start);
66 | disp(['exe time: ',num2str(t2)]);
67 | res_gs.time = t1;
68 | res_gs.time2 = t2;
69 | 
70 | save(fullfile(prefix_mdcs, [dataset, '_best_result_RUFS.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps');
71 | end


--------------------------------------------------------------------------------
/L2_distance.m:
--------------------------------------------------------------------------------
 1 | function d = L2_distance(a, b)
 2 | % L2_DISTANCE - computes Euclidean distance matrix
 3 | %
 4 | % E = L2_distance(A,B)
 5 | %
 6 | %    A - (DxM) matrix 
 7 | %    B - (DxN) matrix
 8 | % 
 9 | % Returns:
10 | %    E - (MxN) Euclidean distances between vectors in A and B
11 | %
12 | %
13 | % Description : 
14 | %    This fully vectorized (VERY FAST!) m-file computes the 
15 | %    Euclidean distance between two vectors by:
16 | %
17 | %                 ||A-B|| = sqrt ( ||A||^2 + ||B||^2 - 2*A.B )
18 | %
19 | % Example : 
20 | %    A = rand(400,100); B = rand(400,200);
21 | %    d = distance(A,B);
22 | 
23 | % Author   : Roland Bunschoten
24 | %            University of Amsterdam
25 | %            Intelligent Autonomous Systems (IAS) group
26 | %            Kruislaan 403  1098 SJ Amsterdam
27 | %            tel.(+31)20-5257524
28 | %            bunschot@wins.uva.nl
29 | % Last Rev : Wed Oct 20 08:58:08 MET DST 1999
30 | % Tested   : PC Matlab v5.2 and Solaris Matlab v5.3
31 | 
32 | % Copyright notice: You are free to modify, extend and distribute 
33 | %    this code granted that the author of the original code is 
34 | %    mentioned as the original author of the code.
35 | 
36 | % Fixed by JBT (3/18/00) to work for 1-dimensional vectors
37 | % and to warn for imaginary numbers.  Also ensures that 
38 | % output is all real, and allows the option of forcing diagonals to
39 | % be zero.  
40 | %
41 | %
42 | 
43 | % This file is part of the Matlab Toolbox for Dimensionality Reduction v0.7.1b.
44 | % The toolbox can be obtained from http://homepage.tudelft.nl/19j49
45 | % You are free to use, change, or redistribute this code in any way you
46 | % want for non-commercial purposes. However, it is appreciated if you 
47 | % maintain the name of the original author.
48 | %
49 | % (C) Laurens van der Maaten, 2010
50 | % University California, San Diego / Delft University of Technology
51 | 
52 | 
53 |     if nargin < 2
54 |        error('Not enough input arguments');
55 |     end
56 |     if size(a, 1) ~= size(b, 1)
57 |         error('A and B should be of same dimensionality');
58 |     end
59 |     if ~isreal(a) || ~isreal(b)
60 |         warning('Computing distance table using imaginary inputs. Results may be off.'); 
61 |     end
62 | 
63 |     % Padd zeros if necessray
64 |     if size(a, 1) == 1
65 |         a = [a; zeros(1, size(a, 2))]; 
66 |         b = [b; zeros(1, size(b, 2))]; 
67 |     end
68 | 
69 |     % Compute distance table
70 |     d = sqrt(bsxfun(@plus, sum(a .* a)', bsxfun(@minus, sum(b .* b), 2 * a' * b)));
71 | 
72 |     % Make sure result is real
73 |     d = real(d);
74 | 
75 | 


--------------------------------------------------------------------------------
/SimGraph_NearestNeighbors.m:
--------------------------------------------------------------------------------
 1 | function W = SimGraph_NearestNeighbors(M, k, Type, sigma)
 2 | % SIMGRAPH_NEARESTNEIGHBORS Returns kNN similarity graph
 3 | %   Returns adjacency matrix for an k-Nearest Neighbors 
 4 | %   similarity graph
 5 | %
 6 | %   'M' - A d-by-n matrix containing n d-dimensional data points
 7 | %   'k' - Number of neighbors
 8 | %   'Type' - Type if kNN Graph
 9 | %      1 - Normal
10 | %      2 - Mutual
11 | %   'sigma' - Parameter for Gaussian similarity function. Set
12 | %      this to 0 for an unweighted graph. Default is 1.
13 | %
14 | %   Author: Ingo Buerk
15 | %   Year  : 2011/2012
16 | %   Bachelor Thesis
17 | 
18 | if nargin < 3
19 |    ME = MException('InvalidCall:NotEnoughArguments', ...
20 |        'Function called with too few arguments');
21 |    throw(ME);
22 | end
23 | 
24 | if ~any(Type == (1:2))
25 |    ME = MException('InvalidCall:UnknownType', ...
26 |        'Unknown similarity graph type');
27 |    throw(ME);
28 | end
29 | 
30 | n = size(M, 2);
31 | 
32 | % Preallocate memory
33 | indi = zeros(1, k * n);
34 | indj = zeros(1, k * n);
35 | inds = zeros(1, k * n);
36 | 
37 | for ii = 1:n
38 |     % Compute i-th column of distance matrix
39 |     dist = distEuclidean(repmat(M(:, ii), 1, n), M);
40 |     
41 |     % Sort row by distance
42 |     [s, O] = sort(dist, 'ascend');
43 |     
44 |     % Save indices and value of the k 
45 |     indi(1, (ii-1)*k+1:ii*k) = ii;
46 |     indj(1, (ii-1)*k+1:ii*k) = O(1:k);
47 |     inds(1, (ii-1)*k+1:ii*k) = s(1:k);
48 | end
49 | 
50 | % Create sparse matrix
51 | W = sparse(indi, indj, inds, n, n);
52 | 
53 | clear indi indj inds dist s O;
54 | 
55 | % Construct either normal or mutual graph
56 | if Type == 1
57 |     % Normal
58 |     W = max(W, W');
59 | else
60 |     % Mutual
61 |     W = min(W, W');
62 | end
63 | 
64 | if nargin < 4 || isempty(sigma)
65 |     sigma = 1;
66 | end
67 | 
68 | % Unweighted graph
69 | if sigma == 0
70 |     W = (W ~= 0);
71 |     
72 | % Gaussian similarity function
73 | elseif isnumeric(sigma)
74 |     W = spfun(@(W) (simGaussian(W, sigma)), W);
75 |     
76 | else
77 |     ME = MException('InvalidArgument:NotANumber', ...
78 |         'Parameter epsilon is not numeric');
79 |     throw(ME);
80 | end
81 | 
82 | end
83 | 
84 | function [ dist ] = distEuclidean( M, N )
85 | %DISTEUCLIDEAN Calculates Euclidean distances
86 | %   distEuclidean calculates the Euclidean distances between n
87 | %   d-dimensional points, where M and N are d-by-n matrices, and
88 | %   returns a 1-by-n vector dist containing those distances.
89 | %
90 | %   Author: Ingo Buerk
91 | %   Year  : 2011/2012
92 | %   Bachelor Thesis
93 | 
94 | dist = sqrt(sum((M - N) .^ 2, 1));
95 | 
96 | end


--------------------------------------------------------------------------------
/fs_unsup_glspfs_single_func.m:
--------------------------------------------------------------------------------
 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_glspfs_single_func(dataset, exp_settings, algo_settings)
 2 | %Unsupervised feature selection using GLSPFS
 3 | 
 4 | %======================setup===========================
 5 | FeaNumCandi = exp_settings.FeaNumCandi;
 6 | nKmeans = exp_settings.nKmeans;
 7 | prefix_mdcs = [];
 8 | if isfield(exp_settings, 'prefix_mdcs')
 9 |     prefix_mdcs = exp_settings.prefix_mdcs;
10 | end
11 | %======================================================
12 | 
13 | disp(['dataset:',dataset]);
14 | [X, Y] = extractXY(dataset);
15 | [nSmp,nDim] = size(X);
16 | nClass = length(unique(Y));
17 | 
18 | %===================setup=======================
19 | local_type_candi = {'LPP', 'LLE', 'LTSA'};
20 | local_type_param_candi = {[], [], []};
21 | knn_size_candi = 5;
22 | lambda1_candi = 10.^[-5:5];
23 | lambda2_candi = 10.^[-5:5];
24 | s1 = optSigma(X);
25 | global_kernel_cell_candi = buildParamKernel({'Gaussian'}, {sqrt(2.^[-1]) * s1}, {''});
26 | local_type_param_candi{1} = [sqrt(2.^[-1]) * s1];
27 | local_type_param_candi{3} = [nClass];
28 | paramCell = fs_unsup_glspfs_build_param(local_type_candi, local_type_param_candi, knn_size_candi, ...
29 |     lambda1_candi, lambda2_candi, global_kernel_cell_candi);
30 | %===============================================
31 | 
32 | disp('GLSPFS ...');
33 | t_start = clock;
34 | feaSubsets = cell(length(paramCell), 1);
35 | for i1 = 1:length(paramCell)
36 |     fprintf('GLSPFS parameter search %d out of %d...\n', i1, length(paramCell));
37 |     param = paramCell{i1};
38 |     K = constructKernel(X, X, param.global_kernel_option);
39 |     L = computeLocalStructure(X, param.local_type, param.local_k, param.local_lpp_sigma, param.local_ltsa_embedded_dim);
40 |     feaSubsets{i1,1} = fs_unsup_glspfs(X, K, L, param.lambda1, param.lambda2, max(FeaNumCandi));
41 | end
42 | t_end = clock;
43 | t1 = etime(t_end,t_start);
44 | disp(['exe time: ',num2str(t1)]);
45 | 
46 | t_start = clock;
47 | disp('evaluation ...');
48 | res_aio = cell(length(paramCell), length(FeaNumCandi));
49 | for i2 = 1:length(FeaNumCandi)
50 |     for i1 = 1:length(paramCell)
51 |         fprintf('GLSPFS parameter evaluation %d outof %d  ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell));
52 |         idx = feaSubsets{i1,1};
53 |         res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans));
54 |     end
55 | end
56 | [res_gs, res_gs_ps] = grid_search_fs(res_aio);
57 | res_gs.feaset = FeaNumCandi;
58 | t_end = clock;
59 | t2 = etime(t_end,t_start);
60 | disp(['exe time: ',num2str(t2)]);
61 | res_gs.time = t1;
62 | res_gs.time2 = t2;
63 | 
64 | save(fullfile(prefix_mdcs, [dataset, '_best_result_GLSPFS.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps');
65 | end


--------------------------------------------------------------------------------
/mdcs_check.m:
--------------------------------------------------------------------------------
 1 | function [flag_writeable, flag_uploadable, prefix] = mdcs_check(username, password)
 2 | % This function check the availability of work and filedependencies
 3 | % directory
 4 | % pls use your username ....
 5 | %
 6 | flag_writeable = 1;
 7 | flag_uploadable = 1;
 8 | prefix = [];
 9 | if ispc
10 |     prefix = pwd;
11 | elseif isunix
12 |     message = ['**************************************', ...
13 |         'This is an automatically generated message. ',...
14 |         'You received this email because you used the Matlab Distributed Computing Server (MDCS) ',...
15 |         'in the Laboratory for Computer Science (LCS) recently, and some of  your directories and files ',...
16 |         'are not automatically deleted by the stupid Matlab Job Scheduler (mjs). As a result, ', ...
17 |         'othes failed to submit jobs on these workers, please help to delete them manually! ', ...
18 |         'Thansk for your cooporation! **************************************'];
19 |     message3 = 'Thanks again! Liang Du, a heavier user, from DMGroup@LCS.';
20 |     
21 |     [mdcs_ips, mdcs_dirs] = get_mdcs_ip_dir(100);
22 |     for i1= 1:length(mdcs_ips)
23 |         disp([' worker ip ', mdcs_ips{i1}]);
24 |     end
25 |     n_dir = zeros(length(mdcs_dirs), 1);
26 |     n_dir2 = zeros(length(mdcs_dirs), 1);
27 |     
28 |     for i1= 1:length(mdcs_dirs)
29 |         disp([' worker pwd ', mdcs_dirs{i1}]);
30 |         unix('ls -l ../ |grep work');
31 |         unix('ls -l ../ |grep filedependencies');
32 |         [~, t1] = unix(['ls -l ../ |grep work | grep ', username, ' |wc -l']); % check the owner of work
33 |         [~, t2] = unix(['ls -l ../ |grep filedependencies | grep ', username, ' |wc -l']); % check the owner of filedependencies
34 |         n_dir(i1) = str2double(t1);
35 |         n_dir2(i1) = str2double(t2);
36 |         
37 |         if str2double(t2) < 1
38 |             message2 = [' Please login to the ip = ', ip, ' and manually delete the directory = ' dir];
39 |             [~, dir2_owner] = unix('ls -l ../ |grep filedependencies |awk -F '' '' ''{print $3}''');
40 |             email_notify(username, password, [dir2_owner, '@ios.ac.cn'], [message, message2, message3]);
41 |         end
42 |     end
43 |     
44 |     if sum(n_dir) < length(n_dir)
45 |         warning('You are not the owner of some work directory ...');
46 |         warning('     Write on this dir will failed ....');
47 |         flag_writeable = 0;
48 |     end
49 |     
50 |     if sum(n_dir2) < length(n_dir2)
51 |         warning('You are not the owner of some filedependencies directory ...');
52 |         warning('     upload dependent file on this dir will failed ....');
53 |         flag_uploadable = 0;
54 |     end
55 |     
56 |     root_dir = ['/home/', username];
57 |     if exist(root_dir, 'dir')
58 |         prefix = root_dir;
59 |     end
60 |     
61 | end


--------------------------------------------------------------------------------
/fs_unsup_mcfs_single_func.m:
--------------------------------------------------------------------------------
 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_mcfs_single_func(dataset, exp_settings, algo_settings)
 2 | %Unsupervised feature selection using MCFS
 3 | 
 4 | %======================setup===========================
 5 | FeaNumCandi = exp_settings.FeaNumCandi;
 6 | nKmeans = exp_settings.nKmeans;
 7 | prefix_mdcs = [];
 8 | if isfield(exp_settings, 'prefix_mdcs')
 9 |     prefix_mdcs = exp_settings.prefix_mdcs;
10 | end
11 | %======================================================
12 | 
13 | disp(['dataset:',dataset]);
14 | [X, Y] = extractXY(dataset);
15 | [nSmp,nDim] = size(X);
16 | nClass = length(unique(Y));
17 | 
18 | %======================setup===========================
19 | knnCandi = 5;
20 | weightCandi = {'Binary','HeatKernel'};
21 | s1 = optSigma(X);
22 | weight_param_Candi = {[], 2.^[-3:3] .* s1.^2};
23 | paramCell = fs_unsup_mcfs_build_param(knnCandi, weightCandi, weight_param_Candi);
24 | %======================================================
25 | 
26 | t_start = clock;
27 | feaSubsets = cell(length(paramCell), 1);
28 | valid_ids = zeros(length(paramCell), 1);
29 | parfor i1 = 1:length(paramCell)
30 |     fprintf(['MCFS parameter search %d out of %d...\n'], i1, length(paramCell));
31 |     param = paramCell{i1};
32 |     W = constructW(X, param);
33 |     options = [];
34 |     options.nUseEigenfunction = nClass;
35 |     options.W = W;
36 |     % some may failed  due to SR code
37 |     try
38 |         index = fs_unsup_mcfs(X,max(FeaNumCandi),options);
39 |         feaSubsets{i1} = index{1};
40 |     catch
41 |         valid_ids(i1) = 1;
42 |     end
43 | end
44 | t_end = clock;
45 | t1 = etime(t_end,t_start);
46 | disp(['exe time: ',num2str(t1)]);
47 | 
48 | t_start = clock;
49 | disp('evaluation ...');
50 | valid_ids = find(valid_ids == 0);
51 | paramCell_old = paramCell;
52 | feaSubsets_old = feaSubsets;
53 | paramCell = cell(length(valid_ids), 1);
54 | feaSubsets = cell(length(valid_ids), 1);
55 | for i1=1:length(valid_ids)
56 |     paramCell{i1} = paramCell_old{valid_ids(i1)};
57 |     feaSubsets{i1} = feaSubsets_old{valid_ids(i1)};
58 | end
59 | res_aio = cell(length(paramCell), length(FeaNumCandi));
60 | for i2 = 1:length(FeaNumCandi)
61 |     parfor i1 = 1:length(paramCell)
62 |         tmp = feaSubsets{i1, 1};
63 |         fprintf('MCFS parameter evaluation %d outof %d  ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell));
64 |         res_aio{i1, i2} = evalUnSupFS(X, Y, tmp(1:FeaNumCandi(i2)), struct('nKm', nKmeans));
65 |     end
66 | end
67 | [res_gs,res_gs_ps] = grid_search_fs(res_aio);
68 | res_gs.feaset = FeaNumCandi;
69 | t_end = clock;
70 | t2 = etime(t_end,t_start);
71 | disp(['exe time: ',num2str(t2)]);
72 | res_gs.time = t1;
73 | res_gs.time2 = t2;
74 | 
75 | save(fullfile(prefix_mdcs, [dataset, '_best_result_MCFS.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps');
76 | end


--------------------------------------------------------------------------------
/initFactor.m:
--------------------------------------------------------------------------------
 1 | function ratio=initFactor(x_norm, Ax , y, z, funName, rsL2, x_2norm)
 2 | % 
 3 | %% function initFactor
 4 | %     compute the an optimal constant factor for the initialization
 5 | %
 6 | %
 7 | % Input parameters:
 8 | % x_norm-      the norm of the starting point
 9 | % Ax-          A*x, with x being the initialization point
10 | % y-           the response matrix
11 | % z-           the regularization parameter or the ball
12 | % funName-     the name of the function
13 | %
14 | % Output parameter:
15 | % ratio-       the computed optimal initialization point is ratio*x
16 | %
17 | %% Copyright (C) 2009-2010 Jun Liu, and Jieping Ye
18 | %
19 | % For any problem, please contact with Jun Liu via j.liu@asu.edu
20 | %
21 | % Last revised on August 2, 2009.
22 | 
23 | switch(funName)
24 |     case 'LeastC'
25 |         ratio_max     = z / x_norm;
26 |         ratio_optimal = Ax'*y / (Ax'*Ax + rsL2 * x_2norm);
27 |         
28 |         if abs(ratio_optimal)<=ratio_max
29 |             ratio  =  ratio_optimal;
30 |         elseif ratio_optimal<0
31 |             ratio  =  -ratio_max;
32 |         else
33 |             ratio  =  ratio_max;
34 |         end
35 |         % fprintf('\n ratio=%e,%e,%e',ratio,ratio_optimal,ratio_max);
36 |         
37 |     case 'LeastR'
38 |         ratio=  (Ax'*y - z * x_norm) / (Ax'*Ax + rsL2 * x_2norm);
39 |         %fprintf('\n ratio=%e',ratio);
40 |         
41 |     case 'glLeastR'
42 |         ratio=  (Ax'*y - z * x_norm) / (Ax'*Ax);
43 |         %fprintf('\n ratio=%e',ratio);
44 |         
45 |     case 'mcLeastR'
46 |         ratio=  (Ax(:)'*y(:) - z * x_norm) / norm(Ax,'fro')^2;
47 |         %fprintf('\n ratio=%e',ratio);
48 |         
49 |     case 'mtLeastR'
50 |         ratio=  (Ax'*y - z * x_norm) / (Ax'*Ax);
51 |         %fprintf('\n ratio=%e',ratio);
52 |         
53 |     case 'nnLeastR'
54 |         ratio=  (Ax'*y - z * x_norm) / (Ax'*Ax + rsL2 * x_2norm);
55 |         ratio=max(0,ratio);
56 |         
57 |     case 'nnLeastC'
58 |         ratio_max     = z / x_norm;
59 |         ratio_optimal = Ax'*y / (Ax'*Ax + rsL2 * x_2norm);
60 | 
61 |         if ratio_optimal<0
62 |             ratio=0;
63 |         elseif ratio_optimal<=ratio_max
64 |             ratio  =  ratio_optimal;
65 |         else
66 |             ratio  =  ratio_max;
67 |         end
68 |         % fprintf('\n ratio=%e,%e,%e',ratio,ratio_optimal,ratio_max);
69 |         
70 |     case 'mcLeastC'
71 |         ratio_max     = z / x_norm;
72 |         ratio_optimal = Ax(:)'*y(:) / (norm(Ax'*Ax,'fro')^2);
73 |         
74 |         if abs(ratio_optimal)<=ratio_max
75 |             ratio  =  ratio_optimal;
76 |         elseif ratio_optimal<0
77 |             ratio  =  -ratio_max;
78 |         else
79 |             ratio  =  ratio_max;
80 |         end
81 |         
82 |     otherwise
83 |         fprintf('\n The specified funName is not supprted');
84 | end


--------------------------------------------------------------------------------
/constructKernel.m:
--------------------------------------------------------------------------------
 1 | function K = constructKernel(fea_a,fea_b,options)
 2 | % function K = constructKernel(fea_a,fea_b,options)
 3 | %	Usage:
 4 | %	K = constructKernel(fea_a,[],options)
 5 | %
 6 | %   K = constructKernel(fea_a,fea_b,options)
 7 | %
 8 | %	fea_a, fea_b  : Rows of vectors of data points. 
 9 | %
10 | %   options       : Struct value in Matlab. The fields in options that can
11 | %                   be set: 
12 | %           KernelType  -  Choices are:
13 | %               'Gaussian'      - e^{-(|x-y|^2)/2t^2}
14 | %               'Polynomial'    - (x'*y)^d
15 | %               'PolyPlus'      - (x'*y+1)^d
16 | %               'Linear'        -  x'*y
17 | %
18 | %               t       -  parameter for Gaussian
19 | %               d       -  parameter for Poly
20 | %
21 | %   version 1.0 --Sep/2006 
22 | %
23 | %   Written by Deng Cai (dengcai2 AT cs.uiuc.edu)
24 | %
25 | 
26 | if (~exist('options','var'))
27 |    options = [];
28 | else
29 |    if ~isstruct(options) 
30 |        error('parameter error!');
31 |    end
32 | end
33 | 
34 | 
35 | 
36 | %=================================================
37 | if ~isfield(options,'KernelType')
38 |     options.KernelType = 'Gaussian';
39 | end
40 | 
41 | switch lower(options.KernelType)
42 |     case {lower('Gaussian')}        %  e^{-(|x-y|^2)/2t^2}
43 |         if ~isfield(options,'t')
44 |             options.t = 1;
45 |         end
46 |     case {lower('Polynomial')}      % (x'*y)^d
47 |         if ~isfield(options,'d')
48 |             options.d = 2;
49 |         end
50 |     case {lower('PolyPlus')}      % (x'*y+1)^d
51 |         if ~isfield(options,'d')
52 |             options.d = 2;
53 |         end
54 |     case {lower('Linear')}      % x'*y
55 |     otherwise
56 |         error('KernelType does not exist!');
57 | end
58 | 
59 | 
60 | %=================================================
61 | 
62 | switch lower(options.KernelType)
63 |     case {lower('Gaussian')}       
64 |         if isempty(fea_b)
65 |             D = EuDist2(fea_a,[],0);
66 |         else
67 |             D = EuDist2(fea_a,fea_b,0);
68 |         end
69 |         K = exp(-D/(2*options.t^2));
70 |     case {lower('Polynomial')}     
71 |         if isempty(fea_b)
72 |             D = full(fea_a * fea_a');
73 |         else
74 |             D = full(fea_a * fea_b');
75 |         end
76 |         K = D.^options.d;
77 |     case {lower('PolyPlus')}     
78 |         if isempty(fea_b)
79 |             D = full(fea_a * fea_a');
80 |         else
81 |             D = full(fea_a * fea_b');
82 |         end
83 |         K = (D+1).^options.d;
84 |     case {lower('Linear')}     
85 |         if isempty(fea_b)
86 |             K = full(fea_a * fea_a');
87 |         else
88 |             K = full(fea_a * fea_b');
89 |         end
90 |     otherwise
91 |         error('KernelType does not exist!');
92 | end
93 | 
94 | if isempty(fea_b)
95 |     K = max(K,K');
96 | end
97 | 
98 |     
99 | 


--------------------------------------------------------------------------------
/find_nn.m:
--------------------------------------------------------------------------------
 1 | function [D, ni] = find_nn(X, k)
 2 | %FIND_NN Finds k nearest neigbors for all datapoints in the dataset
 3 | %
 4 | %	[D, ni] = find_nn(X, k)
 5 | %
 6 | % Finds the k nearest neighbors for all datapoints in the dataset X.
 7 | % In X, rows correspond to the observations and columns to the
 8 | % dimensions. The value of k is the number of neighbors that is
 9 | % stored. The function returns a sparse distance matrix D, in which
10 | % only the distances to the k nearest neighbors are stored. For
11 | % equal datapoints, the distance is set to a tolerance value.
12 | % The method is relatively slow, but has a memory requirement of O(nk).
13 | %
14 | %
15 | 
16 | % This file is part of the Matlab Toolbox for Dimensionality Reduction v0.7.1b.
17 | % The toolbox can be obtained from http://homepage.tudelft.nl/19j49
18 | % You are free to use, change, or redistribute this code in any way you
19 | % want for non-commercial purposes. However, it is appreciated if you 
20 | % maintain the name of the original author.
21 | %
22 | % (C) Laurens van der Maaten, 2010
23 | % University California, San Diego / Delft University of Technology
24 | 
25 | 
26 | 	if ~exist('k', 'var') || isempty(k)
27 | 		k = 12;
28 |     end
29 |     
30 |     % Perform adaptive neighborhood selection if desired
31 |     if ischar(k)
32 |         [D, max_k] = find_nn_adaptive(X);
33 |         ni = zeros(size(X, 1), max_k);
34 |         for i=1:size(X, 1)
35 |             tmp = find(D(i,:) ~= 0);
36 |             tmp = sort(tmp, 'ascend');
37 |             tmp = [tmp(2:end) zeros(1, max_k - length(tmp) + 1)];
38 |             ni(i,:) = tmp;
39 |         end
40 |     
41 |     % Perform normal neighborhood selection
42 |     else
43 | 
44 |         % Memory conservative implementation
45 |         if size(X, 1) > 2000
46 |             X = X';
47 |             n = size(X, 2);
48 |             D = zeros(n, k);
49 |             XX = sum(X .^ 2, 1);
50 |             onez = ones(1,n);
51 |             if nargout > 1, ni = zeros(n, k, 'uint16'); end
52 |             for i=1:n
53 |                 p = X(:,i);
54 |                 xx = sum(p .^ 2);
55 |                 xX = p' * X;
56 |                 d = bsxfun(@plus, XX - 2 * xX, xx);
57 |                 [d, ind] = sort(d);
58 |                 d = sqrt(d(1:k));
59 |                 ind = ind(1:k);
60 |                 d(d == 0) = 1e-7;
61 |                 D(i,:) = d;
62 |                 ni(i,:) = ind;
63 |             end
64 |             D = sparse(repmat((1:size(ni, 1))', [1 size(ni, 2)]), double(ni(:)), double(D(:)), size(ni, 1), size(ni, 1));
65 | 
66 |         % Faster implementation
67 |         else
68 |             n = size(X, 1);
69 | 			D = L2_distance(X', X');
70 |             [foo, ind] = sort(D, 2);
71 |             flat = repmat((1:n)', 1, n - k) + n * ind(:,k+1:end) - n;
72 |             D(flat(:)) = 0;
73 |             D(1:n+1:end) = 1e-7;
74 |             D = sparse(double(D));
75 | 
76 |             if nargout > 1, ni = uint16(ind(:,1:k)); end
77 |         end
78 |     end


--------------------------------------------------------------------------------
/grid_search_fs.m:
--------------------------------------------------------------------------------
 1 | function [res_gs2, res_gs_ps2] = grid_search_fs(res_aio)
 2 | % input
 3 | %	nParam * nFea, cell
 4 | %
 5 | % for each feature subset,
 6 | %     for each evaluation measure,
 7 | %         choose the bset result
 8 | % Liang Du (csliangdu@gmail.com)
 9 | %
10 | 
11 | [nParam, nSubsets] = size(res_aio);
12 | res_gs = cell(1, nSubsets);
13 | res_gs_ps = res_gs;
14 | fn1 = {'mean_acc', 'mean_nmi_sqrt', 'mean_nmi_max', 'mean_purity', 'mean_prec', 'mean_recall', 'mean_f1'};
15 | fn2 = {'std_acc', 'std_nmi_sqrt', 'std_nmi_max', 'std_purity', 'std_prec', 'std_recall', 'std_f1'};
16 | fn3 = {'best_obj_acc', 'best_obj_nmi_max', 'best_obj_nmi_sqrt', 'best_obj_purity',...
17 |     'best_obj_prec', 'best_obj_recall', 'best_obj_f1', ...
18 |     'jac', 'loocv'};
19 | fn4 = {'red'};
20 | for i1 = 1:nSubsets
21 |     res_gs{1, i1} = res_aio{1,i1}; %place holder
22 |     for i3 = 1:length(fn1)
23 |         res_gs_ps{1, i1}.(fn1{i3}) = 1; 
24 |     end
25 |     for i3 = 1:length(fn3)
26 |         res_gs_ps{1, i1}.(fn3{i3}) = 1; 
27 |     end
28 |     for i3 = 1:length(fn4)
29 |         res_gs_ps{1, i1}.(fn4{i3}) = 1; 
30 |     end
31 |     for i2 = 1:nParam
32 |         for i3 = 1:length(fn1)
33 |             if (isfield(res_aio{i2, i1}, fn1{i3}) && isfield(res_gs{1, i1},fn1{i3}) ) && (res_aio{i2, i1}.(fn1{i3}) > res_gs{1, i1}.(fn1{i3}))
34 |                 res_gs{1, i1}.(fn1{i3}) = res_aio{i2, i1}.(fn1{i3});
35 |                 res_gs{1, i1}.(fn2{i3}) = res_aio{i2, i1}.(fn2{i3});
36 |                 res_gs_ps{1, i1}.(fn1{i3}) = i2; 
37 |             end
38 |         end
39 |         for i3 = 1:length(fn3)
40 |             if (isfield(res_aio{i2, i1}, fn3{i3}) && isfield(res_gs{1, i1}, fn3{i3}) ) && (res_aio{i2, i1}.(fn3{i3}) > res_gs{1, i1}.(fn3{i3}))
41 |                 res_gs{1, i1}.(fn3{i3}) = res_aio{i2, i1}.(fn3{i3});
42 |                 res_gs_ps{1, i1}.(fn3{i3}) = i2; 
43 |             end
44 |         end
45 |         for i3 = 1:length(fn4)
46 |             if (isfield(res_aio{i2, i1}, fn4{i3}) && isfield(res_gs{1, i1}, fn4{i3}) ) && (res_aio{i2, i1}.(fn4{i3}) > res_gs{1, i1}.(fn4{i3}))
47 |                 res_gs{1, i1}.(fn4{i3}) = res_aio{i2, i1}.(fn4{i3});
48 |                 res_gs_ps{1, i1}.(fn4{i3}) = i2;
49 |             end
50 |         end
51 |     end
52 | end
53 | 
54 | res_gs2 = res_gs{1,1};
55 | res_gs_ps2 = res_gs_ps{1,1};
56 | for i1 = 2:nSubsets
57 | 	for i3 = 1:length(fn1)
58 | 		res_gs2.(fn1{i3}) = [res_gs2.(fn1{i3}), res_gs{1, i1}.(fn1{i3})];
59 | 		res_gs2.(fn2{i3}) = [res_gs2.(fn2{i3}), res_gs{1, i1}.(fn2{i3})];
60 |         res_gs_ps2.(fn1{i3}) = [res_gs_ps2.(fn1{i3}), res_gs_ps{1, i1}.(fn1{i3})];
61 | 	end
62 | 	for i3 = 1:length(fn3)
63 | 		res_gs2.(fn3{i3}) = [res_gs2.(fn3{i3}), res_gs{1, i1}.(fn3{i3})];
64 |         res_gs_ps2.(fn3{i3}) = [res_gs_ps2.(fn3{i3}), res_gs_ps{1, i1}.(fn3{i3})];
65 | 	end
66 | 	for i3 = 1:length(fn4)
67 | 		res_gs2.(fn4{i3}) = [res_gs2.(fn4{i3}), res_gs{1, i1}.(fn4{i3})];
68 |         res_gs_ps2.(fn4{i3}) = [res_gs_ps2.(fn4{i3}), res_gs_ps{1, i1}.(fn4{i3})];
69 | 	end
70 | end
71 | end


--------------------------------------------------------------------------------
/fs_unsup_jelsr_liang.m:
--------------------------------------------------------------------------------
  1 | function [model_jelsr] = fs_unsup_jelsr_liang(X, param)
  2 | 
  3 | [nDim, nSmp] = size(X);
  4 | 
  5 | if ~exist('param', 'var');
  6 |     param = [];
  7 | end
  8 | 
  9 | if ~isfield(param, 'nClusters');
 10 |     error('The number of Clusters should be specified');
 11 | else
 12 |     nClusters = param.nClusters;
 13 | end
 14 | 
 15 | if isfield(param, 'k')
 16 |     k = param.k;
 17 | else
 18 |     k = 5;
 19 | end
 20 | 
 21 | if isfield(param, 'beta')
 22 |     beta = param.beta;
 23 | else
 24 |     beta = 1;
 25 | end
 26 | 
 27 | if isfield(param, 'alpha')
 28 |     alpha = param.alpha;
 29 | else
 30 |     alpha = 1;
 31 | end
 32 | 
 33 | t1 = cputime;
 34 | L = computeLocalStructure(X', param.weightMode, param.k, param.t);
 35 | [W, Y, obj] = JELSR_AlterOptimizer(X, L, nClusters, alpha, beta);
 36 | 
 37 | model_jelsr.z = sqrt(sum(W.^2,2));
 38 | model_jelsr.Y = Y;
 39 | model_jelsr.runTime = cputime - t1;
 40 | model_jelsr.obj = obj;
 41 | end
 42 | 
 43 | function [W, Y, obj] = JELSR_AlterOptimizer(X, L, nDimEmb, alpha, beta)
 44 | % Input
 45 | %         X: nDim * nSmp
 46 | %         L: nSmp * nSmp; Local reconstruction kernel
 47 | %         nDimEmb: low embedding dimension
 48 | %         alpha: regularization parameter
 49 | %         beta: regularization parameter
 50 | % Output
 51 | %         W: nDim * nEmb
 52 | %         Y: nEmb * nSmp
 53 | %         obj: obj history
 54 | % Optimization objective
 55 | %         min{W, U, Y} = tr(Y L Y') + beta*||W' X - Y ||^2 + beta*alpha* tr(W' U W)
 56 | %
 57 | % [1]. Feature Selection via Joint Embedding Learning and Sparse Regression.
 58 | % Chenping Hou, etc. IJCAI, 2011.
 59 | %
 60 | 
 61 | [nDim, nSmp] = size(X);
 62 | 
 63 | if nDim < nSmp
 64 |     A = X*X';
 65 | end
 66 | U = ones(nDim, 1);
 67 | 
 68 | nIter = 20;
 69 | obj = [];
 70 | epsilon = 1e-2;
 71 | 
 72 | for iter = 1:nIter
 73 |     % Step1: Fix U, update Y by solving the problem in Eq. (16);
 74 |     
 75 |     if nDim < nSmp
 76 |         % AiX = inv(A + alpha*U)*X;
 77 |         AiX = (A + alpha*diag(U))\X;
 78 |     else
 79 |         % AiX = alpha * U^-1 X [ I - (alpha I + X' U^-1 X)^-1 X' U^-1 X]
 80 |         UX = bsxfun(@times, 1./U, X);
 81 |         KiU = X' * UX;
 82 |         AiX = UX * (eye(nSmp) - (alpha * eye(nSmp) + KiU) \ KiU);
 83 |         AiX = AiX/alpha;
 84 |     end
 85 |     
 86 |     K = L + beta*eye(nSmp) - beta*X'*AiX;
 87 |     K = (K + K') / 2;
 88 |     [eigvec, eigval] = eig(K);
 89 |     [eigval, idx] = sort(diag(eigval));
 90 |     Y = eigvec(:, idx(1:nDimEmb));
 91 |     
 92 |     % Step2: Fix U, update W by using Eq. (13);
 93 |     W = AiX*Y;
 94 |     
 95 |     % Step3: FixW, update U by Eq. (9);
 96 |     U = full(0.5./(sqrt(sum(W.^2,2)) + eps));
 97 |     
 98 |     % obj(end+1) = trace(Y'*L*Y) + beta*sum(sum( (X'*W - Y).^2)) + beta*alpha*sum(sqrt(sum(W.^2,2)));
 99 |     %
100 |     %     if iter > 1 && abs(obj(end) - obj(end-1))/abs(obj(end)) < epsilon;
101 |     %         break;
102 |     %     end
103 | end
104 | end


--------------------------------------------------------------------------------
/fs_unsup_lapscore.m:
--------------------------------------------------------------------------------
  1 | function [Y] = LaplacianScore(X, W)
  2 | %	Usage:
  3 | %	[Y] = LaplacianScore(X, W)
  4 | %
  5 | %	X: Rows of vectors of data points
  6 | %	W: The affinity matrix.
  7 | %	Y: Vector of (1-LaplacianScore) for each feature.
  8 | %      The features with larger y are more important.
  9 | %
 10 | %    Examples:
 11 | %
 12 | %       fea = rand(50,70);
 13 | %       options = [];
 14 | %       options.Metric = 'Cosine';
 15 | %       options.NeighborMode = 'KNN';
 16 | %       options.k = 5;
 17 | %       options.WeightMode = 'Cosine';
 18 | %       W = constructW(fea,options);
 19 | %
 20 | %       LaplacianScore = LaplacianScore(fea,W);
 21 | %       [junk, index] = sort(-LaplacianScore);
 22 | %       
 23 | %       newfea = fea(:,index);
 24 | %       %the features in newfea will be sorted based on their importance.
 25 | %
 26 | %	Type "LaplacianScore" for a self-demo.
 27 | %
 28 | % See also constructW
 29 | %
 30 | %Reference:
 31 | %
 32 | %   Xiaofei He, Deng Cai and Partha Niyogi, "Laplacian Score for Feature Selection".
 33 | %   Advances in Neural Information Processing Systems 18 (NIPS 2005),
 34 | %   Vancouver, Canada, 2005.   
 35 | %
 36 | %   Deng Cai, 2004/08
 37 | 
 38 | 
 39 | if nargin == 0, selfdemo; return; end
 40 | 
 41 | [nSmp,nFea] = size(X);
 42 | 
 43 | if size(W,1) ~= nSmp
 44 |     error('W is error');
 45 | end
 46 | 
 47 | D = full(sum(W,2));
 48 | L = W;
 49 | 
 50 | allone = ones(nSmp,1);
 51 | 
 52 | 
 53 | tmp1 = D'*X;
 54 | 
 55 | D = sparse(1:nSmp,1:nSmp,D,nSmp,nSmp);
 56 | 
 57 | DPrime = sum((X'*D)'.*X)-tmp1.*tmp1/sum(diag(D));
 58 | LPrime = sum((X'*L)'.*X)-tmp1.*tmp1/sum(diag(D));
 59 | 
 60 | DPrime(find(DPrime < 1e-12)) = 10000;
 61 | 
 62 | Y = LPrime./DPrime;
 63 | Y = Y';
 64 | Y = full(Y);
 65 | 
 66 | 
 67 | 
 68 |     
 69 | %---------------------------------------------------
 70 | function selfdemo
 71 | % ====== Self demo using IRIS dataset
 72 | % ====== 1. Plot IRIS data after LDA for dimension reduction to 2D
 73 | load iris.dat
 74 | 
 75 | feaNorm = mynorm(iris(:,1:4),2);
 76 | fea = iris(:,1:4) ./ repmat(max(1e-10,feaNorm),1,4);
 77 | 
 78 | options = [];
 79 | options.Metric = 'Cosine';
 80 | options.NeighborMode = 'KNN';
 81 | options.WeightMode = 'Cosine';
 82 | options.k = 3;
 83 | 
 84 | W = constructW(fea,options);
 85 | 
 86 | [LaplacianScore] = feval(mfilename,iris(:,1:4),W);
 87 | [junk, index] = sort(-LaplacianScore);
 88 | 
 89 | index1 = find(iris(:,5)==1);
 90 | index2 = find(iris(:,5)==2);
 91 | index3 = find(iris(:,5)==3);
 92 | figure;
 93 | plot(iris(index1, index(1)), iris(index1, index(2)), '*', ...
 94 |      iris(index2, index(1)), iris(index2, index(2)), 'o', ...
 95 |      iris(index3, index(1)), iris(index3, index(2)), 'x');
 96 | legend('Class 1', 'Class 2', 'Class 3');
 97 | title('IRIS data onto the first and second feature (Laplacian Score)');
 98 | axis equal; axis tight;
 99 | 
100 | figure;
101 | plot(iris(index1, index(3)), iris(index1, index(4)), '*', ...
102 |      iris(index2, index(3)), iris(index2, index(4)), 'o', ...
103 |      iris(index3, index(3)), iris(index3, index(4)), 'x');
104 | legend('Class 1', 'Class 2', 'Class 3');
105 | title('IRIS data onto the third and fourth feature (Laplacian Score)');
106 | axis equal; axis tight;
107 | 
108 | disp('Laplacian Score:');
109 | for i = 1:length(LaplacianScore)
110 |     disp(num2str(LaplacianScore(i)));
111 | end
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/Eigenmap.m:
--------------------------------------------------------------------------------
  1 | function [Y, eigvalue] = Eigenmap(W, ReducedDim, bEigs)
  2 | %function [Y, eigvalue] = Eigenmap(W, ReducedDim, bEigs)
  3 | %
  4 | %                   W       -  the affinity matrix.
  5 | %           ReducedDim      -  the dimensionality of the reduced subspace.
  6 | %                bEigs      -  whether to use eigs to speed up. If not
  7 | %                              specified, this function will automatically
  8 | %                              decide based on the size of W.
  9 | %
 10 | %   version 2.1 --November/2011
 11 | %   version 2.0 --May/2009
 12 | %   version 1.5 --Dec./2005
 13 | %   version 1.0 --Aug./2003
 14 | %
 15 | %   Written by Deng Cai (dengcai AT gmail.com)
 16 | 
 17 | MAX_MATRIX_SIZE = 1600; % You can change this number according your machine computational power
 18 | EIGVECTOR_RATIO = 0.1; % You can change this number according your machine computational power
 19 | 
 20 | 
 21 | [row,col] = size(W);
 22 | if row ~= col
 23 |     error('W must square matrix!!');
 24 | end
 25 | 
 26 | nSmp = row;
 27 | 
 28 | if ~exist('ReducedDim','var')
 29 |     ReducedDim = 10;
 30 | end
 31 | ReducedDim = min(ReducedDim+1,row);
 32 | 
 33 | D_mhalf = full(sum(W,2).^-.5);
 34 | D_mhalfMatrix = spdiags(D_mhalf,0,nSmp,nSmp);
 35 | W = D_mhalfMatrix*W*D_mhalfMatrix;
 36 | 
 37 | W = max(W,W');
 38 | 
 39 | 
 40 | dimMatrix = size(W,2);
 41 | if ~exist('bEigs','var')
 42 |     if (dimMatrix > MAX_MATRIX_SIZE && ReducedDim < dimMatrix*EIGVECTOR_RATIO)
 43 |         bEigs = 1;
 44 |     else
 45 |         bEigs = 0;
 46 |     end
 47 | end
 48 | 
 49 | if bEigs
 50 |     option = struct('disp',0);
 51 |     [Y, eigvalue] = eigs(W,ReducedDim,'la',option);
 52 |     eigvalue = diag(eigvalue);
 53 | else
 54 |     [Y, eigvalue] = eig(full(W));
 55 |     eigvalue = diag(eigvalue);
 56 |     
 57 |     [junk, index] = sort(-eigvalue);
 58 |     eigvalue = eigvalue(index);
 59 |     Y = Y(:,index);
 60 |     if ReducedDim < length(eigvalue)
 61 |         Y = Y(:, 1:ReducedDim);
 62 |         eigvalue = eigvalue(1:ReducedDim);
 63 |     end
 64 | end
 65 | 
 66 | Y_old = Y;
 67 | eigvalue_old = eigvalue;
 68 | 
 69 | eigIdx = find(abs(eigvalue) < 1e-6);
 70 | eigvalue (eigIdx) = [];
 71 | Y (:,eigIdx) = [];
 72 | 
 73 | nGotDim = length(eigvalue);
 74 | if ~isempty(eigvalue)
 75 |     idx = 1;
 76 |     while(abs(eigvalue(idx)-1) < 1e-12)
 77 |         idx = idx + 1;
 78 |         if idx > nGotDim
 79 |             break;
 80 |         end
 81 |     end
 82 |     idx = idx - 1;
 83 |     
 84 |     if(idx > 1)  % more than one eigenvector of 1 eigenvalue
 85 |         u = zeros(size(Y,1),idx);
 86 |         
 87 |         d_m = 1./D_mhalf;
 88 |         cc = 1/norm(d_m);
 89 |         u(:,1) = cc./D_mhalf;
 90 |         
 91 |         bDone = 0;
 92 |         for i = 1:idx
 93 |             if abs(Y(:,i)' * u(:,1) - 1) < 1e-14
 94 |                 Y(:,i) = Y(:,1);
 95 |                 Y(:,1) = u(:,1);
 96 |                 bDone = 1;
 97 |             end
 98 |         end
 99 |         
100 |         if ~bDone
101 |             for i = 2:idx
102 |                 u(:,i) = Y(:,i);
103 |                 for j= 1:i-1
104 |                     u(:,i) = u(:,i) - (u(:,j)' * Y(:,i))*u(:,j);
105 |                 end
106 |                 u(:,i) = u(:,i)/norm(u(:,i));
107 |             end
108 |             Y(:,1:idx) = u;
109 |         end
110 |     end
111 |     
112 |     Y = D_mhalfMatrix*Y;
113 |     
114 |     Y(:,1) = [];
115 |     eigvalue(1) = [];
116 | else
117 |     Y = Y_old;
118 |     eigvalue = eigvalue_old;
119 | end


--------------------------------------------------------------------------------
/fs_unsup_spec.m:
--------------------------------------------------------------------------------
  1 | function [ wFeat, SF ] = fs_unsup_spec( W, X, Y, Pram )
  2 | %function [ wFeat, SF ] = fsSpectrum( X, Y, Pram )
  3 | %   Select feature using the spectrum information of the graph laplacian
  4 | %   W - the similarity matrix or a kernel matrix
  5 | %   X - the input data, each row is an instance
  6 | %   Y - the labels of the data
  7 | %   Pram - the prameter of the algorithm
  8 | %   Pram.style - 1: unsupervised feature selection 2: supervised feature
  9 | %                         selection
 10 | %   Pram.expLam - the exp order for the eigenvalue
 11 | %   Pram.function - 1:f'Lf; 2:using all eigenvalue except the first one; 3:
 12 | %                             using the first k eigenvalues. (In this case
 13 | %                             the wieght the bigger the better.
 14 | 
 15 | [numInst,dimDat] = size(X);
 16 | if size(Y,2) > 1
 17 |     numC =size(Y,2);
 18 | else
 19 |     numC = length(unique(Y));
 20 | end
 21 | 
 22 | % build the degree matrix
 23 | D = diag(sum(W,2));
 24 | % build the laplacian matrix
 25 | L = D - W;
 26 | 
 27 | % D1 = D^(-0.5)
 28 | d1 = (sum(W,2)).^(-0.5);
 29 | d1(isinf(d1)) = 1;
 30 | 
 31 | % D2 = D^(0.5)
 32 | d2 = (sum(W,2)).^0.5;
 33 | v = diag(d2)*ones(numInst,1);
 34 | v = v/norm(v);
 35 | %  build the normalized laplacian matrix hatW = diag(d1)*W*diag(d1)
 36 | hatL = repmat(d1,1,numInst).*L.*repmat(d1',numInst,1);
 37 | if Pram.style ~=2
 38 |     hatL = (hatL'+hatL)/2;
 39 | end
 40 | 
 41 | % calculate and construct spectral information
 42 | switch Pram.style
 43 |     case 1,
 44 |         [ V, EVA ] = funG( hatL, Pram.expLam );
 45 |     case 2.
 46 |         [ V, EVA ] = funG( hatL, 1 );
 47 | end
 48 | 
 49 | % begin to select features
 50 | wFeat = [];
 51 | 
 52 | switch Pram.function
 53 |     case 1, % using f'Lf formulation
 54 |         for i = 1:dimDat
 55 |             f = X(:,i);
 56 |             hatF = diag(d2)*f;
 57 |             l = norm(hatF);
 58 |             
 59 |             if l < 100*eps
 60 |                 wFeat(i) = 1000;
 61 |             else
 62 |                 if Pram.style ~=2
 63 |                     hatF = hatF/l;
 64 |                 end
 65 |                 wFeat(i) = hatF'*hatL*hatF;
 66 |             end
 67 |         end
 68 |     case 2, % using all eigenvalues except the first one
 69 |         for i = 1:dimDat
 70 |             f = X(:,i);
 71 |             hatF = diag(d2)*f;
 72 |             l = norm(hatF);
 73 |             
 74 |             if l < 100*eps
 75 |                 wFeat(i) = 1000;
 76 |             else
 77 |                 hatF = hatF/l;
 78 |                 wFeat(i) = hatF'*hatL*hatF/(1-(hatF'*v)^2);
 79 |             end            
 80 |         end
 81 |     case 3, % use the first k eigenvalues and the weight is the bigger the better.
 82 |         eva = diag(EVA);
 83 |         % calculate the eigenvalues
 84 |         switch Pram.style
 85 |             case 1,
 86 |                 eva = eva.^(1/Pram.expLam);
 87 |                 eva = 2 - eva;
 88 |                 eva = eva.^(Pram.expLam);
 89 |             case 2,
 90 |                 eva = max(eva) - eva;
 91 |         end
 92 |         
 93 |         for i = 1:dimDat
 94 |             % normalize the feature
 95 |             f = X(:,i);
 96 |             hatF = diag(d2)*f;
 97 |             l = norm(hatF);
 98 | 
 99 |             % calculate the weight
100 |             if l < 100*eps
101 |                 wFeat(i) = -1;
102 |             else
103 |                 hatF = hatF/l;
104 |                 alphas = hatF'*V(:,2:numC);
105 |                 wFeat(i) = (alphas.^2)*eva(2:numC);
106 |             end
107 |         end
108 | end
109 | 
110 | SF = 1:dimDat;


--------------------------------------------------------------------------------
/lpp.m:
--------------------------------------------------------------------------------
  1 | function [mappedX, mapping] = lpp(X, no_dims, k, sigma, eig_impl)
  2 | %LPP Perform linearity preserving projection
  3 | %
  4 | %   [mappedX, mapping] = lpp(X, no_dims, k, sigma, eig_impl)
  5 | %
  6 | % Perform the Linearity Preserving Projection on dataset X to reduce it to 
  7 | % dimensionality no_dims. The number of neighbors that is used by LPP is
  8 | % specified by k (default = 12). The variable sigma determines the
  9 | % bandwidth of the Gaussian kernel (default = 1).
 10 | %
 11 | %
 12 | 
 13 | % This file is part of the Matlab Toolbox for Dimensionality Reduction v0.7.1b.
 14 | % The toolbox can be obtained from http://homepage.tudelft.nl/19j49
 15 | % You are free to use, change, or redistribute this code in any way you
 16 | % want for non-commercial purposes. However, it is appreciated if you 
 17 | % maintain the name of the original author.
 18 | %
 19 | % (C) Laurens van der Maaten, 2010
 20 | % University California, San Diego / Delft University of Technology
 21 | 
 22 | 
 23 |     if size(X, 2) > size(X, 1)
 24 |         error('Number of samples should be higher than number of dimensions.');
 25 |     end
 26 |     if ~exist('no_dims', 'var')
 27 |         no_dims = 2; 
 28 |     end
 29 |     if ~exist('k', 'var')
 30 |         k = 12;
 31 |     end
 32 |     if ~exist('sigma', 'var')
 33 | 		sigma = 1;
 34 |     end
 35 |     if ~exist('eig_impl', 'var')
 36 |         eig_impl = 'Matlab';
 37 |     end
 38 |     
 39 |     % Construct neighborhood graph
 40 |     disp('Constructing neighborhood graph...');
 41 |     if size(X, 1) < 4000
 42 |         G = L2_distance(X', X');
 43 |         % Compute neighbourhood graph
 44 |         [tmp, ind] = sort(G); 
 45 |         for i=1:size(G, 1)
 46 |             G(i, ind((2 + k):end, i)) = 0; 
 47 |         end
 48 |         G = sparse(double(G));
 49 |         G = max(G, G');             % Make sure distance matrix is symmetric
 50 |     else
 51 |         G = find_nn(X, k);
 52 |     end
 53 |     G = G .^ 2;
 54 | 	G = G ./ max(max(G));
 55 |     
 56 |     % Compute weights (W = G)
 57 |     disp('Computing weight matrices...');
 58 |     
 59 |     % Compute Gaussian kernel (heat kernel-based weights)
 60 |     G(G ~= 0) = exp(-G(G ~= 0) / (2 * sigma ^ 2));
 61 |         
 62 |     % Construct diagonal weight matrix
 63 |     D = diag(sum(G, 2));
 64 |     
 65 |     % Compute Laplacian
 66 |     L = D - G;
 67 |     L(isnan(L)) = 0; D(isnan(D)) = 0;
 68 | 	L(isinf(L)) = 0; D(isinf(D)) = 0;
 69 | 
 70 |     % Compute XDX and XLX and make sure these are symmetric
 71 |     disp('Computing low-dimensional embedding...');
 72 |     DP = X' * D * X;
 73 |     LP = X' * L * X;
 74 |     DP = (DP + DP') / 2;
 75 |     LP = (LP + LP') / 2;
 76 | 
 77 |     % Perform eigenanalysis of generalized eigenproblem (as in LEM)
 78 |     if size(X, 1) > 200 && no_dims < (size(X, 1) / 2)
 79 |         if strcmp(eig_impl, 'JDQR')
 80 |             options.Disp = 0;
 81 |             options.LSolver = 'bicgstab';
 82 |             [eigvector, eigvalue] = jdqz(LP, DP, no_dims, 'SA', options);
 83 |         else
 84 |             options.disp = 0;
 85 |             options.issym = 1;
 86 |             options.isreal = 1;
 87 |             [eigvector, eigvalue] = eigs(LP, DP, no_dims, 'SA', options);
 88 |         end
 89 |     else
 90 |         [eigvector, eigvalue] = eig(LP, DP);
 91 |     end
 92 |     
 93 |     % Sort eigenvalues in descending order and get largest eigenvectors
 94 |     [eigvalue, ind] = sort(diag(eigvalue), 'ascend');
 95 |     eigvector = eigvector(:,ind(1:no_dims));
 96 |     
 97 |     % Compute final linear basis and map data
 98 |     mappedX = X * eigvector;
 99 |     mapping.M = eigvector;
100 |     mapping.mean = mean(X, 1);
101 | 


--------------------------------------------------------------------------------
/ltsa.m:
--------------------------------------------------------------------------------
  1 | function mappedX = ltsa(X, no_dims, k, eig_impl)
  2 | %LTSA Runs the local tangent space alignment algorithm
  3 | %
  4 | %   mappedX = ltsa(X, no_dims, k, eig_impl)
  5 | %
  6 | % The function runs the local tangent space alignment algorithm on dataset
  7 | % X, reducing the data to dimensionality d. The number of neighbors is
  8 | % specified by k.
  9 | %
 10 | %
 11 | 
 12 | % This file is part of the Matlab Toolbox for Dimensionality Reduction v0.7.1b.
 13 | % The toolbox can be obtained from http://homepage.tudelft.nl/19j49
 14 | % You are free to use, change, or redistribute this code in any way you
 15 | % want for non-commercial purposes. However, it is appreciated if you 
 16 | % maintain the name of the original author.
 17 | %
 18 | % (C) Laurens van der Maaten, 2010
 19 | % University California, San Diego / Delft University of Technology
 20 | 
 21 |     if ~exist('no_dims', 'var')
 22 |         no_dims = 2;
 23 |     end
 24 |     if ~exist('k', 'var')
 25 |         k = 12;
 26 |     end
 27 |     if ~exist('eig_impl', 'var')
 28 |         eig_impl = 'Matlab';
 29 |     end
 30 |  
 31 |     % Compute neighborhood indices
 32 |     disp('Find nearest neighbors...');
 33 |     n = size(X, 1);
 34 |     [D, ni] = find_nn(X, k);
 35 | 
 36 |     % Compute local information matrix for all datapoints
 37 |     disp('Compute local information matrices for all datapoints...');
 38 |     Bi = cell(1, n); 
 39 |     for i=1:n
 40 |         % Compute correlation matrix W
 41 |         Ii = ni(i,:);
 42 |         Ii = Ii(Ii ~= 0);
 43 |         kt = numel(Ii);
 44 |         Xi = X(Ii,:) - repmat(mean(X(Ii,:), 1), [kt 1]);
 45 |         W = Xi * Xi'; 
 46 |         W = (W + W') / 2;
 47 |         
 48 |         % Compute local information by computing d largest eigenvectors of W
 49 |         [Vi, Si] = schur(W);
 50 |         [s, Ji] = sort(-diag(Si));
 51 | 		if length(Ji) < no_dims
 52 | 			no_dims = length(Ji);
 53 | 			warning(['Target dimensionality reduced to ' num2str(no_dims) '...']);
 54 | 		end
 55 |         Vi = Vi(:,Ji(1:no_dims));  
 56 |         
 57 |         % Store eigenvectors in G (Vi is the space with the maximum variance, i.e. a good approximation of the tangent space at point Xi)
 58 | 		% The constant 1/sqrt(kt) serves as a centering matrix
 59 | 		Gi = double([repmat(1 / sqrt(kt), [kt 1]) Vi]);
 60 |         
 61 | 		% Compute Bi = I - Gi * Gi'
 62 | 		Bi{i} = eye(kt) - Gi * Gi';  
 63 |     end
 64 |     
 65 |     % Construct sparse matrix B (= alignment matrix)
 66 |     disp('Construct alignment matrix...');
 67 |     B = speye(n);
 68 |     for i=1:n
 69 |         Ii = ni(i,:);
 70 |         Ii = Ii(Ii ~= 0);
 71 |         B(Ii, Ii) = B(Ii, Ii) + Bi{i};							% sum Bi over all points
 72 | 		B(i, i) = B(i, i) - 1;
 73 |     end
 74 | 	B = (B + B') / 2;											% make sure B is symmetric
 75 | 	
 76 | 	% For sparse datasets, we might end up with NaNs in M. We just set them to zero for now...
 77 | 	B(isnan(B)) = 0;
 78 | 	B(isinf(B)) = 0;
 79 |     
 80 |     % Perform eigenanalysis of matrix B
 81 | 	disp('Perform eigenanalysis...');
 82 |     tol = 0;
 83 | 	if strcmp(eig_impl, 'JDQR')
 84 |         options.Disp = 0;
 85 |         options.LSolver = 'bicgstab';
 86 |         [mappedX, D] = jdqr(B, no_dims + 1, tol, options);      % only need bottom (no_dims + 1) eigenvectors
 87 |     else
 88 |         options.disp = 0;
 89 |         options.isreal = 1;
 90 |         options.issym = 1;
 91 |         [mappedX, D] = eigs(B, no_dims + 1, tol, options);      % only need bottom (no_dims + 1) eigenvectors
 92 |     end
 93 | 
 94 |     % Sort eigenvalues and eigenvectors
 95 |     [D, ind] = sort(diag(D), 'ascend');
 96 |     mappedX = mappedX(:,ind);
 97 | 
 98 |     % Final embedding coordinates
 99 | 	if size(mappedX, 2) < no_dims + 1, no_dims = size(mappedX, 2) - 1; end
100 |     mappedX = mappedX(:,2:no_dims + 1);
101 |     


--------------------------------------------------------------------------------
/fs_unsup_spfs_larnes.m:
--------------------------------------------------------------------------------
  1 | function [ fList W ] = fs_unsup_spfs_larnes( X, Y, numF )
  2 | % function [ fList W ] = spfs_lar( X, K, numF )
  3 | %   X - the data, each row is an instance
  4 | %   Y - the response of nY column
  5 | %   numF - the number of features we want to selected
  6 | 
  7 | [nD, nF] = size(X);
  8 | nY = size(Y,2);
  9 | 
 10 | W = zeros(nF, nY);
 11 | 
 12 | R = Y;
 13 | 
 14 | % find the most correlated one
 15 | nor = X'*R;
 16 | nor = sqrt(sum((nor.*nor),2));
 17 | [bestNor, bestCor] = max(nor);
 18 | 
 19 | fList = bestCor; k = length(fList);
 20 | cnt = 0;
 21 | 
 22 | while k < numF && k < nF && k < nD
 23 |     cnt = cnt + 1;
 24 |     
 25 |     % obtain the proceed direction
 26 |     XA = X(:, fList);
 27 |     GA = XA\R;
 28 |     
 29 |     % compute how far can we go for every f to reduce lambda
 30 |     a = X(:,fList(1))'*R;
 31 |     bestCor = -1; bestNor = inf;
 32 |     for i = 1:nF
 33 |         if sum(fList==i) > 0
 34 |             continue;
 35 |         end
 36 |         c = X(:,i)'*R;
 37 |         d = X(:,i)'*XA*GA;
 38 |         p1=a*a'-d*d'; p2 = a*a'-c*d'; p3 = a*a'-c*c';
 39 |         bb = p2^2-p1*p3;
 40 |         if bb < 0
 41 |             continue;
 42 |         end
 43 |         bb = sqrt(bb);
 44 |         s1 = (p2+abs(bb))/p1;
 45 |         s2 = (p2-abs(bb))/p1;
 46 |         if (s1<=0 || s1>1)
 47 |             s1 = 100;
 48 |         end
 49 |         if (s2<=0 || s2>1)
 50 |             s2 = 100;
 51 |         end
 52 |         if s1==100 && s2==100
 53 |             continue;
 54 |         else
 55 |             s = min(s1,s2);
 56 |         end
 57 |         if s < bestNor
 58 |             bestNor = s;
 59 |             bestCor = i;
 60 |         end
 61 |     end
 62 |     if bestCor == -1;
 63 |         return
 64 |     else
 65 |         % reduce the size of labmda and update W with nes-L2,1        
 66 |         W(fList,:) = W(fList,:) + bestNor*GA;
 67 |         R = Y - X*W;
 68 |         lam = norm(X(:,fList(1))'*R,2);
 69 |                 
 70 |         % find the nes-L2,1 solution
 71 |         [ fList, WA ] = nes(X, Y, W, [fList bestCor], lam*0.995);
 72 |         W(fList,:) = WA;
 73 |         R = Y - X*W; k = length(fList);
 74 |         disp(' ');
 75 | %         fprintf('step: %5i, feature: %5i, Lambda:%f\n',cnt+1, k, lam);
 76 | %         fprintf('----------------------------------\n');
 77 |     end
 78 | end
 79 | 
 80 | % R = Y - X*W;
 81 | % lam = norm(X(:,fList(1))'*R,2);
 82 | % opts.q=2;
 83 | % opts.tol=1e-6;
 84 | % opts.maxIter = 10000;
 85 | % opts.x0=W;
 86 | % W = mcLeastR(X, Y, lam, opts);
 87 | % fList = find(sum(abs(W),2));
 88 | 
 89 |     function [newfList WAA] = nes(X, Y, W, fList, lam)
 90 |         trd = 10e-5;
 91 | 
 92 |         WAA = W(fList,:); XAA = X(:,fList); newfList = fList;
 93 |         
 94 |         opts.q=2;
 95 |         opts.tol=1e-7;
 96 |         opts.maxIter = 10000;
 97 |         
 98 |         stop = 0;
 99 |         maxC = 1000; counterr = 1;
100 |         
101 |         % obtain a solution on XAA
102 |         while stop == 0 && counterr <= maxC
103 |             LC = setdiff(1:nF,newfList);
104 |             
105 |             opts.x0=WAA;
106 |             WAA = mcLeastR(XAA, Y, lam, opts);
107 |             
108 |             keepIDX = find(sum(abs(WAA),2));
109 |             
110 |             newfList = newfList(keepIDX); 
111 |             WAA = WAA(keepIDX,:);
112 |             XAA = XAA(:,keepIDX);
113 |             
114 |             RR = Y - XAA*WAA;
115 |             pp = X(:,LC)'*RR; pp = sqrt(sum(pp.*pp,2)); [maxr sel] = max(pp);
116 |             if maxr - lam >= trd
117 | %                 fprintf('find %i voilations\n', length(find( (pp-lam) > trd )));
118 |             end
119 |             
120 |             if maxr - lam < trd
121 |                 if length(keepIDX) < length(fList)
122 |                     lam = lam*0.995;
123 |                 else
124 |                     stop = 1;
125 |                 end
126 |             else
127 |                 aaa = [newfList,LC((pp-lam) > trd )]; newfList = aaa;
128 |                 aaa = [WAA;zeros(length(find( (pp-lam) > trd )),size(WAA,2))]; WAA = aaa;
129 |                 XAA = X(:,newfList);                
130 |                 counterr = counterr + 1;
131 |             end
132 |         end
133 |     end % end function nes
134 | end


--------------------------------------------------------------------------------
/fs_unsup_llcfs.m:
--------------------------------------------------------------------------------
  1 | function [Y, tao, objHistory] = fs_unsup_llcfs(X,param)
  2 | %
  3 | % Input
  4 | %	X: nSmp * nDim
  5 | %	param, a struct of parameters
  6 | %		nClusters, the number of clusters
  7 | %		k, the size of knn
  8 | %		beta, the regularization parameter
  9 | % Output
 10 | %	Y: nSmp * nClusters
 11 | %	tao: nDim * 1
 12 | %	
 13 | %
 14 | %
 15 | % [1] Feature Selection and Kernel Learning for Local Learning-Based Clustering, PAMI-2011
 16 | %
 17 | % Liang Du (csliangdu@gmail.com)
 18 | 
 19 | if isfield(param, 'nClusters')
 20 |     c = param.nClusters;
 21 | end
 22 | 
 23 | k = 30;
 24 | if isfield(param, 'k')
 25 |     k = param.k;
 26 | end
 27 | 
 28 | beta = 1;
 29 | if isfield(param, 'beta')
 30 |     beta = param.beta;
 31 | end
 32 | 
 33 | kType = 1;
 34 | if isfield(param, 'kType')
 35 |     kType = param.kType;
 36 | end
 37 | 
 38 | maxiter = 50;
 39 | if isfield(param, 'maxiter')
 40 |     maxiter = param.maxiter;
 41 | end
 42 | 
 43 | epsilon = 1e-5;
 44 | if isfield(param, 'epsilon')
 45 |     epsilon = param.epsilon;
 46 | end
 47 | 
 48 | isTao = 0;
 49 | epsilon_tao = 1e-5;
 50 | [n, d] = size(X);
 51 | 
 52 | 
 53 | % convergence by maxiter
 54 | isMaxiter = 1;
 55 | if maxiter > 0
 56 |     isMaxiter = 1;
 57 | end
 58 | 
 59 | % convergence by epsilon
 60 | isEpsilon = 0;
 61 | if isEpsilon > 0
 62 |     isEpsilon = 1;
 63 | end
 64 | 
 65 | tao = ones(d,1) / d;
 66 | 
 67 | objHistory = [];
 68 | iter = 0;
 69 | while true
 70 |     
 71 |     wX = bsxfun(@times, X, sqrt(max(tao, eps))' );
 72 |     wX2 = bsxfun(@times, X, max(tao, eps)' );
 73 |     wK = wX * wX';
 74 |     % k-mutual neighbors re-computation using weighted features
 75 |     switch kType
 76 |         case 1
 77 |             W = SimGraph_NearestNeighbors(wX', k, 2, 0);
 78 |             [idx, jdx, ~] = find(W);
 79 |             kIdx = cell(n, 1);
 80 |             nz = length(idx);
 81 |             for ii = 1:nz
 82 |             	kIdx{jdx(ii)} = [kIdx{jdx(ii)}, idx(ii)];
 83 |             end
 84 |         case 2
 85 |             if isempty(which('knnsearch'))
 86 |                 disp('The funcion knnsearch in stat toolbox is not found');
 87 |             else
 88 |                 [kIdx, ~] = knnsearch(wX, wX, 'k', min(n, k + 1) );
 89 |                 kIdx = kIdx(:, 2:end);
 90 |                 kIdx = mat2cell(kIdx, ones(n, 1), size(kIdx, 2));
 91 |             end
 92 |         otherwise
 93 |             disp('');
 94 |     end
 95 |     
 96 |     % construct A for laplacian
 97 |     A = zeros(n);
 98 |     wA = cell(n,1);% pre storage for w computation
 99 |     for i = 1:n
100 |         lidx = kIdx{i};
101 |         ni = length(lidx);
102 |         if ni > 1
103 |             Ki = wK(lidx, lidx);
104 |             ki = wK(i, lidx);
105 |             Hi = eye(ni) - ones(ni, ni) / ni;
106 |             Ii = eye(ni);
107 |             Iib = Ii / beta;
108 |             Ai = Hi * Ki * Hi;
109 |             Ai = (Ai + Iib) \ Ai;
110 |             Ai = Hi - Hi * Ai;
111 |             Ai = Ai * beta;
112 |             wA{i} = wX2(lidx, :)' * Ai; % EQ 15
113 |             Ai = (ki - sum(Ki) / ni) * Ai;
114 |             Ai = Ai + ones(1, ni) / ni;
115 |             A(i, lidx) = Ai;
116 |         end
117 |     end
118 |     
119 |     % construct laplacian for local learning
120 |     M = eye(n) - A;
121 |     M = M' * M;
122 |     M(isnan(M)) = 0;
123 | 	M(isinf(M)) = 0;
124 | 	
125 |     % first c eigenvectors corresponding to the first c smallest eigenvalues
126 |     M = (M + M') / 2;
127 |     [Y, eigval] = eig(M);
128 |     eigval = diag(eigval);
129 |     [eigval, eigidx] = sort(eigval, 'ascend');
130 | 	eigval = eigval(eigidx(1:c));
131 |     Y = Y(:, eigidx(1:c));
132 |     
133 |     objHistory = [objHistory; sum(eigval)];%#ok
134 |     
135 | 	
136 |     % compute wc to compute tao
137 |     tao_old = tao;
138 | 	
139 | 	tao = zeros(d, 1);
140 |     for i = 1:n
141 |         lidx = kIdx{i};
142 |         ni = length(lidx);
143 |         if ni > 1
144 |             wi = wA{i} * Y(lidx,:);
145 |             tao = sum(wi.^2, 2) + tao;
146 |         end
147 |     end
148 | 	tao = sqrt(tao);
149 |     tao = tao / sum(tao);
150 |     
151 |     % check the convergence
152 |     iter = iter + 1;
153 |     if isEpsilon && iter > 1
154 |         if abs(objHistory(end-1) - objHistory(end)) < epsilon
155 |             break;
156 |         end
157 |     end
158 | 	if isTao && sum(abs(tao_old - tao)) < epsilon_tao
159 | 		break;
160 | 	end
161 |     if isMaxiter && iter == maxiter
162 |         break;
163 |     end
164 | end


--------------------------------------------------------------------------------
/evalUnSupFS.m:
--------------------------------------------------------------------------------
  1 | function fs_res = evalUnSupFS(X, Y, feaIdx, options)
  2 | % Evaluate the selected features
  3 | %
  4 | % [1] redundancy
  5 | % [2] jac, k=5
  6 | % [3] acc, mean and std, best_obj
  7 | % [4] nmi(max_version), mean, std, best_obj
  8 | % [5] purity, mean, std, best_obj
  9 | % [6] precision, mean, std, best_obj
 10 | % [7] recall, mean, std, best_obj
 11 | % [8] f1, mean, std, best_obj
 12 | % [9] loocv, knn, k=1
 13 | %
 14 | %
 15 | % Reference
 16 | % [1] On Similarity Preserving Feature Selection, TKDE, 2011
 17 | %
 18 | % Liang Du (csliangdu@gmail.com)
 19 | %
 20 | 
 21 | if ~exist('options', 'var')
 22 |     options = [];
 23 | end
 24 | 
 25 | if ~isfield(options, 'jac_k')
 26 |     options.jac_k = 5;
 27 | end
 28 | 
 29 | if ~isfield(options, 'nKm')
 30 |     options.nKm = 10;
 31 | end
 32 | 
 33 | if ~isfield(options, 'knn_k')
 34 |     options.knn_k = 1;
 35 | end
 36 | 
 37 | [nSmp, nDim] = size(X);
 38 | Xsub = X(:, feaIdx);
 39 | 
 40 | fs_red = compute_RED(Xsub);
 41 | fs_jac = compute_JAC(X, Xsub, options.jac_k);
 42 | fs_loocv = compute_loocv(Xsub, Y, options.knn_k);
 43 | 
 44 | fs_cluster = compute_Clustering(Xsub, Y, options.nKm);
 45 | 
 46 | fs_res = struct('red', fs_red, 'jac', fs_jac, 'loocv', fs_loocv);
 47 | fs_res = cell2struct([struct2cell(fs_res);struct2cell(fs_cluster)],[fieldnames(fs_res);fieldnames(fs_cluster)]);
 48 | end
 49 | 
 50 | function fs_red = compute_RED(Xsub)
 51 | [nSmp, nDim] = size(Xsub);
 52 | 
 53 | if ~isempty('corr') && nDim < 2000
 54 |     C1 = corr(Xsub);
 55 |     sum_corr = sum(sum(tril(C1, -1)));
 56 | else
 57 |     mX = mean(Xsub, 1);
 58 |     stdX = std(Xsub, 0, 1);
 59 |     Xsub = bsxfun(@minus, Xsub, mX);
 60 |     sum_corr = 0;
 61 |     for i1 = 1:nDim
 62 |         for i2 = 1:i1-1
 63 |             sum_corr = sum_corr + (Xsub(:,i1)' * Xsub(:, i2)) / (stdX(i1) * stdX(i2) + eps);
 64 |         end
 65 |     end
 66 | end
 67 | fs_red = sum_corr / (nDim * (nDim -1) + eps);
 68 | end
 69 | 
 70 | function fs_jac = compute_JAC(X, Xsub, k)
 71 | D1 = EuDist2(X, X, 0);
 72 | [~, Idx1] = sort(D1, 2, 'ascend');
 73 | Idx1 = Idx1(:, 2:k+1);
 74 | Idx1 = mat2cell(Idx1, ones(size(X,1), 1), k);
 75 | D2 = EuDist2(Xsub, Xsub, 0);
 76 | [~, Idx2] = sort(D2, 2, 'ascend');
 77 | Idx2 = Idx2(:, 2:k+1);
 78 | Idx2 = mat2cell(Idx2, ones(size(X,1), 1), k);
 79 | s1 = cellfun(@union, Idx1, Idx2, 'UniformOutput', 0);
 80 | s2 = cellfun(@intersect, Idx1, Idx2, 'UniformOutput', 0);
 81 | n1 = cellfun(@length, s1);
 82 | n2 = cellfun(@length, s2);
 83 | fs_jac = mean(n2 ./ n1);
 84 | end
 85 | 
 86 | function fs_cluster = compute_Clustering(Xsub, Y, nKm)
 87 | if ~exist('nKm', 'var')
 88 |     nKm = 20;
 89 | end
 90 | nClass = length(unique(Y));
 91 | acc_list = zeros(nKm, 1);
 92 | nmi_max_list = zeros(nKm, 1);
 93 | nmi_sqrt_list = zeros(nKm, 1);
 94 | purity_list = zeros(nKm, 1);
 95 | obj_list = zeros(nKm, 1);
 96 | prec_list = zeros(nKm, 1);
 97 | recall_list = zeros(nKm, 1);
 98 | f1_list = zeros(nKm, 1);
 99 | rand('twister',5489); %#ok
100 | for iKm = 1:nKm
101 |     [label, ~, ~, sumD] = litekmeans(Xsub, nClass,'Replicates',1);
102 |     tmp_res = evalClustering(Y, label);
103 |     acc_list(iKm) = tmp_res.acc;
104 |     nmi_max_list(iKm) = tmp_res.nmi_max;
105 |     nmi_sqrt_list(iKm) = tmp_res.nmi_sqrt;
106 |     purity_list(iKm) = tmp_res.purity;
107 |     obj_list(iKm) = sum(sumD);
108 |     prec_list(iKm) = mean(tmp_res.precision);
109 |     recall_list(iKm) = mean(tmp_res.recall);
110 |     f1_list(iKm) = mean(tmp_res.f1);
111 | end
112 | [~, idx] = min(obj_list);
113 | fs_cluster = struct('mean_acc', mean(acc_list), 'std_acc', std(acc_list), ...
114 |     'mean_nmi_max', mean(nmi_max_list), 'std_nmi_max', std(nmi_max_list), ...
115 |     'mean_nmi_sqrt', mean(nmi_sqrt_list), 'std_nmi_sqrt', std(nmi_sqrt_list), ...
116 |     'mean_purity', mean(purity_list), 'std_purity', std(purity_list), ...
117 |     'mean_prec', mean(prec_list), 'std_prec', std(prec_list), ...
118 |     'mean_recall', mean(recall_list), 'std_recall', std(recall_list), ...
119 |     'mean_f1', mean(f1_list), 'std_f1', std(prec_list), ...
120 |     'best_obj_acc', acc_list(idx(1)), 'best_obj_nmi_max', nmi_max_list(idx(1)),...
121 |     'best_obj_nmi_sqrt', nmi_sqrt_list(idx(1)), 'best_obj_purity', purity_list(idx(1)), ...
122 |     'best_obj_prec', prec_list(idx(1)), 'best_obj_recall', recall_list(idx(1)),...
123 |     'best_obj_f1', f1_list(idx(1)), ...
124 |     'aio_acc', acc_list, 'aio_nmi_max', nmi_max_list, 'aio_nmi_sqrt', nmi_sqrt_list, 'aio_purity', purity_list,...
125 |     'aio_prec', prec_list, 'aio_recall', recall_list, 'aio_f1', f1_list);
126 | end
127 | 
128 | function fs_loocv = compute_loocv(Xsub, Y, k)
129 | if ~exist('k', 'var')
130 |     k = 1;
131 | end
132 | Dist = EuDist2(Xsub,Xsub,0);
133 | [~, Idx] = sort(Dist, 2, 'ascend');
134 | idx = Idx(:, 2);
135 | label = Y(idx);
136 | fs_loocv = mean(label == Y);
137 | end
138 | 


--------------------------------------------------------------------------------
/scale_dist3_knn.m:
--------------------------------------------------------------------------------
  1 | function A = scale_dist3_knn(D, nn, knn, useSparse)
  2 | %SCALE_DIST3_KNN
  3 | % A = scale_dist3_knn(D, nn, knn) returns a
  4 | % self-tuned affinity matrix A based on the distance
  5 | % matrix D. Each observation is only connected to its
  6 | % 'knn' neighbors. The affinity values are defined as:
  7 | %     A_ii = 0, for all i
  8 | %     A_ij = exp(-D_ij / (sigma_i * sigma_j)),
  9 | %           if i ~= j and
 10 | %           the i-th observation is one of the 'knn'
 11 | %           neighbors of the j-th observation
 12 | %           or vice versa
 13 | %     A_ij = 0, otherwise
 14 | % For any i, sigma_i is the Euclidean distance between
 15 | % the i-th observation and its nn-th neighbor.
 16 | % The returned affinity matrix A is a sparse matrix.
 17 | %
 18 | % A = scale_dist3_knn(D, nn, knn, useSparse) returns a sparse
 19 | % matrix if useSparse is true, and returns a dense matrix otherwise.
 20 | %
 21 | % Assumptions on the distance matrix D:
 22 | %   When D is a dense matrix:
 23 | %     D_ij is the squared Euclidean distance between the
 24 | %     i-th and j-th observations.
 25 | %   When D is a sparse matrix:
 26 | %   (e.g. constructed for image segmentation)
 27 | %     If D_ij is nonzero, it is the squared Euclidean
 28 | %       distance between the i-th and j-th observations.
 29 | %     If D_ij (i~=j) is zero, it means the distance
 30 | %       between the i-th and j-th observations is infinity
 31 | %       (i.e. the corresponding affinity value is 0).
 32 | %     Finally, D_ii=0 (for all i) by definition.
 33 | %
 34 | % This method was proposed in the following paper:
 35 | %     L. Zelnik-Manor, P. Perona,
 36 | %     Self-tuning spectral clustering.
 37 | %     Advances in Neural Information Processing Systems 17 (NIPS '04), pp. 1601--1608.
 38 | % The authors also posted their Matlab code at:
 39 | %     http://webee.technion.ac.il/~lihi/Demos/SelfTuningClustering.html
 40 | % However, their implementation is different from the
 41 | % definition of A in the paper. In particular,
 42 | %     A_ij = exp(-D_ij / max((sigma_i*sigma_j), 0.004))
 43 | % in their 'scale_dist' function.
 44 | %
 45 | % Our 'scale_dist3' function here implements the
 46 | % original definition of A as stated in the beginning
 47 | % of this help document.
 48 | %
 49 | % This function is used for experiments in the following paper:
 50 | %     Da Kuang, Chris Ding, Haesun Park,
 51 | %     Symmetric Nonnegative Matrix Factorization for Graph Clustering,
 52 | %     The 12th SIAM International Conference on Data Mining (SDM '12), pp. 106--117.
 53 | % Please cite this paper if you find this code useful.
 54 | %
 55 | 
 56 | if (nargin < 4)
 57 |     useSparse = true;
 58 | end
 59 | 
 60 | distSparse = issparse(D);
 61 | n = size(D, 1);
 62 | 
 63 | if (distSparse)
 64 |     max_rows = full(max(sum(D~=0)));
 65 |         if (knn > max_rows)
 66 |                 knn = max_rows;
 67 |         end
 68 |     max_nonzeros = nnz(D);
 69 |     i = zeros(max_nonzeros, 1);
 70 |     j = zeros(max_nonzeros, 1);
 71 |     sorted_s = zeros(max_nonzeros, 1);
 72 |     idx_s = zeros(max_nonzeros, 1);
 73 |     current_pos = 0;
 74 |     for col_num = 1 : n
 75 |         col_nz = D(:, col_num);
 76 |         idx_temp = find(col_nz ~= 0);
 77 |         col_nz = full(col_nz(col_nz ~= 0));
 78 |         col_nnz = length(col_nz);
 79 |         i(current_pos+1 : current_pos+col_nnz) = 1 : col_nnz;
 80 |         j(current_pos+1 : current_pos+col_nnz) = col_num;
 81 |         [sorted, idx_relative] = sort(col_nz);
 82 |         sorted_s(current_pos+1 : current_pos+col_nnz) = sorted;
 83 |         idx_s(current_pos+1 : current_pos+col_nnz) = idx_temp(idx_relative);
 84 |         if (nn > col_nnz)
 85 |             ls(col_num) = sorted(end);
 86 |         else
 87 |             ls(col_num) = sorted(nn);
 88 |         end
 89 |         current_pos = current_pos + col_nnz;
 90 |     end
 91 |     ls = sqrt(ls)';
 92 |     sorted = sparse(i, j, sorted_s, max_rows, n);
 93 |     idx = sparse(i, j, idx_s, max_rows, n);
 94 |         j = meshgrid(1:n, 1:knn);
 95 |         j = j(:);
 96 |         i = full(idx(1:knn, :));
 97 |         i = i(:);
 98 |     s = full(sorted(1:knn, :));
 99 |     s = s(:);
100 |     temp = find(i ~= 0);
101 |     i = i(temp);
102 |     j = j(temp);
103 |     index = [i, j; j, i];
104 |     s = s(temp);
105 |     s = [s; s];
106 | else
107 |     if (nn > n-1)
108 |         nn = n-1;
109 |     end
110 |     if (knn > n-1)
111 |         knn = n-1;
112 |     end
113 |     [sorted, idx] = sort(D);
114 |     ls = sorted(nn+1, :);
115 |     ls = sqrt(ls)';
116 |     j = meshgrid(1:n, 1:knn+1);
117 |     j = j(:);
118 |     i = idx(1:knn+1, :);
119 |     i = i(:);
120 |     I = find(i ~= j);
121 |     i = i(I);
122 |     j = j(I);
123 |     index = [i, j; j, i];
124 |     s = sorted(1:knn+1, :);
125 |     s = s(:);
126 |     s = s(I);
127 |     s = [s; s];
128 | end
129 | 
130 | A_s = exp( -s ./ (ls(index(:,1)).*ls(index(:,2))) );
131 | 
132 | if (useSparse)
133 |     [index, i, j] = unique(index, 'rows');
134 |     A_s = A_s(i);
135 |     A = sparse(index(:,1), index(:,2), A_s, n, n);
136 | else
137 |     index = (index(:,2) - 1) * n + index(:, 1);
138 |     A = zeros(n);
139 |     A(index) = A_s;
140 | end
141 | 


--------------------------------------------------------------------------------
/lle.m:
--------------------------------------------------------------------------------
  1 | function [mappedX, mapping] = lle(X, no_dims, k, eig_impl)
  2 | %LLE Runs the locally linear embedding algorithm
  3 | %
  4 | %   mappedX = lle(X, no_dims, k, eig_impl)
  5 | %
  6 | % Runs the local linear embedding algorithm on dataset X to reduces its
  7 | % dimensionality to no_dims. In the LLE algorithm, the number of neighbors
  8 | % can be specified by k. 
  9 | % The function returns the embedded coordinates in mappedX.
 10 | %
 11 | %
 12 | 
 13 | % This file is part of the Matlab Toolbox for Dimensionality Reduction v0.7.1b.
 14 | % The toolbox can be obtained from http://homepage.tudelft.nl/19j49
 15 | % You are free to use, change, or redistribute this code in any way you
 16 | % want for non-commercial purposes. However, it is appreciated if you 
 17 | % maintain the name of the original author.
 18 | %
 19 | % (C) Laurens van der Maaten, 2010
 20 | % University California, San Diego / Delft University of Technology
 21 | 
 22 |     if ~exist('no_dims', 'var')
 23 |         no_dims = 2;
 24 |     end
 25 |     if ~exist('k', 'var')
 26 |         k = 12;
 27 |     end
 28 |     if ~exist('eig_impl', 'var')
 29 |         eig_impl = 'Matlab';
 30 |     end
 31 | 
 32 |     % Get dimensionality and number of dimensions
 33 |     [n, d] = size(X);
 34 | 
 35 |     % Compute pairwise distances and find nearest neighbors (vectorized implementation)
 36 |     disp('Finding nearest neighbors...');    
 37 |     if ~ischar(k)
 38 |         [distance, neighborhood] = find_nn(X, k + 1);
 39 |     else
 40 |         [distance, neighborhood] = find_nn(X, k);
 41 |     end
 42 |     
 43 |     % Identify largest connected component of the neighborhood graph
 44 |     blocks = components(distance)';
 45 |     count = zeros(1, max(blocks));
 46 |     for i=1:max(blocks)
 47 |         count(i) = length(find(blocks == i));
 48 |     end
 49 |     [count, block_no] = max(count);
 50 |     conn_comp = find(blocks == block_no); 
 51 |     
 52 |     % Update the neighborhood relations
 53 |     tmp = 1:n;
 54 |     tmp = tmp(conn_comp);
 55 |     new_ind = zeros(n, 1);
 56 |     for i=1:n
 57 |         ii = find(tmp == i);
 58 |         if ~isempty(ii), new_ind(i) = ii; end
 59 |     end 
 60 |     neighborhood = neighborhood(conn_comp, 2:k+1)';
 61 |     for i=1:n
 62 |         neighborhood(neighborhood == i) = new_ind(i);
 63 |     end
 64 |     n = numel(conn_comp);
 65 |     X = X(conn_comp,:)';    
 66 |     max_k = size(neighborhood, 1);
 67 |         
 68 |     % Find reconstruction weights for all points by solving the MSE problem 
 69 |     % of reconstructing a point from each neighbours. A used constraint is 
 70 |     % that the sum of the reconstruction weights for a point should be 1.
 71 |     disp('Compute reconstruction weights...');
 72 |     if k > d 
 73 |         tol = 1e-5;
 74 |     else
 75 |         tol = 0;
 76 |     end
 77 | 
 78 |     % Construct reconstruction weight matrix
 79 |     W = zeros(max_k, n);
 80 |     for i=1:n
 81 |         nbhd = neighborhood(:,i);
 82 |         nbhd = nbhd(nbhd ~= 0);
 83 |         kt = numel(nbhd);
 84 |         z = bsxfun(@minus, X(:,nbhd), X(:,i));                  % Shift point to origin
 85 |         C = z' * z;												% Compute local covariance
 86 |         C = C + eye(kt, kt) * tol * trace(C);					% Regularization of covariance (if K > D)
 87 |         wi = C \ ones(kt, 1);                                   % Solve linear system
 88 |         wi = wi / sum(wi);                                      % Make sure that sum is 1
 89 |         W(:,i) = [wi; nan(max_k - kt, 1)];
 90 |     end
 91 | 
 92 |     % Now that we have the reconstruction weights matrix, we define the 
 93 |     % sparse cost matrix M = (I-W)'*(I-W).
 94 |     M = sparse(1:n, 1:n, ones(1, n), n, n, 4 * max_k * n);
 95 |     for i=1:n
 96 |        w = W(:,i);
 97 |        j = neighborhood(:,i);
 98 |        indices = find(j ~= 0 & ~isnan(w));
 99 |        j = j(indices);
100 |        w = w(indices);
101 |        M(i, j) = M(i, j) - w';
102 |        M(j, i) = M(j, i) - w;
103 |        M(j, j) = M(j, j) + w * w';
104 |     end
105 | 	
106 | 	% For sparse datasets, we might end up with NaNs or Infs in M. We just set them to zero for now...
107 | 	M(isnan(M)) = 0;
108 | 	M(isinf(M)) = 0;
109 |     
110 |     % The embedding is computed from the bottom eigenvectors of this cost matrix
111 | 	disp('Compute embedding (solve eigenproblem)...');
112 |     tol = 0;
113 |     if strcmp(eig_impl, 'JDQR')
114 |         options.Disp = 0;
115 |         options.LSolver = 'bicgstab';
116 |         [mappedX, eigenvals] = jdqr(M, no_dims + 1, tol, options);
117 |     else
118 |         options.disp = 0;
119 |         options.isreal = 1;
120 |         options.issym = 1;
121 |         [mappedX, eigenvals] = eigs(M, no_dims + 1, tol, options);          % only need bottom (no_dims + 1) eigenvectors
122 |     end
123 |     [eigenvals, ind] = sort(diag(eigenvals), 'ascend');
124 |     if size(mappedX, 2) < no_dims + 1
125 | 		no_dims = size(mappedX, 2) - 1;
126 | 		warning(['Target dimensionality reduced to ' num2str(no_dims) '...']);
127 |     end
128 |     eigenvals = eigenvals(2:no_dims + 1);
129 |     mappedX = mappedX(:,ind(2:no_dims + 1));                                % throw away zero eigenvector/value
130 |     
131 |     % Save information on the mapping
132 |     mapping.k = k;
133 |     mapping.X = X';
134 |     mapping.vec = mappedX;
135 |     mapping.val = eigenvals;
136 |     mapping.conn_comp = conn_comp;
137 |     mapping.nbhd = distance;
138 | 


--------------------------------------------------------------------------------
/computeLocalStructure.m:
--------------------------------------------------------------------------------
  1 | function [Lap, S] = computeLocalStructure(X, type, k, sigma, emb_dim)
  2 | % Input
  3 | %     X, n * nDim
  4 | %     type, 'LPP', 'LLE', 'LTSA'
  5 | %     k, neighborhood size, needed by all the three types, 5 by default;
  6 | %     sigma, gaussian kernel bandwidth, optSigma(X), by default, only used by LPP
  7 | %     emb_dim, embedding dimension, only used by LTSA
  8 | %
  9 | 
 10 | if ~exist('k', 'var')
 11 |     k = 5;
 12 | end
 13 | 
 14 | if ~exist('type', 'var')
 15 |     type = 'LPP';
 16 | end
 17 | 
 18 | if strcmp(type, 'LPP') && (~exist('sigma', 'var') || isempty(sigma))
 19 |     sigma = optSigma(X);
 20 | end
 21 | 
 22 | if strcmp(type, 'LTSA') && (~exist('emb_dim', 'var') || isempty(emb_dim))
 23 |     emb_dim = 2;
 24 | end
 25 | 
 26 | [n, d] = size(X);
 27 | switch lower(type)
 28 |     case lower('LPP')
 29 |         % Construct neighborhood graph
 30 |         % disp('Constructing neighborhood graph...');
 31 |         if size(X, 1) < 4000
 32 |             G = L2_distance(X', X');
 33 |             % Compute neighbourhood graph
 34 |             [tmp, ind] = sort(G);
 35 |             for i=1:size(G, 1)
 36 |                 G(i, ind((2 + k):end, i)) = 0;
 37 |             end
 38 |             G = sparse(double(G));
 39 |             G = max(G, G');             % Make sure distance matrix is symmetric
 40 |         else
 41 |             G = find_nn(X, k);
 42 |         end
 43 |         G = G .^ 2;
 44 |         G = G ./ max(max(G));
 45 |         
 46 |         % Compute weights (W = G)
 47 |         % disp('Computing weight matrices...');
 48 |         
 49 |         % Compute Gaussian kernel (heat kernel-based weights)
 50 |         G(G ~= 0) = exp(-G(G ~= 0) / (sigma ^ 2));
 51 |         
 52 |         % Construct diagonal weight matrix
 53 |         D = diag(sum(G, 2));
 54 |         
 55 |         % Compute Laplacian
 56 |         L = D - G;
 57 |         L(isnan(L)) = 0; D(isnan(D)) = 0;
 58 |         L(isinf(L)) = 0; D(isinf(D)) = 0;
 59 |         Lap = L;
 60 |         S = G;
 61 |     case lower('LLE')
 62 | %         neighborhood = zeros(n,k);
 63 |         Dist = EuDist2(X);
 64 | 
 65 | %         for ii =1:n
 66 | %             index00 = setdiff(1:n,ii);
 67 | %             [sorted,index] = sort(Kmatrix(ii,index00),2,'descend');
 68 | %             neighborhood(ii,:) = index00(index(1:k));
 69 | %         end
 70 |         [~, neighborhood] = sort(Dist, 2, 'ascend');
 71 |         neighborhood = neighborhood(:,2:k+1);
 72 |         if(k > d)
 73 |             tol=1e-3; % regularlizer in case constrained fits are ill conditioned
 74 |         else
 75 |             tol=1e-12;
 76 |         end
 77 |         
 78 |         W = zeros(k,n);
 79 |         for ii=1:n
 80 |             z = X(neighborhood(ii,:),:)-repmat(X(ii,:),k,1); % shift ith pt to origin
 81 |             C = z*z';                                        % local covariance
 82 |             C = C + eye(size(C))*tol*trace(C);                   % regularlization
 83 |             W(:,ii) = C\ones(k,1);                           % solve Cw=1
 84 |             W(:,ii) = W(:,ii)/sum(W(:,ii));                  % enforce sum(w)=1
 85 |         end
 86 |         
 87 |         M = sparse(1:n,1:n,ones(1,n),n,n,4*k*n);
 88 |         for ii=1:n
 89 |             w = W(:,ii);
 90 |             jj = neighborhood(ii,:)';
 91 |             M(ii,jj) = M(ii,jj) - w'; %#ok
 92 |             M(jj,ii) = M(jj,ii) - w;%#ok
 93 |             M(jj,jj) = M(jj,jj) + w*w';%#ok
 94 |         end
 95 |         M = max(M,M');
 96 |         M = sparse(M);
 97 |         % For sparse datasets, we might end up with NaNs or Infs in M. We just set them to zero for now...
 98 |         M(isnan(M)) = 0;
 99 |         M(isinf(M)) = 0;
100 |         Lap = M;
101 |         S = sparse(repmat(1:n, k, 1), neighborhood(:), W(:), n, n, n*k);
102 |     case lower('LTSA')
103 |         % Compute neighborhood indices
104 |         % disp('Find nearest neighbors...');
105 |         n = size(X, 1);
106 |         [D, ni] = find_nn(X, k);
107 |         
108 |         % Compute local information matrix for all datapoints
109 |         % disp('Compute local information matrices for all datapoints...');
110 |         Bi = cell(1, n);
111 |         for i=1:n
112 |             % Compute correlation matrix W
113 |             Ii = ni(i,:);
114 |             Ii = Ii(Ii ~= 0);
115 |             kt = numel(Ii);
116 |             Xi = X(Ii,:) - repmat(mean(X(Ii,:), 1), [kt 1]);
117 |             W = Xi * Xi';
118 |             W = (W + W') / 2;
119 |             
120 |             % Compute local information by computing d largest eigenvectors of W
121 |             [Vi, Si] = schur(full(W));
122 |             [s, Ji] = sort(-diag(Si));
123 |             if length(Ji) < emb_dim
124 |                 emb_dim = length(Ji);
125 |                 % warning(['Target dimensionality reduced to ' num2str(emb_dim) '...']);
126 |             end
127 |             Vi = Vi(:,Ji(1:emb_dim));
128 |             
129 |             % Store eigenvectors in G (Vi is the space with the maximum variance, i.e. a good approximation of the tangent space at point Xi)
130 |             % The constant 1/sqrt(kt) serves as a centering matrix
131 |             Gi = double([repmat(1 / sqrt(kt), [kt 1]) Vi]);
132 |             
133 |             % Compute Bi = I - Gi * Gi'
134 |             Bi{i} = eye(kt) - Gi * Gi';
135 |         end
136 |         
137 |         % Construct sparse matrix B (= alignment matrix)
138 |         % disp('Construct alignment matrix...');
139 |         B = speye(n);
140 |         for i=1:n
141 |             Ii = ni(i,:);
142 |             Ii = Ii(Ii ~= 0);
143 |             B(Ii, Ii) = B(Ii, Ii) + Bi{i};							% sum Bi over all points
144 |             B(i, i) = B(i, i) - 1;
145 |         end
146 |         B = (B + B') / 2;											% make sure B is symmetric
147 |         
148 |         % For sparse datasets, we might end up with NaNs in M. We just set them to zero for now...
149 |         B(isnan(B)) = 0;
150 |         B(isinf(B)) = 0;
151 |         Lap = B;
152 |         S = [];
153 |     otherwise
154 |         Lap = [];
155 |         S = [];
156 |         disp('not supported yet!');
157 | end


--------------------------------------------------------------------------------
/sll_opts.m:
--------------------------------------------------------------------------------
  1 | function opts = sll_opts(opts)
  2 | 
  3 | % Options for Sparse Learning Library
  4 | %
  5 | % Notice:
  6 | % If one or several (even all) fields are empty, sll_opts shall assign the
  7 | % default settings.
  8 | %
  9 | % If some fields of opts have been defined, sll_opts shall check the fields
 10 | % for possible errors.
 11 | %
 12 | %
 13 | % Table of Options.  * * indicates default value.
 14 | %
 15 | %% FIELD            DESCRIPTION
 16 | %% Starting point
 17 | %
 18 | % .x0               Starting point of x. 
 19 | %                   Initialized according to .init.
 20 | %
 21 | % .c0               Starting point for the intercept c (for Logistic Loss)
 22 | %                   Initialized according to .init.
 23 | %
 24 | % .init             .init specifies how to initialize x.  
 25 | %                       * 0 => .x0 is set by the function initFactor *
 26 | %                         1 => .x0 and .c0 are defined
 27 | %                         2 => .x0= zeros(n,1), .c0=0
 28 | %
 29 | %% Termination
 30 | %
 31 | % .maxIter          Maximum number of iterations.
 32 | %                       *1e4*
 33 | %
 34 | % .tol              Tolerance parameter.
 35 | %                       *1e-4*
 36 | %
 37 | % .tFlag            Flag for termination.
 38 | %                       * 0 => abs( funVal(i)- funVal(i-1) ) <= .tol *
 39 | %                         1 => abs( funVal(i)- funVal(i-1) ) 
 40 | %                              <= .tol max( funVal(i-1), 1)
 41 | %                         2 => funVal(i) <= .tol
 42 | %                         3 => norm( x_i - x_{i-1}, 2) <= .tol
 43 | %                         4 => norm( x_i - x_{i-1}, 2) <= 
 44 | %                              <= .tol max( norm( x_{i-1}, 2), 1 )
 45 | %                         5 => Run the code for .maxIter iterations
 46 | %
 47 | %% Normalization
 48 | %
 49 | % .nFlag            Flag for implicit normalization of A.
 50 | %                       * 0 => Do not normalize A *
 51 | %                         1 => A=(A-repmat(mu, m, 1))*diag(nu)^{-1}
 52 | %                         2 => A=diag(nu)^{-1}*(A-repmat(mu,m,1)
 53 | %
 54 | % .mu               Row vector to be substracted from each sample.
 55 | %                           (.mu is used when .nFlag=1 or 2)
 56 | %                       If .mu is not specified, then
 57 | %                            * .mu=mean(A,1) *
 58 | %
 59 | % .nu               Weight (column) vector for normalization
 60 | %                           (.mu is used when .nFlag=1 or 2)
 61 | %                       If .nu is not specified, then
 62 | %                       * .nFlag=1 => .nu=(sum(A.^2, 1)'/m.^{0.5} *
 63 | %                       * .nFlag=2 => .nu=(sum(A.^2, 2)/n.^{0.5} *
 64 | %
 65 | %% Regularization
 66 | %
 67 | % .rFlag            Flag for regularization
 68 | %                           (.rFlag is used for the functions with "R")
 69 | %                        * 0 => lambda is the regularization parameter *
 70 | %                          1 => lambda = lambda * lambda_{max}
 71 | %                               where lambda_{max} is the maximum lambda
 72 | %                               that yields the zero solution
 73 | % .rsL2              Regularization parameter value of the squared L2 norm
 74 | %                           (.rsL2 is used only for l1 regularization)
 75 | %                        *.rsL2=0*
 76 | %                    If .rFlag=0, .rsL2 is used without scaling
 77 | %                       .rFlag=1, .rsL2=.rsL2 * lambda_{max}
 78 | %
 79 | %% Method & Line Search
 80 | % .lFlag
 81 | %
 82 | %% Grooup & Others
 83 | %
 84 | % .ind              Indices for k groups (a k+1 row vector)
 85 | %                   For group lasso only
 86 | %                   Indices for the i-th group are (ind(i)+1):ind(i+1)
 87 | %
 88 | % .q                Value of q in L1/Lq regularization
 89 | %                      *.q=2*
 90 | %
 91 | % .sWeight          The sample (positive and negative) weight
 92 | %                   For the Logistic Loss only
 93 | %                   Positive sample: .sWeight(1)
 94 | %                   Negative sample: sWeight(2)
 95 | %                   *1/m for both positive and negative samples*
 96 | %
 97 | % .gWeight          The weight for different groups
 98 | %                      *.gWeight=1*
 99 | %
100 | % .fName            The name of the function
101 | %
102 | %% Copyright (C) 2009-2010 Jun Liu, and Jieping Ye
103 | %
104 | % You are suggested to first read the Manual.
105 | %
106 | % For any problem, please contact with Jun Liu via j.liu@asu.edu
107 | %
108 | % Last modified 7 August 2009.
109 | 
110 | %% Starting point
111 | 
112 | if isfield(opts,'init')
113 |     if (opts.init~=0) && (opts.init~=1) && (opts.init~=2)
114 |         opts.init=0; % if .init is not 0, 1, or 2, then use the default 0
115 |     end
116 |     
117 |     if ~isfield(opts,'x0') && (opts.init==1)
118 |         opts.init=0; % if .x0 is not defined and .init=1, set .init=0
119 |     end
120 | else
121 |     opts.init = 0; 
122 |                      % if .init is not specified, use "0"
123 | end
124 | 
125 | %% Termination
126 | 
127 | if isfield(opts,'maxIter')
128 |     if (opts.maxIter<1)
129 |         opts.maxIter=10000;
130 |     end
131 | else
132 |     opts.maxIter=10000;
133 | end
134 | 
135 | if ~isfield(opts,'tol')
136 |     opts.tol=1e-3;
137 | end
138 | 
139 | if isfield(opts,'tFlag')
140 |     if opts.tFlag<0
141 |         opts.tFlag=0;
142 |     elseif opts.tFlag>5
143 |         opts.tFlag=5;
144 |     else
145 |         opts.tFlag=floor(opts.tFlag);
146 |     end
147 | else
148 |     opts.tFlag=0;
149 | end
150 | 
151 | %% Normalization
152 | 
153 | if isfield(opts,'nFlag')
154 |     if (opts.nFlag~=1) && (opts.nFlag~=2)
155 |         opts.nFlag=0;
156 |     end
157 | else
158 |     opts.nFlag=0;
159 | end
160 | 
161 | %% Regularization
162 | 
163 | if isfield(opts,'rFlag')
164 |     if (opts.rFlag~=1)
165 |         opts.rFlag=0;
166 |     end
167 | else
168 |     opts.rFlag=0;
169 | end
170 | %% Method (Line Search)
171 | 
172 | if isfield(opts,'lFlag')
173 |     if (opts.lFlag~=1)
174 |         opts.lFlag=0;
175 |     end
176 | else
177 |     opts.lFlag=0;
178 | end
179 | 
180 | if isfield(opts,'mFlag')
181 |     if (opts.mFlag~=1)
182 |         opts.mFlag=0;
183 |     end
184 | else
185 |     opts.mFlag=0;
186 | end
187 | 
188 | 


--------------------------------------------------------------------------------
/run_exp1_func.m:
--------------------------------------------------------------------------------
  1 | function [FeaNumCandi_aio, res_gs_aio, res_aio_aio, res_gs_ps_aio] = run_exp1_func(datasets, candiAlgs, username, password)
  2 | 
  3 | [flag_writeable, flag_uploadable, prefix] = mdcs_check(username, password);
  4 | 
  5 | if ~exist('datasets', 'var') || isempty(datasets)
  6 |     % datasets = {'test'};
  7 |     datasets = {'medical_706n_1449d_17c', 'PIE_Pose27_1428n_1024d_68c', 'USPS49_1673n_256d_2c', 'mfeat_pix_2000n_240d_10c'};
  8 | end
  9 | if ischar(datasets); datasets = {datasets}; end
 10 | 
 11 | if ~exist('candiAlgs', 'var') || isempty(candiAlgs)
 12 |     candiAlgs = {'AllFea', 'MaxVar', 'LapScore', 'TraceRatio', 'SPEC', 'LLCFS', 'SPFS', 'MCFS', 'UDFS', 'NDFS', 'RUFS',  'JELSR', 'GLSPFS', 'FSSL'};
 13 | end
 14 | if ischar(candiAlgs); candiAlgs = {candiAlgs}; end
 15 | 
 16 | if ~exist('exp_settings', 'var');  exp_settings = []; end
 17 | if ~isfield(exp_settings, 'FeaNumCandi')
 18 |     exp_settings.FeaNumCandi = [[5:5:50],[10:10:150],[50:50:300]];
 19 | end
 20 | if ~isfield(exp_settings, 'nKmeans')
 21 |     exp_settings.nKmeans = 20;
 22 | end
 23 | if ~isfield(exp_settings, 'prefix_mdcs')
 24 |     exp_settings.prefix_mdcs = prefix;
 25 | end
 26 | 
 27 | FeaNumCandi = exp_settings.FeaNumCandi;
 28 | FeaNumCandi_aio = cell(length(datasets), length(candiAlgs));
 29 | res_gs_aio = cell(length(datasets), length(candiAlgs));
 30 | res_aio_aio = cell(length(datasets), length(candiAlgs));
 31 | res_gs_ps_aio = cell(length(datasets), length(candiAlgs));
 32 | 
 33 | root_dir = pwd;
 34 | addpath(root_dir);
 35 | for id = 1:length(datasets)
 36 |     dataset = datasets{id};
 37 |     X = extractXY(dataset);
 38 |     exp_settings.FeaNumCandi = FeaNumCandi(FeaNumCandi < size(X, 2));
 39 |     clear X;
 40 |     
 41 |     disp(['data = ', dataset, ' ...']);
 42 |     try
 43 |         if ~exist([prefix, filesep, dataset], 'dir')
 44 |             mkdir([prefix, filesep, dataset]);
 45 |         end
 46 |         exp_settings.prefix_mdcs = [prefix, filesep, dataset];
 47 |     catch
 48 |         disp(['create dir: ', [prefix, filesep, dataset], 'failed, check the authorization']);
 49 |     end
 50 |     
 51 |     for iAlg = 1:length(candiAlgs)
 52 |         algo = candiAlgs{iAlg};
 53 |         disp(['algo = ', algo, ' ...']);
 54 |         switch lower(algo)
 55 |             case lower('AllFea')
 56 |                 [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_allfea_single_func(dataset, exp_settings);
 57 |             case lower('MaxVar')
 58 |                 [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_maxvar_single_func(dataset, exp_settings);
 59 |             case lower('LapScore')
 60 |                 [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_lapscore_single_func(dataset, exp_settings);
 61 |             case lower('SPEC')
 62 |                 [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_spec_single_func(dataset, exp_settings);
 63 |             case lower('TraceRatio')
 64 |                 [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_traceratio_single_func(dataset, exp_settings);
 65 |             case lower('LLCFS')
 66 |                 [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_llcfs_single_func(dataset, exp_settings);
 67 |             case lower('UDFS')
 68 |                 [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_udfs_single_func(dataset, exp_settings);
 69 |             case lower('SPFS')
 70 |                 [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_spfs_single_func(dataset, exp_settings);
 71 |             case lower('MCFS')
 72 |                 [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_mcfs_single_func(dataset, exp_settings);
 73 |             case lower('NDFS')
 74 |                 [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_ndfs_single_func(dataset, exp_settings);
 75 |             case lower('RUFS')
 76 |                 [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_rufs_single_func(dataset, exp_settings);
 77 |             case lower('JELSR_lpp')
 78 |                 [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_jelsr_lpp_single_func(dataset, exp_settings);
 79 |             case lower('JELSR_lle')
 80 |                 [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_jelsr_lle_single_func(dataset, exp_settings);
 81 |             case lower('JELSR_liang_lpp')
 82 |                 [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_jelsr_liang_lpp_single_func(dataset, exp_settings);
 83 |             case lower('JELSR_liang_lle')
 84 |                 [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_jelsr_liang_lle_single_func(dataset, exp_settings);
 85 |             case lower('CGSSL')
 86 |                 disp('not supported yet');
 87 |             case lower('GLSPFS')
 88 |                 [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_glspfs_single_func(dataset, exp_settings);
 89 |             case lower('FSSL_11_11_1')
 90 |                 [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_fsasl_11_11_1_single_func(dataset, exp_settings);
 91 |             case lower('FSSL_11_11_5')
 92 |                 [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_fsasl_11_11_5_single_func(dataset, exp_settings);
 93 |              case lower('FSSL_11_5_5')
 94 |                 [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_fsasl_11_5_5_single_func(dataset, exp_settings);
 95 |             otherwise
 96 |                 disp('not supported yet');
 97 |         end
 98 |         disp(['algo = ', algo, ' done']);
 99 |         email_notify(username, password, [username, '@ios.ac.cn'], [algo, ' on ', dataset, ' done']);
100 |     end
101 |     cd (exp_settings.prefix_mdcs);
102 |     plot_result(dataset, candiAlgs, 0);
103 |     email_notify(username, password, [username, '@ios.ac.cn'], ['all algo on ', dataset, ' done'], [dataset, '.tex']);
104 |     cd(root_dir);
105 |     disp(['data = ', dataset, ' done']);
106 | end
107 | rmpath(root_dir);


--------------------------------------------------------------------------------
/fs_unsup_mcfs.m:
--------------------------------------------------------------------------------
  1 | function [FeaIndex,FeaNumCandi] = fs_unsup_mcfs(fea,FeaNumCandi,options)
  2 | % MCFS: Feature Section for Multi Class/Cluster data
  3 | %
  4 | %       FeaIndex = MCFS_p(data,FeaNumCandi,options)
  5 | % 
  6 | %             Input:
  7 | %               fea     - data matrix. Each row vector of data is a
  8 | %                         sample vector. 
  9 | %        FeaNumCandi    - The number of featuers to be selected
 10 | %
 11 | %           options     - Struct value in Matlab. The fields in options
 12 | %                         that can be set:
 13 | %
 14 | %                    gnd       -  The label of the data. You can provide
 15 | %                                 gnd if it is a supervised feature
 16 | %                                 selection problem. 
 17 | %                      W       -  Affinity matrix. You can either call
 18 | %                                 "constructW" to construct the W, or
 19 | %                                 construct it by yourself.
 20 | %                                 If W is not provided, MCFS_p will
 21 | %                                 build a k-NN graph with Heat kernel
 22 | %                                 weight, where k is a prameter. (If gnd is
 23 | %                                 provided, this parameter will be ignored)
 24 | %                      k       -  The parameter for k-NN graph (Default is 5)
 25 | %                                 If gnd or W is provided, this parameter will be
 26 | %                                 ignored. 
 27 | %         nUseEigenfunction    -  Indicate how many eigen functions will be
 28 | %                                 used. If gnd is provided, this parameter
 29 | %                                 will be ignored. (Default is 5)
 30 | %
 31 | %                     Method   -  Method used to select features. Choices
 32 | %                            are:
 33 | %                               {'LASSO_LARs'}  -  (the default)
 34 | %                               'LASSO_SLEP' 
 35 | %                               'GROUPLASSO_SLEP' 
 36 | %
 37 | %                         Other fields are:  
 38 | %                    * ratio: [default 1] when trying to select M features,
 39 | %                         keep ratio*M non-zero entries in each eigenvector
 40 | %                         (dimension).
 41 | %                    * NotEnoughNonZero: strategy when non-zero entries are
 42 | %                         not enough to select the required number of
 43 | %                         features. This parameter is only used when `ratio'
 44 | %                         is less than 1. It can be the following values:
 45 | %                       * 0: fire an error and exit
 46 | %                       * 1: ignore
 47 | %                       * 2: [default] try to find more non-zero entries, fire error
 48 | %                            when fail
 49 | %                       * 3: try to find more non-zero entries, ignore when
 50 | %                            fail
 51 | %
 52 | %
 53 | %             Output:
 54 | %               FeaIndex -  cell variable. Each element in FeaIndex is the
 55 | %                           index of the selected features (the number of
 56 | %                           feature is specified in FeaNumCandi). 
 57 | %                            length(FeaIndex) == length(FeaNumCandi)       
 58 | %
 59 | %                           
 60 | % 
 61 | %===================================================================
 62 | %    Examples:
 63 | %           
 64 | %-------------------------------------------------------------------
 65 | %    (Supervised feature selection)
 66 | %
 67 | %       fea = rand(50,70);
 68 | %       gnd = [ones(10,1);ones(15,1)*2;ones(10,1)*3;ones(15,1)*4];
 69 | %
 70 | %       options = [];
 71 | %       options.gnd = gnd;
 72 | %       FeaNumCandi = [10:5:60];
 73 | %
 74 | %       [FeaIndex,FeaNumCandi] = MCFS_p(fea, FeaNumCandi,options);
 75 | %       
 76 | %       for i = 1:length(FeaNumCandi)
 77 | %           SelectFeaIdx = FeaIndex{i};  
 78 | %           feaNew = fea(:,SelectFeaIdx);
 79 | %       end
 80 | %
 81 | %-------------------------------------------------------------------
 82 | %    (Unsupervised feature selection)
 83 | %
 84 | %       fea = rand(50,70);
 85 | %
 86 | %       options = [];
 87 | %       options.k = 5; %For unsupervised feature selection, you should tune
 88 | %                      %this parameter k, the default k is 5.
 89 | %       options.nUseEigenfunction = 4;  %You should tune this parameter.
 90 | %
 91 | %       FeaNumCandi = [10:5:60];
 92 | %
 93 | %       [FeaIndex,FeaNumCandi] = MCFS_p(fea,FeaNumCandi,options);
 94 | %
 95 | %       for i = 1:length(FeaNumCandi)
 96 | %           SelectFeaIdx = FeaIndex{i};  
 97 | %           feaNew = fea(:,SelectFeaIdx);
 98 | %       end
 99 | %
100 | %===================================================================
101 | %
102 | %Reference:
103 | %
104 | %   Deng Cai, Chiyuan Zhang, Xiaofei He, "Unsupervised Feature Selection
105 | %   for Multi-cluster Data",16th ACM SIGKDD Conference on Knowledge
106 | %   Discovery and Data Mining (KDD'10), July 2010. 
107 | %
108 | %   version 1.1 --Dec/2011 
109 | %   version 1.0 --Dec/2009 
110 | %
111 | %   Written by Deng Cai (dengcai AT gmail.com)
112 | %              Chiyuan Zhang (pluskid AT gmail.com)
113 | %
114 | 
115 | [nSmp,mFea] = size(fea);
116 | FeaNumCandi = unique(FeaNumCandi);
117 | FeaNumCandi(FeaNumCandi > mFea) = [];
118 | 
119 | nUseEigenfunction = 5;
120 | if isfield(options,'nUseEigenfunction')
121 |    nUseEigenfunction = options.nUseEigenfunction;
122 | end
123 | 
124 | k = 5;
125 | if isfield(options,'k')
126 |    k = options.k;
127 | end
128 | 
129 | if isfield(options,'ratio')
130 |     ratio = options.ratio;
131 | else
132 |     ratio = 1;
133 | end
134 | 
135 | if isfield(options, 'NotEnoughNonZero')
136 |     NotEnoughNonZero = options.NotEnoughNonZero;
137 | else
138 |     NotEnoughNonZero = 3;
139 | end
140 | 
141 | if isfield(options,'gnd')
142 |     if length(options.gnd) ~= nSmp
143 |         error('gnd does not match!');
144 |     else
145 |         gnd = options.gnd;
146 |     end
147 |     ClassLabel = unique(gnd);
148 |     nClass = length(ClassLabel);
149 |     
150 |     rand('state',0);
151 |     Y = rand(nClass,nClass);
152 |     Z = zeros(nSmp,nClass);
153 |     for i=1:nClass
154 |         idx = find(gnd==ClassLabel(i));
155 |         Z(idx,:) = repmat(Y(i,:),length(idx),1);
156 |     end
157 |     Z(:,1) = ones(nSmp,1);
158 |     [Y,R] = qr(Z,0);
159 |     Y(:,1) = [];
160 | else
161 |     if isfield(options,'W')
162 |         W = options.W;
163 |     else
164 |         Woptions.k = k;
165 |         if nSmp > 3000
166 |             tmpD = EuDist2(fea(randsample(nSmp,3000),:));
167 |         else
168 |             tmpD = EuDist2(fea);
169 |         end
170 |         Woptions.t = mean(mean(tmpD));
171 |         W = constructW(fea,Woptions);
172 |     end
173 |     
174 |     Y = Eigenmap(W,nUseEigenfunction);
175 | end
176 | 
177 | options.ReguType = 'RidgeLasso';
178 | if ~isfield(options,'Method')
179 |     options.Method = 'LASSO_LARs';
180 | end
181 | 
182 | switch lower(options.Method)
183 |     case {lower('LASSO_LARs')}
184 |         options.LASSOway = 'LARs';
185 |         options.LassoCardi = ceil(FeaNumCandi*ratio);
186 |         eigvectorAll = SR(options, Y, fea);
187 |         
188 |         FeaIndex = cell(1,length(FeaNumCandi));
189 |         for i = 1:length(FeaNumCandi)
190 |             eigvector = eigvectorAll{i};
191 |             eigvector = max(abs(eigvector),[],2);
192 |             
193 |             [dump,idx] = sort(eigvector,'descend');
194 |             if dump(FeaNumCandi(i)) == 0
195 |                 if NotEnoughNonZero == 0       % fire error
196 |                     error('Not enough fea!');
197 |                 elseif NotEnoughNonZero == 1   % ignore
198 |                     warning('Not enough fea!');
199 |                 else
200 |                     for j = i+1:length(FeaNumCandi)
201 |                         eigvec = eigvectorAll{j};
202 |                         eigvec = max(abs(eigvec),[],2);
203 |                         [dump2,idx2] = sort(eigvec,'descend');
204 |                         if (dump2(FeaNumCandi(i)) > 0)
205 |                             break;
206 |                         end
207 |                     end
208 |                     if (dump2(FeaNumCandi(i)) > 0)
209 |                         idx = idx2;
210 |                     else
211 |                         if (NotEnoughNonZero == 2)
212 |                             error('Not enough fea, tried to find more but failed!');
213 |                         else
214 |                             warning('Not enough fea, tried to find more but failed!');
215 |                             idx = idx2;
216 |                         end
217 |                     end
218 |                 end
219 |             end
220 |             FeaIndex{i} = idx(1:FeaNumCandi(i));
221 |         end
222 |     case {lower('LASSO_SLEP')}
223 |         error('Comming soon!');
224 |     case {lower('GROUPLASSO_SLEP')}
225 |         error('Comming soon!');
226 |     otherwise
227 |         error('method does not exist!');
228 | end


--------------------------------------------------------------------------------
/plot_result.m:
--------------------------------------------------------------------------------
  1 | function plot_result(dataset, candiAlgs, plot_flag)
  2 | %==========================setup=======================================
  3 | % dataset = 'jaffe_213n_676d_10c';
  4 | if ~exist('plot_flag', 'var')
  5 |     plot_flag = 1;
  6 | end
  7 | 
  8 | if ~exist('candiAlgs', 'var') || isempty(candiAlgs)
  9 |     candiAlgs = { 'LapScore', 'MCFS',  'LLCFS', 'UDFS', 'NDFS',  'SPFS', 'RUFS',  'JELSR_lpp', 'GLSPFS', 'FSSL_11_11_5'};
 10 | end
 11 | % candiAlgs = {'AllFea', 'LapScore'};
 12 | candiLineStyles = {'-', '-.',    '-', '-.',    '-', '-.',    '-', '-.',     '-', '--',   '-', '--',    '-', '--',    '-', '--',    '-', '--',    '-', '--',    '-', '--'};
 13 | candiMarkers = {'o', '+', 's', 'd',               'o', '+', 's', 'd',        'o', '+', 's', 'd',        'o', '+', 's', 'd',  };
 14 | candiColors = [0 0 0; 1 0 1; 0 1 1; 1 0 0; 0 1 0; 0 0 1;           0 0 0; 1 0 1; 0 1 1; 1 0 0; 0 1 0; 0 0 1;           0 0 0; 1 0 1; 0 1 1; 1 0 0; 0 1 0; 0 0 1];
 15 | candiMarkerSpacing = [5,5;5,5;5,5;5,5;5,5;5,5;         5,5;5,5;5,5;5,5;5,5;5,5;  5,5;5,5;5,5;5,5;5,5;5,5; ];
 16 | 
 17 | %=====================================================================
 18 | res_algs = [];
 19 | 
 20 | algs = {};
 21 | lineStyles = {};
 22 | markers = {};
 23 | colors = [];
 24 | markerSpacing = [];
 25 | 
 26 | ii = 0;
 27 | for idx = 1:length(candiAlgs )
 28 |     res_file = [dataset, '_best_result_', candiAlgs{idx}, '.mat'];
 29 | 
 30 |     if exist(res_file, 'file')
 31 |         ii = ii + 1;
 32 |         if exist(res_file, 'file'); load(res_file); end
 33 |         if exist('res_gs', 'var')
 34 |             if ii == 1;
 35 |                 res_algs= res_gs;
 36 |             else
 37 |                 fn = fieldnames(res_gs);
 38 |                 for i2 = 1:length(fn)
 39 |                     if ~isfield(res_algs, (fn{i2}))
 40 |                         res_algs.(fn{i2}) = []; %place holder, should be removed, some algos did not record time2
 41 |                     end
 42 |                     res_algs.(fn{i2}) = [res_algs.(fn{i2}); res_gs.(fn{i2})];
 43 |                 end
 44 |             end
 45 |            
 46 |             algs{end+1} = candiAlgs{idx};
 47 |             lineStyles{end+1} = candiLineStyles{ii};
 48 |             markers{end+1} = candiMarkers{ii};
 49 |             colors = [colors; candiColors(ii, :)];
 50 |             markerSpacing = [markerSpacing; candiMarkerSpacing(ii, :)];
 51 |         end
 52 |         clear res_gs;
 53 |     end
 54 | end
 55 | 
 56 | if ~isempty(res_algs)
 57 |     res_gs_tt = [];
 58 |     
 59 |     % res_gs_tt = compute_ttest(dataset, algs, length(FeaNumCandi));
 60 | if isvector(FeaNumCandi ) && length(FeaNumCandi) > 10
 61 |     tmp = find(FeaNumCandi(1:end-1) - FeaNumCandi(2:end) > 0);
 62 |     tmp = [1; tmp(:); length(FeaNumCandi)];
 63 |     ids = cell(length(tmp) - 1, 1);
 64 |     for i1 = 1:length(ids)
 65 |         ids{i1} = tmp(i1):tmp(i1+1);
 66 |     end
 67 |     message = compute_message(algs, res_algs, dataset, ids);
 68 | else 
 69 |     message = [];
 70 | end
 71 |     save(['res_algs_', dataset, '.mat'], 'algs', 'res_algs', 'res_gs_tt', 'message');
 72 |     fns = {'mean_acc', 'mean_nmi_sqrt', 'mean_nmi_max', 'mean_purity', 'mean_prec', 'mean_recall', 'mean_f1', ...
 73 |         'best_obj_acc', 'best_obj_nmi_sqrt', 'best_obj_nmi_max', 'best_obj_purity', 'best_obj_prec', 'best_obj_recall', 'best_obj_f1',...
 74 |         'jac', 'red','loocv'};
 75 |     if plot_flag
 76 |         xData = (1:length(FeaNumCandi));
 77 |         %     figure;
 78 |         for i1 = 1:length(fns)
 79 |             figure;
 80 |             my_prettyPlot(xData, res_algs.(fns{i1}), colors, lineStyles, markers, markerSpacing, dataset, '# of features', fns{i1}, algs, 'SouthWest');
 81 |             if strcmp(fns{i1},'loocv')
 82 |                 my_prettyPlot(xData, 1-res_algs.(fns{i1}), colors, lineStyles, markers, markerSpacing, dataset, '# of features', fns{i1}, algs, 'SouthWest');
 83 |             end
 84 |         end
 85 |     end
 86 | %     my_prettyPlot(xData, res_algs.mean_nmi_max, colors, lineStyles, markers, markerSpacing, dataset, '# of features', 'Normalized Mutual Information', algs, 'SouthWest');
 87 | %     figure;
 88 | %     my_prettyPlot(xData, res_algs.red, colors, lineStyles, markers, markerSpacing, dataset, '# of features', 'Redundancy', algs, 'SouthWest');
 89 | %     figure;
 90 | %     my_prettyPlot(xData, res_algs.f1, colors, lineStyles, markers, markerSpacing, dataset, '# of features', 'JAC', algs, 'SouthWest');
 91 | end
 92 | end
 93 | 
 94 | function my_prettyPlot(xData, yData, colors, lineStyles, markers, markerSpacing, title, xlabel, ylabel, legends, legendLoc)
 95 | 
 96 | 
 97 | options.colors = colors;
 98 | options.lineStyles = lineStyles;
 99 | options.markers = markers;
100 | % options.markerSpacing = markerSpacing;
101 | options.title = title;
102 | options.xlabel = xlabel;
103 | options.ylabel = ylabel;
104 | options.legendStr = legends;
105 | options.legend = legends;
106 | options.legendLoc = legendLoc;
107 | options.xlimits = [1, length(xData)];
108 | % options.ylimits = [min(yData(:)), max(yData(:)) ];
109 | prettyPlot(xData,yData,options);
110 | hold off;
111 | end
112 | 
113 | 
114 | function res_gs_tt = compute_ttest(dataset, candiAlgs, nFeaNumCandi)
115 | res_gs_tt = cell(1, nFeaNumCandi);
116 | fns = {'aio_acc', 'aio_nmi_max', 'aio_nmi_sqrt', 'aio_purity', 'aio_prec', 'aio_recall', 'aio_f1'};
117 | fns2 = {'mean_acc', 'mean_nmi_max', 'mean_nmi_sqrt', 'mean_purity', 'mean_prec', 'mean_recall', 'mean_f1'};
118 | for i1 = 1:nFeaNumCandi
119 |     for i2 = 1:length(fns);
120 |         res_gs_tt{1, i1}.([fns{i2}, '_tt']) = ones(length(candiAlgs)) * -1;
121 |         res_gs_tt{1, i1}.([fns{i2}, '_tt_p']) = ones(length(candiAlgs)) * -1;
122 |     end
123 | end
124 | 
125 | for i1 = 1:length(candiAlgs);
126 |     res_file = [dataset, 'best_result_', dataset, '_', candiAlgs{i1}, '.mat'];
127 |     if exist(res_file, 'file') 
128 |         if exist(res_file, 'file'); load(res_file); end
129 |         if exist('res_aio', 'var')
130 |             res1 = res_aio;
131 |             res1_ps = res_gs_ps;
132 |             clear res_aio res_gs_ps;
133 |             
134 |             for i2 = i1+1:length(candiAlgs);
135 |                 res_file = [dataset, 'best_result_', dataset, '_', candiAlgs{i1}, '.mat'];
136 |                 if exist(res_file, 'file')
137 |                     if exist(res_file, 'file'); load(res_file); end
138 |                    
139 |                     if exist('res_aio', 'var')
140 |                         res2 = res_aio;
141 |                         res2_ps = res_gs_ps;
142 |                         clear res_aio res_gs_ps;
143 |                         
144 |                         for i3 = 1:nFeaNumCandi
145 |                             for i4 = 1:length(fns)
146 |                                 tmp1 = res1_ps.(fns2{i4});
147 |                                 b1_idx = tmp1(i3);
148 |                                 tmp2 = res2_ps.(fns2{i4});
149 |                                 b2_idx = tmp2(i3);
150 |                                 r1 = res1{b1_idx, i3}.(fns{i4});
151 |                                 r2 = res2{b2_idx, i3}.(fns{i4});
152 |                                 [t1, t2] = ttest(r1, r2);
153 |                                 tmp1 = res_gs_tt{1, i3}.([fns{i4},'_tt']);
154 |                                 tmp1(i1, i2) = t1;
155 |                                 res_gs_tt{1, i3}.([fns{i4},'_tt']) = tmp1;
156 |                                 tmp1 = res_gs_tt{1, i3}.([fns{i4},'_tt_p']);
157 |                                 tmp1(i1, i2) = t2;
158 |                                 res_gs_tt{1, i3}.([fns{i4},'_tt_p']) = tmp1;
159 |                             end
160 |                         end
161 |                     end
162 |                 end
163 |             end
164 |         end
165 |     end
166 | end
167 | end
168 | 
169 | function message = compute_message(algs, res_algs, dataset, ids)
170 | message = [];
171 | fns = {'best_obj_acc', 'best_obj_nmi_sqrt', 'mean_acc', 'mean_nmi_sqrt', 'loocv', 'jac', 'red'};
172 | ismax = [1, 1, 1, 1, 0, 1, 0];
173 | 
174 | tex_header = '\begin{table*}';
175 | tex_header = [tex_header, char(13),'\caption{', dataset, 'all results}'];
176 | tex_header = [tex_header, char(13),'\tiny \centering \label{table:res_aio}'];
177 | 
178 | tex_align = '| c ';
179 | 
180 | tex_title = 'Data Sets';
181 | for i1 = 1:length(algs)
182 |     tex_title = [tex_title,  ' & ', algs{i1}];
183 |     tex_align = [tex_align,  ' | ', 'c'];
184 | end
185 | tex_title = [tex_title '\\ \hline'];
186 | tex_header = [tex_header, char(13),'\begin{tabular}{', tex_align, '| }'];
187 | tex_header = [tex_header, char(13),'\toprule'];
188 | 
189 | for i1 = 1:length(ids)
190 |     tmp = ids{i1};
191 |     for i2 = 1:length(fns)
192 |         sigs = zeros(size(res_algs.(fns{i2}), 1), 1);
193 |         sigs2 = sigs;
194 |         if ismax(i2)
195 |             [~, best_id] = max(mean(res_algs.(fns{i2})(:, ids{i1}), 2));
196 |         else
197 |             [~, best_id] = min( mean(1 - res_algs.(fns{i2})(:, ids{i1}), 2));
198 |         end
199 |         for i3 = 1:length(sigs)
200 |             [sigs(i3), sigs2(i3)] = ttest(res_algs.(fns{i2})(i3, ids{i1}), res_algs.(fns{i2})(best_id, ids{i1}));
201 |         end
202 |         if ismax(i2)
203 |             message = [message, char(13), ms2tex(mean(res_algs.(fns{i2})(:, ids{i1}), 2), std(res_algs.(fns{i2})(:, ids{i1}), 0, 2), ismax(i2), sigs, sigs2, [dataset(1:5), '_', fns{i2}, '_', num2str(tmp(1)), '_', num2str(tmp(end)) ])];
204 |         else
205 |             message = [message, char(13), ms2tex(mean(1 - res_algs.(fns{i2})(:, ids{i1}), 2), std(1 - res_algs.(fns{i2})(:, ids{i1}), 0, 2), ismax(i2), sigs, sigs2, [dataset(1:5), '_', fns{i2}, '_', num2str(tmp(1)), '_', num2str(tmp(end)) ])];
206 |         end
207 |     end
208 | end
209 | message = [tex_title, char(13), message ];
210 | 
211 | 
212 | tex_end = [];
213 | tex_end = [tex_end, char(13), '\bottomrule' ];
214 | tex_end = [tex_end, char(13), '\end{tabular}' ];
215 | tex_end = [tex_end, char(13), '\end{table*}' ];
216 | 
217 | message = [tex_header, char(13), message, char(13), tex_end];
218 | message = strrep(message, '_', '-');
219 | fid=fopen([dataset, '.tex'], 'w+');
220 | fprintf(fid,  '%s', message);
221 | fclose(fid);
222 | end


--------------------------------------------------------------------------------
/FSASL.m:
--------------------------------------------------------------------------------
  1 | function [W, S, A, objHistory] = FSASL(X, nClass, options)
  2 | if ~exist('options', 'var')
  3 |     options = [];
  4 | end
  5 | 
  6 | % Optios for global structure learning
  7 | if ~isfield(options, 'lambda1')
  8 |     options.lambda1 = 1; % [need to search]
  9 | end
 10 | 
 11 | if ~isfield(options, 'LassoType')
 12 |     options.LassoType = 'SLEP';
 13 | end
 14 | 
 15 | if ~isfield(options, 'SLEPrFlag')
 16 |     options.SLEPrFlag = 1; % the input parameter 'ReguAlpha' is a ratio in (0, 1)
 17 | end
 18 | 
 19 | if ~isfield(options, 'SLEPreg')
 20 |     options.SLEPreg = 0.01; % [need to search, and fix it]
 21 | end
 22 | 
 23 | if ~isfield(options, 'LARSk')
 24 |     options.LARSk = 5; % [need to search, and fix it]
 25 | end
 26 | 
 27 | if ~isfield(options, 'LARSratio')
 28 |     options.LARSratio = 2;
 29 | end
 30 | 
 31 | % Optios for local structure learning
 32 | if ~isfield(options, 'lambda2')
 33 |     options.lambda2 = 1; % [need to search] aim to show local structure is helpful
 34 | end
 35 | 
 36 | if ~isfield(options, 'Localk')
 37 |     options.Localk = 5; % [need to search, and fix it]
 38 | end
 39 | 
 40 | if ~isfield(options, 'LocalReg')
 41 |     options.LocalReg = estimateReg(X, options.Localk); % aim to avoid search
 42 | end
 43 | 
 44 | % Optios for subspace learning
 45 | if ~isfield(options, 'GroupLassoType')
 46 |     options.GroupLassoType = 'LS21';
 47 | end
 48 | 
 49 | if ~isfield(options, 'lambda3')
 50 |     options.lambda3 = 1; % [need to search
 51 | end
 52 | 
 53 | if ~isfield(options, 'maxiter')
 54 |     options.maxiter = 1; % [need to search
 55 | end
 56 | % options.lambda1 = 1 - options.lambda2;
 57 | [~, nSmp] = size(X);
 58 | X2 = X;
 59 | objHistory = [];
 60 | for iter = 1:options.maxiter
 61 |     
 62 |     S = zeros(nSmp);
 63 |     if options.lambda1 > 0 && ( options.maxiter < 5 || iter > 1)
 64 |         % update global structure LG
 65 |         for iSmp = 1:nSmp
 66 |             candIdx = ones(nSmp, 1);
 67 |             candIdx(iSmp) = 0;
 68 |             candIdx = candIdx > 0;
 69 |             switch lower(options.LassoType)
 70 |                 case lower('SLEP')
 71 |                     S(candIdx, iSmp) = LeastR(X2(:, candIdx), X2(:, iSmp), options.SLEPreg, struct('rFlag', options.SLEPrFlag, 'rsL2', 0));
 72 |                 case lower('LARS')
 73 |                     S(candIdx, iSmp) = LassoLARS(X2(:, candIdx), X2(:, iSmp), options.LARSk * options.LARSratio, 'verbose', 0);
 74 |                 case lower('lars2')
 75 |                     Gram = X2(:, candIdx) * X2(:, candIdx)';
 76 |                     Gram = max(Gram,Gram');
 77 |                     S(candIdx, iSmp) = lars(X2(:, candIdx), X2(:, iSmp),'lasso', -(max(options.LARSk)+5),1,Gram,options.LARSk);
 78 |                 case lower('lars3')
 79 |                     S(candIdx, iSmp) = lars(X2(:, candIdx), X2(:, iSmp),'lasso', -(max(options.LARSk)+5),0,[],options.LARSk);
 80 |                 otherwise
 81 |                     error('method does not exist!');
 82 |             end
 83 |         end
 84 |         LG = (eye(nSmp) - S);
 85 |         LG = LG * LG';
 86 |         LG = (LG + LG') / 2;
 87 |     else
 88 |         LG = 0;
 89 |     end
 90 |     
 91 |     A = zeros(nSmp);
 92 |     if options.lambda2 > 0
 93 |         if iter > 1
 94 |             % update local structure LL
 95 |             distx = L2_distance_1(X2, X2);
 96 |             if iter>0
 97 |                 [~, idx] = sort(distx,2);
 98 |             end;
 99 |             
100 |             for iSmp = 1 : nSmp
101 |                 if options.Localk < nSmp
102 |                     idxa0 = idx(iSmp, 2: options.Localk + 1);
103 |                 else
104 |                     idxa0 = 1 : nSmp;
105 |                 end;
106 |                 dxi = distx(iSmp, idxa0);
107 |                 ad = - (dxi) / (2 * options.LocalReg);
108 |                 A(iSmp, idxa0) = EProjSimplex_new(ad);
109 |             end;
110 |         else
111 |             A = constructW(X2', struct('k', options.Localk));
112 |         end
113 |         A = (A+A')/2;
114 |         LL = diag(sum(A)) - A;
115 |         LL = (LL + LL') / 2;
116 |     else
117 |         LL = 0;
118 |     end
119 |     
120 |     L = options.lambda1 * LG + options.lambda2 * LL;
121 |     
122 |     % update embedding
123 |     
124 |     switch lower(options.GroupLassoType)
125 |         case lower('JFSSL')
126 |             Y = eig1(L, nClass, 0);
127 |             W = FSSL_subspace(X, Y, options.lambda3);
128 |         case lower('LS21')
129 |             Y = eig1(L, nClass, 0);
130 |             W = LS21(X', Y, options.lambda3);
131 |         case lower('NDFS') % d^3
132 |             tmp = X * L * X';
133 |             tmp = (tmp + tmp') / 2;
134 |             if exist('W', 'var')
135 |                 W = LquadR21_reg(tmp, nClass, options.lambda3, W);
136 |             else
137 |                 W = LquadR21_reg(tmp, nClass, options.lambda3);
138 |             end
139 |         case lower('UDFS')
140 |             tmp = X * L * X';
141 |             tmp = (tmp + tmp') / 2;
142 |             W = LquadR21_reg(tmp, nClass, options.lambda3);
143 |         case lower('MCLEASTR')
144 |             Y = eig1(L, nClass, 0);
145 |             W = mcLeastR(X', Y, options.lambda3, struct('rFlag', 1, 'rsL2', 0));
146 |         otherwise
147 |             error('method does not exist!');
148 |     end
149 |     X2 = W' * X;
150 |     obj = trace(X2 * L * X2');
151 |     if options.lambda1 > 0 && strcmpi(options.LassoType, 'SLEP')
152 |         obj = obj + options.lambda1 * options.SLEPreg * sum(sum(abs(S)));
153 |     end
154 |     
155 |     if options.lambda2 > 0
156 |         obj = obj + options.lambda2 * options.LocalReg * sum(sum(A.^2));
157 |     end
158 |     
159 |     obj = obj + options.lambda3 * sum(sqrt(sum(W.^2, 2)));
160 |     objHistory = [objHistory; obj]; %#ok
161 |     %
162 | end
163 | end
164 | 
165 | function A = FSSL_subspace(X, Y, regu)
166 | [d, ~] = size(X);
167 | [n, nClass] = size(Y);
168 | % Check the solutions
169 | nSolutionCheck = 0;
170 | r1 = rank(X');
171 | r2 = rank([X', Y]);
172 | if r1 == r2 && r1 < d
173 |     % X'*A = Y has many solution == rank(X') == rank([X', Y]) < d
174 |     nSolutionCheck = 1;
175 | end
176 | 
177 | A = zeros(d, nClass);
178 | % Step 2: Find A satisfies the linear system
179 | nIter = 20;
180 | if nSolutionCheck
181 |     % Situation 1, Infinitely many solutions
182 |     G = eye(d);
183 |     for iter = 1:nIter
184 |         Gi = inv(G);
185 |         A = Gi*X*inv(X'*Gi*X)*Y; %#ok
186 |         normG = sqrt(sum(A.^2,2));
187 |         nzIdx = (normG ~= 0);
188 |         dd = zeros(d, 1);
189 |         dd(nzIdx) = 1./normG;
190 |         G = diag(dd);
191 |     end
192 | else
193 |     % Situation 1, Single or No solution
194 |     G = eye(d);
195 |     for iter = 1:nIter
196 |         Gi = inv(G);
197 |         A = Gi*X*inv(X'*Gi*X + 0.5/regu*eye(n))*Y; %#ok
198 |         normG = sqrt(sum(A.^2,2));
199 |         nzIdx = (normG ~= 0);
200 |         dd = zeros(d, 1);
201 |         dd(nzIdx) = 1./normG;
202 |         G = diag(dd);
203 |     end
204 | end
205 | end
206 | 
207 | function [x, ft] = EProjSimplex_new(v, k)
208 | %
209 | % Problem
210 | %
211 | %  min  1/2 || x - v||^2
212 | %  s.t. x>=0, 1'x=1
213 | %
214 | 
215 | if nargin < 2
216 |     k = 1;
217 | end;
218 | 
219 | ft=1;
220 | n = length(v);
221 | 
222 | v0 = v-mean(v) + k/n;
223 | %vmax = max(v0);
224 | vmin = min(v0);
225 | if vmin < 0
226 |     f = 1;
227 |     lambda_m = 0;
228 |     while abs(f) > 10^-10
229 |         v1 = v0 - lambda_m;
230 |         posidx = v1>0;
231 |         npos = sum(posidx);
232 |         g = -npos;
233 |         f = sum(v1(posidx)) - k;
234 |         lambda_m = lambda_m - f/g;
235 |         ft=ft+1;
236 |         if ft > 100
237 |             x = max(v1,0); %#ok
238 |             break;
239 |         end;
240 |     end;
241 |     x = max(v1,0);
242 |     
243 | else
244 |     x = v0;
245 | end;
246 | end
247 | 
248 | % compute squared Euclidean distance
249 | % ||A-B||^2 = ||A||^2 + ||B||^2 - 2*A'*B
250 | function d = L2_distance_1(a,b)
251 | % a,b: two matrices. each column is a data
252 | % d:   distance matrix of a and b
253 | 
254 | 
255 | 
256 | if (size(a,1) == 1)
257 |     a = [a; zeros(1,size(a,2))];
258 |     b = [b; zeros(1,size(b,2))];
259 | end
260 | 
261 | aa=sum(a.*a); bb=sum(b.*b); ab=a'*b;
262 | d = repmat(aa',[1 size(bb,2)]) + repmat(bb,[size(aa,2) 1]) - 2*ab;
263 | 
264 | d = real(d);
265 | d = max(d,0);
266 | 
267 | % % force 0 on the diagonal?
268 | % if (df==1)
269 | %   d = d.*(1-eye(size(d)));
270 | % end
271 | 
272 | end
273 | 
274 | function r = estimateReg(X, k)
275 | [d, nSmp] = size(X);
276 | distX = L2_distance_1(X,X);
277 | %distX = sqrt(distX);
278 | [distX1, idx] = sort(distX,2);
279 | A = zeros(nSmp);
280 | rr = zeros(nSmp,1);
281 | for i = 1:nSmp
282 |     di = distX1(i,2:k+2);
283 |     rr(i) = 0.5*(k*di(k+1)-sum(di(1:k)));
284 |     id = idx(i,2:k+2);
285 |     A(i,id) = (di(k+1)-di)/(k*di(k+1)-sum(di(1:k))+eps);
286 | end;
287 | r = mean(rr);
288 | end
289 | 
290 | 
291 | function [X, obj]=LquadR21_reg(A, k, r, X0)
292 | % quadratic loss with 21-norm regularization
293 | %  min_{X'*X=I}  Tr(X'*A*X) + r * ||X||_21
294 | 
295 | 
296 | NIter = 36;
297 | [m n] = size(A);
298 | if nargin < 4
299 |     d = ones(n,1);
300 | else
301 |     Xi = sqrt(sum(X0.*X0,2)+eps);
302 |     d = 0.5./(Xi);
303 | end;
304 | 
305 | for iter = 1:NIter
306 |     D = diag(d);
307 |     M = A+r*D;
308 |     M = max(M,M');
309 |     [evec, eval] = eig(M);
310 |     eval = diag(eval);
311 |     [~, idx] = sort(eval);
312 |     X = evec(:,idx(1:k));
313 |     
314 |     Xi = sqrt(sum(X.*X,2)+eps);
315 |     d = 0.5./(Xi);
316 |     
317 |     obj(iter) = trace(X'*A*X) + r*sum(Xi); %#ok
318 | end;
319 | end
320 | 
321 | function [eigvec, eigval, eigval_full] = eig1(A, c, isMax, isSym)
322 | 
323 | if nargin < 2
324 |     c = size(A,1);
325 |     isMax = 1;
326 |     isSym = 1;
327 | elseif c > size(A,1)
328 |     c = size(A,1);
329 | end;
330 | 
331 | if nargin < 3
332 |     isMax = 1;
333 |     isSym = 1;
334 | end;
335 | 
336 | if nargin < 4
337 |     isSym = 1;
338 | end;
339 | 
340 | if isSym == 1
341 |     A = max(A,A');
342 | end;
343 | try
344 |     [v, d] = eig(A);
345 |     d = diag(d);
346 |     %d = real(d);
347 | catch
348 |     if isMax == 0 
349 |         [v, d] = eigs(sparse(A), c, 'sa', struct('tol', 1e-5'));
350 |     else
351 |         [v, d] = eigs(sparse(A), c, 'la', struct('tol', 1e-5'));
352 |     end
353 | end
354 | 
355 | if isMax == 0
356 |     [d1, idx] = sort(d);
357 | else
358 |     [d1, idx] = sort(d,'descend');
359 | end;
360 | idx1 = idx(1:c);
361 | eigval = d(idx1);
362 | eigvec = v(:,idx1);
363 | 
364 | eigval_full = d(idx);
365 | end
366 | 
367 | 
368 | function W = LS21(X, Y, r, W0)
369 | [n, m] = size(X);
370 | if nargin < 4
371 |     d = ones(m,1);
372 | else
373 |     Wi = sqrt(sum(W0.^2,2)+eps);
374 |     d = 0.5./(Wi);
375 | end;
376 | 
377 | maxiter = 10;
378 | if n < d % n^3
379 |     XY = X' * Y;
380 |     for iter= 1:maxiter
381 |         rd = 1 ./ (r * d);
382 |         Xrd = bsxfun(@times, X, rd');
383 |         XrdX = Xrd * X';
384 |         A = diag(rd) - Xrd' / (eye(n) + XrdX) * Xrd;
385 |         W = A * XY;
386 |         Wi = sqrt(sum(W.^2,2)+eps);
387 |         d = 0.5./(Wi);
388 |     end
389 | else % d^3
390 |     XX = X' * X;
391 |     XY = X' * Y;
392 |     for iter= 1:maxiter
393 |         W = (XX + r * diag(d)) \ XY;
394 |         Wi = sqrt(sum(W.^2,2)+eps);
395 |         d = 0.5./(Wi);
396 |     end
397 | end
398 | end
399 | 


--------------------------------------------------------------------------------
/lars.m:
--------------------------------------------------------------------------------
  1 | function beta = lars(X, y, method, stop, useGram, Gram, Cardi, bSparse, trace)
  2 | % This function is provided at
  3 | % http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3897
  4 | % I have made some small modifications  -- Deng Cai, Feb/2008
  5 | 
  6 | % LARS  The LARS algorithm for performing LAR or LASSO.
  7 | %    BETA = LARS(X, Y) performs least angle regression on the variables in
  8 | %    X to approximate the response Y. Variables X are assumed to be
  9 | %    normalized (zero mean, unit length), the response Y is assumed to be
 10 | %    centered.
 11 | %    BETA = LARS(X, Y, METHOD), where METHOD is either 'LARS' or 'LASSO'
 12 | %    determines whether least angle regression or lasso regression should
 13 | %    be performed.
 14 | %    BETA = LARS(X, Y, METHOD, STOP) with nonzero STOP will perform least
 15 | %    angle or lasso regression with early stopping. If STOP is negative,
 16 | %    STOP is an integer that determines the desired number of variables. If
 17 | %    STOP is positive, it corresponds to an upper bound on the L1-norm of
 18 | %    the BETA coefficients.
 19 | %    BETA = LARS(X, Y, METHOD, STOP, USEGRAM) specifies whether the Gram
 20 | %    matrix X'X should be calculated (USEGRAM = 1) or not (USEGRAM = 0).
 21 | %    Calculation of the Gram matrix is suitable for low-dimensional
 22 | %    problems. By default, the Gram matrix is calculated.
 23 | %    BETA = LARS(X, Y, METHOD, STOP, USEGRAM, GRAM) makes it possible to
 24 | %    supply a pre-computed Gram matrix. Set USEGRAM to 1 to enable. If no
 25 | %    Gram matrix is available, exclude argument or set GRAM = [].
 26 | %    BETA = LARS(X, Y, METHOD, STOP, USEGRAM, GRAM, TRACE) with nonzero
 27 | %    TRACE will print the adding and subtracting of variables as all
 28 | %    LARS/lasso solutions are found.
 29 | %    Returns BETA where each row contains the predictor coefficients of
 30 | %    one iteration. A suitable row is chosen using e.g. cross-validation,
 31 | %    possibly including interpolation to achieve sub-iteration accuracy.
 32 | %
 33 | % Author: Karl Skoglund, IMM, DTU, kas@imm.dtu.dk
 34 | % Reference: 'Least Angle Regression' by Bradley Efron et al, 2003.
 35 | 
 36 | %% Input checking
 37 | % Set default values.
 38 | if nargin < 9
 39 |     trace = 0;
 40 | end
 41 | if nargin < 8
 42 |     bSparse = 1;
 43 | end
 44 | if nargin < 7
 45 |     Cardi = [];
 46 | end
 47 | if nargin < 6
 48 |     Gram = [];
 49 | end
 50 | if nargin < 5
 51 |     useGram = 0;
 52 | end
 53 | if nargin < 4
 54 |     stop = 0;
 55 | end
 56 | if nargin < 3
 57 |     method = 'lasso';
 58 | end
 59 | if strcmpi(method, 'lasso')
 60 |     lasso = 1;
 61 | else
 62 |     lasso = 0;
 63 | end
 64 | 
 65 | if isempty(X)
 66 |     error('The code has been updated. Please input the X');
 67 | end
 68 | 
 69 | 
 70 | %% LARS variable setup
 71 | [n p] = size(X);
 72 | % nvars = min(n-1,p); %
 73 | nvars = p; %
 74 | 
 75 | maxk = 512*nvars; % Maximum number of iterations
 76 | 
 77 | if isempty(Cardi)
 78 |     if stop == 0
 79 |         if bSparse
 80 |             beta = sparse(p,2*nvars);
 81 |         else
 82 |             beta = zeros(p,2*nvars);
 83 |         end
 84 |     elseif stop < 0
 85 |         if bSparse
 86 |             beta = sparse(p,2*round(-stop));
 87 |         else
 88 |             beta = zeros(p,2*round(-stop));
 89 |         end
 90 |     else
 91 |         if bSparse
 92 |             beta = sparse(p,100);
 93 |         else
 94 |             beta = zeros(p,100);
 95 |         end
 96 |     end
 97 | else
 98 |     Cardi = unique(Cardi);
 99 |     Cardi(Cardi>nvars) = [];
100 |     stop = -max(Cardi);
101 |     if bSparse
102 |         beta = sparse(p,length(Cardi));
103 |     else
104 |         beta = zeros(p,length(Cardi));
105 |     end
106 |     betak = zeros(p,1);
107 | end
108 | 
109 | mu = zeros(n, 1); % current "position" as LARS travels towards lsq solution
110 | I = 1:p; % inactive set
111 | A = []; % active set
112 | 
113 | % Calculate Gram matrix if necessary
114 | if isempty(Gram) && useGram
115 |     error('The code has been updated. Please input the Gram');
116 | %     clear Gram;
117 | %     global Gram;
118 |     %   Gram = X'*X; % Precomputation of the Gram matrix. Fast but memory consuming.
119 | end
120 | 
121 | if ~useGram
122 |     R = []; % Cholesky factorization R'R = X'X where R is upper triangular
123 | end
124 | 
125 | 
126 | lassocond = 0; % LASSO condition boolean
127 | stopcond = 0; % Early stopping condition boolean
128 | k = 0; % Iteration count
129 | vars = 0; % Current number of variables
130 | 
131 | if trace
132 |     disp(sprintf('Step\tAdded\tDropped\t\tActive set size'));
133 | end
134 | 
135 | % TimeLoop = zeros(2*nvars,1);
136 | tmpT = cputime;
137 | 
138 | %% LARS main loop
139 | while vars < nvars && ~stopcond && k < maxk
140 |     k = k + 1;
141 |     c = X'*(y - mu);
142 |     [C j] = max(abs(c(I)));
143 |     j = I(j);
144 | 
145 |     if ~lassocond % if a variable has been dropped, do one iteration with this configuration (don't add new one right away)
146 |         if ~useGram
147 |             diag_k = X(:,j)'*X(:,j); % diagonal element k in X'X matrix
148 |             if isempty(R)
149 |                 R = sqrt(diag_k);
150 |             else
151 |                 col_k = X(:,j)'*X(:,A); % elements of column k in X'X matrix
152 |                 R_k = R'\col_k'; % R'R_k = (X'X)_k, solve for R_k
153 |                 R_kk = sqrt(diag_k - R_k'*R_k); % norm(x'x) = norm(R'*R), find last element by exclusion
154 |                 R = [R R_k; [zeros(1,size(R,2)) R_kk]]; % update R
155 |             end
156 |         end
157 |         A = [A j];
158 |         I(I == j) = [];
159 |         vars = vars + 1;
160 |         if trace
161 |             disp(sprintf('%d\t\t%d\t\t\t\t\t%d', k, j, vars));
162 |         end
163 |     end
164 | 
165 |     s = sign(c(A)); % get the signs of the correlations
166 | 
167 |     if useGram
168 |         if vars <= 200
169 |             R = chol(Gram(A,A));
170 |         elseif lassocond
171 |             if (rJ <= 200) & vars <= 1000
172 |                 R = chol(Gram(A,A));
173 |             else
174 |                 R(:,rJ) = []; % remove column j
175 |                 tmpn = size(R,2);
176 |                 for tmpk = rJ:tmpn
177 |                     tmpp = tmpk:tmpk+1;
178 |                     [G,R(tmpp,tmpk)] = planerot(R(tmpp,tmpk)); % remove extra element in column
179 |                     if tmpk < tmpn
180 |                         R(tmpp,tmpk+1:tmpn) = G*R(tmpp,tmpk+1:tmpn); % adjust rest of row
181 |                     end
182 |                 end
183 |                 R(end,:) = []; % remove zero'ed out row
184 |             end
185 |         else
186 |             R_k = R'\Gram(A(1:end-1),j);
187 |             R_kk = sqrt(Gram(j,j)-R_k'*R_k);
188 |             R = [R R_k; [zeros(1,size(R,2)) R_kk]]; % update R
189 |         end
190 |         GA1 = R\(R'\s);
191 |         AA = 1/sqrt(sum(GA1.*s));
192 |         w = AA*GA1;
193 |     else
194 |         GA1 = R\(R'\s);
195 |         AA = 1/sqrt(sum(GA1.*s));
196 |         w = AA*GA1;
197 |     end
198 |     u = X(:,A)*w; % equiangular direction (unit vector)
199 | 
200 |     if vars == nvars % if all variables active, go all the way to the lsq solution
201 |         gamma = C/AA;
202 |     else
203 |         a = X'*u; % correlation between each variable and eqiangular vector
204 |         temp = [(C - c(I))./(AA - a(I)); (C + c(I))./(AA + a(I))];
205 |         gamma = min([temp(temp > 0); C/AA]);
206 |     end
207 | 
208 |     % LASSO modification
209 |     if lasso
210 |         lassocond = 0;
211 |         if isempty(Cardi)
212 |             temp = -beta(A,k)./w;
213 |         else
214 |             temp = -betak(A)./w;
215 |         end
216 |         [gamma_tilde] = min([temp(temp > 0); gamma]);
217 |         j = find(temp == gamma_tilde);
218 |         if gamma_tilde < gamma,
219 |             gamma = gamma_tilde;
220 |             lassocond = 1;
221 |         end
222 |     end
223 | 
224 |     mu = mu + gamma*u;
225 |     if isempty(Cardi)
226 |         if size(beta,2) < k+1
227 |             if bSparse
228 |                 beta = [beta sparse(p,size(beta,1))];
229 |             else
230 |                 beta = [beta zeros(p,size(beta,1))];
231 |             end
232 |         end
233 |         beta(A,k+1) = beta(A,k) + gamma*w;
234 |     else
235 |         tmpbetak = betak(A) + gamma*w;
236 |         betak = zeros(p,1);
237 |         betak(A) = tmpbetak;
238 |         idx = find(Cardi==vars);
239 |         if ~isempty(idx)
240 |             beta(:,idx) = betak;
241 |         end
242 |     end
243 | 
244 |     % Early stopping at specified bound on L1 norm of beta
245 |     if isempty(Cardi)
246 |         if stop > 0
247 |             t2 = sum(abs(beta(:,k+1)));
248 |             if t2 >= stop
249 |                 t1 = sum(abs(beta(:,k)));
250 |                 s = (stop - t1)/(t2 - t1); % interpolation factor 0 < s < 1
251 |                 beta(:,k+1) = beta(:,k) + s*(beta(:,k+1) - beta(:,k));
252 |                 stopcond = 1;
253 |             end
254 |         end
255 |     end
256 | 
257 |     % If LASSO condition satisfied, drop variable from active set
258 |     if lassocond == 1
259 |         if ~useGram
260 |             R(:,j) = []; % remove column j
261 |             tmpn = size(R,2);
262 |             for tmpk = j:tmpn
263 |                 tmpp = tmpk:tmpk+1;
264 |                 [G,R(tmpp,tmpk)] = planerot(R(tmpp,tmpk)); % remove extra element in column
265 |                 if tmpk < tmpn
266 |                     R(tmpp,tmpk+1:tmpn) = G*R(tmpp,tmpk+1:tmpn); % adjust rest of row
267 |                 end
268 |             end
269 |             R(end,:) = []; % remove zero'ed out row
270 |         end
271 |         rJ = j;
272 |         I = [I A(j)];
273 |         A(j) = [];
274 |         vars = vars - 1;
275 |         if trace
276 |             disp(sprintf('%d\t\t\t\t%d\t\t\t%d', k, j, vars));
277 |         end
278 |     end
279 | 
280 |     % Early stopping at specified number of variables
281 |     if stop < 0
282 |         stopcond = vars >= -stop;
283 |     end
284 |     
285 | %     TimeLoop(k) = cputime - tmpT;
286 | %     tmpT = cputime;
287 |     
288 | %     if vars < 1000
289 | %         if mod(vars,500) == 0
290 | %             tmpT = cputime - tmpT;
291 | %             disp(['LARS: ',num2str(vars),' features selected. Time: ',num2str(tmpT)]);
292 | %             tmpT = cputime;
293 | %         end
294 | %     elseif vars < 2000
295 | %         if mod(vars,200) == 0
296 | %             tmpT = cputime - tmpT;
297 | %             disp(['LARS: ',num2str(vars),' features selected. Time: ',num2str(tmpT)]);
298 | %             tmpT = cputime;
299 | %         end
300 | %     elseif vars < 3000
301 | %         if mod(vars,100) == 0
302 | %             tmpT = cputime - tmpT;
303 | %             disp(['LARS: ',num2str(vars),' features selected. Time: ',num2str(tmpT)]);
304 | %             tmpT = cputime;
305 | %         end
306 | %     else
307 | %         if mod(vars,50) == 0
308 | %             tmpT = cputime - tmpT;
309 | %             disp(['LARS: ',num2str(vars),' features selected. Time: ',num2str(tmpT)]);
310 | %             tmpT = cputime;
311 | %         end
312 | %     end        
313 | end
314 | 
315 | if isempty(Cardi)
316 |     % trim beta
317 |     if size(beta,2) > k+1
318 |         beta(:,k+2:end) = [];
319 |     end
320 | end
321 | 
322 | if k == maxk
323 |     disp('LARS warning: Forced exit. Maximum number of iteration reached.');
324 | end
325 | 
326 | %% To do
327 | %
328 | % There is a modification that turns least angle regression into stagewise
329 | % (epsilon) regression. This has not been implemented.
330 | 


--------------------------------------------------------------------------------