├── email_notify.m ├── compute_SbSw_sup.m ├── fs_unsup_maxvar.m ├── optSigma.m ├── LabelFormat.m ├── funG.m ├── get_mdcs_ip_dir.m ├── README.md ├── extractXY.m ├── fs_unsup_udfs_build_param.m ├── compute_SbSw_unsup.m ├── compute_W.m ├── fs_unsup_jelsr_liang_build_param.m ├── fs_unsup_udfs.m ├── prettyPlotProcessOptions.m ├── fs_unsup_mcfs_build_param.m ├── LocalDisAna.m ├── fs_unsup_lapscore_build_param.m ├── exp1_aio.m ├── fs_unsup_rufs_build_param.m ├── localLearnMx_KRR.m ├── fs_unsup_allfea_single_func.m ├── fs_unsup_spfs_sfs.m ├── fs_unsup_traceratio.m ├── ms2tex.m ├── fs_unsup_jelsr_build_param.m ├── eval_fsasl_param.m ├── tfidf.m ├── fs_unsup_ndfs_build_param.m ├── EuDist2.m ├── NormalizeFea.m ├── fs_unsup_spfs_nes.m ├── fs_unsup_llcfs_build_param.m ├── fs_unsup_spec_build_param.m ├── fs_unsup_maxvar_single_func.m ├── fs_unsup_glspfs.m ├── LocalReconstructLap.m ├── compute_accuracy_F.m ├── components.m ├── fs_unsup_jelsr.m ├── fs_unsup_spfs.m ├── compute_Y.m ├── fs_unsup_glspfs_build_param.m ├── fs_unsup_fsasl_build_param.m ├── fs_unsup_traceratio_single_func.m ├── fs_unsup_spfs_lar.m ├── fs_unsup_llcfs_single_func.m ├── fs_unsup_ndfs_single_func.m ├── fs_unsup_udfs_single_func.m ├── fs_unsup_lapscore_single_func.m ├── fs_unsup_spec_single_func.m ├── fs_unsup_fsasl_11_11_1_single_func.m ├── fs_unsup_jelsr_liang_lle_single_func.m ├── fs_unsup_jelsr_liang_lpp_single_func.m ├── fs_unsup_fsasl_11_11_5_single_func.m ├── fs_unsup_jelsr_lle_single_func.m ├── fs_unsup_jelsr_lpp_single_func.m ├── fs_unsup_spfs_single_func.m ├── fs_unsup_fsasl_11_5_5_single_func.m ├── fs_unsup_ndfs.m ├── fs_unsup_rufs_single_func.m ├── L2_distance.m ├── SimGraph_NearestNeighbors.m ├── fs_unsup_glspfs_single_func.m ├── mdcs_check.m ├── fs_unsup_mcfs_single_func.m ├── initFactor.m ├── constructKernel.m ├── find_nn.m ├── grid_search_fs.m ├── fs_unsup_jelsr_liang.m ├── fs_unsup_lapscore.m ├── Eigenmap.m ├── fs_unsup_spec.m ├── lpp.m ├── ltsa.m ├── fs_unsup_spfs_larnes.m ├── fs_unsup_llcfs.m ├── evalUnSupFS.m ├── scale_dist3_knn.m ├── lle.m ├── computeLocalStructure.m ├── sll_opts.m ├── run_exp1_func.m ├── fs_unsup_mcfs.m ├── plot_result.m ├── FSASL.m └── lars.m /email_notify.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csliangdu/FSASL/HEAD/email_notify.m -------------------------------------------------------------------------------- /compute_SbSw_sup.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csliangdu/FSASL/HEAD/compute_SbSw_sup.m -------------------------------------------------------------------------------- /fs_unsup_maxvar.m: -------------------------------------------------------------------------------- 1 | function FeaScore = fs_unsup_maxvar(X) 2 | % 3 | % X nSmp * nDim 4 | % 5 | 6 | FeaScore = var(X); 7 | -------------------------------------------------------------------------------- /optSigma.m: -------------------------------------------------------------------------------- 1 | function sigma = optSigma(X) 2 | N = size(X,1); 3 | dist = EuDist2(X,X); 4 | dist = reshape(dist,1,N*N); 5 | sigma = median(dist); -------------------------------------------------------------------------------- /LabelFormat.m: -------------------------------------------------------------------------------- 1 | function Y = LabelFormat(y) 2 | [~,~,y] = unique(y); 3 | nClass = max(y); 4 | Y = zeros(length(y), nClass); 5 | Y(sub2ind(size(Y), 1:length(y), y')) = 1; -------------------------------------------------------------------------------- /funG.m: -------------------------------------------------------------------------------- 1 | function [ V, D ] = funG( G, t ) 2 | %function [ V, D ] = funG( G, t ) 3 | % modify the eigenvalue of a matrix by t order. 4 | 5 | G = full(G); 6 | [V,D] = eig(G); 7 | d = diag(D); 8 | % it is important to calculate G first 9 | [d, orderIDX] = sort(d); 10 | V = V(:,orderIDX); 11 | d = d.^t; 12 | d(isnan(d)) = 0; 13 | d(isinf(d)) = 0; 14 | D = diag(d); -------------------------------------------------------------------------------- /get_mdcs_ip_dir.m: -------------------------------------------------------------------------------- 1 | function [mdcs_ips, mdcs_dirs] = get_mdcs_ip_dir(n) 2 | mdcs_ips = cell(n,1); 3 | mdcs_dirs = cell(n,1); 4 | parfor i=1:n*1000 5 | localhost = java.net.InetAddress.getLocalHost(); 6 | ip = localhost.getHostAddress(); 7 | mdcs_ips{i} = char(ip); 8 | mdcs_dirs{i} = pwd; 9 | end 10 | mdcs_ips = unique(mdcs_ips); 11 | mdcs_dirs = unique(mdcs_dirs); 12 | end -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | FSASL 2 | ===== 3 | 4 | An unsupervised feature selection algorithm with adaptive structure learning. 5 | 6 | The code is used to generate fully reproducible experimental results in [1]. 7 | 8 | [1]Liang Du and Yi-Dong Shen. Unsupervised Feature Selection with Adaptive Structure Learning. in Proceedings of the 21th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD), pages 209-218, Sydney, Australia, August 10–13, 2015. 9 | 10 | -------------------------------------------------------------------------------- /extractXY.m: -------------------------------------------------------------------------------- 1 | function [X, Y] = extractXY(dataset) 2 | % For single view data : 'X', 'y' 3 | load(dataset); 4 | if ~exist('X', 'var') && exist('fea', 'var') 5 | X = fea; 6 | end 7 | 8 | if exist('Y', 'var') && size(Y,1) < size(Y,2) 9 | Y = Y'; 10 | end 11 | 12 | if exist('Y', 'var') && min(size(Y)) > 1 13 | Y = LabelFormat(Y); 14 | end 15 | 16 | if ~exist('Y', 'var') && exist('gnd', 'var') 17 | Y = gnd(:); 18 | end 19 | 20 | if ~exist('Y', 'var') && exist('y', 'var') 21 | Y = y(:); 22 | end 23 | end -------------------------------------------------------------------------------- /fs_unsup_udfs_build_param.m: -------------------------------------------------------------------------------- 1 | function paramCell = fs_unsup_udfs_build_param(knnCandi, gammaCandi, lamdaCandi) 2 | n1 = length(knnCandi); 3 | n2 = length(gammaCandi); 4 | n3 = length(lamdaCandi); 5 | nP = n1 * n2 * n3; 6 | paramCell = cell(nP, 1); 7 | idx = 0; 8 | for i1 = 1:n1 9 | for i2 = 1:n2 10 | for i3 = 1:n3 11 | param = []; 12 | param.k = knnCandi(i1); 13 | param.gamma = gammaCandi(i2); 14 | param.lamda = lamdaCandi(i3); 15 | idx = idx + 1; 16 | paramCell{idx} = param; 17 | end 18 | end 19 | end 20 | -------------------------------------------------------------------------------- /compute_SbSw_unsup.m: -------------------------------------------------------------------------------- 1 | function [Sb, Sw] = compute_SbSw_unsup(X, nK) 2 | % X: training data each row is a data; 3 | % calculate L_b and L_w defined in Laplacian score 4 | % Sb = X*L_b*X'; 5 | % Sw = X*L_w*X'; 6 | if nargin < 2 7 | nK = 5; 8 | end 9 | W = constructW(X, struct('k', nK)); 10 | Dw = sum(W,2); 11 | L_w = diag(Dw) - W; 12 | L_b = (Dw * Dw') / sum(Dw); 13 | 14 | L_w = (L_w + L_w')/2; 15 | L_b = (L_b + L_b')/2; 16 | 17 | Sb = X'*L_b*X; 18 | Sw = X'*L_w*X; 19 | 20 | % very important! 21 | Sb = (Sb + Sb')/2; 22 | Sw = (Sw + Sw')/2; 23 | -------------------------------------------------------------------------------- /compute_W.m: -------------------------------------------------------------------------------- 1 | function [W] = compute_W(W,data,D_mhalf) 2 | 3 | [nSmp,nFea] = size(data); 4 | 5 | %%%%%%%%%%%%%%%%%%%% Normalize W 6 | if nSmp < 5000 7 | tmpD_mhalf = repmat(D_mhalf,1,nSmp); 8 | W = (tmpD_mhalf.*W).*tmpD_mhalf'; 9 | clear tmpD_mhalf; 10 | else 11 | [i_idx,j_idx,v_idx] = find(W); 12 | v1_idx = zeros(size(v_idx)); 13 | for i=1:length(v_idx) 14 | v1_idx(i) = v_idx(i)*D_mhalf(i_idx(i))*D_mhalf(j_idx(i)); 15 | end 16 | W = sparse(i_idx,j_idx,v1_idx); 17 | clear i_idx j_idx v_idx v1_idx 18 | end 19 | W = (W+W')/2; -------------------------------------------------------------------------------- /fs_unsup_jelsr_liang_build_param.m: -------------------------------------------------------------------------------- 1 | function paramCell = fs_unsup_jelsr_liang_build_param(r1Candi, r2Candi, knnCandi) 2 | n1 = length(r1Candi); 3 | n2 = length(r2Candi); 4 | n3 = length(knnCandi); 5 | nP = n1 * n2 * n3; 6 | paramCell = cell(nP, 1); 7 | idx = 0; 8 | for i1 = 1:n1 9 | for i2 = 1:n2 10 | for i3 = 1:n3 11 | param = []; 12 | param.r1 = r1Candi(i1); 13 | param.r2 = r2Candi(i2); 14 | param.r3 = knnCandi(i3); 15 | idx = idx + 1; 16 | paramCell{idx} = param; 17 | end 18 | end 19 | end 20 | end -------------------------------------------------------------------------------- /fs_unsup_udfs.m: -------------------------------------------------------------------------------- 1 | function [X, obj]=fs_unsup_udfs(A, k, r, X0) 2 | % quadratic loss with 21-norm regularization 3 | % 4 | % min_{X'*X=I} Tr(X'*A*X) + r * ||X||_21 5 | % 6 | 7 | NIter = 20; 8 | [m, n] = size(A); %#ok 9 | if nargin < 4 10 | d = ones(n,1); 11 | else 12 | Xi = sqrt(sum(X0.*X0,2)+eps); 13 | d = 0.5./(Xi); 14 | end; 15 | 16 | for iter = 1:NIter 17 | D = diag(d); 18 | M = A+r*D; 19 | M = max(M,M'); 20 | [evec, eval] = eig(M); 21 | eval = diag(eval); 22 | [~, idx] = sort(eval); 23 | X = evec(:,idx(1:k)); 24 | 25 | Xi = sqrt(sum(X.*X,2)+eps); 26 | d = 0.5./(Xi); 27 | 28 | obj(iter) = trace(X'*A*X) + r*sum(Xi); %#ok 29 | end; -------------------------------------------------------------------------------- /prettyPlotProcessOptions.m: -------------------------------------------------------------------------------- 1 | function [varargout] = prettyPlotProcessOptions(options,varargin) 2 | % Similar to processOptions, but case insensitive and 3 | % using a struct instead of a variable length list 4 | 5 | options = toUpper(options); 6 | 7 | for i = 1:2:length(varargin) 8 | if isfield(options,upper(varargin{i})) 9 | v = getfield(options,upper(varargin{i})); 10 | if isempty(v) 11 | varargout{(i+1)/2}=varargin{i+1}; 12 | else 13 | varargout{(i+1)/2}=v; 14 | end 15 | else 16 | varargout{(i+1)/2}=varargin{i+1}; 17 | end 18 | end 19 | 20 | end 21 | 22 | function [o] = toUpper(o) 23 | if ~isempty(o) 24 | fn = fieldnames(o); 25 | for i = 1:length(fn) 26 | o = setfield(o,upper(fn{i}),getfield(o,fn{i})); 27 | end 28 | end 29 | end -------------------------------------------------------------------------------- /fs_unsup_mcfs_build_param.m: -------------------------------------------------------------------------------- 1 | function paramCell = fs_unsup_mcfs_build_param(knnCandi, weightCandi, weight_param_Candi) 2 | n1 = length(knnCandi); 3 | n2 = length(weightCandi); 4 | n3 = zeros(n2, 1); 5 | for i1 = 1:length(weightCandi) 6 | n3(i1) = max(1, length(weight_param_Candi{i1})); 7 | end 8 | nP = n1 * max(sum(n3), 1) ; 9 | paramCell = cell(nP, 1); 10 | idx = 0; 11 | for i1 = 1:n1 12 | for i2 = 1:n2 13 | for i3 = 1:max(n3(i2), 1) 14 | param = []; 15 | param.k = knnCandi(i1); 16 | param.weightMode = weightCandi{i2}; 17 | if ~isempty(weightCandi) && ~isempty(weight_param_Candi{i2}) 18 | tmp = weight_param_Candi{i2}; 19 | param.t = tmp(i3); 20 | else 21 | param.t = 1; % place holder 22 | end 23 | idx = idx + 1; 24 | paramCell{idx} = param; 25 | end 26 | end 27 | end -------------------------------------------------------------------------------- /LocalDisAna.m: -------------------------------------------------------------------------------- 1 | function L = LocalDisAna(X, para) 2 | % unsupervised local discriminative analysis 3 | % each column is a data 4 | 5 | 6 | 7 | [D, n] = size(X); 8 | 9 | if isfield(para, 'k') 10 | k = para.k+1; 11 | else 12 | k = 16; 13 | end; 14 | if isfield(para, 'lamda') 15 | lamda = para.lamda; 16 | else 17 | lamda = 1000; 18 | end; 19 | 20 | Lc = eye(k) - 1/k*ones(k); 21 | A = spalloc(n*k,n*k,5*n*k); 22 | S = spalloc(n,n*k,5*n*k); 23 | for i = 1:n 24 | dis = repmat(X(:,i),1,n) - X; 25 | dis = sum(dis.*dis); 26 | [dumb, nnidx] = sort(dis); 27 | Xi = X(:,nnidx(1:k)); 28 | Xi = Xi*Lc; 29 | if D > k 30 | Ai = inv(lamda*eye(k) + Xi'*Xi); 31 | Ai = Lc*Ai*Lc; 32 | else 33 | Ai = Lc - lamda*Xi'*inv(eye(D) + lamda*Xi*Xi')*Xi; 34 | end; 35 | lidx = (i-1)*k+1:(i-1)*k+k; 36 | A(lidx, lidx) = Ai; 37 | S(nnidx(1:k),lidx) = eye(k); 38 | end; 39 | 40 | L = S*A*S'; 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /fs_unsup_lapscore_build_param.m: -------------------------------------------------------------------------------- 1 | function paramCell = fs_unsup_lapscore_build_param(knnCandi, weightCandi, weight_param_Candi) 2 | n1 = length(knnCandi); 3 | n2 = length(weightCandi); 4 | n3 = zeros(n2, 1); 5 | for i1 = 1:length(weightCandi) 6 | n3(i1) = max(1, length(weight_param_Candi{i1})); 7 | end 8 | 9 | nP = n1 * max(sum(n3), 1); 10 | paramCell = cell(nP, 1); 11 | idx = 0; 12 | for i1 = 1:n1 13 | for i2 = 1:n2 14 | for i3 = 1:max(n3(i2), 1) 15 | 16 | param = []; 17 | param.k = knnCandi(i1); 18 | param.weightMode = weightCandi{i2}; 19 | if ~isempty(weightCandi) && ~isempty(weight_param_Candi{i2}) 20 | tmp = weight_param_Candi{i2}; 21 | param.t = tmp(i3); 22 | else 23 | param.t = 1; % place holder 24 | end 25 | 26 | idx = idx + 1; 27 | paramCell{idx} = param; 28 | end 29 | end 30 | end 31 | 32 | -------------------------------------------------------------------------------- /exp1_aio.m: -------------------------------------------------------------------------------- 1 | algs = {'AllFea', 'LapScore', 'SPFS', 'UDFS', 'LLCFS', 'MCFS', 'NDFS', 'RUFS', 'JELSR_lpp', 'GLSPFS', 'FSSL_11_11_5'}; 2 | lab_cluster = 'local'; % the matlab distributed computing server (MDCS) name, you may use 'local' as default 3 | lab_cluster_size = 11; % number of node 4 | lab_email_username = '';% the email notification service provided by our lab, you can also use other public email configuration. 5 | lab_email_password = ''; 6 | ds = {'USPS_9298n_256d_10c', 'wap_1560n_8460d_20c_tfidf', ... 7 | 'webbb_texas_814n_4029d_7c_binary', 'webkb_washington_1166n_4165d_7c_binary', ... 8 | 'Carcinom_174n_9182d_11c', 'binaryalphadigs_1404n_320d_36c'}; 9 | ds = {'JAFFE_213n_676d_10c'}; % demo data 10 | for i1 = 1:1%length(ds) 11 | dataset = ds{i1}; 12 | job = batch(@run_exp1_func, 4, {dataset, algs, 'lab_email_username', 'lab_email_password'},... 13 | 'Profile', lab_cluster, 'pool', lab_cluster_size, ... 14 | 'AttachedFiles', {[dataset, '.mat'], 'eppMatrix.mexa64', 'eppMatrix.mexglx'},... 15 | 'CaptureDiary',true, 'CurrentDirectory', '.'); 16 | end 17 | -------------------------------------------------------------------------------- /fs_unsup_rufs_build_param.m: -------------------------------------------------------------------------------- 1 | function paramCell = fs_unsup_rufs_build_param(llkrrParamCell, alphaCandi, betaCandi, nuCandi) 2 | n1 = length(alphaCandi); 3 | n2 = length(betaCandi); 4 | n3 = length(nuCandi); 5 | n4 = length(llkrrParamCell); 6 | nP = n1 * n2 * n3 * n4; 7 | paramCell = cell(nP, 1); 8 | idx = 0; 9 | for i1 = 1:n1 10 | for i2 = 1:n2 11 | for i3 = 1:n3 12 | for i4 = 1:n4 13 | param = []; 14 | param.alpha = alphaCandi(i1); 15 | param.beta = betaCandi(i2); 16 | param.nu = nuCandi(i3); 17 | param.MaxIter = 20; 18 | if param.alpha + param.beta + param.nu > 1e4 19 | param.MaxIter = 5; % large parameter is costly for convergence 20 | end 21 | param.epsilon = 1e-2; 22 | param.verbose = 0; 23 | 24 | param.llkrrParam = llkrrParamCell{i4}; 25 | idx = idx + 1; 26 | paramCell{idx} = param; 27 | end 28 | end 29 | end 30 | end -------------------------------------------------------------------------------- /localLearnMx_KRR.m: -------------------------------------------------------------------------------- 1 | function T = localLearnMx_KRR( X, param) 2 | 3 | % conpute K via rbf function, the width is computed by self-tunning 4 | 5 | K = constructW(X, struct('WeightMode', 'HeatKernel', 'k', param.nNeighbors)); 6 | 7 | [nSmp, nDim] = size(X); 8 | 9 | % locate neighbors for each data 10 | W = 1*(K>0); 11 | 12 | % compute the local learning matrices 13 | if param.nNeighbors < nSmp - 1 && param.nNeighbors > 0 14 | % compute A by local regularized kernel ridge regression 15 | A = zeros( nSmp, nSmp ); 16 | for n = 1 : nSmp 17 | idxV = find( W( n, : ) > 0 ); 18 | A( n, idxV ) = K( n, idxV )*inv( K(idxV, idxV) + param.rLambda * eye( length( idxV ) ) ); 19 | end 20 | 21 | % matrix T 22 | T = eye( nSmp ) - A; 23 | T = T' * T; 24 | 25 | else % all the data are neighboring to each other 26 | A = []; % A can not be computed directly 27 | I = eye( nSmp ); 28 | 29 | % deformed kernel 30 | T = K * inv( K + param.rLambda * I ); 31 | 32 | T = I - T; 33 | T = inv( diag( diag( T ) ) ) * T; 34 | T = T' * T; 35 | end -------------------------------------------------------------------------------- /fs_unsup_allfea_single_func.m: -------------------------------------------------------------------------------- 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_allfea_single_func(dataset, exp_settings, algo_settings) 2 | %Unsupervised feature selection using AllFea 3 | 4 | %======================setup=========================== 5 | FeaNumCandi = exp_settings.FeaNumCandi; 6 | nKmeans = exp_settings.nKmeans; 7 | prefix_mdcs = []; 8 | if isfield(exp_settings, 'prefix_mdcs') 9 | prefix_mdcs = exp_settings.prefix_mdcs; 10 | end 11 | %=============================================== 12 | [X, Y] = extractXY(dataset); 13 | [nSmp,nDim] = size(X); 14 | 15 | t_start = clock; 16 | disp('get AllFea ...'); 17 | fs_res = evalUnSupFS(X, Y, [1:nDim], struct('nKm', nKmeans)); 18 | res_aio = cell(1, length(FeaNumCandi)); 19 | parfor feaIdx = 1:length(FeaNumCandi) 20 | res_aio{1, feaIdx} = fs_res; 21 | end 22 | [res_gs, res_gs_ps] = grid_search_fs(res_aio); 23 | res_gs.feaset = FeaNumCandi; 24 | t_end = clock; 25 | t1 = etime(t_end,t_start); 26 | disp(['exe time: ',num2str(t1)]); 27 | res_gs.time = t1; 28 | res_gs.time2 = t1; 29 | 30 | save(fullfile(prefix_mdcs, [dataset, '_best_result_AllFea.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps'); 31 | end -------------------------------------------------------------------------------- /fs_unsup_spfs_sfs.m: -------------------------------------------------------------------------------- 1 | function [ fList ] = fs_unsup_spfs_sfs(X, K, numF) 2 | % function [ fList ] = spfs_sfs(X, K, numF) 3 | % X - data, each row is an instance 4 | % K - the similarity matrix of instances 5 | % numF - the number of features to be selected 6 | 7 | nF = size(X,2); 8 | fList = zeros(numF,1); 9 | R = K; 10 | count = 1; 11 | while count <= numF && count <= nF 12 | % fprintf('%i,',count); 13 | % if mod(count,10)==0 14 | % fprintf('\n'); 15 | % end 16 | 17 | [R, selF] = find_best_match(X, fList, R); 18 | if selF == -1 19 | return; 20 | else 21 | fList(count) = selF; 22 | end 23 | count = count + 1; 24 | end 25 | end 26 | 27 | function [ newR, selF ] = find_best_match(X, fList, R) 28 | nF = size(X,2); 29 | newR = R; 30 | selF = -1; 31 | % smallestErr = norm(newR,'fro'); modified 32 | smallestErr = inf; 33 | for i = 1:nF 34 | if sum(fList == i)>0 35 | continue; 36 | end 37 | curF = X(:,i); 38 | curErr = (curF'*curF)^2-2*curF'*R*curF; 39 | if smallestErr >= curErr 40 | newR = R - curF*curF'; 41 | smallestErr = curErr; 42 | selF = i; 43 | end 44 | end 45 | 46 | end -------------------------------------------------------------------------------- /fs_unsup_traceratio.m: -------------------------------------------------------------------------------- 1 | function [feature_idx, feature_score, subset_score] = fs_unsup_traceratio(Sb, Sw, feature_num) 2 | % Sb: a matrix to reflects the between-class or global affinity 3 | % relationship encoded on Graph, Sb = X*Lb*X' 4 | % Sw: a matrix to reflects the within-class or local affinity relationship 5 | % encoded on Graph, Sw = X*Lw*X' 6 | % feature_idx: the ranked feature index based on subset-level score 7 | % feature_score: the feature-level score 8 | % subset_score: the subset-level score 9 | 10 | 11 | sb = abs(diag(Sb)); 12 | sw = abs(diag(Sw)); 13 | sw(find(sw == 0)) = 0.000000000000001; %#ok 14 | 15 | % preprocessing. 16 | t_fnum = length(sb); 17 | [fs, fs_idx] = sort(sb./sw,'descend'); 18 | 19 | para = 0.9; 20 | 21 | u_fnum = floor(para*t_fnum); 22 | sb = sb(fs_idx(1:u_fnum)); 23 | sw = sw(fs_idx(1:u_fnum)); 24 | 25 | 26 | ind = 1:feature_num; 27 | k = sum(sb(ind))/sum(sw(ind)); 28 | for i = 1: 20 29 | [score, I] = sort(sb - k*sw, 'descend'); 30 | ind = I(1:feature_num); 31 | old_k = k; 32 | k = sum(sb(ind))/sum(sw(ind)); 33 | if abs(k - old_k) < 0.000000000001 34 | break; 35 | end; 36 | end 37 | I = fs_idx(I); 38 | 39 | feature_idx = I; 40 | feature_score = score; 41 | subset_score = k; -------------------------------------------------------------------------------- /ms2tex.m: -------------------------------------------------------------------------------- 1 | function t = ms2tex(mean_val, std_val, ismax, sigs, sigs2, prefix) 2 | if ~exist('ismax', 'var') 3 | ismax = 1; 4 | end 5 | 6 | sigs(isnan(sigs)) = 0; % failed to reject a=b, nan means a = b, of course sigs = 0 7 | sigs2(isnan(sigs2)) = 1.0; 8 | 9 | n = length(mean_val); 10 | t = prefix; 11 | if ismax 12 | [~, idx] = max(mean_val); 13 | else 14 | [~, idx] = min(mean_val); 15 | end 16 | for i1 = 1:n 17 | if isempty(sigs2) 18 | if i1 == idx 19 | t = [t, '& \tabincell{c}{ \textbf{', num2str(mean_val(i1) * 100, '%4.2f'), '} \\ \textbf{$\pm$ ', num2str(std_val(i1) * 100, '%4.2f'), '}} ']; 20 | else 21 | t = [t, '& \tabincell{c}{ ', num2str(mean_val(i1) * 100, '%4.2f'), ' \\ $\pm$ ', num2str(std_val(i1) * 100, '%4.2f'), '} ']; 22 | end 23 | else 24 | if sigs(i1) == 0 25 | t = [t, '& \tabincell{c}{ \textbf{', num2str(mean_val(i1) * 100, '%4.2f'), '} \\ \textbf{$\pm$ ', num2str(std_val(i1) * 100, '%4.2f'), ' } \\ \textbf{', num2str(sigs2(i1), '%4.2f'), ' }} ']; 26 | else 27 | t = [t, '& \tabincell{c}{ ', num2str(mean_val(i1) * 100, '%4.2f'), ' \\ $\pm$ ', num2str(std_val(i1) * 100, '%4.2f'), ' \\', num2str(sigs2(i1), '%4.2f'), ' } ']; 28 | end 29 | end 30 | 31 | end 32 | t = [t, '\\ \hline']; -------------------------------------------------------------------------------- /fs_unsup_jelsr_build_param.m: -------------------------------------------------------------------------------- 1 | function paramCell = fs_unsup_jelsr_build_param(knnCandi, weightCandi, weight_param_Candi, alphaCandi, betaCandi) 2 | n1 = length(knnCandi); 3 | n2 = length(weightCandi); 4 | n3 = zeros(n2, 1); 5 | for i1 = 1:length(weightCandi) 6 | n3(i1) = max(1, length(weight_param_Candi{i1})); 7 | end 8 | n4 = length(alphaCandi); 9 | n5 = length(betaCandi); 10 | 11 | nP = n1 * max(sum(n3), 1) * n4 * n5; 12 | paramCell = cell(nP, 1); 13 | idx = 0; 14 | for i1 = 1:n1 15 | for i2 = 1:n2 16 | for i3 = 1:max(n3(i2), 1) 17 | for i4 = 1:n4 18 | for i5 = 1:n5 19 | param = []; 20 | param.k = knnCandi(i1); 21 | param.weightMode = weightCandi{i2}; 22 | if ~isempty(weightCandi) && ~isempty(weight_param_Candi{i2}) 23 | tmp = weight_param_Candi{i2}; 24 | param.t = tmp(i3); 25 | else 26 | param.t = 1; % place holder 27 | end 28 | param.alpha = alphaCandi(i4); 29 | param.beta = betaCandi(i5); 30 | 31 | idx = idx + 1; 32 | paramCell{idx} = param; 33 | end 34 | end 35 | end 36 | end 37 | end -------------------------------------------------------------------------------- /eval_fsasl_param.m: -------------------------------------------------------------------------------- 1 | function r = eval_fsasl_param(p_name, param_candi, ids, fns, paramCell, res_aio) 2 | % 3 | % r1 = eval_fsasl_param('lambda3',10.^[-5:5], [11:25], {'mean_acc', 'mean_nmi_sqrt', 'loocv'}, paramCell, res_aio); 4 | % r2 = eval_fsasl_param('SLEPreg',[10.^-3, 0.005, 10.^-2, 0.05, 0.01], [11:25], {'mean_acc', 'mean_nmi_sqrt', 'loocv'}, paramCell, res_aio); 5 | % r3 = eval_fsasl_param('lambda1',[0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99], [11:25], {'mean_acc', 'mean_nmi_sqrt', 'loocv'}, paramCell, res_aio); 6 | % r1 7 | % r2 8 | % r3 9 | 10 | if isvector(param_candi) 11 | param_candi = num2cell(param_candi); 12 | end 13 | 14 | 15 | r = zeros(length(param_candi), length(fns)); 16 | for i1 = 1:length(param_candi) 17 | tmp = []; % nP_a * 3 18 | for i2 = 1:size(res_aio, 1); 19 | if isfield(paramCell{i2, 1}, p_name) && strcmp(num2str(paramCell{i2,1}.(p_name)), num2str(param_candi{i1})) 20 | 21 | tmp2 = zeros(length(fns), length(ids)); 22 | for i3 = 1:length(ids) 23 | tmp3 = zeros(length(fns),1); 24 | for i4 = 1:length(fns) 25 | tmp3(i4) = res_aio{i2, ids(i3)}.(fns{i4}); 26 | end 27 | tmp2(:,i3) = tmp3; 28 | end 29 | tmp = [tmp; mean(tmp2, 2)']; 30 | end 31 | 32 | end 33 | r(i1,:) = max(tmp, [], 1); 34 | end 35 | 36 | -------------------------------------------------------------------------------- /tfidf.m: -------------------------------------------------------------------------------- 1 | function fea = tfidf(fea,bNorm) 2 | % fea is a document-term frequency matrix, this function return the tfidf ([1+log(tf)]*log[N/df]) 3 | % weighted document-term matrix. 4 | % 5 | % If bNorm == 1, each document verctor will be further normalized to 6 | % have unit norm. (default) 7 | % 8 | % version 2.0 --Jan/2012 9 | % version 1.0 --Oct/2003 10 | % 11 | % Written by Deng Cai (dengcai AT gmail.com) 12 | % 13 | 14 | if ~exist('bNorm','var') 15 | bNorm = 1; 16 | end 17 | 18 | 19 | [nSmp,mFea] = size(fea); 20 | [idx,jdx,vv] = find(fea); 21 | df = full(sum(sparse(idx,jdx,1),1)); 22 | 23 | df(df==0) = 1; 24 | idf = log(nSmp./df); 25 | 26 | tffea = sparse(idx,jdx,log(vv)+1); 27 | 28 | fea2 = tffea'; 29 | idf = idf'; 30 | 31 | MAX_MATRIX_SIZE = 5000; % You can change this number based on your memory. 32 | nBlock = ceil(MAX_MATRIX_SIZE*MAX_MATRIX_SIZE/mFea); 33 | for i = 1:ceil(nSmp/nBlock) 34 | if i == ceil(nSmp/nBlock) 35 | smpIdx = (i-1)*nBlock+1:nSmp; 36 | else 37 | smpIdx = (i-1)*nBlock+1:i*nBlock; 38 | end 39 | fea2(:,smpIdx) = fea2(:,smpIdx) .* idf(:,ones(1,length(smpIdx))); 40 | end 41 | 42 | %Now each column of fea2 is the tf-idf vector. 43 | %One can further normalize each vector to unit by using following codes: 44 | 45 | if bNorm 46 | fea = NormalizeFea(fea2,0)'; 47 | end 48 | 49 | % fea is the final document-term matrix. 50 | -------------------------------------------------------------------------------- /fs_unsup_ndfs_build_param.m: -------------------------------------------------------------------------------- 1 | function paramCell = fs_unsup_ndfs_build_param(knnCandi, weightCandi, weight_param_Candi, alphaCandi, betaCandi) 2 | n1 = length(knnCandi); 3 | n2 = length(weightCandi); 4 | n3 = zeros(n2, 1); 5 | for i1 = 1:length(weightCandi) 6 | n3(i1) = max(1, length(weight_param_Candi{i1})); 7 | end 8 | n4 = length(alphaCandi); 9 | n5 = length(betaCandi); 10 | 11 | nP = n1 * max(sum(n3), 1) * n4 * n5; 12 | paramCell = cell(nP, 1); 13 | idx = 0; 14 | for i1 = 1:n1 15 | for i2 = 1:n2 16 | for i3 = 1:max(n3(i2), 1) 17 | for i4 = 1:n4 18 | for i5 = 1:n5 19 | param = []; 20 | param.k = knnCandi(i1); 21 | param.weightMode = weightCandi{i2}; 22 | if ~isempty(weightCandi) && ~isempty(weight_param_Candi{i2}) 23 | tmp = weight_param_Candi{i2}; 24 | param.t = tmp(i3); 25 | else 26 | param.t = 1; % place holder 27 | end 28 | param.alpha = alphaCandi(i4); 29 | param.beta = betaCandi(i5); 30 | param.gamma = 10^8; 31 | param.maxiter = 100; 32 | 33 | idx = idx + 1; 34 | paramCell{idx} = param; 35 | end 36 | end 37 | end 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /EuDist2.m: -------------------------------------------------------------------------------- 1 | function D = EuDist2(fea_a,fea_b,bSqrt) 2 | %EUDIST2 Efficiently Compute the Euclidean Distance Matrix by Exploring the 3 | %Matlab matrix operations. 4 | % 5 | % D = EuDist(fea_a,fea_b) 6 | % fea_a: nSample_a * nFeature 7 | % fea_b: nSample_b * nFeature 8 | % D: nSample_a * nSample_a 9 | % or nSample_a * nSample_b 10 | % 11 | % Examples: 12 | % 13 | % a = rand(500,10); 14 | % b = rand(1000,10); 15 | % 16 | % A = EuDist2(a); % A: 500*500 17 | % D = EuDist2(a,b); % D: 500*1000 18 | % 19 | % version 2.1 --November/2011 20 | % version 2.0 --May/2009 21 | % version 1.0 --November/2005 22 | % 23 | % Written by Deng Cai (dengcai AT gmail.com) 24 | 25 | 26 | if ~exist('bSqrt','var') 27 | bSqrt = 1; 28 | end 29 | 30 | if (~exist('fea_b','var')) || isempty(fea_b) 31 | aa = sum(fea_a.*fea_a,2); 32 | ab = fea_a*fea_a'; 33 | 34 | if issparse(aa) 35 | aa = full(aa); 36 | end 37 | 38 | D = bsxfun(@plus,aa,aa') - 2*ab; 39 | D(D<0) = 0; 40 | if bSqrt 41 | D = sqrt(D); 42 | end 43 | D = max(D,D'); 44 | else 45 | aa = sum(fea_a.*fea_a,2); 46 | bb = sum(fea_b.*fea_b,2); 47 | ab = fea_a*fea_b'; 48 | 49 | if issparse(aa) 50 | aa = full(aa); 51 | bb = full(bb); 52 | end 53 | 54 | D = bsxfun(@plus,aa,bb') - 2*ab; 55 | D(D<0) = 0; 56 | if bSqrt 57 | D = sqrt(D); 58 | end 59 | end 60 | 61 | -------------------------------------------------------------------------------- /NormalizeFea.m: -------------------------------------------------------------------------------- 1 | function fea = NormalizeFea(fea,row) 2 | % if row == 1, normalize each row of fea to have unit norm; 3 | % if row == 0, normalize each column of fea to have unit norm; 4 | % 5 | % version 3.0 --Jan/2012 6 | % version 2.0 --Jan/2012 7 | % version 1.0 --Oct/2003 8 | % 9 | % Written by Deng Cai (dengcai AT gmail.com) 10 | % 11 | 12 | if ~exist('row','var') 13 | row = 1; 14 | end 15 | 16 | if row 17 | nSmp = size(fea,1); 18 | feaNorm = max(1e-14,full(sum(fea.^2,2))); 19 | fea = spdiags(feaNorm.^-.5,0,nSmp,nSmp)*fea; 20 | else 21 | nSmp = size(fea,2); 22 | feaNorm = max(1e-14,full(sum(fea.^2,1))'); 23 | fea = fea*spdiags(feaNorm.^-.5,0,nSmp,nSmp); 24 | end 25 | 26 | return; 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | if row 35 | [nSmp, mFea] = size(fea); 36 | if issparse(fea) 37 | fea2 = fea'; 38 | feaNorm = mynorm(fea2,1); 39 | for i = 1:nSmp 40 | fea2(:,i) = fea2(:,i) ./ max(1e-10,feaNorm(i)); 41 | end 42 | fea = fea2'; 43 | else 44 | feaNorm = sum(fea.^2,2).^.5; 45 | fea = fea./feaNorm(:,ones(1,mFea)); 46 | end 47 | else 48 | [mFea, nSmp] = size(fea); 49 | if issparse(fea) 50 | feaNorm = mynorm(fea,1); 51 | for i = 1:nSmp 52 | fea(:,i) = fea(:,i) ./ max(1e-10,feaNorm(i)); 53 | end 54 | else 55 | feaNorm = sum(fea.^2,1).^.5; 56 | fea = fea./feaNorm(ones(1,mFea),:); 57 | end 58 | end 59 | 60 | 61 | -------------------------------------------------------------------------------- /fs_unsup_spfs_nes.m: -------------------------------------------------------------------------------- 1 | function [ W, lam ]= fs_unsup_spfs_nes( X, Y, k, err, starting ) 2 | %unsupervised feature selection by 2-1 norm regression 3 | % X - the training data, each row is an instance 4 | % Y - the class label 5 | 6 | if nargin < 5 7 | starting = 0.5; 8 | end 9 | % L2-1 norm 10 | opts.q=2; 11 | 12 | % lambda = lambda * lambda_{max} 13 | opts.rFlag=1; 14 | 15 | % norm( x_i - x_{i-1}, 2) <= .tol 16 | % opts.tFlag = 3; 17 | 18 | % Tolerance parameter. 19 | % opts.tol=1e-4; 20 | 21 | % opts.init=2; 22 | 23 | % .x0= zeros(n,1), .c0=0 24 | % opts.init=2; 25 | opts.verbose = 0; 26 | opts.maxIter = 500; 27 | 28 | upL = 1; downL = 0; 29 | lam = starting; % the initial search point 30 | nZ = k + 2*err; 31 | count = 1; 32 | need = -1; 33 | 34 | while abs(nZ - k) > err && count <= 10 35 | oldNZ = nZ; 36 | oldNeed = need; 37 | 38 | % fprintf('need %i, iteration: %2i, lam: %f\n', k, count, lam); 39 | W = mcLeastR(X, Y, lam, opts); 40 | opts.x0=W; 41 | nZ = sum(sum(W.^2,2)>0); 42 | if nZ - k > err 43 | need = -1; 44 | downL = lam; lam = (downL + upL) / 2; 45 | elseif nZ - k < -err 46 | need = 1; 47 | upL = lam; lam = (downL + upL) / 2; 48 | end 49 | if nZ < oldNZ && oldNeed == 1 50 | opts = rmfield(opts, 'x0'); 51 | W = mcLeastR(X, Y, lam, opts); 52 | nZ = sum(sum(W.^2,2)>0); 53 | % fprintf('restart, %f, sel feat: %i\n-----\n', lam, nZ); 54 | end 55 | % fprintf('sel feat: %i\n-----\n', nZ); 56 | count = count + 1; 57 | end -------------------------------------------------------------------------------- /fs_unsup_llcfs_build_param.m: -------------------------------------------------------------------------------- 1 | function paramCell = fs_unsup_llcfs_build_param(nClusters, kCandidates, betaCandidates, kTypeCandidates, maxiterCandidates, epsilonCandidates ) 2 | if ~exist('kTypeCandidates', 'var') || isempty(kTypeCandidates) 3 | kTypeCandidates = [1]; 4 | end 5 | 6 | if ~exist('maxiterCandidates', 'var') || isempty(maxiterCandidates) 7 | maxiterCandidates = [20]; 8 | end 9 | 10 | if ~exist('epsilonCandidates', 'var') || isempty(epsilonCandidates) 11 | epsilonCandidates = [1e-4]; 12 | end 13 | 14 | 15 | n1 = length( kCandidates ); 16 | n2 = length( betaCandidates ); 17 | n3 = length( kTypeCandidates ); 18 | n4 = length( maxiterCandidates ); 19 | n5 = length( epsilonCandidates ); 20 | 21 | % number of parameter sets 22 | nP = n1 * n2 * n3 * n4 * n5; 23 | paramCell = cell( 1, nP ); 24 | 25 | idx = 0; 26 | for id1 = 1 : n1 27 | for id2 = 1 : n2 28 | for id3 = 1 : n3 29 | for id4 = 1 : n4 30 | for id5 = 1 : n5 31 | param = []; 32 | 33 | param.nClusters = nClusters; 34 | param.k = kCandidates( id1 ); 35 | param.beta = betaCandidates( id2 ); 36 | param.kType = kTypeCandidates( id3 ); 37 | param.maxiter = maxiterCandidates( id4 ); 38 | param.epsilon = epsilonCandidates( id5 ); 39 | 40 | idx = idx + 1; 41 | paramCell{idx} = param; 42 | end 43 | end 44 | end 45 | end 46 | end -------------------------------------------------------------------------------- /fs_unsup_spec_build_param.m: -------------------------------------------------------------------------------- 1 | function paramCell = fs_unsup_spec_build_param(kernelParamCell, styleCandi, expLamCandi, funcCandi) 2 | % Pram.style - 1: unsupervised feature selection 2: supervised feature 3 | % selection 4 | % Pram.expLam - the exp order for the eigenvalue 5 | % Pram.function - 1:f'Lf; 2:using all eigenvalue except the first one; 3: 6 | % using the first k eigenvalues. (In this case 7 | % the wieght the bigger the better. 8 | if ~exist('styleCandi', 'var') 9 | kernelParamCell = {}; 10 | end 11 | if ~exist('styleCandi', 'var') || isempty(styleCandi) 12 | styleCandi = [1]; 13 | end 14 | 15 | if ~exist('expLamCandi', 'var') || isempty(expLamCandi) 16 | expLamCandi = [0.25, 1, 4]; 17 | end 18 | 19 | if ~exist('funcCandi', 'var') || isempty(funcCandi) 20 | funcCandi = [1, 2, 3]; 21 | end 22 | 23 | n0 = max(length(kernelParamCell), 1); 24 | n1 = length(styleCandi); 25 | n2 = length(expLamCandi); 26 | n3 = length(funcCandi); 27 | nP = n0 * n1 * n2 * n3; 28 | paramCell = cell(nP, 1); 29 | idx = 0; 30 | for i0 = 1:n0 31 | for i1 = 1:n1 32 | for i2 = 1:n2 33 | for i3 = 1:n3 34 | param = []; 35 | if ~isempty(kernelParamCell) 36 | param.kernelOption = kernelParamCell{i0}; 37 | end 38 | param.style = styleCandi(i1); 39 | param.expLam = expLamCandi(i2); 40 | param.function = funcCandi(i3); 41 | idx = idx + 1; 42 | paramCell{idx} = param; 43 | end 44 | end 45 | end 46 | end -------------------------------------------------------------------------------- /fs_unsup_maxvar_single_func.m: -------------------------------------------------------------------------------- 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_maxvar_single_func(dataset, exp_settings, algo_settings) 2 | %use laplacian score to select features. 3 | 4 | %======================setup=========================== 5 | FeaNumCandi = exp_settings.FeaNumCandi; 6 | nKmeans = exp_settings.nKmeans; 7 | prefix_mdcs = []; 8 | if isfield(exp_settings, 'prefix_mdcs') 9 | prefix_mdcs = exp_settings.prefix_mdcs; 10 | end 11 | %====================================================== 12 | 13 | %================setup====================== 14 | 15 | %=========================================== 16 | disp(['dataset:',dataset]); 17 | [X, Y] = extractXY(dataset); 18 | [nSmp,nDim] = size(X); 19 | 20 | %get maxvar score 21 | disp('get maxvar score...'); 22 | t_start = clock; 23 | FeaScore = fs_unsup_maxvar(X); 24 | [~, index] = sort(FeaScore, 'descend'); 25 | % save([dataset, filesep,'feaIdx.mat'],'index'); 26 | t_end = clock; 27 | t1 = etime(t_end,t_start); 28 | disp(['exe time: ',num2str(t1)]); 29 | 30 | t_start = clock; 31 | disp('evaluation ...'); 32 | res_aio = cell(1, length(FeaNumCandi)); 33 | parfor feaIdx = 1:length(FeaNumCandi) 34 | res_aio{1, feaIdx} = evalUnSupFS(X, Y, index(1:FeaNumCandi(feaIdx)), struct('nKm', nKmeans)); 35 | end 36 | [res_gs, res_gs_ps] = grid_search_fs(res_aio); 37 | res_gs.feaset = FeaNumCandi; 38 | t_end = clock; 39 | t2 = etime(t_end,t_start); 40 | disp(['exe time: ',num2str(t2)]); 41 | res_gs.time = t1; 42 | res_gs.time2 = t2; 43 | 44 | save([prefix_mdcs, filesep, dataset, '_best_result_MaxVar.mat'],'FeaNumCandi','res_gs','res_aio', 'res_gs_ps'); 45 | end -------------------------------------------------------------------------------- /fs_unsup_glspfs.m: -------------------------------------------------------------------------------- 1 | function [feaIndx,W,obj] = fs_unsup_glspfs(X, Kmatrix, L, r1, r2, numFea) %% 2 | [num, dim] = size(X); 3 | d = ones(dim,1); 4 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 5 | % L = computeM(X,Kmatrix,options); 6 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 7 | [UY,VY] = eig(Kmatrix); 8 | diagVal = diag(VY); 9 | indxPos = find(diagVal>eps); 10 | UYpos = UY(:,indxPos); 11 | VYpos = diag(sqrt(diagVal(indxPos))); 12 | Ypos = UYpos*VYpos; 13 | 14 | NIter = 20; 15 | flag =1; 16 | objold = inf; 17 | iter = 0; 18 | if numNIter 30 | flag = 0; 31 | end 32 | objold = obj(iter); 33 | end 34 | else 35 | while flag 36 | iter = iter +1; 37 | D = spdiags(d,0,dim,dim); 38 | DX = D*X'; 39 | %%% Notive trick!!! 40 | W = (DX*(eye(num)+ r2*L)*X + r1*eye(dim) )\(DX*Ypos); 41 | Xi = sqrt(sum(W.*W,2)); 42 | d = 2*Xi; 43 | XW = X*W -Ypos; 44 | obj(iter) = trace(XW*XW') + r2*trace(L*((X*W)*(X*W)')) + r1*sum(Xi); 45 | if abs((objold-obj(iter))/obj(iter)) <1e-4 || iter>NIter 46 | flag = 0; 47 | end 48 | objold = obj(iter); 49 | end 50 | end 51 | Xi = sqrt(sum(W.*W,2)); 52 | [val0,indx0] = sort(Xi,'descend'); 53 | feaIndx = indx0(1:numFea); -------------------------------------------------------------------------------- /LocalReconstructLap.m: -------------------------------------------------------------------------------- 1 | function L = LocalReconstructLap(X, K) 2 | [D,N] = size(X); 3 | % fprintf(1,'- LLE running on %d points in %d dimensions\n',N,D); 4 | 5 | 6 | % STEP1: COMPUTE PAIRWISE DISTANCES & FIND NEIGHBORS 7 | % fprintf(1,'- Finding %d nearest neighbours.\n',K); 8 | 9 | X2 = sum(X.^2,1); 10 | distance = repmat(X2,N,1)+repmat(X2',1,N)-2*X'*X; 11 | 12 | [sorted,index] = sort(distance); 13 | neighborhood = index(2:(1+K),:); 14 | 15 | 16 | 17 | % STEP2: SOLVE FOR RECONSTRUCTION WEIGHTS 18 | % fprintf(1,'- Solving for reconstruction weights.\n'); 19 | 20 | if(K>D) 21 | fprintf(1,' [note: K>D; regularization will be used]\n'); 22 | tol=1e-3; % regularlizer in case constrained fits are ill conditioned 23 | else 24 | tol=0; 25 | end 26 | 27 | W = zeros(K,N); 28 | for ii=1:N 29 | z = X(:,neighborhood(:,ii))-repmat(X(:,ii),1,K); % shift ith pt to origin 30 | C = z'*z; % local covariance 31 | C = C + eye(K,K)*tol*trace(C); % regularlization (K>D) 32 | W(:,ii) = C\ones(K,1); % solve Cw=1 33 | W(:,ii) = W(:,ii)/sum(W(:,ii)); % enforce sum(w)=1 34 | end; 35 | 36 | % STEP 3: COMPUTE EMBEDDING FROM EIGENVECTS OF COST MATRIX M=(I-W)'(I-W) 37 | % fprintf(1,'- Computing embedding.\n'); 38 | 39 | % M=eye(N,N); % use a sparse matrix with storage for 4KN nonzero elements 40 | % M = sparse(1:N,1:N,ones(1,N),N,N,4*K*N); 41 | 42 | % for ii=1:N 43 | % w = W(:,ii); 44 | % jj = neighborhood(:,ii); 45 | % M(ii,jj) = M(ii,jj) - w'; 46 | % M(jj,ii) = M(jj,ii) - w; 47 | % M(jj,jj) = M(jj,jj) + w*w'; 48 | % end; 49 | 50 | M = zeros(N); 51 | for ii = 1:N 52 | M(ii,neighborhood(:,ii)) = W(:,ii)'; 53 | end 54 | L = (eye(N) - M)'*(eye(N) - M); 55 | end -------------------------------------------------------------------------------- /compute_accuracy_F.m: -------------------------------------------------------------------------------- 1 | function [confus,accuracy,numcorrect,precision,recall,F] = compute_accuracy_F (actual,pred,classes) 2 | % GETCM : gets confusion matrices, precision, recall, and F scores 3 | % [confus,numcorrect,precision,recall,F] = getcm (actual,pred,[classes]) 4 | % 5 | % actual is a N-element vector representing the actual classes 6 | % pred is a N-element vector representing the predicted classes 7 | % classes is a vector with the numbers of the classes (by default, it is 1:k, where k is the 8 | % largest integer to appear in actual or pred. 9 | 10 | 11 | if size(actual,1) ~= size(pred,1) 12 | pred=pred'; 13 | end 14 | if nargin < 3 15 | classes = [1:max(max(actual),max(pred))]; 16 | end 17 | 18 | numcorrect = sum(actual==pred); 19 | accuracy = numcorrect/length(actual); 20 | for i=1:length(classes) 21 | % confus(i,:) = hist(pred,classes); 22 | a = classes(i); 23 | d = find(actual==a); % d has indices of points with class a 24 | for j=1:length(classes) 25 | confus(i,j) = length(find(pred(d)==classes(j))); 26 | end 27 | end 28 | 29 | precision=[]; 30 | recall=[]; 31 | F=[]; 32 | for i=1:length(classes) 33 | S = sum(confus(i,:)); 34 | if nargout>=4 35 | if S 36 | recall(i) = confus(i,i) / sum(confus(i,:)); 37 | else 38 | recall(i) = 0; 39 | end 40 | end 41 | S = sum(confus(:,i)); 42 | if nargout>=3 43 | if S 44 | precision(i) = confus(i,i) / S; 45 | else 46 | precision(i) = 0; 47 | end 48 | end 49 | if nargout>=5 50 | if (precision(i)+recall(i)) 51 | F(i) = 2 * (precision(i)*recall(i)) / (precision(i)+recall(i)); 52 | else 53 | F(i) = 0; 54 | end 55 | end 56 | end -------------------------------------------------------------------------------- /components.m: -------------------------------------------------------------------------------- 1 | function blocks = components(A) 2 | %COMPONENTS Finds connected components in a graph defined by a adjacency matrix 3 | % 4 | % blocks = components(A) 5 | % 6 | % Finds connected components in a graph defined by the adjacency matrix A. 7 | % The function outputs an n-vector of integers 1:k in blocks, meaning that 8 | % A has k components. The vector blocks labels the vertices of A according 9 | % to component. 10 | % If the adjacency matrix A is undirected (i.e. symmetric), the blocks are 11 | % its connected components. If the adjacency matrix A is directed (i.e. 12 | % unsymmetric), the blocks are its strongly connected components. 13 | % 14 | % 15 | 16 | % This file is part of the Matlab Toolbox for Dimensionality Reduction v0.7.1b. 17 | % The toolbox can be obtained from http://homepage.tudelft.nl/19j49 18 | % You are free to use, change, or redistribute this code in any way you 19 | % want for non-commercial purposes. However, it is appreciated if you 20 | % maintain the name of the original author. 21 | % 22 | % (C) Laurens van der Maaten, 2010 23 | % University California, San Diego / Delft University of Technology 24 | 25 | 26 | % Check size of adjacency matrix 27 | [n, m] = size(A); 28 | if n ~= m, error ('Adjacency matrix must be square'), end; 29 | 30 | % Compute Dulmage-Mendelsohn permutation on A 31 | if ~all(diag(A)) 32 | [foo, p, bar, r] = dmperm(A | speye(size(A))); 33 | else 34 | [foo, p, bar, r] = dmperm(A); 35 | end 36 | 37 | % Compute sizes and number of clusters 38 | sizes = diff(r); 39 | k = length(sizes); 40 | 41 | % Now compute the array blocks 42 | blocks = zeros(1, n); 43 | blocks(r(1:k)) = ones(1, k); 44 | blocks = cumsum(blocks); 45 | 46 | % Permute blocks so it maps vertices of A to components 47 | blocks(p) = blocks; 48 | -------------------------------------------------------------------------------- /fs_unsup_jelsr.m: -------------------------------------------------------------------------------- 1 | function [W_compute, Y, obj] = fs_unsup_jelsr(data, W_ori, ReducedDim,alpha,beta) 2 | 3 | %%%%%%%% Input: data: nSmp*nFea; 4 | %%% W_ori: The original local similarity matrix 5 | %%% ReducedDim: the dimensionality for low dimensionality 6 | %%% embedding $Y$ 7 | %%% alpha and beta ar two parameters 8 | 9 | [nSmp,nFea] = size(data); 10 | 11 | %%%%%%%%%%%%%%%%%%% Normalization of W_ori 12 | D_mhalf = full(sum(W_ori,2).^-.5); 13 | W = compute_W(W_ori,data,D_mhalf); 14 | %%%%%%%%%%%%%%%%%% Eigen_decomposition 15 | Y = compute_Y(data,W, ReducedDim, D_mhalf); 16 | if issparse(data) 17 | data = [data ones(size(data,1),1)]; 18 | [nSmp,nFea] = size(data); 19 | else 20 | sampleMean = mean(data); 21 | data = (data - repmat(sampleMean,nSmp,1)); 22 | end 23 | 24 | %%% To minimize squared loss with L21 normalization 25 | %%%%%%%%%%%% Initialization 26 | AA = data'*data; 27 | Ay = data'*Y; 28 | W_compute = (AA+alpha*eye(nFea))\Ay; 29 | d = sqrt(sum(W_compute.*W_compute,2)); 30 | 31 | itermax = 20; 32 | obj = zeros(itermax,1); 33 | feaK = data'*data; % modified by liang du 34 | for iter = 1:itermax 35 | %%%%%%%%%%%%%%%%%%% Fix D to updata W_compute, Y 36 | D = 2*spdiags(d,0,nFea,nFea); 37 | %%%%%%%%%%%%%%%% To updata Y 38 | A = (D*feaK+alpha*eye(nFea)); 39 | Temp = A\(D*data'); 40 | Temp = data*Temp; 41 | Temp = W_ori-beta*eye(nSmp)+beta*Temp; 42 | 43 | %%%%% Normalization 44 | Temp = compute_W(Temp,data,D_mhalf); 45 | %%%%% Eigen_decomposition 46 | Y = compute_Y(data,Temp, ReducedDim, D_mhalf); 47 | 48 | %%%%%%%%%%%%%%%%% To updata W 49 | B = D*data'*Y; 50 | W_compute = A\B; 51 | 52 | %%%%%%%%%%%%%%%%%% Fix W and update D 53 | d = sqrt(sum(W_compute.*W_compute,2)); 54 | 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /fs_unsup_spfs.m: -------------------------------------------------------------------------------- 1 | function fList = fs_unsup_spfs(X, K, Y, numF, options) 2 | % A wrapper function for different solvers of SPFS 3 | % 4 | % [ fList ] = spfs_sfs(X, K, numF); 5 | % 6 | % [ W, lam ]= spfs_nes( X, Y, k, err, starting ); 7 | % 8 | % [ fList, W ] = spfs_larnes( X, Y, numF ); 9 | % 10 | % [ fList, W ] = spfs_lar( X, Y, numF ) 11 | % 12 | % each solver is downloaded from the author Zheng Zhao 13 | % https://sites.google.com/site/alanzhao/ 14 | % 15 | % [1] Efficient Spectral Feature Selection with Minimum Redundancy, AAAI 2010 16 | % [2] On Similarity Preserving Feature Selection, TKDE, 2013 17 | 18 | if ~exist('options', 'var') || ~isfield(options, 'spfs_type') 19 | options.spfs_type = 'SFS'; 20 | end 21 | 22 | switch lower(options.spfs_type) 23 | case lower('SFS') 24 | [ fList ] = fs_unsup_spfs_sfs(X, K, numF); 25 | case lower('LAR') 26 | error('not supported yet!'); 27 | % the following code with LAR did not return enough features 28 | [eigvec, eigval] = eigs(K, options.nClass, 'LA'); 29 | Y = eigvec * diag(sqrt(max(diag(eigval), eps))); 30 | [ fList, W ] = fs_unsup_spfs_lar( X, Y, numF ); 31 | case lower('LARNES') 32 | error('not supported yet!'); 33 | % the following code with LARNES did not return enough features 34 | [eigvec, eigval] = eigs(K, options.nClass, 'LA'); 35 | Y = eigvec * diag(sqrt(max(diag(eigval), eps))); 36 | [ fList, W ] = fs_unsup_spfs_larnes( X, Y, numF ); 37 | case lower('NES') 38 | [eigvec, eigval] = eigs(K, options.nClass, 'LA'); 39 | Y = eigvec * diag(sqrt(max(diag(eigval), eps))); 40 | [ W, lam ]= fs_unsup_spfs_nes( X, Y, numF, 0.1*numF); 41 | fList = sum(W.^2,2); 42 | [~, fList] = sort(fList, 'descend'); 43 | fList = fList(1:numF); 44 | otherwise 45 | error('not supported yet!'); 46 | end -------------------------------------------------------------------------------- /compute_Y.m: -------------------------------------------------------------------------------- 1 | function Y = compute_Y(data, W, ReducedDim, D_mhalf) 2 | 3 | [nSmp,nFea] = size(data); 4 | 5 | dimMatrix = size(W,2); 6 | if (dimMatrix > 500 && ReducedDim < dimMatrix/10) 7 | option = struct('disp',0); 8 | [Y, eigvalue] = eigs(W,ReducedDim,'la',option); 9 | eigvalue = diag(eigvalue); 10 | else 11 | W = full(W); 12 | [Y, eigvalue] = eig(W); 13 | eigvalue = diag(eigvalue); 14 | 15 | [junk, index] = sort(-eigvalue); 16 | eigvalue = eigvalue(index); 17 | Y = Y(:,index); 18 | if ReducedDim < length(eigvalue) 19 | Y = Y(:, 1:ReducedDim); 20 | eigvalue = eigvalue(1:ReducedDim); 21 | end 22 | end 23 | 24 | eigIdx = find(abs(eigvalue) < 1e-6); 25 | eigvalue (eigIdx) = []; 26 | Y (:,eigIdx) = []; 27 | 28 | nGotDim = length(eigvalue); 29 | 30 | idx = 1; 31 | while(abs(eigvalue(idx)-1) < 1e-12) 32 | idx = idx + 1; 33 | if idx > nGotDim 34 | break; 35 | end 36 | end 37 | idx = idx - 1; 38 | 39 | if(idx > 1) 40 | % more than one eigenvector of 1 eigenvalue 41 | u = zeros(size(Y,1),idx); 42 | d_m = 1./D_mhalf; 43 | cc = 1/norm(d_m); 44 | u(:,1) = cc./D_mhalf; 45 | 46 | bDone = 0; 47 | for i = 1:idx 48 | if abs(Y(:,i)' * u(:,1) - 1) < 1e-14 49 | Y(:,i) = Y(:,1); 50 | Y(:,1) = u(:,1); 51 | bDone = 1; 52 | end 53 | end 54 | 55 | if ~bDone 56 | for i = 2:idx 57 | u(:,i) = Y(:,i); 58 | for j= 1:i-1 59 | u(:,i) = u(:,i) - (u(:,j)' * Y(:,i))*u(:,j); 60 | end 61 | u(:,i) = u(:,i)/norm(u(:,i)); 62 | end 63 | Y(:,1:idx) = u; 64 | end 65 | end 66 | 67 | if nGotDim < 5000 68 | Y = repmat(D_mhalf,1,nGotDim).*Y; 69 | else 70 | for k = 1:nGotDim 71 | Y(:,k) = Y(:,k).*D_mhalf; 72 | end 73 | end 74 | 75 | Y(:,1) = []; 76 | -------------------------------------------------------------------------------- /fs_unsup_glspfs_build_param.m: -------------------------------------------------------------------------------- 1 | function paramCell = fs_unsup_glspfs_build_param(local_type_candi, local_type_param_candi, knn_size_candi, ... 2 | lambda1_candi, lambda2_candi, global_kernel_cell_candi) 3 | n1 = length( local_type_candi ); 4 | n2 = zeros(n1, 1); 5 | for i1 = 1:length( local_type_candi ) 6 | n2(i1) = max(1, length(local_type_param_candi{i1})); 7 | end 8 | n3 = length( knn_size_candi ); 9 | n4 = length( lambda1_candi ); 10 | n5 = length( lambda2_candi ); 11 | n6 = length( global_kernel_cell_candi ); 12 | 13 | nP = max(sum(n2), 1) * n3 * n4 * n5 * n6; 14 | 15 | paramCell = cell(nP, 1); 16 | idx = 0; 17 | for i1 = 1:n1 18 | for i2 = 1:max(n2(i1), 1) 19 | for i3 = 1:n3 20 | for i4 = 1:n4 21 | for i5 = 1:n5 22 | for i6 = 1:n6 23 | param = []; 24 | param.local_type = local_type_candi{i1}; 25 | if ~isempty(local_type_candi) && ~isempty(local_type_param_candi{i1}) 26 | tmp = local_type_param_candi{i1}; 27 | param.local_lpp_sigma = tmp(i2); 28 | param.local_ltsa_embedded_dim = tmp(i2); 29 | else 30 | param.local_lpp_sigma = []; %place holder 31 | param.local_ltsa_embedded_dim = [];%place holder 32 | end 33 | param.local_k = knn_size_candi(i3); 34 | param.lambda1 = lambda1_candi(i4); 35 | param.lambda2 = lambda2_candi(i5); 36 | param.global_kernel_option = global_kernel_cell_candi{i6}; 37 | idx = idx + 1; 38 | paramCell{idx} = param; 39 | 40 | end 41 | end 42 | end 43 | end 44 | end 45 | end 46 | end -------------------------------------------------------------------------------- /fs_unsup_fsasl_build_param.m: -------------------------------------------------------------------------------- 1 | function paramCell = fs_unsup_fsasl_build_param(sr_solver_candi, sr_solver_param_candi, knn_size_candi, ... 2 | lambda2_candi, lambda3_candi, fs_solver_candi, iter_candi) 3 | n1 = length( sr_solver_candi ); 4 | n2 = zeros(n1, 1); 5 | for i1 = 1:length( sr_solver_candi ) 6 | n2(i1) = max(1, length(sr_solver_param_candi{i1})); 7 | end 8 | n3 = length( knn_size_candi ); 9 | n4 = length( lambda2_candi ); 10 | n5 = length( lambda3_candi ); 11 | n6 = length( fs_solver_candi ); 12 | n7 = length( iter_candi ); 13 | 14 | nP = max(sum(n2), 1) * n3 * n4 * n5 * n6 * n7; 15 | 16 | paramCell = cell(nP, 1); 17 | idx = 0; 18 | % for i0 = 1: n0 19 | for i1 = 1:n1 20 | for i2 = 1:max(n2(i1), 1) 21 | for i3 = 1:n3 22 | for i4 = 1:n4 23 | for i5 = 1:n5 24 | for i6 = 1:n6 25 | for i7 = 1:n7 26 | param = []; 27 | param.LassoType = sr_solver_candi{i1}; 28 | if ~isempty(sr_solver_candi) && ~isempty(sr_solver_param_candi{i1}) 29 | tmp = sr_solver_param_candi{i1}; 30 | param.SLEPreg = tmp(i2); 31 | param.LARSk = tmp(i2); 32 | end 33 | param.Localk = knn_size_candi(i3); 34 | param.lambda2 = lambda2_candi(i4); 35 | param.lambda1 = 1; 36 | param.lambda3 = lambda3_candi(i5); 37 | param.GroupLassoType = fs_solver_candi{i6}; 38 | param.maxiter = iter_candi(i7); 39 | idx = idx + 1; 40 | paramCell{idx} = param; 41 | 42 | end 43 | end 44 | end 45 | end 46 | end 47 | end 48 | end 49 | end -------------------------------------------------------------------------------- /fs_unsup_traceratio_single_func.m: -------------------------------------------------------------------------------- 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_traceratio_single_func(dataset, exp_settings, algo_settings) 2 | %use trace ratio to select features. 3 | 4 | %======================setup=========================== 5 | FeaNumCandi = exp_settings.FeaNumCandi; 6 | nKmeans = exp_settings.nKmeans; 7 | prefix_mdcs = []; 8 | if isfield(exp_settings, 'prefix_mdcs') 9 | prefix_mdcs = exp_settings.prefix_mdcs; 10 | end 11 | %====================================================== 12 | 13 | disp(['dataset:',dataset]); 14 | [X, Y] = extractXY(dataset); 15 | [nSmp,nDim] = size(X); 16 | nClass = length(unique(Y)); 17 | 18 | %================setup====================== 19 | knnCandi = 5; 20 | n1 = length(knnCandi); 21 | nP = n1; 22 | paramCell = cell(nP, 1); 23 | idx = 0; 24 | for i1 = 1:n1 25 | param = []; 26 | param.k = knnCandi(i1); 27 | idx = idx + 1; 28 | paramCell{idx} = param; 29 | end 30 | %=========================================== 31 | 32 | t_start = clock; 33 | disp('trace ratio...'); 34 | feaSubsets = cell(length(paramCell), length(FeaNumCandi)); 35 | for i1 = 1:length(paramCell) 36 | fprintf('UDFS parameter search %d out of %d...\n', i1, length(paramCell)); 37 | param = paramCell{i1}; 38 | [Sb, Sw] = compute_SbSw_unsup(X, param.k); 39 | parfor i2 = 1:length(FeaNumCandi) 40 | feaSubsets{i1, i2} = fs_unsup_traceratio(Sb, Sw, FeaNumCandi(i2)); 41 | end 42 | end 43 | t_end = clock; 44 | t1 = etime(t_end,t_start); 45 | disp(['exe time: ',num2str(t1)]); 46 | 47 | t_start = clock; 48 | disp('evaluation ...'); 49 | res_aio = cell(1, length(FeaNumCandi)); 50 | parfor i1 = 1:length(FeaNumCandi) 51 | idx = feaSubsets{i1}; 52 | res_aio{1, i1} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i1)), struct('nKm', nKmeans)); 53 | end 54 | [res_gs, res_gs_ps] = grid_search_fs(res_aio); 55 | res_gs.feaset = FeaNumCandi; 56 | t_end = clock; 57 | t2 = etime(t_end,t_start); 58 | disp(['exe time: ',num2str(t2)]); 59 | res_gs.time = t1; 60 | res_gs.time2 = t2; 61 | 62 | save(fullfile(prefix_mdcs, [dataset, '_best_result_TraceRatio.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps'); 63 | end -------------------------------------------------------------------------------- /fs_unsup_spfs_lar.m: -------------------------------------------------------------------------------- 1 | function [ fList, W ] = fs_unsup_spfs_lar( X, Y, numF ) 2 | % function [ fList W ] = spfs_lar( X, K, numF ) 3 | % X - the data, each row is an instance 4 | % Y - the response of nY column 5 | % numF - the number of features we want to selected 6 | 7 | [nD, nF] = size(X); 8 | nY = size(Y,2); 9 | 10 | W = zeros(nF, nY); 11 | fList = zeros(numF, 1); 12 | k = 1; R = Y; 13 | 14 | % find the most correlated one 15 | bestCor = -1; bestNor = 0; 16 | for i = 1:nF 17 | curF = X(:,i); 18 | curNorm = norm(curF'*R,2); 19 | if curNorm > bestNor 20 | bestCor = i; bestNor = curNorm; 21 | end 22 | end 23 | fList(k) = bestCor; XA = X(:, bestCor); 24 | 25 | while k < numF && k < nF && k < nD 26 | k = k + 1; 27 | % fprintf('%i,',k); 28 | 29 | % obtain the proceed direction 30 | GA = XA\R; 31 | 32 | % compute how far can we go for every f 33 | a = X(:,fList(1))'*R; 34 | b = X(:,fList(1))'*XA*GA; 35 | bestCor = -1; bestNor = inf; 36 | for i = 1:nF 37 | if sum(fList==i) > 0 38 | continue; 39 | end 40 | c = X(:,i)'*R; 41 | d = X(:,i)'*XA*GA; 42 | p1=b*b'-d*d'; p2 = a*b'-c*d'; p3 = a*a'-c*c'; 43 | s1 = (p2+abs(sqrt(p2^2-p1*p3)))/p1; 44 | s2 = (p2-abs(sqrt(p2^2-p1*p3)))/p1; 45 | if (s1<=0 || s1>1) 46 | s1 = 100; 47 | end 48 | if (s2<=0 || s2>1) 49 | s2 = 100; 50 | end 51 | if s1==100 && s2==100 52 | continue; 53 | else 54 | s = min(s1,s2); 55 | end 56 | if s < bestNor 57 | bestNor = s; 58 | bestCor = i; 59 | end 60 | end 61 | if bestCor == -1; 62 | return 63 | else 64 | fList(k) = bestCor; 65 | XA = X(:, fList(1:k)); 66 | W(fList(1:k-1),:) = W(fList(1:k-1),:) + bestNor*GA; 67 | R = Y - X*W; 68 | % fprintf(' R: %f, W: %f, l: %f\n',norm(R), norm(W), bestNor); 69 | end 70 | end 71 | 72 | GA = pinv(full(XA'*XA))*XA'*R; 73 | W(fList(1:k),:) = W(fList(1:k),:) + GA; 74 | R = Y - X*W; 75 | % fprintf(' R: %f, W: %f\n',norm(R), norm(W)); -------------------------------------------------------------------------------- /fs_unsup_llcfs_single_func.m: -------------------------------------------------------------------------------- 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_llcfs_single_func(dataset, exp_settings, algo_settings) 2 | %Unsupervised feature selection using all features 3 | 4 | %======================setup=========================== 5 | FeaNumCandi = exp_settings.FeaNumCandi; 6 | nKmeans = exp_settings.nKmeans; 7 | prefix_mdcs = []; 8 | if isfield(exp_settings, 'prefix_mdcs') 9 | prefix_mdcs = exp_settings.prefix_mdcs; 10 | end 11 | %================================ 12 | 13 | disp(['dataset:',dataset]); 14 | [X, Y] = extractXY(dataset); 15 | [nSmp,nDim] = size(X); 16 | nClass = length(unique(Y)); 17 | 18 | %===================setup======================= 19 | knnCandi = 5; 20 | graphTypeCandi = [2]; 21 | betaCandidates = 10.^[-5:5]; 22 | paramCell = fs_unsup_llcfs_build_param(nClass, knnCandi, betaCandidates, graphTypeCandi ); 23 | %=============================================== 24 | 25 | disp('LLCFS ...'); 26 | t_start = clock; 27 | feaSubsets = cell(length(paramCell), 1); 28 | parfor i1 = 1:length(paramCell) 29 | fprintf(['LLCFS parameter search %d out of %d...\n'], i1, length(paramCell)); 30 | param = paramCell{i1}; 31 | [~, tao] = fs_unsup_llcfs(X,param); 32 | [~, idx] = sort(tao, 'descend'); 33 | feaSubsets{i1,1} = idx; 34 | end 35 | t_end = clock; 36 | t1 = etime(t_end,t_start); 37 | disp(['exe time: ',num2str(t1)]); 38 | 39 | t_start = clock; 40 | disp('evaluation ...'); 41 | res_aio = cell(length(paramCell), length(FeaNumCandi)); 42 | for i2 = 1:length(FeaNumCandi) 43 | m = FeaNumCandi(i2); 44 | parfor i1 = 1:length(paramCell) 45 | fprintf('LLCFS parameter evaluation %d outof %d ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell)); 46 | idx = feaSubsets{i1,1}; 47 | res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:m), struct('nKm', nKmeans)); 48 | end 49 | end 50 | [res_gs, res_gs_ps] = grid_search_fs(res_aio); 51 | res_gs.feaset = FeaNumCandi; 52 | t_end = clock; 53 | t2 = etime(t_end,t_start); 54 | disp(['exe time: ',num2str(t2)]); 55 | res_gs.time = t1; 56 | res_gs.time2 = t2; 57 | 58 | save(fullfile(prefix_mdcs, [dataset, '_best_result_LLCFS.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps'); 59 | end -------------------------------------------------------------------------------- /fs_unsup_ndfs_single_func.m: -------------------------------------------------------------------------------- 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_ndfs_single_func(dataset, exp_settings, algo_settings) 2 | %Unsupervised feature selection using NDFS 3 | 4 | %======================setup=========================== 5 | FeaNumCandi = exp_settings.FeaNumCandi; 6 | nKmeans = exp_settings.nKmeans; 7 | prefix_mdcs = []; 8 | if isfield(exp_settings, 'prefix_mdcs') 9 | prefix_mdcs = exp_settings.prefix_mdcs; 10 | end 11 | %====================================================== 12 | 13 | disp(['dataset:',dataset]); 14 | [X, Y] = extractXY(dataset); 15 | [nSmp,nDim] = size(X); 16 | nClass = length(unique(Y)); 17 | 18 | %===================setup======================= 19 | knnCandi = 5; 20 | weightCandi = {'HeatKernel'}; 21 | alphaCandi = 10.^[-5:5]; 22 | betaCandi = 10.^[-5:5]; 23 | s1 = optSigma(X); 24 | weight_param_Candi = {2.^[0] .* s1.^2}; 25 | paramCell = fs_unsup_ndfs_build_param(knnCandi, weightCandi, weight_param_Candi, alphaCandi, betaCandi); 26 | %=============================================== 27 | 28 | disp('NDFS ...'); 29 | t_start = clock; 30 | feaSubsets = cell(length(paramCell), 1); 31 | for i1 = 1:length(paramCell) 32 | fprintf(['NDFS parameter search %d out of %d...\n'], i1, length(paramCell)); 33 | param = paramCell{i1}; 34 | idx = fs_unsup_ndfs(X, nClass, param); 35 | feaSubsets{i1,1} = idx; 36 | end 37 | t_end = clock; 38 | t1 = etime(t_end,t_start); 39 | disp(['exe time: ',num2str(t1)]); 40 | 41 | disp('evaluation....'); 42 | t_start = clock; 43 | res_aio = cell(length(paramCell), length(FeaNumCandi)); 44 | for i2 = 1:length(FeaNumCandi) 45 | for i1 = 1:length(paramCell) 46 | fprintf('NDFS parameter evaluation %d outof %d ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell)); 47 | idx = feaSubsets{i1,1}; 48 | res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans)); 49 | end 50 | end 51 | [res_gs, res_gs_ps] = grid_search_fs(res_aio); 52 | res_gs.feaset = FeaNumCandi; 53 | t_end = clock; 54 | t2 = etime(t_end,t_start); 55 | disp(['exe time: ',num2str(t2)]); 56 | res_gs.time = t1; 57 | res_gs.time2 = t2; 58 | 59 | save(fullfile(prefix_mdcs, [dataset, '_best_result_NDFS.mat']) ,'FeaNumCandi','res_gs','res_aio', 'res_gs_ps'); 60 | end -------------------------------------------------------------------------------- /fs_unsup_udfs_single_func.m: -------------------------------------------------------------------------------- 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_udfs_single_func(dataset, exp_settings, algo_settings) 2 | % run UDFS feature selection algorithm 3 | 4 | %======================setup=========================== 5 | FeaNumCandi = exp_settings.FeaNumCandi; 6 | nKmeans = exp_settings.nKmeans; 7 | prefix_mdcs = []; 8 | if isfield(exp_settings, 'prefix_mdcs') 9 | prefix_mdcs = exp_settings.prefix_mdcs; 10 | end 11 | %====================================================== 12 | 13 | disp(['dataset:',dataset]); 14 | [X, Y] = extractXY(dataset); 15 | [nSmp, nDim] = size(X); 16 | nClass = length(unique(Y)); 17 | 18 | %======================setup=========================== 19 | gammaCandi = 10.^(-5:5); 20 | lamdaCandi = 10.^(-5:5); 21 | knnCandi = 5; 22 | paramCell = fs_unsup_udfs_build_param(knnCandi, gammaCandi, lamdaCandi); 23 | %====================================================== 24 | 25 | t_start = clock; 26 | disp('UDFS ...'); 27 | feaSubsets = cell(length(paramCell), 1); 28 | parfor i1 = 1:length(paramCell) 29 | fprintf('UDFS parameter search %d out of %d...\n', i1, length(paramCell)); 30 | param = paramCell{i1}; 31 | L = LocalDisAna(X', param); 32 | A = X'*L*X; 33 | W = fs_unsup_udfs(A, nClass, param.gamma); 34 | [~, idx] = sort(sum(W.*W,2),'descend'); 35 | feaSubsets{i1,1} = idx; 36 | end 37 | t_end = clock; 38 | t1 = etime(t_end,t_start); 39 | disp(['exe time: ',num2str(t1)]); 40 | 41 | t_start = clock; 42 | disp('evaluation ...'); 43 | res_aio = cell(length(paramCell), length(FeaNumCandi)); 44 | for i2 = 1:length(FeaNumCandi) 45 | m = FeaNumCandi(i2); 46 | parfor i1 = 1:length(paramCell) 47 | fprintf('UDFS parameter evaluation %d outof %d ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell)); 48 | idx = feaSubsets{i1,1}; 49 | res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:m), struct('nKm', nKmeans)); 50 | end 51 | end 52 | [res_gs, res_gs_ps] = grid_search_fs(res_aio); 53 | res_gs.feaset = FeaNumCandi; 54 | t_end = clock; 55 | t2 = etime(t_end,t_start); 56 | disp(['exe time: ',num2str(t2)]); 57 | res_gs.time = t1; 58 | res_gs.time2 = t2; 59 | 60 | save(fullfile(prefix_mdcs, [dataset, '_best_result_UDFS.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps'); 61 | end -------------------------------------------------------------------------------- /fs_unsup_lapscore_single_func.m: -------------------------------------------------------------------------------- 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_lapscore_single_func(dataset, exp_settings, algo_settings) 2 | %Unsupervised feature selection using LapScore 3 | 4 | %======================setup=========================== 5 | FeaNumCandi = exp_settings.FeaNumCandi; 6 | nKmeans = exp_settings.nKmeans; 7 | prefix_mdcs = []; 8 | if isfield(exp_settings, 'prefix_mdcs') 9 | prefix_mdcs = exp_settings.prefix_mdcs; 10 | end 11 | %====================================================== 12 | 13 | disp(['dataset:',dataset]); 14 | [X, Y] = extractXY(dataset); 15 | [nSmp,nDim] = size(X); 16 | nClass = length(unique(Y)); 17 | 18 | %================setup====================== 19 | knnCandi = 5; 20 | weightCandi = {'HeatKernel'};%{'Binary','HeatKernel'}; 21 | s1 = optSigma(X); 22 | weight_param_Candi = {2.^[-3:3] .* s1.^2};% {[], 2.^[-3:3] .* s1.^2}; 23 | paramCell = fs_unsup_lapscore_build_param(knnCandi, weightCandi, weight_param_Candi); 24 | %=========================================== 25 | 26 | 27 | disp('LapScore ...'); 28 | t_start = clock; 29 | feaSubsets = cell(length(paramCell), 1); 30 | parfor i1 = 1:length(paramCell) 31 | fprintf(['LapScore parameter search %d out of %d...\n'], i1, length(paramCell)); 32 | param = paramCell{i1}; 33 | W = constructW(X, param); 34 | LS = fs_unsup_lapscore(X, W); 35 | [~, idx] = sort(-LS); 36 | feaSubsets{i1,1} = idx; 37 | end 38 | t_end = clock; 39 | t1 = etime(t_end,t_start); 40 | disp(['exe time: ',num2str(t1)]); 41 | 42 | disp('evaluation....'); 43 | t_start = clock; 44 | res_aio = cell(length(paramCell), length(FeaNumCandi)); 45 | for i2 = 1:length(FeaNumCandi) 46 | parfor i1 = 1:length(paramCell) 47 | fprintf('LapScore parameter evaluation %d outof %d ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell)); 48 | idx = feaSubsets{i1,1}; 49 | res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans)); 50 | end 51 | end 52 | [res_gs, res_gs_ps] = grid_search_fs(res_aio); 53 | res_gs.feaset = FeaNumCandi; 54 | t_end = clock; 55 | t2 = etime(t_end,t_start); 56 | disp(['exe time: ',num2str(t2)]); 57 | res_gs.time = t1; 58 | res_gs.time2 = t2; 59 | 60 | save(fullfile(prefix_mdcs, [dataset, '_best_result_LapScore.mat']), 'FeaNumCandi','res_gs','res_aio', 'res_gs_ps'); 61 | end -------------------------------------------------------------------------------- /fs_unsup_spec_single_func.m: -------------------------------------------------------------------------------- 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_spec_single_func(dataset, exp_settings, algo_settings) 2 | %feature selection by SPEC 3 | 4 | %======================setup=========================== 5 | FeaNumCandi = exp_settings.FeaNumCandi; 6 | nKmeans = exp_settings.nKmeans; 7 | prefix_mdcs = []; 8 | if isfield(exp_settings, 'prefix_mdcs') 9 | prefix_mdcs = exp_settings.prefix_mdcs; 10 | end 11 | %====================================================== 12 | 13 | disp(['dataset:',dataset]); 14 | [X, Y] = extractXY(dataset); 15 | [nSmp,nDim] = size(X); 16 | nClass = length(unique(Y)); 17 | 18 | %===================setup======================= 19 | styleCandi = [1]; 20 | expLamCandi = [0.25, 1, 4]; 21 | funcCandi = [1, 2, 3]; 22 | s1 = optSigma(X); 23 | kernelParamCell = buildParamKernel({'Gaussian'}, {sqrt(2.^[-4:2]) * s1}, {''}); 24 | paramCell = fs_unsup_spec_build_param(kernelParamCell, styleCandi, expLamCandi, funcCandi); 25 | %=============================================== 26 | 27 | disp('SPEC ...'); 28 | t_start = clock; 29 | feaSubsets = cell(length(paramCell), 1); 30 | parfor i1 = 1:length(paramCell) 31 | fprintf(['SPEC parameter search %d out of %d...\n'], i1, length(paramCell)); 32 | K = constructKernel(X, X, paramCell{i1}.kernelOption); 33 | wFeat = fs_unsup_spec( K, X, LabelFormat(Y), paramCell{i1} ); 34 | [~, idx] = sort(wFeat,'descend'); 35 | feaSubsets{i1,1} = idx; 36 | end 37 | t_end = clock; 38 | t1 = etime(t_end,t_start); 39 | disp(['exe time: ',num2str(t1)]); 40 | 41 | t_start = clock; 42 | disp('evaluation ...'); 43 | res_aio = cell(length(paramCell), length(FeaNumCandi)); 44 | for i2 = 1:length(FeaNumCandi) 45 | m = FeaNumCandi(i2); 46 | parfor i1 = 1:length(paramCell) 47 | fprintf('SPEC parameter evaluation %d outof %d ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell)); 48 | idx = feaSubsets{i1,1}; 49 | res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:m), struct('nKm', nKmeans)); 50 | end 51 | end 52 | [res_gs, res_gs_ps] = grid_search_fs(res_aio); 53 | res_gs.feaset = FeaNumCandi; 54 | t_end = clock; 55 | t2 = etime(t_end,t_start); 56 | disp(['exe time: ',num2str(t2)]); 57 | res_gs.time = t1; 58 | res_gs.time2 = t2; 59 | 60 | save(fullfile(prefix_mdcs, [dataset, '_best_result_SPEC.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps'); 61 | end -------------------------------------------------------------------------------- /fs_unsup_fsasl_11_11_1_single_func.m: -------------------------------------------------------------------------------- 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_fsasl_11_11_1_single_func(dataset, exp_settings, algo_settings) 2 | %feature selection by FSASL 3 | 4 | %======================setup=========================== 5 | FeaNumCandi = exp_settings.FeaNumCandi; 6 | nKmeans = exp_settings.nKmeans; 7 | prefix_mdcs = []; 8 | if isfield(exp_settings, 'prefix_mdcs') 9 | prefix_mdcs = exp_settings.prefix_mdcs; 10 | end 11 | %====================================================== 12 | 13 | disp(['dataset:',dataset]); 14 | [X, Y] = extractXY(dataset); 15 | [nSmp,nDim] = size(X); 16 | nClass = length(unique(Y)); 17 | 18 | %===================setup======================= 19 | alphaCandi = 10.^[-5:5]; 20 | betaCandi = 10.^[-5:5]; 21 | gammaCandi = [0.01]; 22 | maxIter = 50; 23 | nnCandi = 5; 24 | paramCell = fs_unsup_fsasl_build_param({'SLEP'}, {gammaCandi}, nnCandi, ... 25 | alphaCandi, betaCandi, {'LS21'}, maxIter); 26 | %=============================================== 27 | 28 | disp('FSSL ...'); 29 | t_start = clock; 30 | feaSubsets = cell(length(paramCell), 1); 31 | parfor i1 = 1:length(paramCell) 32 | fprintf(['FSSL parameter search %d out of %d...\n'], i1, length(paramCell)); 33 | W = FSASL(X', nClass, paramCell{i1}); 34 | [~, idx] = sort(sum(W.^2,2),'descend'); 35 | % save([dataset,'\','feaIdx_param_', num2str(i1), '.mat'],'idx'); 36 | feaSubsets{i1,1} = idx; 37 | end 38 | t_end = clock; 39 | t1 = etime(t_end,t_start); 40 | disp(['exe time: ',num2str(t1)]); 41 | 42 | t_start = clock; 43 | disp('evaluation ...'); 44 | res_aio = cell(length(paramCell), length(FeaNumCandi)); 45 | for i2 = 1:length(FeaNumCandi) 46 | parfor i1 = 1:length(paramCell) 47 | fprintf('FSASL parameter evaluation %d outof %d ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell)); 48 | idx = feaSubsets{i1,1}; 49 | res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans)); 50 | end 51 | end 52 | [res_gs, res_gs_ps] = grid_search_fs(res_aio); 53 | res_gs.feaset = FeaNumCandi; 54 | t_end = clock; 55 | t2 = etime(t_end,t_start); 56 | disp(['exe time: ',num2str(t2)]); 57 | res_gs.time = t1; 58 | res_gs.time2 = t2; 59 | 60 | 61 | save(fullfile(prefix_mdcs, [dataset, '_best_result_FSSL_11_11_1.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps', 'paramCell', 'feaSubsets'); 62 | end -------------------------------------------------------------------------------- /fs_unsup_jelsr_liang_lle_single_func.m: -------------------------------------------------------------------------------- 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_jelsr_liang_lle_single_func(dataset, exp_settings, algo_settings) 2 | %Unsupervised feature selection using JELSR_liang 3 | 4 | %======================setup=========================== 5 | FeaNumCandi = exp_settings.FeaNumCandi; 6 | nKmeans = exp_settings.nKmeans; 7 | prefix_mdcs = []; 8 | if isfield(exp_settings, 'prefix_mdcs') 9 | prefix_mdcs = exp_settings.prefix_mdcs; 10 | end 11 | %====================================================== 12 | 13 | disp(['dataset:',dataset]); 14 | [X, Y] = extractXY(dataset); 15 | [nSmp,nDim] = size(X); 16 | nClass = length(unique(Y)); 17 | 18 | %===================setup======================= 19 | r1Candi = 10.^[-5:5]; 20 | r2Candi = 10.^[-5:5]; 21 | knnCandi = 5; 22 | weightCandi = {'lle'}; 23 | s1 = optSigma(X); 24 | weight_param_Candi = {s1}; 25 | paramCell = fs_unsup_jelsr_build_param(knnCandi, weightCandi, weight_param_Candi, r1Candi, r2Candi); 26 | %=============================================== 27 | disp('JELSR ...'); 28 | t_start = clock; 29 | feaSubsets = cell(length(paramCell), 1); 30 | parfor i1 = 1:length(paramCell) 31 | fprintf('JELSR_liang parameter search %d out of %d...\n', i1, length(paramCell)); 32 | param = paramCell{i1}; 33 | param.nClusters = nClass; 34 | [model_jelsr] = fs_unsup_jelsr_liang(X', param); 35 | [~,idx] = sort(model_jelsr.z, 'descend'); 36 | feaSubsets{i1,1} = idx; 37 | end 38 | t_end = clock; 39 | t1 = etime(t_end,t_start); 40 | disp(['exe time: ',num2str(t1)]); 41 | 42 | t_start = clock; 43 | disp('evaluation....'); 44 | res_aio = cell(length(paramCell), length(FeaNumCandi)); 45 | for i2 = 1:length(FeaNumCandi) 46 | parfor i1 = 1:length(paramCell) 47 | fprintf('JELSR parameter evaluation %d outof %d ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell)); 48 | idx = feaSubsets{i1,1}; 49 | res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans)); 50 | end 51 | end 52 | [res_gs, res_gs_ps] = grid_search_fs(res_aio); 53 | res_gs.feaset = FeaNumCandi; 54 | t_end = clock; 55 | t2 = etime(t_end,t_start); 56 | disp(['exe time: ',num2str(t2)]); 57 | res_gs.time = t1; 58 | res_gs.time2 = t2; 59 | 60 | save([prefix_mdcs, filesep, dataset, '_best_result_JELSR_liang_lle.mat'],'FeaNumCandi','res_gs','res_aio', 'res_gs_ps'); 61 | end -------------------------------------------------------------------------------- /fs_unsup_jelsr_liang_lpp_single_func.m: -------------------------------------------------------------------------------- 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_jelsr_liang_lpp_single_func(dataset, exp_settings, algo_settings) 2 | %Unsupervised feature selection using JELSR_liang 3 | 4 | %======================setup=========================== 5 | FeaNumCandi = exp_settings.FeaNumCandi; 6 | nKmeans = exp_settings.nKmeans; 7 | prefix_mdcs = []; 8 | if isfield(exp_settings, 'prefix_mdcs') 9 | prefix_mdcs = exp_settings.prefix_mdcs; 10 | end 11 | %====================================================== 12 | 13 | disp(['dataset:',dataset]); 14 | [X, Y] = extractXY(dataset); 15 | [nSmp,nDim] = size(X); 16 | nClass = length(unique(Y)); 17 | 18 | %===================setup======================= 19 | r1Candi = 10.^[-5:5]; 20 | r2Candi = 10.^[-5:5]; 21 | knnCandi = 5; 22 | weightCandi = {'lpp'}; 23 | s1 = optSigma(X); 24 | weight_param_Candi = {s1}; 25 | paramCell = fs_unsup_jelsr_build_param(knnCandi, weightCandi, weight_param_Candi, r1Candi, r2Candi); 26 | %=============================================== 27 | disp('JELSR ...'); 28 | t_start = clock; 29 | feaSubsets = cell(length(paramCell), 1); 30 | parfor i1 = 1:length(paramCell) 31 | fprintf('JELSR_liang parameter search %d out of %d...\n', i1, length(paramCell)); 32 | param = paramCell{i1}; 33 | param.nClusters = nClass; 34 | [model_jelsr] = fs_unsup_jelsr_liang(X', param); 35 | [~,idx] = sort(model_jelsr.z, 'descend'); 36 | feaSubsets{i1,1} = idx; 37 | end 38 | t_end = clock; 39 | t1 = etime(t_end,t_start); 40 | disp(['exe time: ',num2str(t1)]); 41 | 42 | t_start = clock; 43 | disp('evaluation....'); 44 | res_aio = cell(length(paramCell), length(FeaNumCandi)); 45 | for i2 = 1:length(FeaNumCandi) 46 | parfor i1 = 1:length(paramCell) 47 | fprintf('JELSR parameter evaluation %d outof %d ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell)); 48 | idx = feaSubsets{i1,1}; 49 | res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans)); 50 | end 51 | end 52 | [res_gs, res_gs_ps] = grid_search_fs(res_aio); 53 | res_gs.feaset = FeaNumCandi; 54 | t_end = clock; 55 | t2 = etime(t_end,t_start); 56 | disp(['exe time: ',num2str(t2)]); 57 | res_gs.time = t1; 58 | res_gs.time2 = t2; 59 | 60 | save([prefix_mdcs, filesep, dataset, '_best_result_JELSR_liang_lpp.mat'],'FeaNumCandi','res_gs','res_aio', 'res_gs_ps'); 61 | end -------------------------------------------------------------------------------- /fs_unsup_fsasl_11_11_5_single_func.m: -------------------------------------------------------------------------------- 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_fsasl_11_11_5_single_func(dataset, exp_settings, algo_settings) 2 | %feature selection by FSASL 3 | 4 | %======================setup=========================== 5 | FeaNumCandi = exp_settings.FeaNumCandi; 6 | nKmeans = exp_settings.nKmeans; 7 | prefix_mdcs = []; 8 | if isfield(exp_settings, 'prefix_mdcs') 9 | prefix_mdcs = exp_settings.prefix_mdcs; 10 | end 11 | %====================================================== 12 | 13 | disp(['dataset:',dataset]); 14 | [X, Y] = extractXY(dataset); 15 | [nSmp,nDim] = size(X); 16 | nClass = length(unique(Y)); 17 | 18 | %===================setup======================= 19 | alphaCandi = 10.^[-5:5]; 20 | betaCandi = 10.^[-5:5]; 21 | gammaCandi = [0.001, 0.005, 0.01, 0.05, 0.1]; 22 | maxIter = 20; 23 | nnCandi = 5; 24 | paramCell = fs_unsup_fsasl_build_param({'SLEP'}, {gammaCandi}, nnCandi, ... 25 | alphaCandi, betaCandi, {'LS21'}, maxIter); 26 | %=============================================== 27 | 28 | disp('FSSL ...'); 29 | t_start = clock; 30 | feaSubsets = cell(length(paramCell), 1); 31 | parfor i1 = 1:length(paramCell) 32 | fprintf(['FSSL parameter search %d out of %d...\n'], i1, length(paramCell)); 33 | W = FSASL(X', nClass, paramCell{i1}); 34 | [~, idx] = sort(sum(W.^2,2),'descend'); 35 | % save([dataset,'\','feaIdx_param_', num2str(i1), '.mat'],'idx'); 36 | feaSubsets{i1,1} = idx; 37 | end 38 | t_end = clock; 39 | t1 = etime(t_end,t_start); 40 | disp(['exe time: ',num2str(t1)]); 41 | 42 | t_start = clock; 43 | disp('evaluation ...'); 44 | res_aio = cell(length(paramCell), length(FeaNumCandi)); 45 | for i2 = 1:length(FeaNumCandi) 46 | parfor i1 = 1:length(paramCell) 47 | fprintf('FSASL parameter evaluation %d outof %d ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell)); 48 | idx = feaSubsets{i1,1}; 49 | res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans)); 50 | end 51 | end 52 | [res_gs, res_gs_ps] = grid_search_fs(res_aio); 53 | res_gs.feaset = FeaNumCandi; 54 | t_end = clock; 55 | t2 = etime(t_end,t_start); 56 | disp(['exe time: ',num2str(t2)]); 57 | res_gs.time = t1; 58 | res_gs.time2 = t2; 59 | 60 | 61 | save(fullfile(prefix_mdcs, [dataset, '_best_result_FSSL_11_11_5.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps', 'paramCell', 'feaSubsets'); 62 | end -------------------------------------------------------------------------------- /fs_unsup_jelsr_lle_single_func.m: -------------------------------------------------------------------------------- 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_jelsr_lle_single_func(dataset, exp_settings, algo_settings) 2 | %Unsupervised feature selection using JELSR 3 | 4 | %======================setup=========================== 5 | FeaNumCandi = exp_settings.FeaNumCandi; 6 | nKmeans = exp_settings.nKmeans; 7 | prefix_mdcs = []; 8 | if isfield(exp_settings, 'prefix_mdcs') 9 | prefix_mdcs = exp_settings.prefix_mdcs; 10 | end 11 | %================================ 12 | 13 | disp(['dataset:',dataset]); 14 | [X, Y] = extractXY(dataset); 15 | [nSmp,nDim] = size(X); 16 | nClass = length(unique(Y)); 17 | 18 | %===================setup======================= 19 | r1Candi = 10.^[-5:5]; 20 | r2Candi = 10.^[-5:5]; 21 | knnCandi = 5; 22 | weightCandi = {'lle'}; 23 | s1 = optSigma(X); 24 | weight_param_Candi = {s1}; 25 | paramCell = fs_unsup_jelsr_build_param(knnCandi, weightCandi, weight_param_Candi, r1Candi, r2Candi); 26 | %=============================================== 27 | 28 | t_start = clock; 29 | disp('JELSR ...'); 30 | feaSubsets = cell(length(paramCell), 1); 31 | parfor i1 = 1:length(paramCell) 32 | fprintf('JELSR parameter search %d out of %d...\n', i1, length(paramCell)); 33 | param = paramCell{i1}; 34 | [~, W] = computeLocalStructure(X, param.weightMode, param.k, param.t); 35 | W_compute = fs_unsup_jelsr(X, W, nClass, param.alpha, param.beta); 36 | [~,idx] = sort(sum(W_compute.^2,2), 'descend'); 37 | feaSubsets{i1,1} = idx; 38 | end 39 | t_end = clock; 40 | t1 = etime(t_end,t_start); 41 | disp(['exe time: ',num2str(t1)]); 42 | 43 | t_start = clock; 44 | disp('evaluation....'); 45 | res_aio = cell(length(paramCell), length(FeaNumCandi)); 46 | for i2 = 1:length(FeaNumCandi) 47 | parfor i1 = 1:length(paramCell) 48 | fprintf('JELSR parameter evaluation %d outof %d ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell)); 49 | idx = feaSubsets{i1,1}; 50 | res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans)); 51 | end 52 | end 53 | [res_gs, res_gs_ps] = grid_search_fs(res_aio); 54 | res_gs.feaset = FeaNumCandi; 55 | t_end = clock; 56 | t2 = etime(t_end,t_start); 57 | disp(['exe time: ',num2str(t2)]); 58 | res_gs.time = t1; 59 | res_gs.time2 = t2; 60 | 61 | save(fullfile(prefix_mdcs, [dataset, '_best_result_JELSR_lle.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps'); 62 | end -------------------------------------------------------------------------------- /fs_unsup_jelsr_lpp_single_func.m: -------------------------------------------------------------------------------- 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_jelsr_lpp_single_func(dataset, exp_settings, algo_settings) 2 | %Unsupervised feature selection using JELSR 3 | 4 | %======================setup=========================== 5 | FeaNumCandi = exp_settings.FeaNumCandi; 6 | nKmeans = exp_settings.nKmeans; 7 | prefix_mdcs = []; 8 | if isfield(exp_settings, 'prefix_mdcs') 9 | prefix_mdcs = exp_settings.prefix_mdcs; 10 | end 11 | %================================ 12 | 13 | disp(['dataset:',dataset]); 14 | [X, Y] = extractXY(dataset); 15 | [nSmp,nDim] = size(X); 16 | nClass = length(unique(Y)); 17 | 18 | %===================setup======================= 19 | r1Candi = 10.^[-5:5]; 20 | r2Candi = 10.^[-5:5]; 21 | knnCandi = 5; 22 | weightCandi = {'lpp'}; 23 | s1 = optSigma(X); 24 | weight_param_Candi = {s1}; 25 | paramCell = fs_unsup_jelsr_build_param(knnCandi, weightCandi, weight_param_Candi, r1Candi, r2Candi); 26 | %=============================================== 27 | 28 | t_start = clock; 29 | disp('JELSR ...'); 30 | feaSubsets = cell(length(paramCell), 1); 31 | parfor i1 = 1:length(paramCell) 32 | fprintf('JELSR parameter search %d out of %d...\n', i1, length(paramCell)); 33 | param = paramCell{i1}; 34 | [~, W] = computeLocalStructure(X, param.weightMode, param.k, param.t); 35 | W_compute = fs_unsup_jelsr(X, W, nClass, param.alpha, param.beta); 36 | [~,idx] = sort(sum(W_compute.^2,2), 'descend'); 37 | feaSubsets{i1,1} = idx; 38 | end 39 | t_end = clock; 40 | t1 = etime(t_end,t_start); 41 | disp(['exe time: ',num2str(t1)]); 42 | 43 | t_start = clock; 44 | disp('evaluation....'); 45 | res_aio = cell(length(paramCell), length(FeaNumCandi)); 46 | for i2 = 1:length(FeaNumCandi) 47 | parfor i1 = 1:length(paramCell) 48 | fprintf('JELSR parameter evaluation %d outof %d ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell)); 49 | idx = feaSubsets{i1,1}; 50 | res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans)); 51 | end 52 | end 53 | [res_gs, res_gs_ps] = grid_search_fs(res_aio); 54 | res_gs.feaset = FeaNumCandi; 55 | t_end = clock; 56 | t2 = etime(t_end,t_start); 57 | disp(['exe time: ',num2str(t2)]); 58 | res_gs.time = t1; 59 | res_gs.time2 = t2; 60 | 61 | save(fullfile(prefix_mdcs, [dataset, '_best_result_JELSR_lpp.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps'); 62 | end -------------------------------------------------------------------------------- /fs_unsup_spfs_single_func.m: -------------------------------------------------------------------------------- 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_spfs_single_func(dataset, exp_settings, algo_settings) 2 | %feature selection by SPFS 3 | 4 | %======================setup=========================== 5 | FeaNumCandi = exp_settings.FeaNumCandi; 6 | nKmeans = exp_settings.nKmeans; 7 | prefix_mdcs = []; 8 | if isfield(exp_settings, 'prefix_mdcs') 9 | prefix_mdcs = exp_settings.prefix_mdcs; 10 | end 11 | %====================================================== 12 | 13 | disp(['dataset:',dataset]); 14 | [X, Y] = extractXY(dataset); 15 | [nSmp,nDim] = size(X); 16 | nClass = length(unique(Y)); 17 | 18 | %================setup====================== 19 | s1 = optSigma(X); 20 | tCandi = 2.^[-3:3] * s1.^2; 21 | spfs_typeCandi = {'SFS', 'NES'}; 22 | nP = length(tCandi) * length(spfs_typeCandi); 23 | paramCell = cell(nP, 1) ; 24 | idx = 0; 25 | for i1 = 1:length(tCandi) 26 | for i2 = 1:length(spfs_typeCandi) 27 | param = []; 28 | param.t = tCandi(i1); 29 | param.spfs_type = spfs_typeCandi{i2}; 30 | idx = idx + 1; 31 | paramCell{idx} = param; 32 | end 33 | end 34 | %=========================================== 35 | 36 | disp('get SPFS...'); 37 | t_start = clock; 38 | Dist = EuDist2(X, X, 0); 39 | 40 | feaSubsets = cell(length(paramCell), 1); 41 | for i1 = 1:length(paramCell) 42 | param = paramCell{i1}; 43 | param.nClass = nClass; 44 | K = exp( - Dist / param.t); 45 | index = fs_unsup_spfs(X, K, [], max(FeaNumCandi), param); 46 | feaSubsets{i1,1} = index; 47 | end 48 | t_end = clock; 49 | t1 = etime(t_end,t_start); 50 | disp(['exe time: ',num2str(t1)]); 51 | 52 | t_start = clock; 53 | disp('evaluation ...'); 54 | res_aio = cell(length(paramCell), length(FeaNumCandi)); 55 | for i2 = 1:length(FeaNumCandi) 56 | for i1 = 1:length(paramCell) 57 | fprintf('SPFS parameter evaluation %d outof %d ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell)); 58 | idx = feaSubsets{i1,1}; 59 | res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans)); 60 | end 61 | end 62 | [res_gs, res_gs_ps] = grid_search_fs(res_aio); 63 | res_gs.feaset = FeaNumCandi; 64 | t_end = clock; 65 | t2 = etime(t_end,t_start); 66 | disp(['exe time: ',num2str(t2)]); 67 | res_gs.time = t1; 68 | res_gs.time2 = t2; 69 | 70 | save(fullfile(prefix_mdcs, [dataset, '_best_result_SPFS.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps'); 71 | end -------------------------------------------------------------------------------- /fs_unsup_fsasl_11_5_5_single_func.m: -------------------------------------------------------------------------------- 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_fsasl_11_5_5_single_func(dataset, exp_settings, algo_settings) 2 | %feature selection by FSASL 3 | 4 | %======================setup=========================== 5 | FeaNumCandi = exp_settings.FeaNumCandi; 6 | nKmeans = exp_settings.nKmeans; 7 | prefix_mdcs = []; 8 | if isfield(exp_settings, 'prefix_mdcs') 9 | prefix_mdcs = exp_settings.prefix_mdcs; 10 | end 11 | %====================================================== 12 | 13 | disp(['dataset:',dataset]); 14 | [X, Y] = extractXY(dataset); 15 | [nSmp,nDim] = size(X); 16 | nClass = length(unique(Y)); 17 | 18 | %===================setup======================= 19 | alphaCandi = 10.^[-5:5]; 20 | betaCandi = [0.1:0.2:0.9]; 21 | gammaCandi = [0.001, 0.005, 0.01, 0.05, 0.1]; 22 | maxIter = 50; 23 | nnCandi = 5; 24 | paramCell = fs_unsup_fsasl_build_param({'SLEP'}, {gammaCandi}, nnCandi, ... 25 | alphaCandi, betaCandi, {'LS21'}, maxIter); 26 | %=============================================== 27 | 28 | disp('FSSL ...'); 29 | t_start = clock; 30 | feaSubsets = cell(length(paramCell), 1); 31 | parfor i1 = 1:length(paramCell) 32 | fprintf(['FSSL parameter search %d out of %d...\n'], i1, length(paramCell)); 33 | param = paramCell{i1}; 34 | param.lambda1 = 1 - param.lambda2; 35 | W = FSASL(X', nClass, param); 36 | [~, idx] = sort(sum(W.^2,2),'descend'); 37 | % save([dataset,'\','feaIdx_param_', num2str(i1), '.mat'],'idx'); 38 | feaSubsets{i1,1} = idx; 39 | end 40 | t_end = clock; 41 | t1 = etime(t_end,t_start); 42 | disp(['exe time: ',num2str(t1)]); 43 | 44 | t_start = clock; 45 | disp('evaluation ...'); 46 | res_aio = cell(length(paramCell), length(FeaNumCandi)); 47 | for i2 = 1:length(FeaNumCandi) 48 | parfor i1 = 1:length(paramCell) 49 | fprintf('FSASL parameter evaluation %d outof %d ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell)); 50 | idx = feaSubsets{i1,1}; 51 | res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans)); 52 | end 53 | end 54 | [res_gs, res_gs_ps] = grid_search_fs(res_aio); 55 | res_gs.feaset = FeaNumCandi; 56 | t_end = clock; 57 | t2 = etime(t_end,t_start); 58 | disp(['exe time: ',num2str(t2)]); 59 | res_gs.time = t1; 60 | res_gs.time2 = t2; 61 | 62 | 63 | save(fullfile(prefix_mdcs, [dataset, '_best_result_FSSL_11_5_5.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps', 'paramCell', 'feaSubsets'); 64 | end -------------------------------------------------------------------------------- /fs_unsup_ndfs.m: -------------------------------------------------------------------------------- 1 | function idx = fs_unsup_ndfs(X, nClass, param) 2 | 3 | [L, F_init] = NDFS_init(X, nClass, param); 4 | 5 | W_init = ones(size(X,2),nClass); %W: the feature selection matrix 6 | warning off; 7 | [F,W,obj]=NDFS_iter(X', L, F_init, W_init, param.maxiter, param.alpha, param.beta, param.gamma); 8 | warning on; 9 | [~, idx] = sort(sum(W.*W,2),'descend'); 10 | end 11 | 12 | function [L, F_init] = NDFS_init(X, nClass, param) 13 | %construct the affinity matrix 14 | S = constructW(X, struct('k', param.k, 'WeightMode', param.weightMode, 't', param.t)); 15 | diag_ele_arr = sum(S); 16 | diag_ele_arr_t = diag_ele_arr.^(-1/2); 17 | L = eye(size(X,1)) - diag(diag_ele_arr_t)* S *diag(diag_ele_arr_t); 18 | L = (L + L')/2; 19 | [eigvec, eigval] = eig(L); 20 | [~, t1] = sort(diag(eigval), 'ascend'); 21 | eigvec = eigvec(:, t1(1:nClass)); 22 | eigvec = bsxfun(@rdivide, eigvec, sqrt(sum(eigvec.^2,2) + eps)); 23 | 24 | %init F and W 25 | rand('twister',5489); %#ok 26 | label = litekmeans(eigvec,nClass,'Replicates',10); % significantly! 27 | F_init = rand(size(X,1),nClass); 28 | for i = 1:size(X,1) 29 | F_init(i,label(i)) = 1; 30 | end 31 | F_init = F_init + 0.2; 32 | end 33 | 34 | function [F,W,obj]=NDFS_iter(X,L,F,W,maxIter,alpha,beta,gamma) 35 | % X: Rows of vectors of data points 36 | % L: The laplacian matrix. 37 | % F: the cluster result 38 | % W: the feature selection matrix 39 | 40 | if nargin == 0 41 | return; 42 | end 43 | 44 | [nFeat,nSamp] = size(X); 45 | 46 | if size(L,1) ~= nSamp 47 | error('L is error'); 48 | end 49 | XX=X*X'; 50 | 51 | Wi = sqrt(sum(W.*W,2)+eps); 52 | d = 0.5./Wi; 53 | D = diag(d); 54 | 55 | % G=inv(XX+beta*D); 56 | % W=G*X*F; 57 | % Wi = sqrt(sum(W.*W,2)+eps); 58 | % d = 0.5./Wi; 59 | % D = diag(d); 60 | % clear Wi 61 | % M=L+alpha*(eye(nSamp)-X'*G*X); 62 | % clear G 63 | % M=(M+M')/2; 64 | % F = F.*(gamma*F + eps)./(M*F + gamma*F*F'*F + eps); 65 | % F = F*diag(sqrt(1./(diag(F'*F)+eps))); 66 | 67 | iter=1; 68 | while iter<=maxIter %|| (iter>2&& obj(end-1)-obj(end)>10^(-3)*obj(end)) 69 | G=inv(XX+beta*D); 70 | W=G*X*F; 71 | Wi = sqrt(sum(W.*W,2)+eps); 72 | d = 0.5./Wi; 73 | D = diag(d); 74 | clear Wi 75 | M=L+alpha*(eye(nSamp)-X'*G*X); 76 | clear G 77 | M=(M+M')/2; 78 | 79 | F = F.*(gamma*F + eps)./(M*F + gamma*F*F'*F + eps); 80 | F = F*diag(sqrt(1./(diag(F'*F)+eps))); 81 | clear Wnew 82 | 83 | obj(iter)=trace(F'*M*F)+gamma/4*norm(F'*F-eye(size(F,2)),'fro')^2; 84 | iter=iter+1; 85 | 86 | end 87 | end -------------------------------------------------------------------------------- /fs_unsup_rufs_single_func.m: -------------------------------------------------------------------------------- 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_rufs_single_func(dataset, exp_settings, algo_settings) 2 | %feature selection by RUFS 3 | 4 | %======================setup=========================== 5 | FeaNumCandi = exp_settings.FeaNumCandi; 6 | nKmeans = exp_settings.nKmeans; 7 | prefix_mdcs = []; 8 | if isfield(exp_settings, 'prefix_mdcs') 9 | prefix_mdcs = exp_settings.prefix_mdcs; 10 | end 11 | %====================================================== 12 | 13 | disp(['dataset:',dataset]); 14 | [X, Y] = extractXY(dataset); 15 | [nSmp,nDim] = size(X); 16 | nClass = length(unique(Y)); 17 | 18 | %===================setup======================= 19 | knnCandi = 5; 20 | rLamdaCandi = [0.1]; 21 | nuCandi = 10.^[-5:5]; 22 | alphaCandi = 10.^[-5:5]; 23 | betaCandi = 10.^[-5:5]; 24 | llkrrParamCell = buildParam_LLKRR(knnCandi, rLamdaCandi); 25 | paramCell = fs_unsup_rufs_build_param(llkrrParamCell, alphaCandi, betaCandi, nuCandi); 26 | %=============================================== 27 | 28 | t_start = clock; 29 | disp('RUFS ...'); 30 | feaSubsets = cell(length(paramCell), 1); 31 | rand('twister',5489); %#ok 32 | label = litekmeans(X,nClass,'Replicates',10); 33 | G0 = zeros(size(X,1),nClass); 34 | for i = 1:size(X,1) 35 | G0(i,label(i)) = 1; 36 | end 37 | 38 | %feature selection by RUFS 39 | parfor i1 = 1:length(paramCell) 40 | fprintf('RUFS parameter search %d out of %d...\n', i1, length(paramCell)); 41 | param = paramCell{i1}; 42 | L_init = localLearnMx_KRR(X, param.llkrrParam); 43 | W = fs_unsup_rufs(X,L_init,G0, param); 44 | [~, idx] = sort(sum(W.^2,2), 'descend'); 45 | feaSubsets{i1,1} = idx; 46 | end 47 | t_end = clock; 48 | t1 = etime(t_end,t_start); 49 | disp(['exe time: ',num2str(t1)]); 50 | 51 | t_start = clock; 52 | disp('evaluation ...'); 53 | res_aio = cell(length(paramCell), length(FeaNumCandi)); 54 | for i2 = 1:length(FeaNumCandi) 55 | m = FeaNumCandi(i2); 56 | parfor i1 = 1:length(paramCell) 57 | fprintf('RUFS parameter evaluation %d outof %d ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell)); 58 | idx = feaSubsets{i1,1}; 59 | res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:m), struct('nKm', nKmeans)); 60 | end 61 | end 62 | [res_gs, res_gs_ps] = grid_search_fs(res_aio); 63 | res_gs.feaset = FeaNumCandi; 64 | t_end = clock; 65 | t2 = etime(t_end,t_start); 66 | disp(['exe time: ',num2str(t2)]); 67 | res_gs.time = t1; 68 | res_gs.time2 = t2; 69 | 70 | save(fullfile(prefix_mdcs, [dataset, '_best_result_RUFS.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps'); 71 | end -------------------------------------------------------------------------------- /L2_distance.m: -------------------------------------------------------------------------------- 1 | function d = L2_distance(a, b) 2 | % L2_DISTANCE - computes Euclidean distance matrix 3 | % 4 | % E = L2_distance(A,B) 5 | % 6 | % A - (DxM) matrix 7 | % B - (DxN) matrix 8 | % 9 | % Returns: 10 | % E - (MxN) Euclidean distances between vectors in A and B 11 | % 12 | % 13 | % Description : 14 | % This fully vectorized (VERY FAST!) m-file computes the 15 | % Euclidean distance between two vectors by: 16 | % 17 | % ||A-B|| = sqrt ( ||A||^2 + ||B||^2 - 2*A.B ) 18 | % 19 | % Example : 20 | % A = rand(400,100); B = rand(400,200); 21 | % d = distance(A,B); 22 | 23 | % Author : Roland Bunschoten 24 | % University of Amsterdam 25 | % Intelligent Autonomous Systems (IAS) group 26 | % Kruislaan 403 1098 SJ Amsterdam 27 | % tel.(+31)20-5257524 28 | % bunschot@wins.uva.nl 29 | % Last Rev : Wed Oct 20 08:58:08 MET DST 1999 30 | % Tested : PC Matlab v5.2 and Solaris Matlab v5.3 31 | 32 | % Copyright notice: You are free to modify, extend and distribute 33 | % this code granted that the author of the original code is 34 | % mentioned as the original author of the code. 35 | 36 | % Fixed by JBT (3/18/00) to work for 1-dimensional vectors 37 | % and to warn for imaginary numbers. Also ensures that 38 | % output is all real, and allows the option of forcing diagonals to 39 | % be zero. 40 | % 41 | % 42 | 43 | % This file is part of the Matlab Toolbox for Dimensionality Reduction v0.7.1b. 44 | % The toolbox can be obtained from http://homepage.tudelft.nl/19j49 45 | % You are free to use, change, or redistribute this code in any way you 46 | % want for non-commercial purposes. However, it is appreciated if you 47 | % maintain the name of the original author. 48 | % 49 | % (C) Laurens van der Maaten, 2010 50 | % University California, San Diego / Delft University of Technology 51 | 52 | 53 | if nargin < 2 54 | error('Not enough input arguments'); 55 | end 56 | if size(a, 1) ~= size(b, 1) 57 | error('A and B should be of same dimensionality'); 58 | end 59 | if ~isreal(a) || ~isreal(b) 60 | warning('Computing distance table using imaginary inputs. Results may be off.'); 61 | end 62 | 63 | % Padd zeros if necessray 64 | if size(a, 1) == 1 65 | a = [a; zeros(1, size(a, 2))]; 66 | b = [b; zeros(1, size(b, 2))]; 67 | end 68 | 69 | % Compute distance table 70 | d = sqrt(bsxfun(@plus, sum(a .* a)', bsxfun(@minus, sum(b .* b), 2 * a' * b))); 71 | 72 | % Make sure result is real 73 | d = real(d); 74 | 75 | -------------------------------------------------------------------------------- /SimGraph_NearestNeighbors.m: -------------------------------------------------------------------------------- 1 | function W = SimGraph_NearestNeighbors(M, k, Type, sigma) 2 | % SIMGRAPH_NEARESTNEIGHBORS Returns kNN similarity graph 3 | % Returns adjacency matrix for an k-Nearest Neighbors 4 | % similarity graph 5 | % 6 | % 'M' - A d-by-n matrix containing n d-dimensional data points 7 | % 'k' - Number of neighbors 8 | % 'Type' - Type if kNN Graph 9 | % 1 - Normal 10 | % 2 - Mutual 11 | % 'sigma' - Parameter for Gaussian similarity function. Set 12 | % this to 0 for an unweighted graph. Default is 1. 13 | % 14 | % Author: Ingo Buerk 15 | % Year : 2011/2012 16 | % Bachelor Thesis 17 | 18 | if nargin < 3 19 | ME = MException('InvalidCall:NotEnoughArguments', ... 20 | 'Function called with too few arguments'); 21 | throw(ME); 22 | end 23 | 24 | if ~any(Type == (1:2)) 25 | ME = MException('InvalidCall:UnknownType', ... 26 | 'Unknown similarity graph type'); 27 | throw(ME); 28 | end 29 | 30 | n = size(M, 2); 31 | 32 | % Preallocate memory 33 | indi = zeros(1, k * n); 34 | indj = zeros(1, k * n); 35 | inds = zeros(1, k * n); 36 | 37 | for ii = 1:n 38 | % Compute i-th column of distance matrix 39 | dist = distEuclidean(repmat(M(:, ii), 1, n), M); 40 | 41 | % Sort row by distance 42 | [s, O] = sort(dist, 'ascend'); 43 | 44 | % Save indices and value of the k 45 | indi(1, (ii-1)*k+1:ii*k) = ii; 46 | indj(1, (ii-1)*k+1:ii*k) = O(1:k); 47 | inds(1, (ii-1)*k+1:ii*k) = s(1:k); 48 | end 49 | 50 | % Create sparse matrix 51 | W = sparse(indi, indj, inds, n, n); 52 | 53 | clear indi indj inds dist s O; 54 | 55 | % Construct either normal or mutual graph 56 | if Type == 1 57 | % Normal 58 | W = max(W, W'); 59 | else 60 | % Mutual 61 | W = min(W, W'); 62 | end 63 | 64 | if nargin < 4 || isempty(sigma) 65 | sigma = 1; 66 | end 67 | 68 | % Unweighted graph 69 | if sigma == 0 70 | W = (W ~= 0); 71 | 72 | % Gaussian similarity function 73 | elseif isnumeric(sigma) 74 | W = spfun(@(W) (simGaussian(W, sigma)), W); 75 | 76 | else 77 | ME = MException('InvalidArgument:NotANumber', ... 78 | 'Parameter epsilon is not numeric'); 79 | throw(ME); 80 | end 81 | 82 | end 83 | 84 | function [ dist ] = distEuclidean( M, N ) 85 | %DISTEUCLIDEAN Calculates Euclidean distances 86 | % distEuclidean calculates the Euclidean distances between n 87 | % d-dimensional points, where M and N are d-by-n matrices, and 88 | % returns a 1-by-n vector dist containing those distances. 89 | % 90 | % Author: Ingo Buerk 91 | % Year : 2011/2012 92 | % Bachelor Thesis 93 | 94 | dist = sqrt(sum((M - N) .^ 2, 1)); 95 | 96 | end -------------------------------------------------------------------------------- /fs_unsup_glspfs_single_func.m: -------------------------------------------------------------------------------- 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_glspfs_single_func(dataset, exp_settings, algo_settings) 2 | %Unsupervised feature selection using GLSPFS 3 | 4 | %======================setup=========================== 5 | FeaNumCandi = exp_settings.FeaNumCandi; 6 | nKmeans = exp_settings.nKmeans; 7 | prefix_mdcs = []; 8 | if isfield(exp_settings, 'prefix_mdcs') 9 | prefix_mdcs = exp_settings.prefix_mdcs; 10 | end 11 | %====================================================== 12 | 13 | disp(['dataset:',dataset]); 14 | [X, Y] = extractXY(dataset); 15 | [nSmp,nDim] = size(X); 16 | nClass = length(unique(Y)); 17 | 18 | %===================setup======================= 19 | local_type_candi = {'LPP', 'LLE', 'LTSA'}; 20 | local_type_param_candi = {[], [], []}; 21 | knn_size_candi = 5; 22 | lambda1_candi = 10.^[-5:5]; 23 | lambda2_candi = 10.^[-5:5]; 24 | s1 = optSigma(X); 25 | global_kernel_cell_candi = buildParamKernel({'Gaussian'}, {sqrt(2.^[-1]) * s1}, {''}); 26 | local_type_param_candi{1} = [sqrt(2.^[-1]) * s1]; 27 | local_type_param_candi{3} = [nClass]; 28 | paramCell = fs_unsup_glspfs_build_param(local_type_candi, local_type_param_candi, knn_size_candi, ... 29 | lambda1_candi, lambda2_candi, global_kernel_cell_candi); 30 | %=============================================== 31 | 32 | disp('GLSPFS ...'); 33 | t_start = clock; 34 | feaSubsets = cell(length(paramCell), 1); 35 | for i1 = 1:length(paramCell) 36 | fprintf('GLSPFS parameter search %d out of %d...\n', i1, length(paramCell)); 37 | param = paramCell{i1}; 38 | K = constructKernel(X, X, param.global_kernel_option); 39 | L = computeLocalStructure(X, param.local_type, param.local_k, param.local_lpp_sigma, param.local_ltsa_embedded_dim); 40 | feaSubsets{i1,1} = fs_unsup_glspfs(X, K, L, param.lambda1, param.lambda2, max(FeaNumCandi)); 41 | end 42 | t_end = clock; 43 | t1 = etime(t_end,t_start); 44 | disp(['exe time: ',num2str(t1)]); 45 | 46 | t_start = clock; 47 | disp('evaluation ...'); 48 | res_aio = cell(length(paramCell), length(FeaNumCandi)); 49 | for i2 = 1:length(FeaNumCandi) 50 | for i1 = 1:length(paramCell) 51 | fprintf('GLSPFS parameter evaluation %d outof %d ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell)); 52 | idx = feaSubsets{i1,1}; 53 | res_aio{i1, i2} = evalUnSupFS(X, Y, idx(1:FeaNumCandi(i2)), struct('nKm', nKmeans)); 54 | end 55 | end 56 | [res_gs, res_gs_ps] = grid_search_fs(res_aio); 57 | res_gs.feaset = FeaNumCandi; 58 | t_end = clock; 59 | t2 = etime(t_end,t_start); 60 | disp(['exe time: ',num2str(t2)]); 61 | res_gs.time = t1; 62 | res_gs.time2 = t2; 63 | 64 | save(fullfile(prefix_mdcs, [dataset, '_best_result_GLSPFS.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps'); 65 | end -------------------------------------------------------------------------------- /mdcs_check.m: -------------------------------------------------------------------------------- 1 | function [flag_writeable, flag_uploadable, prefix] = mdcs_check(username, password) 2 | % This function check the availability of work and filedependencies 3 | % directory 4 | % pls use your username .... 5 | % 6 | flag_writeable = 1; 7 | flag_uploadable = 1; 8 | prefix = []; 9 | if ispc 10 | prefix = pwd; 11 | elseif isunix 12 | message = ['**************************************', ... 13 | 'This is an automatically generated message. ',... 14 | 'You received this email because you used the Matlab Distributed Computing Server (MDCS) ',... 15 | 'in the Laboratory for Computer Science (LCS) recently, and some of your directories and files ',... 16 | 'are not automatically deleted by the stupid Matlab Job Scheduler (mjs). As a result, ', ... 17 | 'othes failed to submit jobs on these workers, please help to delete them manually! ', ... 18 | 'Thansk for your cooporation! **************************************']; 19 | message3 = 'Thanks again! Liang Du, a heavier user, from DMGroup@LCS.'; 20 | 21 | [mdcs_ips, mdcs_dirs] = get_mdcs_ip_dir(100); 22 | for i1= 1:length(mdcs_ips) 23 | disp([' worker ip ', mdcs_ips{i1}]); 24 | end 25 | n_dir = zeros(length(mdcs_dirs), 1); 26 | n_dir2 = zeros(length(mdcs_dirs), 1); 27 | 28 | for i1= 1:length(mdcs_dirs) 29 | disp([' worker pwd ', mdcs_dirs{i1}]); 30 | unix('ls -l ../ |grep work'); 31 | unix('ls -l ../ |grep filedependencies'); 32 | [~, t1] = unix(['ls -l ../ |grep work | grep ', username, ' |wc -l']); % check the owner of work 33 | [~, t2] = unix(['ls -l ../ |grep filedependencies | grep ', username, ' |wc -l']); % check the owner of filedependencies 34 | n_dir(i1) = str2double(t1); 35 | n_dir2(i1) = str2double(t2); 36 | 37 | if str2double(t2) < 1 38 | message2 = [' Please login to the ip = ', ip, ' and manually delete the directory = ' dir]; 39 | [~, dir2_owner] = unix('ls -l ../ |grep filedependencies |awk -F '' '' ''{print $3}'''); 40 | email_notify(username, password, [dir2_owner, '@ios.ac.cn'], [message, message2, message3]); 41 | end 42 | end 43 | 44 | if sum(n_dir) < length(n_dir) 45 | warning('You are not the owner of some work directory ...'); 46 | warning(' Write on this dir will failed ....'); 47 | flag_writeable = 0; 48 | end 49 | 50 | if sum(n_dir2) < length(n_dir2) 51 | warning('You are not the owner of some filedependencies directory ...'); 52 | warning(' upload dependent file on this dir will failed ....'); 53 | flag_uploadable = 0; 54 | end 55 | 56 | root_dir = ['/home/', username]; 57 | if exist(root_dir, 'dir') 58 | prefix = root_dir; 59 | end 60 | 61 | end -------------------------------------------------------------------------------- /fs_unsup_mcfs_single_func.m: -------------------------------------------------------------------------------- 1 | function [FeaNumCandi,res_gs,res_aio, res_gs_ps] = fs_unsup_mcfs_single_func(dataset, exp_settings, algo_settings) 2 | %Unsupervised feature selection using MCFS 3 | 4 | %======================setup=========================== 5 | FeaNumCandi = exp_settings.FeaNumCandi; 6 | nKmeans = exp_settings.nKmeans; 7 | prefix_mdcs = []; 8 | if isfield(exp_settings, 'prefix_mdcs') 9 | prefix_mdcs = exp_settings.prefix_mdcs; 10 | end 11 | %====================================================== 12 | 13 | disp(['dataset:',dataset]); 14 | [X, Y] = extractXY(dataset); 15 | [nSmp,nDim] = size(X); 16 | nClass = length(unique(Y)); 17 | 18 | %======================setup=========================== 19 | knnCandi = 5; 20 | weightCandi = {'Binary','HeatKernel'}; 21 | s1 = optSigma(X); 22 | weight_param_Candi = {[], 2.^[-3:3] .* s1.^2}; 23 | paramCell = fs_unsup_mcfs_build_param(knnCandi, weightCandi, weight_param_Candi); 24 | %====================================================== 25 | 26 | t_start = clock; 27 | feaSubsets = cell(length(paramCell), 1); 28 | valid_ids = zeros(length(paramCell), 1); 29 | parfor i1 = 1:length(paramCell) 30 | fprintf(['MCFS parameter search %d out of %d...\n'], i1, length(paramCell)); 31 | param = paramCell{i1}; 32 | W = constructW(X, param); 33 | options = []; 34 | options.nUseEigenfunction = nClass; 35 | options.W = W; 36 | % some may failed due to SR code 37 | try 38 | index = fs_unsup_mcfs(X,max(FeaNumCandi),options); 39 | feaSubsets{i1} = index{1}; 40 | catch 41 | valid_ids(i1) = 1; 42 | end 43 | end 44 | t_end = clock; 45 | t1 = etime(t_end,t_start); 46 | disp(['exe time: ',num2str(t1)]); 47 | 48 | t_start = clock; 49 | disp('evaluation ...'); 50 | valid_ids = find(valid_ids == 0); 51 | paramCell_old = paramCell; 52 | feaSubsets_old = feaSubsets; 53 | paramCell = cell(length(valid_ids), 1); 54 | feaSubsets = cell(length(valid_ids), 1); 55 | for i1=1:length(valid_ids) 56 | paramCell{i1} = paramCell_old{valid_ids(i1)}; 57 | feaSubsets{i1} = feaSubsets_old{valid_ids(i1)}; 58 | end 59 | res_aio = cell(length(paramCell), length(FeaNumCandi)); 60 | for i2 = 1:length(FeaNumCandi) 61 | parfor i1 = 1:length(paramCell) 62 | tmp = feaSubsets{i1, 1}; 63 | fprintf('MCFS parameter evaluation %d outof %d ... %d out of %d...\n', i2, length(FeaNumCandi), i1, length(paramCell)); 64 | res_aio{i1, i2} = evalUnSupFS(X, Y, tmp(1:FeaNumCandi(i2)), struct('nKm', nKmeans)); 65 | end 66 | end 67 | [res_gs,res_gs_ps] = grid_search_fs(res_aio); 68 | res_gs.feaset = FeaNumCandi; 69 | t_end = clock; 70 | t2 = etime(t_end,t_start); 71 | disp(['exe time: ',num2str(t2)]); 72 | res_gs.time = t1; 73 | res_gs.time2 = t2; 74 | 75 | save(fullfile(prefix_mdcs, [dataset, '_best_result_MCFS.mat']),'FeaNumCandi','res_gs','res_aio', 'res_gs_ps'); 76 | end -------------------------------------------------------------------------------- /initFactor.m: -------------------------------------------------------------------------------- 1 | function ratio=initFactor(x_norm, Ax , y, z, funName, rsL2, x_2norm) 2 | % 3 | %% function initFactor 4 | % compute the an optimal constant factor for the initialization 5 | % 6 | % 7 | % Input parameters: 8 | % x_norm- the norm of the starting point 9 | % Ax- A*x, with x being the initialization point 10 | % y- the response matrix 11 | % z- the regularization parameter or the ball 12 | % funName- the name of the function 13 | % 14 | % Output parameter: 15 | % ratio- the computed optimal initialization point is ratio*x 16 | % 17 | %% Copyright (C) 2009-2010 Jun Liu, and Jieping Ye 18 | % 19 | % For any problem, please contact with Jun Liu via j.liu@asu.edu 20 | % 21 | % Last revised on August 2, 2009. 22 | 23 | switch(funName) 24 | case 'LeastC' 25 | ratio_max = z / x_norm; 26 | ratio_optimal = Ax'*y / (Ax'*Ax + rsL2 * x_2norm); 27 | 28 | if abs(ratio_optimal)<=ratio_max 29 | ratio = ratio_optimal; 30 | elseif ratio_optimal<0 31 | ratio = -ratio_max; 32 | else 33 | ratio = ratio_max; 34 | end 35 | % fprintf('\n ratio=%e,%e,%e',ratio,ratio_optimal,ratio_max); 36 | 37 | case 'LeastR' 38 | ratio= (Ax'*y - z * x_norm) / (Ax'*Ax + rsL2 * x_2norm); 39 | %fprintf('\n ratio=%e',ratio); 40 | 41 | case 'glLeastR' 42 | ratio= (Ax'*y - z * x_norm) / (Ax'*Ax); 43 | %fprintf('\n ratio=%e',ratio); 44 | 45 | case 'mcLeastR' 46 | ratio= (Ax(:)'*y(:) - z * x_norm) / norm(Ax,'fro')^2; 47 | %fprintf('\n ratio=%e',ratio); 48 | 49 | case 'mtLeastR' 50 | ratio= (Ax'*y - z * x_norm) / (Ax'*Ax); 51 | %fprintf('\n ratio=%e',ratio); 52 | 53 | case 'nnLeastR' 54 | ratio= (Ax'*y - z * x_norm) / (Ax'*Ax + rsL2 * x_2norm); 55 | ratio=max(0,ratio); 56 | 57 | case 'nnLeastC' 58 | ratio_max = z / x_norm; 59 | ratio_optimal = Ax'*y / (Ax'*Ax + rsL2 * x_2norm); 60 | 61 | if ratio_optimal<0 62 | ratio=0; 63 | elseif ratio_optimal<=ratio_max 64 | ratio = ratio_optimal; 65 | else 66 | ratio = ratio_max; 67 | end 68 | % fprintf('\n ratio=%e,%e,%e',ratio,ratio_optimal,ratio_max); 69 | 70 | case 'mcLeastC' 71 | ratio_max = z / x_norm; 72 | ratio_optimal = Ax(:)'*y(:) / (norm(Ax'*Ax,'fro')^2); 73 | 74 | if abs(ratio_optimal)<=ratio_max 75 | ratio = ratio_optimal; 76 | elseif ratio_optimal<0 77 | ratio = -ratio_max; 78 | else 79 | ratio = ratio_max; 80 | end 81 | 82 | otherwise 83 | fprintf('\n The specified funName is not supprted'); 84 | end -------------------------------------------------------------------------------- /constructKernel.m: -------------------------------------------------------------------------------- 1 | function K = constructKernel(fea_a,fea_b,options) 2 | % function K = constructKernel(fea_a,fea_b,options) 3 | % Usage: 4 | % K = constructKernel(fea_a,[],options) 5 | % 6 | % K = constructKernel(fea_a,fea_b,options) 7 | % 8 | % fea_a, fea_b : Rows of vectors of data points. 9 | % 10 | % options : Struct value in Matlab. The fields in options that can 11 | % be set: 12 | % KernelType - Choices are: 13 | % 'Gaussian' - e^{-(|x-y|^2)/2t^2} 14 | % 'Polynomial' - (x'*y)^d 15 | % 'PolyPlus' - (x'*y+1)^d 16 | % 'Linear' - x'*y 17 | % 18 | % t - parameter for Gaussian 19 | % d - parameter for Poly 20 | % 21 | % version 1.0 --Sep/2006 22 | % 23 | % Written by Deng Cai (dengcai2 AT cs.uiuc.edu) 24 | % 25 | 26 | if (~exist('options','var')) 27 | options = []; 28 | else 29 | if ~isstruct(options) 30 | error('parameter error!'); 31 | end 32 | end 33 | 34 | 35 | 36 | %================================================= 37 | if ~isfield(options,'KernelType') 38 | options.KernelType = 'Gaussian'; 39 | end 40 | 41 | switch lower(options.KernelType) 42 | case {lower('Gaussian')} % e^{-(|x-y|^2)/2t^2} 43 | if ~isfield(options,'t') 44 | options.t = 1; 45 | end 46 | case {lower('Polynomial')} % (x'*y)^d 47 | if ~isfield(options,'d') 48 | options.d = 2; 49 | end 50 | case {lower('PolyPlus')} % (x'*y+1)^d 51 | if ~isfield(options,'d') 52 | options.d = 2; 53 | end 54 | case {lower('Linear')} % x'*y 55 | otherwise 56 | error('KernelType does not exist!'); 57 | end 58 | 59 | 60 | %================================================= 61 | 62 | switch lower(options.KernelType) 63 | case {lower('Gaussian')} 64 | if isempty(fea_b) 65 | D = EuDist2(fea_a,[],0); 66 | else 67 | D = EuDist2(fea_a,fea_b,0); 68 | end 69 | K = exp(-D/(2*options.t^2)); 70 | case {lower('Polynomial')} 71 | if isempty(fea_b) 72 | D = full(fea_a * fea_a'); 73 | else 74 | D = full(fea_a * fea_b'); 75 | end 76 | K = D.^options.d; 77 | case {lower('PolyPlus')} 78 | if isempty(fea_b) 79 | D = full(fea_a * fea_a'); 80 | else 81 | D = full(fea_a * fea_b'); 82 | end 83 | K = (D+1).^options.d; 84 | case {lower('Linear')} 85 | if isempty(fea_b) 86 | K = full(fea_a * fea_a'); 87 | else 88 | K = full(fea_a * fea_b'); 89 | end 90 | otherwise 91 | error('KernelType does not exist!'); 92 | end 93 | 94 | if isempty(fea_b) 95 | K = max(K,K'); 96 | end 97 | 98 | 99 | -------------------------------------------------------------------------------- /find_nn.m: -------------------------------------------------------------------------------- 1 | function [D, ni] = find_nn(X, k) 2 | %FIND_NN Finds k nearest neigbors for all datapoints in the dataset 3 | % 4 | % [D, ni] = find_nn(X, k) 5 | % 6 | % Finds the k nearest neighbors for all datapoints in the dataset X. 7 | % In X, rows correspond to the observations and columns to the 8 | % dimensions. The value of k is the number of neighbors that is 9 | % stored. The function returns a sparse distance matrix D, in which 10 | % only the distances to the k nearest neighbors are stored. For 11 | % equal datapoints, the distance is set to a tolerance value. 12 | % The method is relatively slow, but has a memory requirement of O(nk). 13 | % 14 | % 15 | 16 | % This file is part of the Matlab Toolbox for Dimensionality Reduction v0.7.1b. 17 | % The toolbox can be obtained from http://homepage.tudelft.nl/19j49 18 | % You are free to use, change, or redistribute this code in any way you 19 | % want for non-commercial purposes. However, it is appreciated if you 20 | % maintain the name of the original author. 21 | % 22 | % (C) Laurens van der Maaten, 2010 23 | % University California, San Diego / Delft University of Technology 24 | 25 | 26 | if ~exist('k', 'var') || isempty(k) 27 | k = 12; 28 | end 29 | 30 | % Perform adaptive neighborhood selection if desired 31 | if ischar(k) 32 | [D, max_k] = find_nn_adaptive(X); 33 | ni = zeros(size(X, 1), max_k); 34 | for i=1:size(X, 1) 35 | tmp = find(D(i,:) ~= 0); 36 | tmp = sort(tmp, 'ascend'); 37 | tmp = [tmp(2:end) zeros(1, max_k - length(tmp) + 1)]; 38 | ni(i,:) = tmp; 39 | end 40 | 41 | % Perform normal neighborhood selection 42 | else 43 | 44 | % Memory conservative implementation 45 | if size(X, 1) > 2000 46 | X = X'; 47 | n = size(X, 2); 48 | D = zeros(n, k); 49 | XX = sum(X .^ 2, 1); 50 | onez = ones(1,n); 51 | if nargout > 1, ni = zeros(n, k, 'uint16'); end 52 | for i=1:n 53 | p = X(:,i); 54 | xx = sum(p .^ 2); 55 | xX = p' * X; 56 | d = bsxfun(@plus, XX - 2 * xX, xx); 57 | [d, ind] = sort(d); 58 | d = sqrt(d(1:k)); 59 | ind = ind(1:k); 60 | d(d == 0) = 1e-7; 61 | D(i,:) = d; 62 | ni(i,:) = ind; 63 | end 64 | D = sparse(repmat((1:size(ni, 1))', [1 size(ni, 2)]), double(ni(:)), double(D(:)), size(ni, 1), size(ni, 1)); 65 | 66 | % Faster implementation 67 | else 68 | n = size(X, 1); 69 | D = L2_distance(X', X'); 70 | [foo, ind] = sort(D, 2); 71 | flat = repmat((1:n)', 1, n - k) + n * ind(:,k+1:end) - n; 72 | D(flat(:)) = 0; 73 | D(1:n+1:end) = 1e-7; 74 | D = sparse(double(D)); 75 | 76 | if nargout > 1, ni = uint16(ind(:,1:k)); end 77 | end 78 | end -------------------------------------------------------------------------------- /grid_search_fs.m: -------------------------------------------------------------------------------- 1 | function [res_gs2, res_gs_ps2] = grid_search_fs(res_aio) 2 | % input 3 | % nParam * nFea, cell 4 | % 5 | % for each feature subset, 6 | % for each evaluation measure, 7 | % choose the bset result 8 | % Liang Du (csliangdu@gmail.com) 9 | % 10 | 11 | [nParam, nSubsets] = size(res_aio); 12 | res_gs = cell(1, nSubsets); 13 | res_gs_ps = res_gs; 14 | fn1 = {'mean_acc', 'mean_nmi_sqrt', 'mean_nmi_max', 'mean_purity', 'mean_prec', 'mean_recall', 'mean_f1'}; 15 | fn2 = {'std_acc', 'std_nmi_sqrt', 'std_nmi_max', 'std_purity', 'std_prec', 'std_recall', 'std_f1'}; 16 | fn3 = {'best_obj_acc', 'best_obj_nmi_max', 'best_obj_nmi_sqrt', 'best_obj_purity',... 17 | 'best_obj_prec', 'best_obj_recall', 'best_obj_f1', ... 18 | 'jac', 'loocv'}; 19 | fn4 = {'red'}; 20 | for i1 = 1:nSubsets 21 | res_gs{1, i1} = res_aio{1,i1}; %place holder 22 | for i3 = 1:length(fn1) 23 | res_gs_ps{1, i1}.(fn1{i3}) = 1; 24 | end 25 | for i3 = 1:length(fn3) 26 | res_gs_ps{1, i1}.(fn3{i3}) = 1; 27 | end 28 | for i3 = 1:length(fn4) 29 | res_gs_ps{1, i1}.(fn4{i3}) = 1; 30 | end 31 | for i2 = 1:nParam 32 | for i3 = 1:length(fn1) 33 | if (isfield(res_aio{i2, i1}, fn1{i3}) && isfield(res_gs{1, i1},fn1{i3}) ) && (res_aio{i2, i1}.(fn1{i3}) > res_gs{1, i1}.(fn1{i3})) 34 | res_gs{1, i1}.(fn1{i3}) = res_aio{i2, i1}.(fn1{i3}); 35 | res_gs{1, i1}.(fn2{i3}) = res_aio{i2, i1}.(fn2{i3}); 36 | res_gs_ps{1, i1}.(fn1{i3}) = i2; 37 | end 38 | end 39 | for i3 = 1:length(fn3) 40 | if (isfield(res_aio{i2, i1}, fn3{i3}) && isfield(res_gs{1, i1}, fn3{i3}) ) && (res_aio{i2, i1}.(fn3{i3}) > res_gs{1, i1}.(fn3{i3})) 41 | res_gs{1, i1}.(fn3{i3}) = res_aio{i2, i1}.(fn3{i3}); 42 | res_gs_ps{1, i1}.(fn3{i3}) = i2; 43 | end 44 | end 45 | for i3 = 1:length(fn4) 46 | if (isfield(res_aio{i2, i1}, fn4{i3}) && isfield(res_gs{1, i1}, fn4{i3}) ) && (res_aio{i2, i1}.(fn4{i3}) > res_gs{1, i1}.(fn4{i3})) 47 | res_gs{1, i1}.(fn4{i3}) = res_aio{i2, i1}.(fn4{i3}); 48 | res_gs_ps{1, i1}.(fn4{i3}) = i2; 49 | end 50 | end 51 | end 52 | end 53 | 54 | res_gs2 = res_gs{1,1}; 55 | res_gs_ps2 = res_gs_ps{1,1}; 56 | for i1 = 2:nSubsets 57 | for i3 = 1:length(fn1) 58 | res_gs2.(fn1{i3}) = [res_gs2.(fn1{i3}), res_gs{1, i1}.(fn1{i3})]; 59 | res_gs2.(fn2{i3}) = [res_gs2.(fn2{i3}), res_gs{1, i1}.(fn2{i3})]; 60 | res_gs_ps2.(fn1{i3}) = [res_gs_ps2.(fn1{i3}), res_gs_ps{1, i1}.(fn1{i3})]; 61 | end 62 | for i3 = 1:length(fn3) 63 | res_gs2.(fn3{i3}) = [res_gs2.(fn3{i3}), res_gs{1, i1}.(fn3{i3})]; 64 | res_gs_ps2.(fn3{i3}) = [res_gs_ps2.(fn3{i3}), res_gs_ps{1, i1}.(fn3{i3})]; 65 | end 66 | for i3 = 1:length(fn4) 67 | res_gs2.(fn4{i3}) = [res_gs2.(fn4{i3}), res_gs{1, i1}.(fn4{i3})]; 68 | res_gs_ps2.(fn4{i3}) = [res_gs_ps2.(fn4{i3}), res_gs_ps{1, i1}.(fn4{i3})]; 69 | end 70 | end 71 | end -------------------------------------------------------------------------------- /fs_unsup_jelsr_liang.m: -------------------------------------------------------------------------------- 1 | function [model_jelsr] = fs_unsup_jelsr_liang(X, param) 2 | 3 | [nDim, nSmp] = size(X); 4 | 5 | if ~exist('param', 'var'); 6 | param = []; 7 | end 8 | 9 | if ~isfield(param, 'nClusters'); 10 | error('The number of Clusters should be specified'); 11 | else 12 | nClusters = param.nClusters; 13 | end 14 | 15 | if isfield(param, 'k') 16 | k = param.k; 17 | else 18 | k = 5; 19 | end 20 | 21 | if isfield(param, 'beta') 22 | beta = param.beta; 23 | else 24 | beta = 1; 25 | end 26 | 27 | if isfield(param, 'alpha') 28 | alpha = param.alpha; 29 | else 30 | alpha = 1; 31 | end 32 | 33 | t1 = cputime; 34 | L = computeLocalStructure(X', param.weightMode, param.k, param.t); 35 | [W, Y, obj] = JELSR_AlterOptimizer(X, L, nClusters, alpha, beta); 36 | 37 | model_jelsr.z = sqrt(sum(W.^2,2)); 38 | model_jelsr.Y = Y; 39 | model_jelsr.runTime = cputime - t1; 40 | model_jelsr.obj = obj; 41 | end 42 | 43 | function [W, Y, obj] = JELSR_AlterOptimizer(X, L, nDimEmb, alpha, beta) 44 | % Input 45 | % X: nDim * nSmp 46 | % L: nSmp * nSmp; Local reconstruction kernel 47 | % nDimEmb: low embedding dimension 48 | % alpha: regularization parameter 49 | % beta: regularization parameter 50 | % Output 51 | % W: nDim * nEmb 52 | % Y: nEmb * nSmp 53 | % obj: obj history 54 | % Optimization objective 55 | % min{W, U, Y} = tr(Y L Y') + beta*||W' X - Y ||^2 + beta*alpha* tr(W' U W) 56 | % 57 | % [1]. Feature Selection via Joint Embedding Learning and Sparse Regression. 58 | % Chenping Hou, etc. IJCAI, 2011. 59 | % 60 | 61 | [nDim, nSmp] = size(X); 62 | 63 | if nDim < nSmp 64 | A = X*X'; 65 | end 66 | U = ones(nDim, 1); 67 | 68 | nIter = 20; 69 | obj = []; 70 | epsilon = 1e-2; 71 | 72 | for iter = 1:nIter 73 | % Step1: Fix U, update Y by solving the problem in Eq. (16); 74 | 75 | if nDim < nSmp 76 | % AiX = inv(A + alpha*U)*X; 77 | AiX = (A + alpha*diag(U))\X; 78 | else 79 | % AiX = alpha * U^-1 X [ I - (alpha I + X' U^-1 X)^-1 X' U^-1 X] 80 | UX = bsxfun(@times, 1./U, X); 81 | KiU = X' * UX; 82 | AiX = UX * (eye(nSmp) - (alpha * eye(nSmp) + KiU) \ KiU); 83 | AiX = AiX/alpha; 84 | end 85 | 86 | K = L + beta*eye(nSmp) - beta*X'*AiX; 87 | K = (K + K') / 2; 88 | [eigvec, eigval] = eig(K); 89 | [eigval, idx] = sort(diag(eigval)); 90 | Y = eigvec(:, idx(1:nDimEmb)); 91 | 92 | % Step2: Fix U, update W by using Eq. (13); 93 | W = AiX*Y; 94 | 95 | % Step3: FixW, update U by Eq. (9); 96 | U = full(0.5./(sqrt(sum(W.^2,2)) + eps)); 97 | 98 | % obj(end+1) = trace(Y'*L*Y) + beta*sum(sum( (X'*W - Y).^2)) + beta*alpha*sum(sqrt(sum(W.^2,2))); 99 | % 100 | % if iter > 1 && abs(obj(end) - obj(end-1))/abs(obj(end)) < epsilon; 101 | % break; 102 | % end 103 | end 104 | end -------------------------------------------------------------------------------- /fs_unsup_lapscore.m: -------------------------------------------------------------------------------- 1 | function [Y] = LaplacianScore(X, W) 2 | % Usage: 3 | % [Y] = LaplacianScore(X, W) 4 | % 5 | % X: Rows of vectors of data points 6 | % W: The affinity matrix. 7 | % Y: Vector of (1-LaplacianScore) for each feature. 8 | % The features with larger y are more important. 9 | % 10 | % Examples: 11 | % 12 | % fea = rand(50,70); 13 | % options = []; 14 | % options.Metric = 'Cosine'; 15 | % options.NeighborMode = 'KNN'; 16 | % options.k = 5; 17 | % options.WeightMode = 'Cosine'; 18 | % W = constructW(fea,options); 19 | % 20 | % LaplacianScore = LaplacianScore(fea,W); 21 | % [junk, index] = sort(-LaplacianScore); 22 | % 23 | % newfea = fea(:,index); 24 | % %the features in newfea will be sorted based on their importance. 25 | % 26 | % Type "LaplacianScore" for a self-demo. 27 | % 28 | % See also constructW 29 | % 30 | %Reference: 31 | % 32 | % Xiaofei He, Deng Cai and Partha Niyogi, "Laplacian Score for Feature Selection". 33 | % Advances in Neural Information Processing Systems 18 (NIPS 2005), 34 | % Vancouver, Canada, 2005. 35 | % 36 | % Deng Cai, 2004/08 37 | 38 | 39 | if nargin == 0, selfdemo; return; end 40 | 41 | [nSmp,nFea] = size(X); 42 | 43 | if size(W,1) ~= nSmp 44 | error('W is error'); 45 | end 46 | 47 | D = full(sum(W,2)); 48 | L = W; 49 | 50 | allone = ones(nSmp,1); 51 | 52 | 53 | tmp1 = D'*X; 54 | 55 | D = sparse(1:nSmp,1:nSmp,D,nSmp,nSmp); 56 | 57 | DPrime = sum((X'*D)'.*X)-tmp1.*tmp1/sum(diag(D)); 58 | LPrime = sum((X'*L)'.*X)-tmp1.*tmp1/sum(diag(D)); 59 | 60 | DPrime(find(DPrime < 1e-12)) = 10000; 61 | 62 | Y = LPrime./DPrime; 63 | Y = Y'; 64 | Y = full(Y); 65 | 66 | 67 | 68 | 69 | %--------------------------------------------------- 70 | function selfdemo 71 | % ====== Self demo using IRIS dataset 72 | % ====== 1. Plot IRIS data after LDA for dimension reduction to 2D 73 | load iris.dat 74 | 75 | feaNorm = mynorm(iris(:,1:4),2); 76 | fea = iris(:,1:4) ./ repmat(max(1e-10,feaNorm),1,4); 77 | 78 | options = []; 79 | options.Metric = 'Cosine'; 80 | options.NeighborMode = 'KNN'; 81 | options.WeightMode = 'Cosine'; 82 | options.k = 3; 83 | 84 | W = constructW(fea,options); 85 | 86 | [LaplacianScore] = feval(mfilename,iris(:,1:4),W); 87 | [junk, index] = sort(-LaplacianScore); 88 | 89 | index1 = find(iris(:,5)==1); 90 | index2 = find(iris(:,5)==2); 91 | index3 = find(iris(:,5)==3); 92 | figure; 93 | plot(iris(index1, index(1)), iris(index1, index(2)), '*', ... 94 | iris(index2, index(1)), iris(index2, index(2)), 'o', ... 95 | iris(index3, index(1)), iris(index3, index(2)), 'x'); 96 | legend('Class 1', 'Class 2', 'Class 3'); 97 | title('IRIS data onto the first and second feature (Laplacian Score)'); 98 | axis equal; axis tight; 99 | 100 | figure; 101 | plot(iris(index1, index(3)), iris(index1, index(4)), '*', ... 102 | iris(index2, index(3)), iris(index2, index(4)), 'o', ... 103 | iris(index3, index(3)), iris(index3, index(4)), 'x'); 104 | legend('Class 1', 'Class 2', 'Class 3'); 105 | title('IRIS data onto the third and fourth feature (Laplacian Score)'); 106 | axis equal; axis tight; 107 | 108 | disp('Laplacian Score:'); 109 | for i = 1:length(LaplacianScore) 110 | disp(num2str(LaplacianScore(i))); 111 | end 112 | 113 | 114 | -------------------------------------------------------------------------------- /Eigenmap.m: -------------------------------------------------------------------------------- 1 | function [Y, eigvalue] = Eigenmap(W, ReducedDim, bEigs) 2 | %function [Y, eigvalue] = Eigenmap(W, ReducedDim, bEigs) 3 | % 4 | % W - the affinity matrix. 5 | % ReducedDim - the dimensionality of the reduced subspace. 6 | % bEigs - whether to use eigs to speed up. If not 7 | % specified, this function will automatically 8 | % decide based on the size of W. 9 | % 10 | % version 2.1 --November/2011 11 | % version 2.0 --May/2009 12 | % version 1.5 --Dec./2005 13 | % version 1.0 --Aug./2003 14 | % 15 | % Written by Deng Cai (dengcai AT gmail.com) 16 | 17 | MAX_MATRIX_SIZE = 1600; % You can change this number according your machine computational power 18 | EIGVECTOR_RATIO = 0.1; % You can change this number according your machine computational power 19 | 20 | 21 | [row,col] = size(W); 22 | if row ~= col 23 | error('W must square matrix!!'); 24 | end 25 | 26 | nSmp = row; 27 | 28 | if ~exist('ReducedDim','var') 29 | ReducedDim = 10; 30 | end 31 | ReducedDim = min(ReducedDim+1,row); 32 | 33 | D_mhalf = full(sum(W,2).^-.5); 34 | D_mhalfMatrix = spdiags(D_mhalf,0,nSmp,nSmp); 35 | W = D_mhalfMatrix*W*D_mhalfMatrix; 36 | 37 | W = max(W,W'); 38 | 39 | 40 | dimMatrix = size(W,2); 41 | if ~exist('bEigs','var') 42 | if (dimMatrix > MAX_MATRIX_SIZE && ReducedDim < dimMatrix*EIGVECTOR_RATIO) 43 | bEigs = 1; 44 | else 45 | bEigs = 0; 46 | end 47 | end 48 | 49 | if bEigs 50 | option = struct('disp',0); 51 | [Y, eigvalue] = eigs(W,ReducedDim,'la',option); 52 | eigvalue = diag(eigvalue); 53 | else 54 | [Y, eigvalue] = eig(full(W)); 55 | eigvalue = diag(eigvalue); 56 | 57 | [junk, index] = sort(-eigvalue); 58 | eigvalue = eigvalue(index); 59 | Y = Y(:,index); 60 | if ReducedDim < length(eigvalue) 61 | Y = Y(:, 1:ReducedDim); 62 | eigvalue = eigvalue(1:ReducedDim); 63 | end 64 | end 65 | 66 | Y_old = Y; 67 | eigvalue_old = eigvalue; 68 | 69 | eigIdx = find(abs(eigvalue) < 1e-6); 70 | eigvalue (eigIdx) = []; 71 | Y (:,eigIdx) = []; 72 | 73 | nGotDim = length(eigvalue); 74 | if ~isempty(eigvalue) 75 | idx = 1; 76 | while(abs(eigvalue(idx)-1) < 1e-12) 77 | idx = idx + 1; 78 | if idx > nGotDim 79 | break; 80 | end 81 | end 82 | idx = idx - 1; 83 | 84 | if(idx > 1) % more than one eigenvector of 1 eigenvalue 85 | u = zeros(size(Y,1),idx); 86 | 87 | d_m = 1./D_mhalf; 88 | cc = 1/norm(d_m); 89 | u(:,1) = cc./D_mhalf; 90 | 91 | bDone = 0; 92 | for i = 1:idx 93 | if abs(Y(:,i)' * u(:,1) - 1) < 1e-14 94 | Y(:,i) = Y(:,1); 95 | Y(:,1) = u(:,1); 96 | bDone = 1; 97 | end 98 | end 99 | 100 | if ~bDone 101 | for i = 2:idx 102 | u(:,i) = Y(:,i); 103 | for j= 1:i-1 104 | u(:,i) = u(:,i) - (u(:,j)' * Y(:,i))*u(:,j); 105 | end 106 | u(:,i) = u(:,i)/norm(u(:,i)); 107 | end 108 | Y(:,1:idx) = u; 109 | end 110 | end 111 | 112 | Y = D_mhalfMatrix*Y; 113 | 114 | Y(:,1) = []; 115 | eigvalue(1) = []; 116 | else 117 | Y = Y_old; 118 | eigvalue = eigvalue_old; 119 | end -------------------------------------------------------------------------------- /fs_unsup_spec.m: -------------------------------------------------------------------------------- 1 | function [ wFeat, SF ] = fs_unsup_spec( W, X, Y, Pram ) 2 | %function [ wFeat, SF ] = fsSpectrum( X, Y, Pram ) 3 | % Select feature using the spectrum information of the graph laplacian 4 | % W - the similarity matrix or a kernel matrix 5 | % X - the input data, each row is an instance 6 | % Y - the labels of the data 7 | % Pram - the prameter of the algorithm 8 | % Pram.style - 1: unsupervised feature selection 2: supervised feature 9 | % selection 10 | % Pram.expLam - the exp order for the eigenvalue 11 | % Pram.function - 1:f'Lf; 2:using all eigenvalue except the first one; 3: 12 | % using the first k eigenvalues. (In this case 13 | % the wieght the bigger the better. 14 | 15 | [numInst,dimDat] = size(X); 16 | if size(Y,2) > 1 17 | numC =size(Y,2); 18 | else 19 | numC = length(unique(Y)); 20 | end 21 | 22 | % build the degree matrix 23 | D = diag(sum(W,2)); 24 | % build the laplacian matrix 25 | L = D - W; 26 | 27 | % D1 = D^(-0.5) 28 | d1 = (sum(W,2)).^(-0.5); 29 | d1(isinf(d1)) = 1; 30 | 31 | % D2 = D^(0.5) 32 | d2 = (sum(W,2)).^0.5; 33 | v = diag(d2)*ones(numInst,1); 34 | v = v/norm(v); 35 | % build the normalized laplacian matrix hatW = diag(d1)*W*diag(d1) 36 | hatL = repmat(d1,1,numInst).*L.*repmat(d1',numInst,1); 37 | if Pram.style ~=2 38 | hatL = (hatL'+hatL)/2; 39 | end 40 | 41 | % calculate and construct spectral information 42 | switch Pram.style 43 | case 1, 44 | [ V, EVA ] = funG( hatL, Pram.expLam ); 45 | case 2. 46 | [ V, EVA ] = funG( hatL, 1 ); 47 | end 48 | 49 | % begin to select features 50 | wFeat = []; 51 | 52 | switch Pram.function 53 | case 1, % using f'Lf formulation 54 | for i = 1:dimDat 55 | f = X(:,i); 56 | hatF = diag(d2)*f; 57 | l = norm(hatF); 58 | 59 | if l < 100*eps 60 | wFeat(i) = 1000; 61 | else 62 | if Pram.style ~=2 63 | hatF = hatF/l; 64 | end 65 | wFeat(i) = hatF'*hatL*hatF; 66 | end 67 | end 68 | case 2, % using all eigenvalues except the first one 69 | for i = 1:dimDat 70 | f = X(:,i); 71 | hatF = diag(d2)*f; 72 | l = norm(hatF); 73 | 74 | if l < 100*eps 75 | wFeat(i) = 1000; 76 | else 77 | hatF = hatF/l; 78 | wFeat(i) = hatF'*hatL*hatF/(1-(hatF'*v)^2); 79 | end 80 | end 81 | case 3, % use the first k eigenvalues and the weight is the bigger the better. 82 | eva = diag(EVA); 83 | % calculate the eigenvalues 84 | switch Pram.style 85 | case 1, 86 | eva = eva.^(1/Pram.expLam); 87 | eva = 2 - eva; 88 | eva = eva.^(Pram.expLam); 89 | case 2, 90 | eva = max(eva) - eva; 91 | end 92 | 93 | for i = 1:dimDat 94 | % normalize the feature 95 | f = X(:,i); 96 | hatF = diag(d2)*f; 97 | l = norm(hatF); 98 | 99 | % calculate the weight 100 | if l < 100*eps 101 | wFeat(i) = -1; 102 | else 103 | hatF = hatF/l; 104 | alphas = hatF'*V(:,2:numC); 105 | wFeat(i) = (alphas.^2)*eva(2:numC); 106 | end 107 | end 108 | end 109 | 110 | SF = 1:dimDat; -------------------------------------------------------------------------------- /lpp.m: -------------------------------------------------------------------------------- 1 | function [mappedX, mapping] = lpp(X, no_dims, k, sigma, eig_impl) 2 | %LPP Perform linearity preserving projection 3 | % 4 | % [mappedX, mapping] = lpp(X, no_dims, k, sigma, eig_impl) 5 | % 6 | % Perform the Linearity Preserving Projection on dataset X to reduce it to 7 | % dimensionality no_dims. The number of neighbors that is used by LPP is 8 | % specified by k (default = 12). The variable sigma determines the 9 | % bandwidth of the Gaussian kernel (default = 1). 10 | % 11 | % 12 | 13 | % This file is part of the Matlab Toolbox for Dimensionality Reduction v0.7.1b. 14 | % The toolbox can be obtained from http://homepage.tudelft.nl/19j49 15 | % You are free to use, change, or redistribute this code in any way you 16 | % want for non-commercial purposes. However, it is appreciated if you 17 | % maintain the name of the original author. 18 | % 19 | % (C) Laurens van der Maaten, 2010 20 | % University California, San Diego / Delft University of Technology 21 | 22 | 23 | if size(X, 2) > size(X, 1) 24 | error('Number of samples should be higher than number of dimensions.'); 25 | end 26 | if ~exist('no_dims', 'var') 27 | no_dims = 2; 28 | end 29 | if ~exist('k', 'var') 30 | k = 12; 31 | end 32 | if ~exist('sigma', 'var') 33 | sigma = 1; 34 | end 35 | if ~exist('eig_impl', 'var') 36 | eig_impl = 'Matlab'; 37 | end 38 | 39 | % Construct neighborhood graph 40 | disp('Constructing neighborhood graph...'); 41 | if size(X, 1) < 4000 42 | G = L2_distance(X', X'); 43 | % Compute neighbourhood graph 44 | [tmp, ind] = sort(G); 45 | for i=1:size(G, 1) 46 | G(i, ind((2 + k):end, i)) = 0; 47 | end 48 | G = sparse(double(G)); 49 | G = max(G, G'); % Make sure distance matrix is symmetric 50 | else 51 | G = find_nn(X, k); 52 | end 53 | G = G .^ 2; 54 | G = G ./ max(max(G)); 55 | 56 | % Compute weights (W = G) 57 | disp('Computing weight matrices...'); 58 | 59 | % Compute Gaussian kernel (heat kernel-based weights) 60 | G(G ~= 0) = exp(-G(G ~= 0) / (2 * sigma ^ 2)); 61 | 62 | % Construct diagonal weight matrix 63 | D = diag(sum(G, 2)); 64 | 65 | % Compute Laplacian 66 | L = D - G; 67 | L(isnan(L)) = 0; D(isnan(D)) = 0; 68 | L(isinf(L)) = 0; D(isinf(D)) = 0; 69 | 70 | % Compute XDX and XLX and make sure these are symmetric 71 | disp('Computing low-dimensional embedding...'); 72 | DP = X' * D * X; 73 | LP = X' * L * X; 74 | DP = (DP + DP') / 2; 75 | LP = (LP + LP') / 2; 76 | 77 | % Perform eigenanalysis of generalized eigenproblem (as in LEM) 78 | if size(X, 1) > 200 && no_dims < (size(X, 1) / 2) 79 | if strcmp(eig_impl, 'JDQR') 80 | options.Disp = 0; 81 | options.LSolver = 'bicgstab'; 82 | [eigvector, eigvalue] = jdqz(LP, DP, no_dims, 'SA', options); 83 | else 84 | options.disp = 0; 85 | options.issym = 1; 86 | options.isreal = 1; 87 | [eigvector, eigvalue] = eigs(LP, DP, no_dims, 'SA', options); 88 | end 89 | else 90 | [eigvector, eigvalue] = eig(LP, DP); 91 | end 92 | 93 | % Sort eigenvalues in descending order and get largest eigenvectors 94 | [eigvalue, ind] = sort(diag(eigvalue), 'ascend'); 95 | eigvector = eigvector(:,ind(1:no_dims)); 96 | 97 | % Compute final linear basis and map data 98 | mappedX = X * eigvector; 99 | mapping.M = eigvector; 100 | mapping.mean = mean(X, 1); 101 | -------------------------------------------------------------------------------- /ltsa.m: -------------------------------------------------------------------------------- 1 | function mappedX = ltsa(X, no_dims, k, eig_impl) 2 | %LTSA Runs the local tangent space alignment algorithm 3 | % 4 | % mappedX = ltsa(X, no_dims, k, eig_impl) 5 | % 6 | % The function runs the local tangent space alignment algorithm on dataset 7 | % X, reducing the data to dimensionality d. The number of neighbors is 8 | % specified by k. 9 | % 10 | % 11 | 12 | % This file is part of the Matlab Toolbox for Dimensionality Reduction v0.7.1b. 13 | % The toolbox can be obtained from http://homepage.tudelft.nl/19j49 14 | % You are free to use, change, or redistribute this code in any way you 15 | % want for non-commercial purposes. However, it is appreciated if you 16 | % maintain the name of the original author. 17 | % 18 | % (C) Laurens van der Maaten, 2010 19 | % University California, San Diego / Delft University of Technology 20 | 21 | if ~exist('no_dims', 'var') 22 | no_dims = 2; 23 | end 24 | if ~exist('k', 'var') 25 | k = 12; 26 | end 27 | if ~exist('eig_impl', 'var') 28 | eig_impl = 'Matlab'; 29 | end 30 | 31 | % Compute neighborhood indices 32 | disp('Find nearest neighbors...'); 33 | n = size(X, 1); 34 | [D, ni] = find_nn(X, k); 35 | 36 | % Compute local information matrix for all datapoints 37 | disp('Compute local information matrices for all datapoints...'); 38 | Bi = cell(1, n); 39 | for i=1:n 40 | % Compute correlation matrix W 41 | Ii = ni(i,:); 42 | Ii = Ii(Ii ~= 0); 43 | kt = numel(Ii); 44 | Xi = X(Ii,:) - repmat(mean(X(Ii,:), 1), [kt 1]); 45 | W = Xi * Xi'; 46 | W = (W + W') / 2; 47 | 48 | % Compute local information by computing d largest eigenvectors of W 49 | [Vi, Si] = schur(W); 50 | [s, Ji] = sort(-diag(Si)); 51 | if length(Ji) < no_dims 52 | no_dims = length(Ji); 53 | warning(['Target dimensionality reduced to ' num2str(no_dims) '...']); 54 | end 55 | Vi = Vi(:,Ji(1:no_dims)); 56 | 57 | % Store eigenvectors in G (Vi is the space with the maximum variance, i.e. a good approximation of the tangent space at point Xi) 58 | % The constant 1/sqrt(kt) serves as a centering matrix 59 | Gi = double([repmat(1 / sqrt(kt), [kt 1]) Vi]); 60 | 61 | % Compute Bi = I - Gi * Gi' 62 | Bi{i} = eye(kt) - Gi * Gi'; 63 | end 64 | 65 | % Construct sparse matrix B (= alignment matrix) 66 | disp('Construct alignment matrix...'); 67 | B = speye(n); 68 | for i=1:n 69 | Ii = ni(i,:); 70 | Ii = Ii(Ii ~= 0); 71 | B(Ii, Ii) = B(Ii, Ii) + Bi{i}; % sum Bi over all points 72 | B(i, i) = B(i, i) - 1; 73 | end 74 | B = (B + B') / 2; % make sure B is symmetric 75 | 76 | % For sparse datasets, we might end up with NaNs in M. We just set them to zero for now... 77 | B(isnan(B)) = 0; 78 | B(isinf(B)) = 0; 79 | 80 | % Perform eigenanalysis of matrix B 81 | disp('Perform eigenanalysis...'); 82 | tol = 0; 83 | if strcmp(eig_impl, 'JDQR') 84 | options.Disp = 0; 85 | options.LSolver = 'bicgstab'; 86 | [mappedX, D] = jdqr(B, no_dims + 1, tol, options); % only need bottom (no_dims + 1) eigenvectors 87 | else 88 | options.disp = 0; 89 | options.isreal = 1; 90 | options.issym = 1; 91 | [mappedX, D] = eigs(B, no_dims + 1, tol, options); % only need bottom (no_dims + 1) eigenvectors 92 | end 93 | 94 | % Sort eigenvalues and eigenvectors 95 | [D, ind] = sort(diag(D), 'ascend'); 96 | mappedX = mappedX(:,ind); 97 | 98 | % Final embedding coordinates 99 | if size(mappedX, 2) < no_dims + 1, no_dims = size(mappedX, 2) - 1; end 100 | mappedX = mappedX(:,2:no_dims + 1); 101 | -------------------------------------------------------------------------------- /fs_unsup_spfs_larnes.m: -------------------------------------------------------------------------------- 1 | function [ fList W ] = fs_unsup_spfs_larnes( X, Y, numF ) 2 | % function [ fList W ] = spfs_lar( X, K, numF ) 3 | % X - the data, each row is an instance 4 | % Y - the response of nY column 5 | % numF - the number of features we want to selected 6 | 7 | [nD, nF] = size(X); 8 | nY = size(Y,2); 9 | 10 | W = zeros(nF, nY); 11 | 12 | R = Y; 13 | 14 | % find the most correlated one 15 | nor = X'*R; 16 | nor = sqrt(sum((nor.*nor),2)); 17 | [bestNor, bestCor] = max(nor); 18 | 19 | fList = bestCor; k = length(fList); 20 | cnt = 0; 21 | 22 | while k < numF && k < nF && k < nD 23 | cnt = cnt + 1; 24 | 25 | % obtain the proceed direction 26 | XA = X(:, fList); 27 | GA = XA\R; 28 | 29 | % compute how far can we go for every f to reduce lambda 30 | a = X(:,fList(1))'*R; 31 | bestCor = -1; bestNor = inf; 32 | for i = 1:nF 33 | if sum(fList==i) > 0 34 | continue; 35 | end 36 | c = X(:,i)'*R; 37 | d = X(:,i)'*XA*GA; 38 | p1=a*a'-d*d'; p2 = a*a'-c*d'; p3 = a*a'-c*c'; 39 | bb = p2^2-p1*p3; 40 | if bb < 0 41 | continue; 42 | end 43 | bb = sqrt(bb); 44 | s1 = (p2+abs(bb))/p1; 45 | s2 = (p2-abs(bb))/p1; 46 | if (s1<=0 || s1>1) 47 | s1 = 100; 48 | end 49 | if (s2<=0 || s2>1) 50 | s2 = 100; 51 | end 52 | if s1==100 && s2==100 53 | continue; 54 | else 55 | s = min(s1,s2); 56 | end 57 | if s < bestNor 58 | bestNor = s; 59 | bestCor = i; 60 | end 61 | end 62 | if bestCor == -1; 63 | return 64 | else 65 | % reduce the size of labmda and update W with nes-L2,1 66 | W(fList,:) = W(fList,:) + bestNor*GA; 67 | R = Y - X*W; 68 | lam = norm(X(:,fList(1))'*R,2); 69 | 70 | % find the nes-L2,1 solution 71 | [ fList, WA ] = nes(X, Y, W, [fList bestCor], lam*0.995); 72 | W(fList,:) = WA; 73 | R = Y - X*W; k = length(fList); 74 | disp(' '); 75 | % fprintf('step: %5i, feature: %5i, Lambda:%f\n',cnt+1, k, lam); 76 | % fprintf('----------------------------------\n'); 77 | end 78 | end 79 | 80 | % R = Y - X*W; 81 | % lam = norm(X(:,fList(1))'*R,2); 82 | % opts.q=2; 83 | % opts.tol=1e-6; 84 | % opts.maxIter = 10000; 85 | % opts.x0=W; 86 | % W = mcLeastR(X, Y, lam, opts); 87 | % fList = find(sum(abs(W),2)); 88 | 89 | function [newfList WAA] = nes(X, Y, W, fList, lam) 90 | trd = 10e-5; 91 | 92 | WAA = W(fList,:); XAA = X(:,fList); newfList = fList; 93 | 94 | opts.q=2; 95 | opts.tol=1e-7; 96 | opts.maxIter = 10000; 97 | 98 | stop = 0; 99 | maxC = 1000; counterr = 1; 100 | 101 | % obtain a solution on XAA 102 | while stop == 0 && counterr <= maxC 103 | LC = setdiff(1:nF,newfList); 104 | 105 | opts.x0=WAA; 106 | WAA = mcLeastR(XAA, Y, lam, opts); 107 | 108 | keepIDX = find(sum(abs(WAA),2)); 109 | 110 | newfList = newfList(keepIDX); 111 | WAA = WAA(keepIDX,:); 112 | XAA = XAA(:,keepIDX); 113 | 114 | RR = Y - XAA*WAA; 115 | pp = X(:,LC)'*RR; pp = sqrt(sum(pp.*pp,2)); [maxr sel] = max(pp); 116 | if maxr - lam >= trd 117 | % fprintf('find %i voilations\n', length(find( (pp-lam) > trd ))); 118 | end 119 | 120 | if maxr - lam < trd 121 | if length(keepIDX) < length(fList) 122 | lam = lam*0.995; 123 | else 124 | stop = 1; 125 | end 126 | else 127 | aaa = [newfList,LC((pp-lam) > trd )]; newfList = aaa; 128 | aaa = [WAA;zeros(length(find( (pp-lam) > trd )),size(WAA,2))]; WAA = aaa; 129 | XAA = X(:,newfList); 130 | counterr = counterr + 1; 131 | end 132 | end 133 | end % end function nes 134 | end -------------------------------------------------------------------------------- /fs_unsup_llcfs.m: -------------------------------------------------------------------------------- 1 | function [Y, tao, objHistory] = fs_unsup_llcfs(X,param) 2 | % 3 | % Input 4 | % X: nSmp * nDim 5 | % param, a struct of parameters 6 | % nClusters, the number of clusters 7 | % k, the size of knn 8 | % beta, the regularization parameter 9 | % Output 10 | % Y: nSmp * nClusters 11 | % tao: nDim * 1 12 | % 13 | % 14 | % 15 | % [1] Feature Selection and Kernel Learning for Local Learning-Based Clustering, PAMI-2011 16 | % 17 | % Liang Du (csliangdu@gmail.com) 18 | 19 | if isfield(param, 'nClusters') 20 | c = param.nClusters; 21 | end 22 | 23 | k = 30; 24 | if isfield(param, 'k') 25 | k = param.k; 26 | end 27 | 28 | beta = 1; 29 | if isfield(param, 'beta') 30 | beta = param.beta; 31 | end 32 | 33 | kType = 1; 34 | if isfield(param, 'kType') 35 | kType = param.kType; 36 | end 37 | 38 | maxiter = 50; 39 | if isfield(param, 'maxiter') 40 | maxiter = param.maxiter; 41 | end 42 | 43 | epsilon = 1e-5; 44 | if isfield(param, 'epsilon') 45 | epsilon = param.epsilon; 46 | end 47 | 48 | isTao = 0; 49 | epsilon_tao = 1e-5; 50 | [n, d] = size(X); 51 | 52 | 53 | % convergence by maxiter 54 | isMaxiter = 1; 55 | if maxiter > 0 56 | isMaxiter = 1; 57 | end 58 | 59 | % convergence by epsilon 60 | isEpsilon = 0; 61 | if isEpsilon > 0 62 | isEpsilon = 1; 63 | end 64 | 65 | tao = ones(d,1) / d; 66 | 67 | objHistory = []; 68 | iter = 0; 69 | while true 70 | 71 | wX = bsxfun(@times, X, sqrt(max(tao, eps))' ); 72 | wX2 = bsxfun(@times, X, max(tao, eps)' ); 73 | wK = wX * wX'; 74 | % k-mutual neighbors re-computation using weighted features 75 | switch kType 76 | case 1 77 | W = SimGraph_NearestNeighbors(wX', k, 2, 0); 78 | [idx, jdx, ~] = find(W); 79 | kIdx = cell(n, 1); 80 | nz = length(idx); 81 | for ii = 1:nz 82 | kIdx{jdx(ii)} = [kIdx{jdx(ii)}, idx(ii)]; 83 | end 84 | case 2 85 | if isempty(which('knnsearch')) 86 | disp('The funcion knnsearch in stat toolbox is not found'); 87 | else 88 | [kIdx, ~] = knnsearch(wX, wX, 'k', min(n, k + 1) ); 89 | kIdx = kIdx(:, 2:end); 90 | kIdx = mat2cell(kIdx, ones(n, 1), size(kIdx, 2)); 91 | end 92 | otherwise 93 | disp(''); 94 | end 95 | 96 | % construct A for laplacian 97 | A = zeros(n); 98 | wA = cell(n,1);% pre storage for w computation 99 | for i = 1:n 100 | lidx = kIdx{i}; 101 | ni = length(lidx); 102 | if ni > 1 103 | Ki = wK(lidx, lidx); 104 | ki = wK(i, lidx); 105 | Hi = eye(ni) - ones(ni, ni) / ni; 106 | Ii = eye(ni); 107 | Iib = Ii / beta; 108 | Ai = Hi * Ki * Hi; 109 | Ai = (Ai + Iib) \ Ai; 110 | Ai = Hi - Hi * Ai; 111 | Ai = Ai * beta; 112 | wA{i} = wX2(lidx, :)' * Ai; % EQ 15 113 | Ai = (ki - sum(Ki) / ni) * Ai; 114 | Ai = Ai + ones(1, ni) / ni; 115 | A(i, lidx) = Ai; 116 | end 117 | end 118 | 119 | % construct laplacian for local learning 120 | M = eye(n) - A; 121 | M = M' * M; 122 | M(isnan(M)) = 0; 123 | M(isinf(M)) = 0; 124 | 125 | % first c eigenvectors corresponding to the first c smallest eigenvalues 126 | M = (M + M') / 2; 127 | [Y, eigval] = eig(M); 128 | eigval = diag(eigval); 129 | [eigval, eigidx] = sort(eigval, 'ascend'); 130 | eigval = eigval(eigidx(1:c)); 131 | Y = Y(:, eigidx(1:c)); 132 | 133 | objHistory = [objHistory; sum(eigval)];%#ok 134 | 135 | 136 | % compute wc to compute tao 137 | tao_old = tao; 138 | 139 | tao = zeros(d, 1); 140 | for i = 1:n 141 | lidx = kIdx{i}; 142 | ni = length(lidx); 143 | if ni > 1 144 | wi = wA{i} * Y(lidx,:); 145 | tao = sum(wi.^2, 2) + tao; 146 | end 147 | end 148 | tao = sqrt(tao); 149 | tao = tao / sum(tao); 150 | 151 | % check the convergence 152 | iter = iter + 1; 153 | if isEpsilon && iter > 1 154 | if abs(objHistory(end-1) - objHistory(end)) < epsilon 155 | break; 156 | end 157 | end 158 | if isTao && sum(abs(tao_old - tao)) < epsilon_tao 159 | break; 160 | end 161 | if isMaxiter && iter == maxiter 162 | break; 163 | end 164 | end -------------------------------------------------------------------------------- /evalUnSupFS.m: -------------------------------------------------------------------------------- 1 | function fs_res = evalUnSupFS(X, Y, feaIdx, options) 2 | % Evaluate the selected features 3 | % 4 | % [1] redundancy 5 | % [2] jac, k=5 6 | % [3] acc, mean and std, best_obj 7 | % [4] nmi(max_version), mean, std, best_obj 8 | % [5] purity, mean, std, best_obj 9 | % [6] precision, mean, std, best_obj 10 | % [7] recall, mean, std, best_obj 11 | % [8] f1, mean, std, best_obj 12 | % [9] loocv, knn, k=1 13 | % 14 | % 15 | % Reference 16 | % [1] On Similarity Preserving Feature Selection, TKDE, 2011 17 | % 18 | % Liang Du (csliangdu@gmail.com) 19 | % 20 | 21 | if ~exist('options', 'var') 22 | options = []; 23 | end 24 | 25 | if ~isfield(options, 'jac_k') 26 | options.jac_k = 5; 27 | end 28 | 29 | if ~isfield(options, 'nKm') 30 | options.nKm = 10; 31 | end 32 | 33 | if ~isfield(options, 'knn_k') 34 | options.knn_k = 1; 35 | end 36 | 37 | [nSmp, nDim] = size(X); 38 | Xsub = X(:, feaIdx); 39 | 40 | fs_red = compute_RED(Xsub); 41 | fs_jac = compute_JAC(X, Xsub, options.jac_k); 42 | fs_loocv = compute_loocv(Xsub, Y, options.knn_k); 43 | 44 | fs_cluster = compute_Clustering(Xsub, Y, options.nKm); 45 | 46 | fs_res = struct('red', fs_red, 'jac', fs_jac, 'loocv', fs_loocv); 47 | fs_res = cell2struct([struct2cell(fs_res);struct2cell(fs_cluster)],[fieldnames(fs_res);fieldnames(fs_cluster)]); 48 | end 49 | 50 | function fs_red = compute_RED(Xsub) 51 | [nSmp, nDim] = size(Xsub); 52 | 53 | if ~isempty('corr') && nDim < 2000 54 | C1 = corr(Xsub); 55 | sum_corr = sum(sum(tril(C1, -1))); 56 | else 57 | mX = mean(Xsub, 1); 58 | stdX = std(Xsub, 0, 1); 59 | Xsub = bsxfun(@minus, Xsub, mX); 60 | sum_corr = 0; 61 | for i1 = 1:nDim 62 | for i2 = 1:i1-1 63 | sum_corr = sum_corr + (Xsub(:,i1)' * Xsub(:, i2)) / (stdX(i1) * stdX(i2) + eps); 64 | end 65 | end 66 | end 67 | fs_red = sum_corr / (nDim * (nDim -1) + eps); 68 | end 69 | 70 | function fs_jac = compute_JAC(X, Xsub, k) 71 | D1 = EuDist2(X, X, 0); 72 | [~, Idx1] = sort(D1, 2, 'ascend'); 73 | Idx1 = Idx1(:, 2:k+1); 74 | Idx1 = mat2cell(Idx1, ones(size(X,1), 1), k); 75 | D2 = EuDist2(Xsub, Xsub, 0); 76 | [~, Idx2] = sort(D2, 2, 'ascend'); 77 | Idx2 = Idx2(:, 2:k+1); 78 | Idx2 = mat2cell(Idx2, ones(size(X,1), 1), k); 79 | s1 = cellfun(@union, Idx1, Idx2, 'UniformOutput', 0); 80 | s2 = cellfun(@intersect, Idx1, Idx2, 'UniformOutput', 0); 81 | n1 = cellfun(@length, s1); 82 | n2 = cellfun(@length, s2); 83 | fs_jac = mean(n2 ./ n1); 84 | end 85 | 86 | function fs_cluster = compute_Clustering(Xsub, Y, nKm) 87 | if ~exist('nKm', 'var') 88 | nKm = 20; 89 | end 90 | nClass = length(unique(Y)); 91 | acc_list = zeros(nKm, 1); 92 | nmi_max_list = zeros(nKm, 1); 93 | nmi_sqrt_list = zeros(nKm, 1); 94 | purity_list = zeros(nKm, 1); 95 | obj_list = zeros(nKm, 1); 96 | prec_list = zeros(nKm, 1); 97 | recall_list = zeros(nKm, 1); 98 | f1_list = zeros(nKm, 1); 99 | rand('twister',5489); %#ok 100 | for iKm = 1:nKm 101 | [label, ~, ~, sumD] = litekmeans(Xsub, nClass,'Replicates',1); 102 | tmp_res = evalClustering(Y, label); 103 | acc_list(iKm) = tmp_res.acc; 104 | nmi_max_list(iKm) = tmp_res.nmi_max; 105 | nmi_sqrt_list(iKm) = tmp_res.nmi_sqrt; 106 | purity_list(iKm) = tmp_res.purity; 107 | obj_list(iKm) = sum(sumD); 108 | prec_list(iKm) = mean(tmp_res.precision); 109 | recall_list(iKm) = mean(tmp_res.recall); 110 | f1_list(iKm) = mean(tmp_res.f1); 111 | end 112 | [~, idx] = min(obj_list); 113 | fs_cluster = struct('mean_acc', mean(acc_list), 'std_acc', std(acc_list), ... 114 | 'mean_nmi_max', mean(nmi_max_list), 'std_nmi_max', std(nmi_max_list), ... 115 | 'mean_nmi_sqrt', mean(nmi_sqrt_list), 'std_nmi_sqrt', std(nmi_sqrt_list), ... 116 | 'mean_purity', mean(purity_list), 'std_purity', std(purity_list), ... 117 | 'mean_prec', mean(prec_list), 'std_prec', std(prec_list), ... 118 | 'mean_recall', mean(recall_list), 'std_recall', std(recall_list), ... 119 | 'mean_f1', mean(f1_list), 'std_f1', std(prec_list), ... 120 | 'best_obj_acc', acc_list(idx(1)), 'best_obj_nmi_max', nmi_max_list(idx(1)),... 121 | 'best_obj_nmi_sqrt', nmi_sqrt_list(idx(1)), 'best_obj_purity', purity_list(idx(1)), ... 122 | 'best_obj_prec', prec_list(idx(1)), 'best_obj_recall', recall_list(idx(1)),... 123 | 'best_obj_f1', f1_list(idx(1)), ... 124 | 'aio_acc', acc_list, 'aio_nmi_max', nmi_max_list, 'aio_nmi_sqrt', nmi_sqrt_list, 'aio_purity', purity_list,... 125 | 'aio_prec', prec_list, 'aio_recall', recall_list, 'aio_f1', f1_list); 126 | end 127 | 128 | function fs_loocv = compute_loocv(Xsub, Y, k) 129 | if ~exist('k', 'var') 130 | k = 1; 131 | end 132 | Dist = EuDist2(Xsub,Xsub,0); 133 | [~, Idx] = sort(Dist, 2, 'ascend'); 134 | idx = Idx(:, 2); 135 | label = Y(idx); 136 | fs_loocv = mean(label == Y); 137 | end 138 | -------------------------------------------------------------------------------- /scale_dist3_knn.m: -------------------------------------------------------------------------------- 1 | function A = scale_dist3_knn(D, nn, knn, useSparse) 2 | %SCALE_DIST3_KNN 3 | % A = scale_dist3_knn(D, nn, knn) returns a 4 | % self-tuned affinity matrix A based on the distance 5 | % matrix D. Each observation is only connected to its 6 | % 'knn' neighbors. The affinity values are defined as: 7 | % A_ii = 0, for all i 8 | % A_ij = exp(-D_ij / (sigma_i * sigma_j)), 9 | % if i ~= j and 10 | % the i-th observation is one of the 'knn' 11 | % neighbors of the j-th observation 12 | % or vice versa 13 | % A_ij = 0, otherwise 14 | % For any i, sigma_i is the Euclidean distance between 15 | % the i-th observation and its nn-th neighbor. 16 | % The returned affinity matrix A is a sparse matrix. 17 | % 18 | % A = scale_dist3_knn(D, nn, knn, useSparse) returns a sparse 19 | % matrix if useSparse is true, and returns a dense matrix otherwise. 20 | % 21 | % Assumptions on the distance matrix D: 22 | % When D is a dense matrix: 23 | % D_ij is the squared Euclidean distance between the 24 | % i-th and j-th observations. 25 | % When D is a sparse matrix: 26 | % (e.g. constructed for image segmentation) 27 | % If D_ij is nonzero, it is the squared Euclidean 28 | % distance between the i-th and j-th observations. 29 | % If D_ij (i~=j) is zero, it means the distance 30 | % between the i-th and j-th observations is infinity 31 | % (i.e. the corresponding affinity value is 0). 32 | % Finally, D_ii=0 (for all i) by definition. 33 | % 34 | % This method was proposed in the following paper: 35 | % L. Zelnik-Manor, P. Perona, 36 | % Self-tuning spectral clustering. 37 | % Advances in Neural Information Processing Systems 17 (NIPS '04), pp. 1601--1608. 38 | % The authors also posted their Matlab code at: 39 | % http://webee.technion.ac.il/~lihi/Demos/SelfTuningClustering.html 40 | % However, their implementation is different from the 41 | % definition of A in the paper. In particular, 42 | % A_ij = exp(-D_ij / max((sigma_i*sigma_j), 0.004)) 43 | % in their 'scale_dist' function. 44 | % 45 | % Our 'scale_dist3' function here implements the 46 | % original definition of A as stated in the beginning 47 | % of this help document. 48 | % 49 | % This function is used for experiments in the following paper: 50 | % Da Kuang, Chris Ding, Haesun Park, 51 | % Symmetric Nonnegative Matrix Factorization for Graph Clustering, 52 | % The 12th SIAM International Conference on Data Mining (SDM '12), pp. 106--117. 53 | % Please cite this paper if you find this code useful. 54 | % 55 | 56 | if (nargin < 4) 57 | useSparse = true; 58 | end 59 | 60 | distSparse = issparse(D); 61 | n = size(D, 1); 62 | 63 | if (distSparse) 64 | max_rows = full(max(sum(D~=0))); 65 | if (knn > max_rows) 66 | knn = max_rows; 67 | end 68 | max_nonzeros = nnz(D); 69 | i = zeros(max_nonzeros, 1); 70 | j = zeros(max_nonzeros, 1); 71 | sorted_s = zeros(max_nonzeros, 1); 72 | idx_s = zeros(max_nonzeros, 1); 73 | current_pos = 0; 74 | for col_num = 1 : n 75 | col_nz = D(:, col_num); 76 | idx_temp = find(col_nz ~= 0); 77 | col_nz = full(col_nz(col_nz ~= 0)); 78 | col_nnz = length(col_nz); 79 | i(current_pos+1 : current_pos+col_nnz) = 1 : col_nnz; 80 | j(current_pos+1 : current_pos+col_nnz) = col_num; 81 | [sorted, idx_relative] = sort(col_nz); 82 | sorted_s(current_pos+1 : current_pos+col_nnz) = sorted; 83 | idx_s(current_pos+1 : current_pos+col_nnz) = idx_temp(idx_relative); 84 | if (nn > col_nnz) 85 | ls(col_num) = sorted(end); 86 | else 87 | ls(col_num) = sorted(nn); 88 | end 89 | current_pos = current_pos + col_nnz; 90 | end 91 | ls = sqrt(ls)'; 92 | sorted = sparse(i, j, sorted_s, max_rows, n); 93 | idx = sparse(i, j, idx_s, max_rows, n); 94 | j = meshgrid(1:n, 1:knn); 95 | j = j(:); 96 | i = full(idx(1:knn, :)); 97 | i = i(:); 98 | s = full(sorted(1:knn, :)); 99 | s = s(:); 100 | temp = find(i ~= 0); 101 | i = i(temp); 102 | j = j(temp); 103 | index = [i, j; j, i]; 104 | s = s(temp); 105 | s = [s; s]; 106 | else 107 | if (nn > n-1) 108 | nn = n-1; 109 | end 110 | if (knn > n-1) 111 | knn = n-1; 112 | end 113 | [sorted, idx] = sort(D); 114 | ls = sorted(nn+1, :); 115 | ls = sqrt(ls)'; 116 | j = meshgrid(1:n, 1:knn+1); 117 | j = j(:); 118 | i = idx(1:knn+1, :); 119 | i = i(:); 120 | I = find(i ~= j); 121 | i = i(I); 122 | j = j(I); 123 | index = [i, j; j, i]; 124 | s = sorted(1:knn+1, :); 125 | s = s(:); 126 | s = s(I); 127 | s = [s; s]; 128 | end 129 | 130 | A_s = exp( -s ./ (ls(index(:,1)).*ls(index(:,2))) ); 131 | 132 | if (useSparse) 133 | [index, i, j] = unique(index, 'rows'); 134 | A_s = A_s(i); 135 | A = sparse(index(:,1), index(:,2), A_s, n, n); 136 | else 137 | index = (index(:,2) - 1) * n + index(:, 1); 138 | A = zeros(n); 139 | A(index) = A_s; 140 | end 141 | -------------------------------------------------------------------------------- /lle.m: -------------------------------------------------------------------------------- 1 | function [mappedX, mapping] = lle(X, no_dims, k, eig_impl) 2 | %LLE Runs the locally linear embedding algorithm 3 | % 4 | % mappedX = lle(X, no_dims, k, eig_impl) 5 | % 6 | % Runs the local linear embedding algorithm on dataset X to reduces its 7 | % dimensionality to no_dims. In the LLE algorithm, the number of neighbors 8 | % can be specified by k. 9 | % The function returns the embedded coordinates in mappedX. 10 | % 11 | % 12 | 13 | % This file is part of the Matlab Toolbox for Dimensionality Reduction v0.7.1b. 14 | % The toolbox can be obtained from http://homepage.tudelft.nl/19j49 15 | % You are free to use, change, or redistribute this code in any way you 16 | % want for non-commercial purposes. However, it is appreciated if you 17 | % maintain the name of the original author. 18 | % 19 | % (C) Laurens van der Maaten, 2010 20 | % University California, San Diego / Delft University of Technology 21 | 22 | if ~exist('no_dims', 'var') 23 | no_dims = 2; 24 | end 25 | if ~exist('k', 'var') 26 | k = 12; 27 | end 28 | if ~exist('eig_impl', 'var') 29 | eig_impl = 'Matlab'; 30 | end 31 | 32 | % Get dimensionality and number of dimensions 33 | [n, d] = size(X); 34 | 35 | % Compute pairwise distances and find nearest neighbors (vectorized implementation) 36 | disp('Finding nearest neighbors...'); 37 | if ~ischar(k) 38 | [distance, neighborhood] = find_nn(X, k + 1); 39 | else 40 | [distance, neighborhood] = find_nn(X, k); 41 | end 42 | 43 | % Identify largest connected component of the neighborhood graph 44 | blocks = components(distance)'; 45 | count = zeros(1, max(blocks)); 46 | for i=1:max(blocks) 47 | count(i) = length(find(blocks == i)); 48 | end 49 | [count, block_no] = max(count); 50 | conn_comp = find(blocks == block_no); 51 | 52 | % Update the neighborhood relations 53 | tmp = 1:n; 54 | tmp = tmp(conn_comp); 55 | new_ind = zeros(n, 1); 56 | for i=1:n 57 | ii = find(tmp == i); 58 | if ~isempty(ii), new_ind(i) = ii; end 59 | end 60 | neighborhood = neighborhood(conn_comp, 2:k+1)'; 61 | for i=1:n 62 | neighborhood(neighborhood == i) = new_ind(i); 63 | end 64 | n = numel(conn_comp); 65 | X = X(conn_comp,:)'; 66 | max_k = size(neighborhood, 1); 67 | 68 | % Find reconstruction weights for all points by solving the MSE problem 69 | % of reconstructing a point from each neighbours. A used constraint is 70 | % that the sum of the reconstruction weights for a point should be 1. 71 | disp('Compute reconstruction weights...'); 72 | if k > d 73 | tol = 1e-5; 74 | else 75 | tol = 0; 76 | end 77 | 78 | % Construct reconstruction weight matrix 79 | W = zeros(max_k, n); 80 | for i=1:n 81 | nbhd = neighborhood(:,i); 82 | nbhd = nbhd(nbhd ~= 0); 83 | kt = numel(nbhd); 84 | z = bsxfun(@minus, X(:,nbhd), X(:,i)); % Shift point to origin 85 | C = z' * z; % Compute local covariance 86 | C = C + eye(kt, kt) * tol * trace(C); % Regularization of covariance (if K > D) 87 | wi = C \ ones(kt, 1); % Solve linear system 88 | wi = wi / sum(wi); % Make sure that sum is 1 89 | W(:,i) = [wi; nan(max_k - kt, 1)]; 90 | end 91 | 92 | % Now that we have the reconstruction weights matrix, we define the 93 | % sparse cost matrix M = (I-W)'*(I-W). 94 | M = sparse(1:n, 1:n, ones(1, n), n, n, 4 * max_k * n); 95 | for i=1:n 96 | w = W(:,i); 97 | j = neighborhood(:,i); 98 | indices = find(j ~= 0 & ~isnan(w)); 99 | j = j(indices); 100 | w = w(indices); 101 | M(i, j) = M(i, j) - w'; 102 | M(j, i) = M(j, i) - w; 103 | M(j, j) = M(j, j) + w * w'; 104 | end 105 | 106 | % For sparse datasets, we might end up with NaNs or Infs in M. We just set them to zero for now... 107 | M(isnan(M)) = 0; 108 | M(isinf(M)) = 0; 109 | 110 | % The embedding is computed from the bottom eigenvectors of this cost matrix 111 | disp('Compute embedding (solve eigenproblem)...'); 112 | tol = 0; 113 | if strcmp(eig_impl, 'JDQR') 114 | options.Disp = 0; 115 | options.LSolver = 'bicgstab'; 116 | [mappedX, eigenvals] = jdqr(M, no_dims + 1, tol, options); 117 | else 118 | options.disp = 0; 119 | options.isreal = 1; 120 | options.issym = 1; 121 | [mappedX, eigenvals] = eigs(M, no_dims + 1, tol, options); % only need bottom (no_dims + 1) eigenvectors 122 | end 123 | [eigenvals, ind] = sort(diag(eigenvals), 'ascend'); 124 | if size(mappedX, 2) < no_dims + 1 125 | no_dims = size(mappedX, 2) - 1; 126 | warning(['Target dimensionality reduced to ' num2str(no_dims) '...']); 127 | end 128 | eigenvals = eigenvals(2:no_dims + 1); 129 | mappedX = mappedX(:,ind(2:no_dims + 1)); % throw away zero eigenvector/value 130 | 131 | % Save information on the mapping 132 | mapping.k = k; 133 | mapping.X = X'; 134 | mapping.vec = mappedX; 135 | mapping.val = eigenvals; 136 | mapping.conn_comp = conn_comp; 137 | mapping.nbhd = distance; 138 | -------------------------------------------------------------------------------- /computeLocalStructure.m: -------------------------------------------------------------------------------- 1 | function [Lap, S] = computeLocalStructure(X, type, k, sigma, emb_dim) 2 | % Input 3 | % X, n * nDim 4 | % type, 'LPP', 'LLE', 'LTSA' 5 | % k, neighborhood size, needed by all the three types, 5 by default; 6 | % sigma, gaussian kernel bandwidth, optSigma(X), by default, only used by LPP 7 | % emb_dim, embedding dimension, only used by LTSA 8 | % 9 | 10 | if ~exist('k', 'var') 11 | k = 5; 12 | end 13 | 14 | if ~exist('type', 'var') 15 | type = 'LPP'; 16 | end 17 | 18 | if strcmp(type, 'LPP') && (~exist('sigma', 'var') || isempty(sigma)) 19 | sigma = optSigma(X); 20 | end 21 | 22 | if strcmp(type, 'LTSA') && (~exist('emb_dim', 'var') || isempty(emb_dim)) 23 | emb_dim = 2; 24 | end 25 | 26 | [n, d] = size(X); 27 | switch lower(type) 28 | case lower('LPP') 29 | % Construct neighborhood graph 30 | % disp('Constructing neighborhood graph...'); 31 | if size(X, 1) < 4000 32 | G = L2_distance(X', X'); 33 | % Compute neighbourhood graph 34 | [tmp, ind] = sort(G); 35 | for i=1:size(G, 1) 36 | G(i, ind((2 + k):end, i)) = 0; 37 | end 38 | G = sparse(double(G)); 39 | G = max(G, G'); % Make sure distance matrix is symmetric 40 | else 41 | G = find_nn(X, k); 42 | end 43 | G = G .^ 2; 44 | G = G ./ max(max(G)); 45 | 46 | % Compute weights (W = G) 47 | % disp('Computing weight matrices...'); 48 | 49 | % Compute Gaussian kernel (heat kernel-based weights) 50 | G(G ~= 0) = exp(-G(G ~= 0) / (sigma ^ 2)); 51 | 52 | % Construct diagonal weight matrix 53 | D = diag(sum(G, 2)); 54 | 55 | % Compute Laplacian 56 | L = D - G; 57 | L(isnan(L)) = 0; D(isnan(D)) = 0; 58 | L(isinf(L)) = 0; D(isinf(D)) = 0; 59 | Lap = L; 60 | S = G; 61 | case lower('LLE') 62 | % neighborhood = zeros(n,k); 63 | Dist = EuDist2(X); 64 | 65 | % for ii =1:n 66 | % index00 = setdiff(1:n,ii); 67 | % [sorted,index] = sort(Kmatrix(ii,index00),2,'descend'); 68 | % neighborhood(ii,:) = index00(index(1:k)); 69 | % end 70 | [~, neighborhood] = sort(Dist, 2, 'ascend'); 71 | neighborhood = neighborhood(:,2:k+1); 72 | if(k > d) 73 | tol=1e-3; % regularlizer in case constrained fits are ill conditioned 74 | else 75 | tol=1e-12; 76 | end 77 | 78 | W = zeros(k,n); 79 | for ii=1:n 80 | z = X(neighborhood(ii,:),:)-repmat(X(ii,:),k,1); % shift ith pt to origin 81 | C = z*z'; % local covariance 82 | C = C + eye(size(C))*tol*trace(C); % regularlization 83 | W(:,ii) = C\ones(k,1); % solve Cw=1 84 | W(:,ii) = W(:,ii)/sum(W(:,ii)); % enforce sum(w)=1 85 | end 86 | 87 | M = sparse(1:n,1:n,ones(1,n),n,n,4*k*n); 88 | for ii=1:n 89 | w = W(:,ii); 90 | jj = neighborhood(ii,:)'; 91 | M(ii,jj) = M(ii,jj) - w'; %#ok 92 | M(jj,ii) = M(jj,ii) - w;%#ok 93 | M(jj,jj) = M(jj,jj) + w*w';%#ok 94 | end 95 | M = max(M,M'); 96 | M = sparse(M); 97 | % For sparse datasets, we might end up with NaNs or Infs in M. We just set them to zero for now... 98 | M(isnan(M)) = 0; 99 | M(isinf(M)) = 0; 100 | Lap = M; 101 | S = sparse(repmat(1:n, k, 1), neighborhood(:), W(:), n, n, n*k); 102 | case lower('LTSA') 103 | % Compute neighborhood indices 104 | % disp('Find nearest neighbors...'); 105 | n = size(X, 1); 106 | [D, ni] = find_nn(X, k); 107 | 108 | % Compute local information matrix for all datapoints 109 | % disp('Compute local information matrices for all datapoints...'); 110 | Bi = cell(1, n); 111 | for i=1:n 112 | % Compute correlation matrix W 113 | Ii = ni(i,:); 114 | Ii = Ii(Ii ~= 0); 115 | kt = numel(Ii); 116 | Xi = X(Ii,:) - repmat(mean(X(Ii,:), 1), [kt 1]); 117 | W = Xi * Xi'; 118 | W = (W + W') / 2; 119 | 120 | % Compute local information by computing d largest eigenvectors of W 121 | [Vi, Si] = schur(full(W)); 122 | [s, Ji] = sort(-diag(Si)); 123 | if length(Ji) < emb_dim 124 | emb_dim = length(Ji); 125 | % warning(['Target dimensionality reduced to ' num2str(emb_dim) '...']); 126 | end 127 | Vi = Vi(:,Ji(1:emb_dim)); 128 | 129 | % Store eigenvectors in G (Vi is the space with the maximum variance, i.e. a good approximation of the tangent space at point Xi) 130 | % The constant 1/sqrt(kt) serves as a centering matrix 131 | Gi = double([repmat(1 / sqrt(kt), [kt 1]) Vi]); 132 | 133 | % Compute Bi = I - Gi * Gi' 134 | Bi{i} = eye(kt) - Gi * Gi'; 135 | end 136 | 137 | % Construct sparse matrix B (= alignment matrix) 138 | % disp('Construct alignment matrix...'); 139 | B = speye(n); 140 | for i=1:n 141 | Ii = ni(i,:); 142 | Ii = Ii(Ii ~= 0); 143 | B(Ii, Ii) = B(Ii, Ii) + Bi{i}; % sum Bi over all points 144 | B(i, i) = B(i, i) - 1; 145 | end 146 | B = (B + B') / 2; % make sure B is symmetric 147 | 148 | % For sparse datasets, we might end up with NaNs in M. We just set them to zero for now... 149 | B(isnan(B)) = 0; 150 | B(isinf(B)) = 0; 151 | Lap = B; 152 | S = []; 153 | otherwise 154 | Lap = []; 155 | S = []; 156 | disp('not supported yet!'); 157 | end -------------------------------------------------------------------------------- /sll_opts.m: -------------------------------------------------------------------------------- 1 | function opts = sll_opts(opts) 2 | 3 | % Options for Sparse Learning Library 4 | % 5 | % Notice: 6 | % If one or several (even all) fields are empty, sll_opts shall assign the 7 | % default settings. 8 | % 9 | % If some fields of opts have been defined, sll_opts shall check the fields 10 | % for possible errors. 11 | % 12 | % 13 | % Table of Options. * * indicates default value. 14 | % 15 | %% FIELD DESCRIPTION 16 | %% Starting point 17 | % 18 | % .x0 Starting point of x. 19 | % Initialized according to .init. 20 | % 21 | % .c0 Starting point for the intercept c (for Logistic Loss) 22 | % Initialized according to .init. 23 | % 24 | % .init .init specifies how to initialize x. 25 | % * 0 => .x0 is set by the function initFactor * 26 | % 1 => .x0 and .c0 are defined 27 | % 2 => .x0= zeros(n,1), .c0=0 28 | % 29 | %% Termination 30 | % 31 | % .maxIter Maximum number of iterations. 32 | % *1e4* 33 | % 34 | % .tol Tolerance parameter. 35 | % *1e-4* 36 | % 37 | % .tFlag Flag for termination. 38 | % * 0 => abs( funVal(i)- funVal(i-1) ) <= .tol * 39 | % 1 => abs( funVal(i)- funVal(i-1) ) 40 | % <= .tol max( funVal(i-1), 1) 41 | % 2 => funVal(i) <= .tol 42 | % 3 => norm( x_i - x_{i-1}, 2) <= .tol 43 | % 4 => norm( x_i - x_{i-1}, 2) <= 44 | % <= .tol max( norm( x_{i-1}, 2), 1 ) 45 | % 5 => Run the code for .maxIter iterations 46 | % 47 | %% Normalization 48 | % 49 | % .nFlag Flag for implicit normalization of A. 50 | % * 0 => Do not normalize A * 51 | % 1 => A=(A-repmat(mu, m, 1))*diag(nu)^{-1} 52 | % 2 => A=diag(nu)^{-1}*(A-repmat(mu,m,1) 53 | % 54 | % .mu Row vector to be substracted from each sample. 55 | % (.mu is used when .nFlag=1 or 2) 56 | % If .mu is not specified, then 57 | % * .mu=mean(A,1) * 58 | % 59 | % .nu Weight (column) vector for normalization 60 | % (.mu is used when .nFlag=1 or 2) 61 | % If .nu is not specified, then 62 | % * .nFlag=1 => .nu=(sum(A.^2, 1)'/m.^{0.5} * 63 | % * .nFlag=2 => .nu=(sum(A.^2, 2)/n.^{0.5} * 64 | % 65 | %% Regularization 66 | % 67 | % .rFlag Flag for regularization 68 | % (.rFlag is used for the functions with "R") 69 | % * 0 => lambda is the regularization parameter * 70 | % 1 => lambda = lambda * lambda_{max} 71 | % where lambda_{max} is the maximum lambda 72 | % that yields the zero solution 73 | % .rsL2 Regularization parameter value of the squared L2 norm 74 | % (.rsL2 is used only for l1 regularization) 75 | % *.rsL2=0* 76 | % If .rFlag=0, .rsL2 is used without scaling 77 | % .rFlag=1, .rsL2=.rsL2 * lambda_{max} 78 | % 79 | %% Method & Line Search 80 | % .lFlag 81 | % 82 | %% Grooup & Others 83 | % 84 | % .ind Indices for k groups (a k+1 row vector) 85 | % For group lasso only 86 | % Indices for the i-th group are (ind(i)+1):ind(i+1) 87 | % 88 | % .q Value of q in L1/Lq regularization 89 | % *.q=2* 90 | % 91 | % .sWeight The sample (positive and negative) weight 92 | % For the Logistic Loss only 93 | % Positive sample: .sWeight(1) 94 | % Negative sample: sWeight(2) 95 | % *1/m for both positive and negative samples* 96 | % 97 | % .gWeight The weight for different groups 98 | % *.gWeight=1* 99 | % 100 | % .fName The name of the function 101 | % 102 | %% Copyright (C) 2009-2010 Jun Liu, and Jieping Ye 103 | % 104 | % You are suggested to first read the Manual. 105 | % 106 | % For any problem, please contact with Jun Liu via j.liu@asu.edu 107 | % 108 | % Last modified 7 August 2009. 109 | 110 | %% Starting point 111 | 112 | if isfield(opts,'init') 113 | if (opts.init~=0) && (opts.init~=1) && (opts.init~=2) 114 | opts.init=0; % if .init is not 0, 1, or 2, then use the default 0 115 | end 116 | 117 | if ~isfield(opts,'x0') && (opts.init==1) 118 | opts.init=0; % if .x0 is not defined and .init=1, set .init=0 119 | end 120 | else 121 | opts.init = 0; 122 | % if .init is not specified, use "0" 123 | end 124 | 125 | %% Termination 126 | 127 | if isfield(opts,'maxIter') 128 | if (opts.maxIter<1) 129 | opts.maxIter=10000; 130 | end 131 | else 132 | opts.maxIter=10000; 133 | end 134 | 135 | if ~isfield(opts,'tol') 136 | opts.tol=1e-3; 137 | end 138 | 139 | if isfield(opts,'tFlag') 140 | if opts.tFlag<0 141 | opts.tFlag=0; 142 | elseif opts.tFlag>5 143 | opts.tFlag=5; 144 | else 145 | opts.tFlag=floor(opts.tFlag); 146 | end 147 | else 148 | opts.tFlag=0; 149 | end 150 | 151 | %% Normalization 152 | 153 | if isfield(opts,'nFlag') 154 | if (opts.nFlag~=1) && (opts.nFlag~=2) 155 | opts.nFlag=0; 156 | end 157 | else 158 | opts.nFlag=0; 159 | end 160 | 161 | %% Regularization 162 | 163 | if isfield(opts,'rFlag') 164 | if (opts.rFlag~=1) 165 | opts.rFlag=0; 166 | end 167 | else 168 | opts.rFlag=0; 169 | end 170 | %% Method (Line Search) 171 | 172 | if isfield(opts,'lFlag') 173 | if (opts.lFlag~=1) 174 | opts.lFlag=0; 175 | end 176 | else 177 | opts.lFlag=0; 178 | end 179 | 180 | if isfield(opts,'mFlag') 181 | if (opts.mFlag~=1) 182 | opts.mFlag=0; 183 | end 184 | else 185 | opts.mFlag=0; 186 | end 187 | 188 | -------------------------------------------------------------------------------- /run_exp1_func.m: -------------------------------------------------------------------------------- 1 | function [FeaNumCandi_aio, res_gs_aio, res_aio_aio, res_gs_ps_aio] = run_exp1_func(datasets, candiAlgs, username, password) 2 | 3 | [flag_writeable, flag_uploadable, prefix] = mdcs_check(username, password); 4 | 5 | if ~exist('datasets', 'var') || isempty(datasets) 6 | % datasets = {'test'}; 7 | datasets = {'medical_706n_1449d_17c', 'PIE_Pose27_1428n_1024d_68c', 'USPS49_1673n_256d_2c', 'mfeat_pix_2000n_240d_10c'}; 8 | end 9 | if ischar(datasets); datasets = {datasets}; end 10 | 11 | if ~exist('candiAlgs', 'var') || isempty(candiAlgs) 12 | candiAlgs = {'AllFea', 'MaxVar', 'LapScore', 'TraceRatio', 'SPEC', 'LLCFS', 'SPFS', 'MCFS', 'UDFS', 'NDFS', 'RUFS', 'JELSR', 'GLSPFS', 'FSSL'}; 13 | end 14 | if ischar(candiAlgs); candiAlgs = {candiAlgs}; end 15 | 16 | if ~exist('exp_settings', 'var'); exp_settings = []; end 17 | if ~isfield(exp_settings, 'FeaNumCandi') 18 | exp_settings.FeaNumCandi = [[5:5:50],[10:10:150],[50:50:300]]; 19 | end 20 | if ~isfield(exp_settings, 'nKmeans') 21 | exp_settings.nKmeans = 20; 22 | end 23 | if ~isfield(exp_settings, 'prefix_mdcs') 24 | exp_settings.prefix_mdcs = prefix; 25 | end 26 | 27 | FeaNumCandi = exp_settings.FeaNumCandi; 28 | FeaNumCandi_aio = cell(length(datasets), length(candiAlgs)); 29 | res_gs_aio = cell(length(datasets), length(candiAlgs)); 30 | res_aio_aio = cell(length(datasets), length(candiAlgs)); 31 | res_gs_ps_aio = cell(length(datasets), length(candiAlgs)); 32 | 33 | root_dir = pwd; 34 | addpath(root_dir); 35 | for id = 1:length(datasets) 36 | dataset = datasets{id}; 37 | X = extractXY(dataset); 38 | exp_settings.FeaNumCandi = FeaNumCandi(FeaNumCandi < size(X, 2)); 39 | clear X; 40 | 41 | disp(['data = ', dataset, ' ...']); 42 | try 43 | if ~exist([prefix, filesep, dataset], 'dir') 44 | mkdir([prefix, filesep, dataset]); 45 | end 46 | exp_settings.prefix_mdcs = [prefix, filesep, dataset]; 47 | catch 48 | disp(['create dir: ', [prefix, filesep, dataset], 'failed, check the authorization']); 49 | end 50 | 51 | for iAlg = 1:length(candiAlgs) 52 | algo = candiAlgs{iAlg}; 53 | disp(['algo = ', algo, ' ...']); 54 | switch lower(algo) 55 | case lower('AllFea') 56 | [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_allfea_single_func(dataset, exp_settings); 57 | case lower('MaxVar') 58 | [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_maxvar_single_func(dataset, exp_settings); 59 | case lower('LapScore') 60 | [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_lapscore_single_func(dataset, exp_settings); 61 | case lower('SPEC') 62 | [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_spec_single_func(dataset, exp_settings); 63 | case lower('TraceRatio') 64 | [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_traceratio_single_func(dataset, exp_settings); 65 | case lower('LLCFS') 66 | [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_llcfs_single_func(dataset, exp_settings); 67 | case lower('UDFS') 68 | [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_udfs_single_func(dataset, exp_settings); 69 | case lower('SPFS') 70 | [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_spfs_single_func(dataset, exp_settings); 71 | case lower('MCFS') 72 | [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_mcfs_single_func(dataset, exp_settings); 73 | case lower('NDFS') 74 | [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_ndfs_single_func(dataset, exp_settings); 75 | case lower('RUFS') 76 | [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_rufs_single_func(dataset, exp_settings); 77 | case lower('JELSR_lpp') 78 | [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_jelsr_lpp_single_func(dataset, exp_settings); 79 | case lower('JELSR_lle') 80 | [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_jelsr_lle_single_func(dataset, exp_settings); 81 | case lower('JELSR_liang_lpp') 82 | [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_jelsr_liang_lpp_single_func(dataset, exp_settings); 83 | case lower('JELSR_liang_lle') 84 | [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_jelsr_liang_lle_single_func(dataset, exp_settings); 85 | case lower('CGSSL') 86 | disp('not supported yet'); 87 | case lower('GLSPFS') 88 | [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_glspfs_single_func(dataset, exp_settings); 89 | case lower('FSSL_11_11_1') 90 | [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_fsasl_11_11_1_single_func(dataset, exp_settings); 91 | case lower('FSSL_11_11_5') 92 | [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_fsasl_11_11_5_single_func(dataset, exp_settings); 93 | case lower('FSSL_11_5_5') 94 | [FeaNumCandi_aio{id, iAlg}, res_gs_aio{id, iAlg}, res_aio_aio{id, iAlg}, res_gs_ps_aio{id, iAlg}] = fs_unsup_fsasl_11_5_5_single_func(dataset, exp_settings); 95 | otherwise 96 | disp('not supported yet'); 97 | end 98 | disp(['algo = ', algo, ' done']); 99 | email_notify(username, password, [username, '@ios.ac.cn'], [algo, ' on ', dataset, ' done']); 100 | end 101 | cd (exp_settings.prefix_mdcs); 102 | plot_result(dataset, candiAlgs, 0); 103 | email_notify(username, password, [username, '@ios.ac.cn'], ['all algo on ', dataset, ' done'], [dataset, '.tex']); 104 | cd(root_dir); 105 | disp(['data = ', dataset, ' done']); 106 | end 107 | rmpath(root_dir); -------------------------------------------------------------------------------- /fs_unsup_mcfs.m: -------------------------------------------------------------------------------- 1 | function [FeaIndex,FeaNumCandi] = fs_unsup_mcfs(fea,FeaNumCandi,options) 2 | % MCFS: Feature Section for Multi Class/Cluster data 3 | % 4 | % FeaIndex = MCFS_p(data,FeaNumCandi,options) 5 | % 6 | % Input: 7 | % fea - data matrix. Each row vector of data is a 8 | % sample vector. 9 | % FeaNumCandi - The number of featuers to be selected 10 | % 11 | % options - Struct value in Matlab. The fields in options 12 | % that can be set: 13 | % 14 | % gnd - The label of the data. You can provide 15 | % gnd if it is a supervised feature 16 | % selection problem. 17 | % W - Affinity matrix. You can either call 18 | % "constructW" to construct the W, or 19 | % construct it by yourself. 20 | % If W is not provided, MCFS_p will 21 | % build a k-NN graph with Heat kernel 22 | % weight, where k is a prameter. (If gnd is 23 | % provided, this parameter will be ignored) 24 | % k - The parameter for k-NN graph (Default is 5) 25 | % If gnd or W is provided, this parameter will be 26 | % ignored. 27 | % nUseEigenfunction - Indicate how many eigen functions will be 28 | % used. If gnd is provided, this parameter 29 | % will be ignored. (Default is 5) 30 | % 31 | % Method - Method used to select features. Choices 32 | % are: 33 | % {'LASSO_LARs'} - (the default) 34 | % 'LASSO_SLEP' 35 | % 'GROUPLASSO_SLEP' 36 | % 37 | % Other fields are: 38 | % * ratio: [default 1] when trying to select M features, 39 | % keep ratio*M non-zero entries in each eigenvector 40 | % (dimension). 41 | % * NotEnoughNonZero: strategy when non-zero entries are 42 | % not enough to select the required number of 43 | % features. This parameter is only used when `ratio' 44 | % is less than 1. It can be the following values: 45 | % * 0: fire an error and exit 46 | % * 1: ignore 47 | % * 2: [default] try to find more non-zero entries, fire error 48 | % when fail 49 | % * 3: try to find more non-zero entries, ignore when 50 | % fail 51 | % 52 | % 53 | % Output: 54 | % FeaIndex - cell variable. Each element in FeaIndex is the 55 | % index of the selected features (the number of 56 | % feature is specified in FeaNumCandi). 57 | % length(FeaIndex) == length(FeaNumCandi) 58 | % 59 | % 60 | % 61 | %=================================================================== 62 | % Examples: 63 | % 64 | %------------------------------------------------------------------- 65 | % (Supervised feature selection) 66 | % 67 | % fea = rand(50,70); 68 | % gnd = [ones(10,1);ones(15,1)*2;ones(10,1)*3;ones(15,1)*4]; 69 | % 70 | % options = []; 71 | % options.gnd = gnd; 72 | % FeaNumCandi = [10:5:60]; 73 | % 74 | % [FeaIndex,FeaNumCandi] = MCFS_p(fea, FeaNumCandi,options); 75 | % 76 | % for i = 1:length(FeaNumCandi) 77 | % SelectFeaIdx = FeaIndex{i}; 78 | % feaNew = fea(:,SelectFeaIdx); 79 | % end 80 | % 81 | %------------------------------------------------------------------- 82 | % (Unsupervised feature selection) 83 | % 84 | % fea = rand(50,70); 85 | % 86 | % options = []; 87 | % options.k = 5; %For unsupervised feature selection, you should tune 88 | % %this parameter k, the default k is 5. 89 | % options.nUseEigenfunction = 4; %You should tune this parameter. 90 | % 91 | % FeaNumCandi = [10:5:60]; 92 | % 93 | % [FeaIndex,FeaNumCandi] = MCFS_p(fea,FeaNumCandi,options); 94 | % 95 | % for i = 1:length(FeaNumCandi) 96 | % SelectFeaIdx = FeaIndex{i}; 97 | % feaNew = fea(:,SelectFeaIdx); 98 | % end 99 | % 100 | %=================================================================== 101 | % 102 | %Reference: 103 | % 104 | % Deng Cai, Chiyuan Zhang, Xiaofei He, "Unsupervised Feature Selection 105 | % for Multi-cluster Data",16th ACM SIGKDD Conference on Knowledge 106 | % Discovery and Data Mining (KDD'10), July 2010. 107 | % 108 | % version 1.1 --Dec/2011 109 | % version 1.0 --Dec/2009 110 | % 111 | % Written by Deng Cai (dengcai AT gmail.com) 112 | % Chiyuan Zhang (pluskid AT gmail.com) 113 | % 114 | 115 | [nSmp,mFea] = size(fea); 116 | FeaNumCandi = unique(FeaNumCandi); 117 | FeaNumCandi(FeaNumCandi > mFea) = []; 118 | 119 | nUseEigenfunction = 5; 120 | if isfield(options,'nUseEigenfunction') 121 | nUseEigenfunction = options.nUseEigenfunction; 122 | end 123 | 124 | k = 5; 125 | if isfield(options,'k') 126 | k = options.k; 127 | end 128 | 129 | if isfield(options,'ratio') 130 | ratio = options.ratio; 131 | else 132 | ratio = 1; 133 | end 134 | 135 | if isfield(options, 'NotEnoughNonZero') 136 | NotEnoughNonZero = options.NotEnoughNonZero; 137 | else 138 | NotEnoughNonZero = 3; 139 | end 140 | 141 | if isfield(options,'gnd') 142 | if length(options.gnd) ~= nSmp 143 | error('gnd does not match!'); 144 | else 145 | gnd = options.gnd; 146 | end 147 | ClassLabel = unique(gnd); 148 | nClass = length(ClassLabel); 149 | 150 | rand('state',0); 151 | Y = rand(nClass,nClass); 152 | Z = zeros(nSmp,nClass); 153 | for i=1:nClass 154 | idx = find(gnd==ClassLabel(i)); 155 | Z(idx,:) = repmat(Y(i,:),length(idx),1); 156 | end 157 | Z(:,1) = ones(nSmp,1); 158 | [Y,R] = qr(Z,0); 159 | Y(:,1) = []; 160 | else 161 | if isfield(options,'W') 162 | W = options.W; 163 | else 164 | Woptions.k = k; 165 | if nSmp > 3000 166 | tmpD = EuDist2(fea(randsample(nSmp,3000),:)); 167 | else 168 | tmpD = EuDist2(fea); 169 | end 170 | Woptions.t = mean(mean(tmpD)); 171 | W = constructW(fea,Woptions); 172 | end 173 | 174 | Y = Eigenmap(W,nUseEigenfunction); 175 | end 176 | 177 | options.ReguType = 'RidgeLasso'; 178 | if ~isfield(options,'Method') 179 | options.Method = 'LASSO_LARs'; 180 | end 181 | 182 | switch lower(options.Method) 183 | case {lower('LASSO_LARs')} 184 | options.LASSOway = 'LARs'; 185 | options.LassoCardi = ceil(FeaNumCandi*ratio); 186 | eigvectorAll = SR(options, Y, fea); 187 | 188 | FeaIndex = cell(1,length(FeaNumCandi)); 189 | for i = 1:length(FeaNumCandi) 190 | eigvector = eigvectorAll{i}; 191 | eigvector = max(abs(eigvector),[],2); 192 | 193 | [dump,idx] = sort(eigvector,'descend'); 194 | if dump(FeaNumCandi(i)) == 0 195 | if NotEnoughNonZero == 0 % fire error 196 | error('Not enough fea!'); 197 | elseif NotEnoughNonZero == 1 % ignore 198 | warning('Not enough fea!'); 199 | else 200 | for j = i+1:length(FeaNumCandi) 201 | eigvec = eigvectorAll{j}; 202 | eigvec = max(abs(eigvec),[],2); 203 | [dump2,idx2] = sort(eigvec,'descend'); 204 | if (dump2(FeaNumCandi(i)) > 0) 205 | break; 206 | end 207 | end 208 | if (dump2(FeaNumCandi(i)) > 0) 209 | idx = idx2; 210 | else 211 | if (NotEnoughNonZero == 2) 212 | error('Not enough fea, tried to find more but failed!'); 213 | else 214 | warning('Not enough fea, tried to find more but failed!'); 215 | idx = idx2; 216 | end 217 | end 218 | end 219 | end 220 | FeaIndex{i} = idx(1:FeaNumCandi(i)); 221 | end 222 | case {lower('LASSO_SLEP')} 223 | error('Comming soon!'); 224 | case {lower('GROUPLASSO_SLEP')} 225 | error('Comming soon!'); 226 | otherwise 227 | error('method does not exist!'); 228 | end -------------------------------------------------------------------------------- /plot_result.m: -------------------------------------------------------------------------------- 1 | function plot_result(dataset, candiAlgs, plot_flag) 2 | %==========================setup======================================= 3 | % dataset = 'jaffe_213n_676d_10c'; 4 | if ~exist('plot_flag', 'var') 5 | plot_flag = 1; 6 | end 7 | 8 | if ~exist('candiAlgs', 'var') || isempty(candiAlgs) 9 | candiAlgs = { 'LapScore', 'MCFS', 'LLCFS', 'UDFS', 'NDFS', 'SPFS', 'RUFS', 'JELSR_lpp', 'GLSPFS', 'FSSL_11_11_5'}; 10 | end 11 | % candiAlgs = {'AllFea', 'LapScore'}; 12 | candiLineStyles = {'-', '-.', '-', '-.', '-', '-.', '-', '-.', '-', '--', '-', '--', '-', '--', '-', '--', '-', '--', '-', '--', '-', '--'}; 13 | candiMarkers = {'o', '+', 's', 'd', 'o', '+', 's', 'd', 'o', '+', 's', 'd', 'o', '+', 's', 'd', }; 14 | candiColors = [0 0 0; 1 0 1; 0 1 1; 1 0 0; 0 1 0; 0 0 1; 0 0 0; 1 0 1; 0 1 1; 1 0 0; 0 1 0; 0 0 1; 0 0 0; 1 0 1; 0 1 1; 1 0 0; 0 1 0; 0 0 1]; 15 | candiMarkerSpacing = [5,5;5,5;5,5;5,5;5,5;5,5; 5,5;5,5;5,5;5,5;5,5;5,5; 5,5;5,5;5,5;5,5;5,5;5,5; ]; 16 | 17 | %===================================================================== 18 | res_algs = []; 19 | 20 | algs = {}; 21 | lineStyles = {}; 22 | markers = {}; 23 | colors = []; 24 | markerSpacing = []; 25 | 26 | ii = 0; 27 | for idx = 1:length(candiAlgs ) 28 | res_file = [dataset, '_best_result_', candiAlgs{idx}, '.mat']; 29 | 30 | if exist(res_file, 'file') 31 | ii = ii + 1; 32 | if exist(res_file, 'file'); load(res_file); end 33 | if exist('res_gs', 'var') 34 | if ii == 1; 35 | res_algs= res_gs; 36 | else 37 | fn = fieldnames(res_gs); 38 | for i2 = 1:length(fn) 39 | if ~isfield(res_algs, (fn{i2})) 40 | res_algs.(fn{i2}) = []; %place holder, should be removed, some algos did not record time2 41 | end 42 | res_algs.(fn{i2}) = [res_algs.(fn{i2}); res_gs.(fn{i2})]; 43 | end 44 | end 45 | 46 | algs{end+1} = candiAlgs{idx}; 47 | lineStyles{end+1} = candiLineStyles{ii}; 48 | markers{end+1} = candiMarkers{ii}; 49 | colors = [colors; candiColors(ii, :)]; 50 | markerSpacing = [markerSpacing; candiMarkerSpacing(ii, :)]; 51 | end 52 | clear res_gs; 53 | end 54 | end 55 | 56 | if ~isempty(res_algs) 57 | res_gs_tt = []; 58 | 59 | % res_gs_tt = compute_ttest(dataset, algs, length(FeaNumCandi)); 60 | if isvector(FeaNumCandi ) && length(FeaNumCandi) > 10 61 | tmp = find(FeaNumCandi(1:end-1) - FeaNumCandi(2:end) > 0); 62 | tmp = [1; tmp(:); length(FeaNumCandi)]; 63 | ids = cell(length(tmp) - 1, 1); 64 | for i1 = 1:length(ids) 65 | ids{i1} = tmp(i1):tmp(i1+1); 66 | end 67 | message = compute_message(algs, res_algs, dataset, ids); 68 | else 69 | message = []; 70 | end 71 | save(['res_algs_', dataset, '.mat'], 'algs', 'res_algs', 'res_gs_tt', 'message'); 72 | fns = {'mean_acc', 'mean_nmi_sqrt', 'mean_nmi_max', 'mean_purity', 'mean_prec', 'mean_recall', 'mean_f1', ... 73 | 'best_obj_acc', 'best_obj_nmi_sqrt', 'best_obj_nmi_max', 'best_obj_purity', 'best_obj_prec', 'best_obj_recall', 'best_obj_f1',... 74 | 'jac', 'red','loocv'}; 75 | if plot_flag 76 | xData = (1:length(FeaNumCandi)); 77 | % figure; 78 | for i1 = 1:length(fns) 79 | figure; 80 | my_prettyPlot(xData, res_algs.(fns{i1}), colors, lineStyles, markers, markerSpacing, dataset, '# of features', fns{i1}, algs, 'SouthWest'); 81 | if strcmp(fns{i1},'loocv') 82 | my_prettyPlot(xData, 1-res_algs.(fns{i1}), colors, lineStyles, markers, markerSpacing, dataset, '# of features', fns{i1}, algs, 'SouthWest'); 83 | end 84 | end 85 | end 86 | % my_prettyPlot(xData, res_algs.mean_nmi_max, colors, lineStyles, markers, markerSpacing, dataset, '# of features', 'Normalized Mutual Information', algs, 'SouthWest'); 87 | % figure; 88 | % my_prettyPlot(xData, res_algs.red, colors, lineStyles, markers, markerSpacing, dataset, '# of features', 'Redundancy', algs, 'SouthWest'); 89 | % figure; 90 | % my_prettyPlot(xData, res_algs.f1, colors, lineStyles, markers, markerSpacing, dataset, '# of features', 'JAC', algs, 'SouthWest'); 91 | end 92 | end 93 | 94 | function my_prettyPlot(xData, yData, colors, lineStyles, markers, markerSpacing, title, xlabel, ylabel, legends, legendLoc) 95 | 96 | 97 | options.colors = colors; 98 | options.lineStyles = lineStyles; 99 | options.markers = markers; 100 | % options.markerSpacing = markerSpacing; 101 | options.title = title; 102 | options.xlabel = xlabel; 103 | options.ylabel = ylabel; 104 | options.legendStr = legends; 105 | options.legend = legends; 106 | options.legendLoc = legendLoc; 107 | options.xlimits = [1, length(xData)]; 108 | % options.ylimits = [min(yData(:)), max(yData(:)) ]; 109 | prettyPlot(xData,yData,options); 110 | hold off; 111 | end 112 | 113 | 114 | function res_gs_tt = compute_ttest(dataset, candiAlgs, nFeaNumCandi) 115 | res_gs_tt = cell(1, nFeaNumCandi); 116 | fns = {'aio_acc', 'aio_nmi_max', 'aio_nmi_sqrt', 'aio_purity', 'aio_prec', 'aio_recall', 'aio_f1'}; 117 | fns2 = {'mean_acc', 'mean_nmi_max', 'mean_nmi_sqrt', 'mean_purity', 'mean_prec', 'mean_recall', 'mean_f1'}; 118 | for i1 = 1:nFeaNumCandi 119 | for i2 = 1:length(fns); 120 | res_gs_tt{1, i1}.([fns{i2}, '_tt']) = ones(length(candiAlgs)) * -1; 121 | res_gs_tt{1, i1}.([fns{i2}, '_tt_p']) = ones(length(candiAlgs)) * -1; 122 | end 123 | end 124 | 125 | for i1 = 1:length(candiAlgs); 126 | res_file = [dataset, 'best_result_', dataset, '_', candiAlgs{i1}, '.mat']; 127 | if exist(res_file, 'file') 128 | if exist(res_file, 'file'); load(res_file); end 129 | if exist('res_aio', 'var') 130 | res1 = res_aio; 131 | res1_ps = res_gs_ps; 132 | clear res_aio res_gs_ps; 133 | 134 | for i2 = i1+1:length(candiAlgs); 135 | res_file = [dataset, 'best_result_', dataset, '_', candiAlgs{i1}, '.mat']; 136 | if exist(res_file, 'file') 137 | if exist(res_file, 'file'); load(res_file); end 138 | 139 | if exist('res_aio', 'var') 140 | res2 = res_aio; 141 | res2_ps = res_gs_ps; 142 | clear res_aio res_gs_ps; 143 | 144 | for i3 = 1:nFeaNumCandi 145 | for i4 = 1:length(fns) 146 | tmp1 = res1_ps.(fns2{i4}); 147 | b1_idx = tmp1(i3); 148 | tmp2 = res2_ps.(fns2{i4}); 149 | b2_idx = tmp2(i3); 150 | r1 = res1{b1_idx, i3}.(fns{i4}); 151 | r2 = res2{b2_idx, i3}.(fns{i4}); 152 | [t1, t2] = ttest(r1, r2); 153 | tmp1 = res_gs_tt{1, i3}.([fns{i4},'_tt']); 154 | tmp1(i1, i2) = t1; 155 | res_gs_tt{1, i3}.([fns{i4},'_tt']) = tmp1; 156 | tmp1 = res_gs_tt{1, i3}.([fns{i4},'_tt_p']); 157 | tmp1(i1, i2) = t2; 158 | res_gs_tt{1, i3}.([fns{i4},'_tt_p']) = tmp1; 159 | end 160 | end 161 | end 162 | end 163 | end 164 | end 165 | end 166 | end 167 | end 168 | 169 | function message = compute_message(algs, res_algs, dataset, ids) 170 | message = []; 171 | fns = {'best_obj_acc', 'best_obj_nmi_sqrt', 'mean_acc', 'mean_nmi_sqrt', 'loocv', 'jac', 'red'}; 172 | ismax = [1, 1, 1, 1, 0, 1, 0]; 173 | 174 | tex_header = '\begin{table*}'; 175 | tex_header = [tex_header, char(13),'\caption{', dataset, 'all results}']; 176 | tex_header = [tex_header, char(13),'\tiny \centering \label{table:res_aio}']; 177 | 178 | tex_align = '| c '; 179 | 180 | tex_title = 'Data Sets'; 181 | for i1 = 1:length(algs) 182 | tex_title = [tex_title, ' & ', algs{i1}]; 183 | tex_align = [tex_align, ' | ', 'c']; 184 | end 185 | tex_title = [tex_title '\\ \hline']; 186 | tex_header = [tex_header, char(13),'\begin{tabular}{', tex_align, '| }']; 187 | tex_header = [tex_header, char(13),'\toprule']; 188 | 189 | for i1 = 1:length(ids) 190 | tmp = ids{i1}; 191 | for i2 = 1:length(fns) 192 | sigs = zeros(size(res_algs.(fns{i2}), 1), 1); 193 | sigs2 = sigs; 194 | if ismax(i2) 195 | [~, best_id] = max(mean(res_algs.(fns{i2})(:, ids{i1}), 2)); 196 | else 197 | [~, best_id] = min( mean(1 - res_algs.(fns{i2})(:, ids{i1}), 2)); 198 | end 199 | for i3 = 1:length(sigs) 200 | [sigs(i3), sigs2(i3)] = ttest(res_algs.(fns{i2})(i3, ids{i1}), res_algs.(fns{i2})(best_id, ids{i1})); 201 | end 202 | if ismax(i2) 203 | message = [message, char(13), ms2tex(mean(res_algs.(fns{i2})(:, ids{i1}), 2), std(res_algs.(fns{i2})(:, ids{i1}), 0, 2), ismax(i2), sigs, sigs2, [dataset(1:5), '_', fns{i2}, '_', num2str(tmp(1)), '_', num2str(tmp(end)) ])]; 204 | else 205 | message = [message, char(13), ms2tex(mean(1 - res_algs.(fns{i2})(:, ids{i1}), 2), std(1 - res_algs.(fns{i2})(:, ids{i1}), 0, 2), ismax(i2), sigs, sigs2, [dataset(1:5), '_', fns{i2}, '_', num2str(tmp(1)), '_', num2str(tmp(end)) ])]; 206 | end 207 | end 208 | end 209 | message = [tex_title, char(13), message ]; 210 | 211 | 212 | tex_end = []; 213 | tex_end = [tex_end, char(13), '\bottomrule' ]; 214 | tex_end = [tex_end, char(13), '\end{tabular}' ]; 215 | tex_end = [tex_end, char(13), '\end{table*}' ]; 216 | 217 | message = [tex_header, char(13), message, char(13), tex_end]; 218 | message = strrep(message, '_', '-'); 219 | fid=fopen([dataset, '.tex'], 'w+'); 220 | fprintf(fid, '%s', message); 221 | fclose(fid); 222 | end -------------------------------------------------------------------------------- /FSASL.m: -------------------------------------------------------------------------------- 1 | function [W, S, A, objHistory] = FSASL(X, nClass, options) 2 | if ~exist('options', 'var') 3 | options = []; 4 | end 5 | 6 | % Optios for global structure learning 7 | if ~isfield(options, 'lambda1') 8 | options.lambda1 = 1; % [need to search] 9 | end 10 | 11 | if ~isfield(options, 'LassoType') 12 | options.LassoType = 'SLEP'; 13 | end 14 | 15 | if ~isfield(options, 'SLEPrFlag') 16 | options.SLEPrFlag = 1; % the input parameter 'ReguAlpha' is a ratio in (0, 1) 17 | end 18 | 19 | if ~isfield(options, 'SLEPreg') 20 | options.SLEPreg = 0.01; % [need to search, and fix it] 21 | end 22 | 23 | if ~isfield(options, 'LARSk') 24 | options.LARSk = 5; % [need to search, and fix it] 25 | end 26 | 27 | if ~isfield(options, 'LARSratio') 28 | options.LARSratio = 2; 29 | end 30 | 31 | % Optios for local structure learning 32 | if ~isfield(options, 'lambda2') 33 | options.lambda2 = 1; % [need to search] aim to show local structure is helpful 34 | end 35 | 36 | if ~isfield(options, 'Localk') 37 | options.Localk = 5; % [need to search, and fix it] 38 | end 39 | 40 | if ~isfield(options, 'LocalReg') 41 | options.LocalReg = estimateReg(X, options.Localk); % aim to avoid search 42 | end 43 | 44 | % Optios for subspace learning 45 | if ~isfield(options, 'GroupLassoType') 46 | options.GroupLassoType = 'LS21'; 47 | end 48 | 49 | if ~isfield(options, 'lambda3') 50 | options.lambda3 = 1; % [need to search 51 | end 52 | 53 | if ~isfield(options, 'maxiter') 54 | options.maxiter = 1; % [need to search 55 | end 56 | % options.lambda1 = 1 - options.lambda2; 57 | [~, nSmp] = size(X); 58 | X2 = X; 59 | objHistory = []; 60 | for iter = 1:options.maxiter 61 | 62 | S = zeros(nSmp); 63 | if options.lambda1 > 0 && ( options.maxiter < 5 || iter > 1) 64 | % update global structure LG 65 | for iSmp = 1:nSmp 66 | candIdx = ones(nSmp, 1); 67 | candIdx(iSmp) = 0; 68 | candIdx = candIdx > 0; 69 | switch lower(options.LassoType) 70 | case lower('SLEP') 71 | S(candIdx, iSmp) = LeastR(X2(:, candIdx), X2(:, iSmp), options.SLEPreg, struct('rFlag', options.SLEPrFlag, 'rsL2', 0)); 72 | case lower('LARS') 73 | S(candIdx, iSmp) = LassoLARS(X2(:, candIdx), X2(:, iSmp), options.LARSk * options.LARSratio, 'verbose', 0); 74 | case lower('lars2') 75 | Gram = X2(:, candIdx) * X2(:, candIdx)'; 76 | Gram = max(Gram,Gram'); 77 | S(candIdx, iSmp) = lars(X2(:, candIdx), X2(:, iSmp),'lasso', -(max(options.LARSk)+5),1,Gram,options.LARSk); 78 | case lower('lars3') 79 | S(candIdx, iSmp) = lars(X2(:, candIdx), X2(:, iSmp),'lasso', -(max(options.LARSk)+5),0,[],options.LARSk); 80 | otherwise 81 | error('method does not exist!'); 82 | end 83 | end 84 | LG = (eye(nSmp) - S); 85 | LG = LG * LG'; 86 | LG = (LG + LG') / 2; 87 | else 88 | LG = 0; 89 | end 90 | 91 | A = zeros(nSmp); 92 | if options.lambda2 > 0 93 | if iter > 1 94 | % update local structure LL 95 | distx = L2_distance_1(X2, X2); 96 | if iter>0 97 | [~, idx] = sort(distx,2); 98 | end; 99 | 100 | for iSmp = 1 : nSmp 101 | if options.Localk < nSmp 102 | idxa0 = idx(iSmp, 2: options.Localk + 1); 103 | else 104 | idxa0 = 1 : nSmp; 105 | end; 106 | dxi = distx(iSmp, idxa0); 107 | ad = - (dxi) / (2 * options.LocalReg); 108 | A(iSmp, idxa0) = EProjSimplex_new(ad); 109 | end; 110 | else 111 | A = constructW(X2', struct('k', options.Localk)); 112 | end 113 | A = (A+A')/2; 114 | LL = diag(sum(A)) - A; 115 | LL = (LL + LL') / 2; 116 | else 117 | LL = 0; 118 | end 119 | 120 | L = options.lambda1 * LG + options.lambda2 * LL; 121 | 122 | % update embedding 123 | 124 | switch lower(options.GroupLassoType) 125 | case lower('JFSSL') 126 | Y = eig1(L, nClass, 0); 127 | W = FSSL_subspace(X, Y, options.lambda3); 128 | case lower('LS21') 129 | Y = eig1(L, nClass, 0); 130 | W = LS21(X', Y, options.lambda3); 131 | case lower('NDFS') % d^3 132 | tmp = X * L * X'; 133 | tmp = (tmp + tmp') / 2; 134 | if exist('W', 'var') 135 | W = LquadR21_reg(tmp, nClass, options.lambda3, W); 136 | else 137 | W = LquadR21_reg(tmp, nClass, options.lambda3); 138 | end 139 | case lower('UDFS') 140 | tmp = X * L * X'; 141 | tmp = (tmp + tmp') / 2; 142 | W = LquadR21_reg(tmp, nClass, options.lambda3); 143 | case lower('MCLEASTR') 144 | Y = eig1(L, nClass, 0); 145 | W = mcLeastR(X', Y, options.lambda3, struct('rFlag', 1, 'rsL2', 0)); 146 | otherwise 147 | error('method does not exist!'); 148 | end 149 | X2 = W' * X; 150 | obj = trace(X2 * L * X2'); 151 | if options.lambda1 > 0 && strcmpi(options.LassoType, 'SLEP') 152 | obj = obj + options.lambda1 * options.SLEPreg * sum(sum(abs(S))); 153 | end 154 | 155 | if options.lambda2 > 0 156 | obj = obj + options.lambda2 * options.LocalReg * sum(sum(A.^2)); 157 | end 158 | 159 | obj = obj + options.lambda3 * sum(sqrt(sum(W.^2, 2))); 160 | objHistory = [objHistory; obj]; %#ok 161 | % 162 | end 163 | end 164 | 165 | function A = FSSL_subspace(X, Y, regu) 166 | [d, ~] = size(X); 167 | [n, nClass] = size(Y); 168 | % Check the solutions 169 | nSolutionCheck = 0; 170 | r1 = rank(X'); 171 | r2 = rank([X', Y]); 172 | if r1 == r2 && r1 < d 173 | % X'*A = Y has many solution == rank(X') == rank([X', Y]) < d 174 | nSolutionCheck = 1; 175 | end 176 | 177 | A = zeros(d, nClass); 178 | % Step 2: Find A satisfies the linear system 179 | nIter = 20; 180 | if nSolutionCheck 181 | % Situation 1, Infinitely many solutions 182 | G = eye(d); 183 | for iter = 1:nIter 184 | Gi = inv(G); 185 | A = Gi*X*inv(X'*Gi*X)*Y; %#ok 186 | normG = sqrt(sum(A.^2,2)); 187 | nzIdx = (normG ~= 0); 188 | dd = zeros(d, 1); 189 | dd(nzIdx) = 1./normG; 190 | G = diag(dd); 191 | end 192 | else 193 | % Situation 1, Single or No solution 194 | G = eye(d); 195 | for iter = 1:nIter 196 | Gi = inv(G); 197 | A = Gi*X*inv(X'*Gi*X + 0.5/regu*eye(n))*Y; %#ok 198 | normG = sqrt(sum(A.^2,2)); 199 | nzIdx = (normG ~= 0); 200 | dd = zeros(d, 1); 201 | dd(nzIdx) = 1./normG; 202 | G = diag(dd); 203 | end 204 | end 205 | end 206 | 207 | function [x, ft] = EProjSimplex_new(v, k) 208 | % 209 | % Problem 210 | % 211 | % min 1/2 || x - v||^2 212 | % s.t. x>=0, 1'x=1 213 | % 214 | 215 | if nargin < 2 216 | k = 1; 217 | end; 218 | 219 | ft=1; 220 | n = length(v); 221 | 222 | v0 = v-mean(v) + k/n; 223 | %vmax = max(v0); 224 | vmin = min(v0); 225 | if vmin < 0 226 | f = 1; 227 | lambda_m = 0; 228 | while abs(f) > 10^-10 229 | v1 = v0 - lambda_m; 230 | posidx = v1>0; 231 | npos = sum(posidx); 232 | g = -npos; 233 | f = sum(v1(posidx)) - k; 234 | lambda_m = lambda_m - f/g; 235 | ft=ft+1; 236 | if ft > 100 237 | x = max(v1,0); %#ok 238 | break; 239 | end; 240 | end; 241 | x = max(v1,0); 242 | 243 | else 244 | x = v0; 245 | end; 246 | end 247 | 248 | % compute squared Euclidean distance 249 | % ||A-B||^2 = ||A||^2 + ||B||^2 - 2*A'*B 250 | function d = L2_distance_1(a,b) 251 | % a,b: two matrices. each column is a data 252 | % d: distance matrix of a and b 253 | 254 | 255 | 256 | if (size(a,1) == 1) 257 | a = [a; zeros(1,size(a,2))]; 258 | b = [b; zeros(1,size(b,2))]; 259 | end 260 | 261 | aa=sum(a.*a); bb=sum(b.*b); ab=a'*b; 262 | d = repmat(aa',[1 size(bb,2)]) + repmat(bb,[size(aa,2) 1]) - 2*ab; 263 | 264 | d = real(d); 265 | d = max(d,0); 266 | 267 | % % force 0 on the diagonal? 268 | % if (df==1) 269 | % d = d.*(1-eye(size(d))); 270 | % end 271 | 272 | end 273 | 274 | function r = estimateReg(X, k) 275 | [d, nSmp] = size(X); 276 | distX = L2_distance_1(X,X); 277 | %distX = sqrt(distX); 278 | [distX1, idx] = sort(distX,2); 279 | A = zeros(nSmp); 280 | rr = zeros(nSmp,1); 281 | for i = 1:nSmp 282 | di = distX1(i,2:k+2); 283 | rr(i) = 0.5*(k*di(k+1)-sum(di(1:k))); 284 | id = idx(i,2:k+2); 285 | A(i,id) = (di(k+1)-di)/(k*di(k+1)-sum(di(1:k))+eps); 286 | end; 287 | r = mean(rr); 288 | end 289 | 290 | 291 | function [X, obj]=LquadR21_reg(A, k, r, X0) 292 | % quadratic loss with 21-norm regularization 293 | % min_{X'*X=I} Tr(X'*A*X) + r * ||X||_21 294 | 295 | 296 | NIter = 36; 297 | [m n] = size(A); 298 | if nargin < 4 299 | d = ones(n,1); 300 | else 301 | Xi = sqrt(sum(X0.*X0,2)+eps); 302 | d = 0.5./(Xi); 303 | end; 304 | 305 | for iter = 1:NIter 306 | D = diag(d); 307 | M = A+r*D; 308 | M = max(M,M'); 309 | [evec, eval] = eig(M); 310 | eval = diag(eval); 311 | [~, idx] = sort(eval); 312 | X = evec(:,idx(1:k)); 313 | 314 | Xi = sqrt(sum(X.*X,2)+eps); 315 | d = 0.5./(Xi); 316 | 317 | obj(iter) = trace(X'*A*X) + r*sum(Xi); %#ok 318 | end; 319 | end 320 | 321 | function [eigvec, eigval, eigval_full] = eig1(A, c, isMax, isSym) 322 | 323 | if nargin < 2 324 | c = size(A,1); 325 | isMax = 1; 326 | isSym = 1; 327 | elseif c > size(A,1) 328 | c = size(A,1); 329 | end; 330 | 331 | if nargin < 3 332 | isMax = 1; 333 | isSym = 1; 334 | end; 335 | 336 | if nargin < 4 337 | isSym = 1; 338 | end; 339 | 340 | if isSym == 1 341 | A = max(A,A'); 342 | end; 343 | try 344 | [v, d] = eig(A); 345 | d = diag(d); 346 | %d = real(d); 347 | catch 348 | if isMax == 0 349 | [v, d] = eigs(sparse(A), c, 'sa', struct('tol', 1e-5')); 350 | else 351 | [v, d] = eigs(sparse(A), c, 'la', struct('tol', 1e-5')); 352 | end 353 | end 354 | 355 | if isMax == 0 356 | [d1, idx] = sort(d); 357 | else 358 | [d1, idx] = sort(d,'descend'); 359 | end; 360 | idx1 = idx(1:c); 361 | eigval = d(idx1); 362 | eigvec = v(:,idx1); 363 | 364 | eigval_full = d(idx); 365 | end 366 | 367 | 368 | function W = LS21(X, Y, r, W0) 369 | [n, m] = size(X); 370 | if nargin < 4 371 | d = ones(m,1); 372 | else 373 | Wi = sqrt(sum(W0.^2,2)+eps); 374 | d = 0.5./(Wi); 375 | end; 376 | 377 | maxiter = 10; 378 | if n < d % n^3 379 | XY = X' * Y; 380 | for iter= 1:maxiter 381 | rd = 1 ./ (r * d); 382 | Xrd = bsxfun(@times, X, rd'); 383 | XrdX = Xrd * X'; 384 | A = diag(rd) - Xrd' / (eye(n) + XrdX) * Xrd; 385 | W = A * XY; 386 | Wi = sqrt(sum(W.^2,2)+eps); 387 | d = 0.5./(Wi); 388 | end 389 | else % d^3 390 | XX = X' * X; 391 | XY = X' * Y; 392 | for iter= 1:maxiter 393 | W = (XX + r * diag(d)) \ XY; 394 | Wi = sqrt(sum(W.^2,2)+eps); 395 | d = 0.5./(Wi); 396 | end 397 | end 398 | end 399 | -------------------------------------------------------------------------------- /lars.m: -------------------------------------------------------------------------------- 1 | function beta = lars(X, y, method, stop, useGram, Gram, Cardi, bSparse, trace) 2 | % This function is provided at 3 | % http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3897 4 | % I have made some small modifications -- Deng Cai, Feb/2008 5 | 6 | % LARS The LARS algorithm for performing LAR or LASSO. 7 | % BETA = LARS(X, Y) performs least angle regression on the variables in 8 | % X to approximate the response Y. Variables X are assumed to be 9 | % normalized (zero mean, unit length), the response Y is assumed to be 10 | % centered. 11 | % BETA = LARS(X, Y, METHOD), where METHOD is either 'LARS' or 'LASSO' 12 | % determines whether least angle regression or lasso regression should 13 | % be performed. 14 | % BETA = LARS(X, Y, METHOD, STOP) with nonzero STOP will perform least 15 | % angle or lasso regression with early stopping. If STOP is negative, 16 | % STOP is an integer that determines the desired number of variables. If 17 | % STOP is positive, it corresponds to an upper bound on the L1-norm of 18 | % the BETA coefficients. 19 | % BETA = LARS(X, Y, METHOD, STOP, USEGRAM) specifies whether the Gram 20 | % matrix X'X should be calculated (USEGRAM = 1) or not (USEGRAM = 0). 21 | % Calculation of the Gram matrix is suitable for low-dimensional 22 | % problems. By default, the Gram matrix is calculated. 23 | % BETA = LARS(X, Y, METHOD, STOP, USEGRAM, GRAM) makes it possible to 24 | % supply a pre-computed Gram matrix. Set USEGRAM to 1 to enable. If no 25 | % Gram matrix is available, exclude argument or set GRAM = []. 26 | % BETA = LARS(X, Y, METHOD, STOP, USEGRAM, GRAM, TRACE) with nonzero 27 | % TRACE will print the adding and subtracting of variables as all 28 | % LARS/lasso solutions are found. 29 | % Returns BETA where each row contains the predictor coefficients of 30 | % one iteration. A suitable row is chosen using e.g. cross-validation, 31 | % possibly including interpolation to achieve sub-iteration accuracy. 32 | % 33 | % Author: Karl Skoglund, IMM, DTU, kas@imm.dtu.dk 34 | % Reference: 'Least Angle Regression' by Bradley Efron et al, 2003. 35 | 36 | %% Input checking 37 | % Set default values. 38 | if nargin < 9 39 | trace = 0; 40 | end 41 | if nargin < 8 42 | bSparse = 1; 43 | end 44 | if nargin < 7 45 | Cardi = []; 46 | end 47 | if nargin < 6 48 | Gram = []; 49 | end 50 | if nargin < 5 51 | useGram = 0; 52 | end 53 | if nargin < 4 54 | stop = 0; 55 | end 56 | if nargin < 3 57 | method = 'lasso'; 58 | end 59 | if strcmpi(method, 'lasso') 60 | lasso = 1; 61 | else 62 | lasso = 0; 63 | end 64 | 65 | if isempty(X) 66 | error('The code has been updated. Please input the X'); 67 | end 68 | 69 | 70 | %% LARS variable setup 71 | [n p] = size(X); 72 | % nvars = min(n-1,p); % 73 | nvars = p; % 74 | 75 | maxk = 512*nvars; % Maximum number of iterations 76 | 77 | if isempty(Cardi) 78 | if stop == 0 79 | if bSparse 80 | beta = sparse(p,2*nvars); 81 | else 82 | beta = zeros(p,2*nvars); 83 | end 84 | elseif stop < 0 85 | if bSparse 86 | beta = sparse(p,2*round(-stop)); 87 | else 88 | beta = zeros(p,2*round(-stop)); 89 | end 90 | else 91 | if bSparse 92 | beta = sparse(p,100); 93 | else 94 | beta = zeros(p,100); 95 | end 96 | end 97 | else 98 | Cardi = unique(Cardi); 99 | Cardi(Cardi>nvars) = []; 100 | stop = -max(Cardi); 101 | if bSparse 102 | beta = sparse(p,length(Cardi)); 103 | else 104 | beta = zeros(p,length(Cardi)); 105 | end 106 | betak = zeros(p,1); 107 | end 108 | 109 | mu = zeros(n, 1); % current "position" as LARS travels towards lsq solution 110 | I = 1:p; % inactive set 111 | A = []; % active set 112 | 113 | % Calculate Gram matrix if necessary 114 | if isempty(Gram) && useGram 115 | error('The code has been updated. Please input the Gram'); 116 | % clear Gram; 117 | % global Gram; 118 | % Gram = X'*X; % Precomputation of the Gram matrix. Fast but memory consuming. 119 | end 120 | 121 | if ~useGram 122 | R = []; % Cholesky factorization R'R = X'X where R is upper triangular 123 | end 124 | 125 | 126 | lassocond = 0; % LASSO condition boolean 127 | stopcond = 0; % Early stopping condition boolean 128 | k = 0; % Iteration count 129 | vars = 0; % Current number of variables 130 | 131 | if trace 132 | disp(sprintf('Step\tAdded\tDropped\t\tActive set size')); 133 | end 134 | 135 | % TimeLoop = zeros(2*nvars,1); 136 | tmpT = cputime; 137 | 138 | %% LARS main loop 139 | while vars < nvars && ~stopcond && k < maxk 140 | k = k + 1; 141 | c = X'*(y - mu); 142 | [C j] = max(abs(c(I))); 143 | j = I(j); 144 | 145 | if ~lassocond % if a variable has been dropped, do one iteration with this configuration (don't add new one right away) 146 | if ~useGram 147 | diag_k = X(:,j)'*X(:,j); % diagonal element k in X'X matrix 148 | if isempty(R) 149 | R = sqrt(diag_k); 150 | else 151 | col_k = X(:,j)'*X(:,A); % elements of column k in X'X matrix 152 | R_k = R'\col_k'; % R'R_k = (X'X)_k, solve for R_k 153 | R_kk = sqrt(diag_k - R_k'*R_k); % norm(x'x) = norm(R'*R), find last element by exclusion 154 | R = [R R_k; [zeros(1,size(R,2)) R_kk]]; % update R 155 | end 156 | end 157 | A = [A j]; 158 | I(I == j) = []; 159 | vars = vars + 1; 160 | if trace 161 | disp(sprintf('%d\t\t%d\t\t\t\t\t%d', k, j, vars)); 162 | end 163 | end 164 | 165 | s = sign(c(A)); % get the signs of the correlations 166 | 167 | if useGram 168 | if vars <= 200 169 | R = chol(Gram(A,A)); 170 | elseif lassocond 171 | if (rJ <= 200) & vars <= 1000 172 | R = chol(Gram(A,A)); 173 | else 174 | R(:,rJ) = []; % remove column j 175 | tmpn = size(R,2); 176 | for tmpk = rJ:tmpn 177 | tmpp = tmpk:tmpk+1; 178 | [G,R(tmpp,tmpk)] = planerot(R(tmpp,tmpk)); % remove extra element in column 179 | if tmpk < tmpn 180 | R(tmpp,tmpk+1:tmpn) = G*R(tmpp,tmpk+1:tmpn); % adjust rest of row 181 | end 182 | end 183 | R(end,:) = []; % remove zero'ed out row 184 | end 185 | else 186 | R_k = R'\Gram(A(1:end-1),j); 187 | R_kk = sqrt(Gram(j,j)-R_k'*R_k); 188 | R = [R R_k; [zeros(1,size(R,2)) R_kk]]; % update R 189 | end 190 | GA1 = R\(R'\s); 191 | AA = 1/sqrt(sum(GA1.*s)); 192 | w = AA*GA1; 193 | else 194 | GA1 = R\(R'\s); 195 | AA = 1/sqrt(sum(GA1.*s)); 196 | w = AA*GA1; 197 | end 198 | u = X(:,A)*w; % equiangular direction (unit vector) 199 | 200 | if vars == nvars % if all variables active, go all the way to the lsq solution 201 | gamma = C/AA; 202 | else 203 | a = X'*u; % correlation between each variable and eqiangular vector 204 | temp = [(C - c(I))./(AA - a(I)); (C + c(I))./(AA + a(I))]; 205 | gamma = min([temp(temp > 0); C/AA]); 206 | end 207 | 208 | % LASSO modification 209 | if lasso 210 | lassocond = 0; 211 | if isempty(Cardi) 212 | temp = -beta(A,k)./w; 213 | else 214 | temp = -betak(A)./w; 215 | end 216 | [gamma_tilde] = min([temp(temp > 0); gamma]); 217 | j = find(temp == gamma_tilde); 218 | if gamma_tilde < gamma, 219 | gamma = gamma_tilde; 220 | lassocond = 1; 221 | end 222 | end 223 | 224 | mu = mu + gamma*u; 225 | if isempty(Cardi) 226 | if size(beta,2) < k+1 227 | if bSparse 228 | beta = [beta sparse(p,size(beta,1))]; 229 | else 230 | beta = [beta zeros(p,size(beta,1))]; 231 | end 232 | end 233 | beta(A,k+1) = beta(A,k) + gamma*w; 234 | else 235 | tmpbetak = betak(A) + gamma*w; 236 | betak = zeros(p,1); 237 | betak(A) = tmpbetak; 238 | idx = find(Cardi==vars); 239 | if ~isempty(idx) 240 | beta(:,idx) = betak; 241 | end 242 | end 243 | 244 | % Early stopping at specified bound on L1 norm of beta 245 | if isempty(Cardi) 246 | if stop > 0 247 | t2 = sum(abs(beta(:,k+1))); 248 | if t2 >= stop 249 | t1 = sum(abs(beta(:,k))); 250 | s = (stop - t1)/(t2 - t1); % interpolation factor 0 < s < 1 251 | beta(:,k+1) = beta(:,k) + s*(beta(:,k+1) - beta(:,k)); 252 | stopcond = 1; 253 | end 254 | end 255 | end 256 | 257 | % If LASSO condition satisfied, drop variable from active set 258 | if lassocond == 1 259 | if ~useGram 260 | R(:,j) = []; % remove column j 261 | tmpn = size(R,2); 262 | for tmpk = j:tmpn 263 | tmpp = tmpk:tmpk+1; 264 | [G,R(tmpp,tmpk)] = planerot(R(tmpp,tmpk)); % remove extra element in column 265 | if tmpk < tmpn 266 | R(tmpp,tmpk+1:tmpn) = G*R(tmpp,tmpk+1:tmpn); % adjust rest of row 267 | end 268 | end 269 | R(end,:) = []; % remove zero'ed out row 270 | end 271 | rJ = j; 272 | I = [I A(j)]; 273 | A(j) = []; 274 | vars = vars - 1; 275 | if trace 276 | disp(sprintf('%d\t\t\t\t%d\t\t\t%d', k, j, vars)); 277 | end 278 | end 279 | 280 | % Early stopping at specified number of variables 281 | if stop < 0 282 | stopcond = vars >= -stop; 283 | end 284 | 285 | % TimeLoop(k) = cputime - tmpT; 286 | % tmpT = cputime; 287 | 288 | % if vars < 1000 289 | % if mod(vars,500) == 0 290 | % tmpT = cputime - tmpT; 291 | % disp(['LARS: ',num2str(vars),' features selected. Time: ',num2str(tmpT)]); 292 | % tmpT = cputime; 293 | % end 294 | % elseif vars < 2000 295 | % if mod(vars,200) == 0 296 | % tmpT = cputime - tmpT; 297 | % disp(['LARS: ',num2str(vars),' features selected. Time: ',num2str(tmpT)]); 298 | % tmpT = cputime; 299 | % end 300 | % elseif vars < 3000 301 | % if mod(vars,100) == 0 302 | % tmpT = cputime - tmpT; 303 | % disp(['LARS: ',num2str(vars),' features selected. Time: ',num2str(tmpT)]); 304 | % tmpT = cputime; 305 | % end 306 | % else 307 | % if mod(vars,50) == 0 308 | % tmpT = cputime - tmpT; 309 | % disp(['LARS: ',num2str(vars),' features selected. Time: ',num2str(tmpT)]); 310 | % tmpT = cputime; 311 | % end 312 | % end 313 | end 314 | 315 | if isempty(Cardi) 316 | % trim beta 317 | if size(beta,2) > k+1 318 | beta(:,k+2:end) = []; 319 | end 320 | end 321 | 322 | if k == maxk 323 | disp('LARS warning: Forced exit. Maximum number of iteration reached.'); 324 | end 325 | 326 | %% To do 327 | % 328 | % There is a modification that turns least angle regression into stagewise 329 | % (epsilon) regression. This has not been implemented. 330 | --------------------------------------------------------------------------------