├── EuDist2.m ├── LICENSE ├── LaplacianScore.m ├── MCLS.m ├── NormalizeFea.m ├── README.md ├── build_label_manifold.m ├── constructW.m ├── constructW_PKN.m ├── estimate_top_struct.m └── sample.mat /EuDist2.m: -------------------------------------------------------------------------------- 1 | function D = EuDist2(fea_a,fea_b,bSqrt) 2 | %EUDIST2 Efficiently Compute the Euclidean Distance Matrix by Exploring the 3 | %Matlab matrix operations. 4 | % 5 | % D = EuDist(fea_a,fea_b) 6 | % fea_a: nSample_a * nFeature 7 | % fea_b: nSample_b * nFeature 8 | % D: nSample_a * nSample_a 9 | % or nSample_a * nSample_b 10 | % 11 | % Examples: 12 | % 13 | % a = rand(500,10); 14 | % b = rand(1000,10); 15 | % 16 | % A = EuDist2(a); % A: 500*500 17 | % D = EuDist2(a,b); % D: 500*1000 18 | % 19 | % version 2.1 --November/2011 20 | % version 2.0 --May/2009 21 | % version 1.0 --November/2005 22 | % 23 | % Written by Deng Cai (dengcai AT gmail.com) 24 | 25 | 26 | if ~exist('bSqrt','var') 27 | bSqrt = 1; 28 | end 29 | 30 | if (~exist('fea_b','var')) || isempty(fea_b) 31 | aa = sum(fea_a.*fea_a,2); 32 | ab = fea_a*fea_a'; 33 | 34 | if issparse(aa) 35 | aa = full(aa); 36 | end 37 | 38 | D = bsxfun(@plus,aa,aa') - 2*ab; 39 | D(D<0) = 0; 40 | if bSqrt 41 | D = sqrt(D); 42 | end 43 | D = max(D,D'); 44 | else 45 | aa = sum(fea_a.*fea_a,2); 46 | bb = sum(fea_b.*fea_b,2); 47 | ab = fea_a*fea_b'; 48 | 49 | if issparse(aa) 50 | aa = full(aa); 51 | bb = full(bb); 52 | end 53 | 54 | D = bsxfun(@plus,aa,bb') - 2*ab; 55 | D(D<0) = 0; 56 | if bSqrt 57 | D = sqrt(D); 58 | end 59 | end 60 | 61 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 RuiHuang2018 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LaplacianScore.m: -------------------------------------------------------------------------------- 1 | function [Y] = LaplacianScore(X, W) 2 | % Usage: 3 | % [Y] = LaplacianScore(X, W) 4 | % 5 | % X: Rows of vectors of data points 6 | % W: The affinity matrix. 7 | % Y: Vector of (1-LaplacianScore) for each feature. 8 | % The features with larger y are more important. 9 | % 10 | % Examples: 11 | % 12 | % fea = rand(50,70); 13 | % options = []; 14 | % options.Metric = 'Cosine'; 15 | % options.NeighborMode = 'KNN'; 16 | % options.k = 5; 17 | % options.WeightMode = 'Cosine'; 18 | % W = constructW(fea,options); 19 | % 20 | % LaplacianScore = LaplacianScore(fea,W); 21 | % [junk, index] = sort(-LaplacianScore); 22 | % 23 | % newfea = fea(:,index); 24 | % %the features in newfea will be sorted based on their importance. 25 | % 26 | % Type "LaplacianScore" for a self-demo. 27 | % 28 | % See also constructW 29 | % 30 | %Reference: 31 | % 32 | % Xiaofei He, Deng Cai and Partha Niyogi, "Laplacian Score for Feature Selection". 33 | % Advances in Neural Information Processing Systems 18 (NIPS 2005), 34 | % Vancouver, Canada, 2005. 35 | % 36 | % Deng Cai, 2004/08 37 | 38 | 39 | if nargin == 0, selfdemo; return; end 40 | 41 | [nSmp,nFea] = size(X); 42 | 43 | if size(W,1) ~= nSmp 44 | error('W is error'); 45 | end 46 | 47 | D = full(sum(W,2)); 48 | L = W; 49 | 50 | allone = ones(nSmp,1); 51 | 52 | 53 | tmp1 = D'*X; 54 | 55 | D = sparse(1:nSmp,1:nSmp,D,nSmp,nSmp); 56 | 57 | DPrime = sum((X'*D)'.*X)-tmp1.*tmp1/sum(diag(D)); 58 | LPrime = sum((X'*L)'.*X)-tmp1.*tmp1/sum(diag(D)); 59 | 60 | DPrime(find(DPrime < 1e-12)) = 10000; 61 | 62 | Y = LPrime./DPrime; 63 | Y = Y'; 64 | Y = full(Y); 65 | 66 | 67 | 68 | 69 | %--------------------------------------------------- 70 | function selfdemo 71 | % ====== Self demo using IRIS dataset 72 | % ====== 1. Plot IRIS data after LDA for dimension reduction to 2D 73 | load iris.dat 74 | 75 | feaNorm = mynorm(iris(:,1:4),2); 76 | fea = iris(:,1:4) ./ repmat(max(1e-10,feaNorm),1,4); 77 | 78 | options = []; 79 | options.Metric = 'Cosine'; 80 | options.NeighborMode = 'KNN'; 81 | options.WeightMode = 'Cosine'; 82 | options.k = 3; 83 | 84 | W = constructW(fea,options); 85 | 86 | [LaplacianScore] = feval(mfilename,iris(:,1:4),W); 87 | [junk, index] = sort(-LaplacianScore); 88 | 89 | index1 = find(iris(:,5)==1); 90 | index2 = find(iris(:,5)==2); 91 | index3 = find(iris(:,5)==3); 92 | figure; 93 | plot(iris(index1, index(1)), iris(index1, index(2)), '*', ... 94 | iris(index2, index(1)), iris(index2, index(2)), 'o', ... 95 | iris(index3, index(1)), iris(index3, index(2)), 'x'); 96 | legend('Class 1', 'Class 2', 'Class 3'); 97 | title('IRIS data onto the first and second feature (Laplacian Score)'); 98 | axis equal; axis tight; 99 | 100 | figure; 101 | plot(iris(index1, index(3)), iris(index1, index(4)), '*', ... 102 | iris(index2, index(3)), iris(index2, index(4)), 'o', ... 103 | iris(index3, index(3)), iris(index3, index(4)), 'x'); 104 | legend('Class 1', 'Class 2', 'Class 3'); 105 | title('IRIS data onto the third and fourth feature (Laplacian Score)'); 106 | axis equal; axis tight; 107 | 108 | disp('Laplacian Score:'); 109 | for i = 1:length(LaplacianScore) 110 | disp(num2str(LaplacianScore(i))); 111 | end 112 | 113 | 114 | -------------------------------------------------------------------------------- /MCLS.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RuiHuang2018/MCLS-code/55cab70bd2851ede6fd69c34a0f58d9aeda761d8/MCLS.m -------------------------------------------------------------------------------- /NormalizeFea.m: -------------------------------------------------------------------------------- 1 | function fea = NormalizeFea(fea,row) 2 | % if row == 1, normalize each row of fea to have unit norm; 3 | % if row == 0, normalize each column of fea to have unit norm; 4 | % 5 | % version 3.0 --Jan/2012 6 | % version 2.0 --Jan/2012 7 | % version 1.0 --Oct/2003 8 | % 9 | % Written by Deng Cai (dengcai AT gmail.com) 10 | % 11 | 12 | if ~exist('row','var') 13 | row = 1; 14 | end 15 | 16 | if row 17 | nSmp = size(fea,1); 18 | feaNorm = max(1e-14,full(sum(fea.^2,2))); 19 | fea = spdiags(feaNorm.^-.5,0,nSmp,nSmp)*fea; 20 | else 21 | nSmp = size(fea,2); 22 | feaNorm = max(1e-14,full(sum(fea.^2,1))'); 23 | fea = fea*spdiags(feaNorm.^-.5,0,nSmp,nSmp); 24 | end 25 | 26 | return; 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | if row 35 | [nSmp, mFea] = size(fea); 36 | if issparse(fea) 37 | fea2 = fea'; 38 | feaNorm = mynorm(fea2,1); 39 | for i = 1:nSmp 40 | fea2(:,i) = fea2(:,i) ./ max(1e-10,feaNorm(i)); 41 | end 42 | fea = fea2'; 43 | else 44 | feaNorm = sum(fea.^2,2).^.5; 45 | fea = fea./feaNorm(:,ones(1,mFea)); 46 | end 47 | else 48 | [mFea, nSmp] = size(fea); 49 | if issparse(fea) 50 | feaNorm = mynorm(fea,1); 51 | for i = 1:nSmp 52 | fea(:,i) = fea(:,i) ./ max(1e-10,feaNorm(i)); 53 | end 54 | else 55 | feaNorm = sum(fea.^2,1).^.5; 56 | fea = fea./feaNorm(ones(1,mFea),:); 57 | end 58 | end 59 | 60 | 61 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MCLS-code 2 | Manifold-based Constraint Laplacian Score for multi-label feature selection 3 | 4 | This code from our paper "Manifold-based Constraint Laplacian Score for multi-label feature selection". 5 | 6 | function idx = MCLS(fea, knear) 7 | 8 | Inputs: 9 | 10 | fea: struct data set 11 | 12 | data : nSample * nFeature 13 | 14 | target : nLabel * nSample 15 | 16 | knear: number of neighbors 17 | 18 | Output: 19 | 20 | idx: feature sorting by score 21 | 22 | Usage: 23 | 24 | fea = load('sample.mat'); 25 | 26 | knear = 5; 27 | 28 | idx = MCLS(fea,knear); 29 | -------------------------------------------------------------------------------- /build_label_manifold.m: -------------------------------------------------------------------------------- 1 | function MU = build_label_manifold(Y, W, lambda) 2 | % 3 | % BUILD_LABEL_MANIFOLD The label manifold building part of the algorithm ML^2. 4 | % 5 | % Description 6 | % MU = BUILD_LABEL_MANIFOLD(X, Y, K, lambda) is the label manifold building part of the algorithm ML^2. 7 | % It constructs the label manifold via L quadratic programming problems. 8 | % 9 | % Inputs: 10 | % Y: multi-label matrix corresponding to the training samples in X above (N x L). Note that each element 11 | % in this matrix can only take 1 or -1, where 1 represents the corresponding label is relevant and -1 represents 12 | % the corresponding label is irrelevant. 13 | % W: weight matrix 14 | % lambda: parameter in the constraint (3) in our paper. 15 | % 16 | % Output: 17 | % MU: constructed numerical labels. 18 | % 19 | % Copyright: Peng Hou (hpeng@seu.edu.cn), Xin Geng (xgeng@seu.edu.cn), 20 | % Min-Ling Zhang (mlzhang@seu.edu.cn) 21 | % School of Computer Science and Engineering, Southeast University 22 | % Nanjing 211189, P.R.China 23 | % 24 | 25 | fprintf(1,'Build the label manifold.\n'); 26 | 27 | [N, L] = size(Y); 28 | M=speye([N,N]); 29 | for i=1:N 30 | w = W(i,:); 31 | M(i,:) = M(i,:) - w; 32 | M(:,i) = M(:,i) - w'; 33 | M = M + w'*w; 34 | end 35 | 36 | % For sparse datasets, we might end up with NaNs or Infs in M. We just set them to zero for now... 37 | M(isnan(M)) = 0; 38 | M(isinf(M)) = 0; 39 | 40 | % Quadratic programming 41 | b = zeros(N,1)-lambda; 42 | options = optimoptions('quadprog',... 43 | 'Display', 'off'); 44 | for k=1:L 45 | A = -diag(Y(:,k)); 46 | MU(:,k) = quadprog(2*M, [], A, b, [], [], [], [],[], options); 47 | end 48 | 49 | end -------------------------------------------------------------------------------- /constructW.m: -------------------------------------------------------------------------------- 1 | function W = constructW(fea,options) 2 | % Usage: 3 | % W = constructW(fea,options) 4 | % 5 | % fea: Rows of vectors of data points. Each row is x_i 6 | % options: Struct value in Matlab. The fields in options that can be set: 7 | % 8 | % NeighborMode - Indicates how to construct the graph. Choices 9 | % are: [Default 'KNN'] 10 | % 'KNN' - k = 0 11 | % Complete graph 12 | % k > 0 13 | % Put an edge between two nodes if and 14 | % only if they are among the k nearst 15 | % neighbors of each other. You are 16 | % required to provide the parameter k in 17 | % the options. Default k=5. 18 | % 'Supervised' - k = 0 19 | % Put an edge between two nodes if and 20 | % only if they belong to same class. 21 | % k > 0 22 | % Put an edge between two nodes if 23 | % they belong to same class and they 24 | % are among the k nearst neighbors of 25 | % each other. 26 | % Default: k=0 27 | % You are required to provide the label 28 | % information gnd in the options. 29 | % 30 | % WeightMode - Indicates how to assign weights for each edge 31 | % in the graph. Choices are: 32 | % 'Binary' - 0-1 weighting. Every edge receiveds weight 33 | % of 1. 34 | % 'HeatKernel' - If nodes i and j are connected, put weight 35 | % W_ij = exp(-norm(x_i - x_j)/2t^2). You are 36 | % required to provide the parameter t. [Default One] 37 | % 'Cosine' - If nodes i and j are connected, put weight 38 | % cosine(x_i,x_j). 39 | % 40 | % k - The parameter needed under 'KNN' NeighborMode. 41 | % Default will be 5. 42 | % gnd - The parameter needed under 'Supervised' 43 | % NeighborMode. Colunm vector of the label 44 | % information for each data point. 45 | % bLDA - 0 or 1. Only effective under 'Supervised' 46 | % NeighborMode. If 1, the graph will be constructed 47 | % to make LPP exactly same as LDA. Default will be 48 | % 0. 49 | % t - The parameter needed under 'HeatKernel' 50 | % WeightMode. Default will be 1 51 | % bNormalized - 0 or 1. Only effective under 'Cosine' WeightMode. 52 | % Indicates whether the fea are already be 53 | % normalized to 1. Default will be 0 54 | % bSelfConnected - 0 or 1. Indicates whether W(i,i) == 1. Default 0 55 | % if 'Supervised' NeighborMode & bLDA == 1, 56 | % bSelfConnected will always be 1. Default 0. 57 | % bTrueKNN - 0 or 1. If 1, will construct a truly kNN graph 58 | % (Not symmetric!). Default will be 0. Only valid 59 | % for 'KNN' NeighborMode 60 | % 61 | % 62 | % Examples: 63 | % 64 | % fea = rand(50,15); 65 | % options = []; 66 | % options.NeighborMode = 'KNN'; 67 | % options.k = 5; 68 | % options.WeightMode = 'HeatKernel'; 69 | % options.t = 1; 70 | % W = constructW(fea,options); 71 | % 72 | % 73 | % fea = rand(50,15); 74 | % gnd = [ones(10,1);ones(15,1)*2;ones(10,1)*3;ones(15,1)*4]; 75 | % options = []; 76 | % options.NeighborMode = 'Supervised'; 77 | % options.gnd = gnd; 78 | % options.WeightMode = 'HeatKernel'; 79 | % options.t = 1; 80 | % W = constructW(fea,options); 81 | % 82 | % 83 | % fea = rand(50,15); 84 | % gnd = [ones(10,1);ones(15,1)*2;ones(10,1)*3;ones(15,1)*4]; 85 | % options = []; 86 | % options.NeighborMode = 'Supervised'; 87 | % options.gnd = gnd; 88 | % options.bLDA = 1; 89 | % W = constructW(fea,options); 90 | % 91 | % 92 | % For more details about the different ways to construct the W, please 93 | % refer: 94 | % Deng Cai, Xiaofei He and Jiawei Han, "Document Clustering Using 95 | % Locality Preserving Indexing" IEEE TKDE, Dec. 2005. 96 | % 97 | % 98 | % Written by Deng Cai (dengcai2 AT cs.uiuc.edu), April/2004, Feb/2006, 99 | % May/2007 100 | % 101 | 102 | bSpeed = 1; 103 | 104 | if (~exist('options','var')) 105 | options = []; 106 | end 107 | 108 | if isfield(options,'Metric') 109 | warning('This function has been changed and the Metric is no longer be supported'); 110 | end 111 | 112 | 113 | if ~isfield(options,'bNormalized') 114 | options.bNormalized = 0; 115 | end 116 | 117 | %================================================= 118 | if ~isfield(options,'NeighborMode') 119 | options.NeighborMode = 'KNN'; 120 | end 121 | 122 | switch lower(options.NeighborMode) 123 | case {lower('KNN')} %For simplicity, we include the data point itself in the kNN 124 | if ~isfield(options,'k') 125 | options.k = 5; 126 | end 127 | case {lower('Supervised')} 128 | if ~isfield(options,'bLDA') 129 | options.bLDA = 0; 130 | end 131 | if options.bLDA 132 | options.bSelfConnected = 1; 133 | end 134 | if ~isfield(options,'k') 135 | options.k = 0; 136 | end 137 | if ~isfield(options,'gnd') 138 | error('Label(gnd) should be provided under ''Supervised'' NeighborMode!'); 139 | end 140 | if ~isempty(fea) && length(options.gnd) ~= size(fea,1) 141 | error('gnd doesn''t match with fea!'); 142 | end 143 | otherwise 144 | error('NeighborMode does not exist!'); 145 | end 146 | 147 | %================================================= 148 | 149 | if ~isfield(options,'WeightMode') 150 | options.WeightMode = 'HeatKernel'; 151 | end 152 | 153 | bBinary = 0; 154 | bCosine = 0; 155 | switch lower(options.WeightMode) 156 | case {lower('Binary')} 157 | bBinary = 1; 158 | case {lower('HeatKernel')} 159 | if ~isfield(options,'t') 160 | nSmp = size(fea,1); 161 | if nSmp > 3000 162 | D = EuDist2(fea(randsample(nSmp,3000),:)); 163 | else 164 | D = EuDist2(fea); 165 | end 166 | options.t = mean(mean(D)); 167 | end 168 | case {lower('Cosine')} 169 | bCosine = 1; 170 | otherwise 171 | error('WeightMode does not exist!'); 172 | end 173 | 174 | %================================================= 175 | 176 | if ~isfield(options,'bSelfConnected') 177 | options.bSelfConnected = 0; 178 | end 179 | 180 | %================================================= 181 | 182 | if isfield(options,'gnd') 183 | nSmp = length(options.gnd); 184 | else 185 | nSmp = size(fea,1); 186 | end 187 | maxM = 62500000; %500M 188 | BlockSize = floor(maxM/(nSmp*3)); 189 | 190 | 191 | if strcmpi(options.NeighborMode,'Supervised') 192 | Label = unique(options.gnd); 193 | nLabel = length(Label); 194 | if options.bLDA 195 | G = zeros(nSmp,nSmp); 196 | for idx=1:nLabel 197 | classIdx = options.gnd==Label(idx); 198 | G(classIdx,classIdx) = 1/sum(classIdx); 199 | end 200 | W = sparse(G); 201 | return; 202 | end 203 | 204 | switch lower(options.WeightMode) 205 | case {lower('Binary')} 206 | if options.k > 0 207 | G = zeros(nSmp*(options.k+1),3); 208 | idNow = 0; 209 | for i=1:nLabel 210 | classIdx = find(options.gnd==Label(i)); 211 | D = EuDist2(fea(classIdx,:),[],0); 212 | [dump idx] = sort(D,2); % sort each row 213 | clear D dump; 214 | idx = idx(:,1:options.k+1); 215 | 216 | nSmpClass = length(classIdx)*(options.k+1); 217 | G(idNow+1:nSmpClass+idNow,1) = repmat(classIdx,[options.k+1,1]); 218 | G(idNow+1:nSmpClass+idNow,2) = classIdx(idx(:)); 219 | G(idNow+1:nSmpClass+idNow,3) = 1; 220 | idNow = idNow+nSmpClass; 221 | clear idx 222 | end 223 | G = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp); 224 | G = max(G,G'); 225 | else 226 | G = zeros(nSmp,nSmp); 227 | for i=1:nLabel 228 | classIdx = find(options.gnd==Label(i)); 229 | G(classIdx,classIdx) = 1; 230 | end 231 | end 232 | 233 | if ~options.bSelfConnected 234 | for i=1:size(G,1) 235 | G(i,i) = 0; 236 | end 237 | end 238 | 239 | W = sparse(G); 240 | case {lower('HeatKernel')} 241 | if options.k > 0 242 | G = zeros(nSmp*(options.k+1),3); 243 | idNow = 0; 244 | for i=1:nLabel 245 | classIdx = find(options.gnd==Label(i)); 246 | D = EuDist2(fea(classIdx,:),[],0); 247 | [dump idx] = sort(D,2); % sort each row 248 | clear D; 249 | idx = idx(:,1:options.k+1); 250 | dump = dump(:,1:options.k+1); 251 | dump = exp(-dump/(2*options.t^2)); 252 | 253 | nSmpClass = length(classIdx)*(options.k+1); 254 | G(idNow+1:nSmpClass+idNow,1) = repmat(classIdx,[options.k+1,1]); 255 | G(idNow+1:nSmpClass+idNow,2) = classIdx(idx(:)); 256 | G(idNow+1:nSmpClass+idNow,3) = dump(:); 257 | idNow = idNow+nSmpClass; 258 | clear dump idx 259 | end 260 | G = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp); 261 | else 262 | G = zeros(nSmp,nSmp); 263 | for i=1:nLabel 264 | classIdx = find(options.gnd==Label(i)); 265 | D = EuDist2(fea(classIdx,:),[],0); 266 | D = exp(-D/(2*options.t^2)); 267 | G(classIdx,classIdx) = D; 268 | end 269 | end 270 | 271 | if ~options.bSelfConnected 272 | for i=1:size(G,1) 273 | G(i,i) = 0; 274 | end 275 | end 276 | 277 | W = sparse(max(G,G')); 278 | case {lower('Cosine')} 279 | if ~options.bNormalized 280 | fea = NormalizeFea(fea); 281 | end 282 | 283 | if options.k > 0 284 | G = zeros(nSmp*(options.k+1),3); 285 | idNow = 0; 286 | for i=1:nLabel 287 | classIdx = find(options.gnd==Label(i)); 288 | D = fea(classIdx,:)*fea(classIdx,:)'; 289 | [dump idx] = sort(-D,2); % sort each row 290 | clear D; 291 | idx = idx(:,1:options.k+1); 292 | dump = -dump(:,1:options.k+1); 293 | 294 | nSmpClass = length(classIdx)*(options.k+1); 295 | G(idNow+1:nSmpClass+idNow,1) = repmat(classIdx,[options.k+1,1]); 296 | G(idNow+1:nSmpClass+idNow,2) = classIdx(idx(:)); 297 | G(idNow+1:nSmpClass+idNow,3) = dump(:); 298 | idNow = idNow+nSmpClass; 299 | clear dump idx 300 | end 301 | G = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp); 302 | else 303 | G = zeros(nSmp,nSmp); 304 | for i=1:nLabel 305 | classIdx = find(options.gnd==Label(i)); 306 | G(classIdx,classIdx) = fea(classIdx,:)*fea(classIdx,:)'; 307 | end 308 | end 309 | 310 | if ~options.bSelfConnected 311 | for i=1:size(G,1) 312 | G(i,i) = 0; 313 | end 314 | end 315 | 316 | W = sparse(max(G,G')); 317 | otherwise 318 | error('WeightMode does not exist!'); 319 | end 320 | return; 321 | end 322 | 323 | 324 | if bCosine && ~options.bNormalized 325 | Normfea = NormalizeFea(fea); 326 | end 327 | 328 | if strcmpi(options.NeighborMode,'KNN') && (options.k > 0) 329 | if ~(bCosine && options.bNormalized) 330 | G = zeros(nSmp*(options.k+1),3); 331 | for i = 1:ceil(nSmp/BlockSize) 332 | if i == ceil(nSmp/BlockSize) 333 | smpIdx = (i-1)*BlockSize+1:nSmp; 334 | dist = EuDist2(fea(smpIdx,:),fea,0); 335 | 336 | if bSpeed 337 | nSmpNow = length(smpIdx); 338 | dump = zeros(nSmpNow,options.k+1); 339 | idx = dump; 340 | for j = 1:options.k+1 341 | [dump(:,j),idx(:,j)] = min(dist,[],2); 342 | temp = (idx(:,j)-1)*nSmpNow+[1:nSmpNow]'; 343 | dist(temp) = 1e100; 344 | end 345 | else 346 | [dump idx] = sort(dist,2); % sort each row 347 | idx = idx(:,1:options.k+1); 348 | dump = dump(:,1:options.k+1); 349 | end 350 | 351 | if ~bBinary 352 | if bCosine 353 | dist = Normfea(smpIdx,:)*Normfea'; 354 | dist = full(dist); 355 | linidx = [1:size(idx,1)]'; 356 | dump = dist(sub2ind(size(dist),linidx(:,ones(1,size(idx,2))),idx)); 357 | else 358 | dump = exp(-dump/(2*options.t^2)); 359 | end 360 | end 361 | 362 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),1) = repmat(smpIdx',[options.k+1,1]); 363 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),2) = idx(:); 364 | if ~bBinary 365 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),3) = dump(:); 366 | else 367 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),3) = 1; 368 | end 369 | else 370 | smpIdx = (i-1)*BlockSize+1:i*BlockSize; 371 | 372 | dist = EuDist2(fea(smpIdx,:),fea,0); 373 | 374 | if bSpeed 375 | nSmpNow = length(smpIdx); 376 | dump = zeros(nSmpNow,options.k+1); 377 | idx = dump; 378 | for j = 1:options.k+1 379 | [dump(:,j),idx(:,j)] = min(dist,[],2); 380 | temp = (idx(:,j)-1)*nSmpNow+[1:nSmpNow]'; 381 | dist(temp) = 1e100; 382 | end 383 | else 384 | [dump idx] = sort(dist,2); % sort each row 385 | idx = idx(:,1:options.k+1); 386 | dump = dump(:,1:options.k+1); 387 | end 388 | 389 | if ~bBinary 390 | if bCosine 391 | dist = Normfea(smpIdx,:)*Normfea'; 392 | dist = full(dist); 393 | linidx = [1:size(idx,1)]'; 394 | dump = dist(sub2ind(size(dist),linidx(:,ones(1,size(idx,2))),idx)); 395 | else 396 | dump = exp(-dump/(2*options.t^2)); 397 | end 398 | end 399 | 400 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),1) = repmat(smpIdx',[options.k+1,1]); 401 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),2) = idx(:); 402 | if ~bBinary 403 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),3) = dump(:); 404 | else 405 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),3) = 1; 406 | end 407 | end 408 | end 409 | 410 | W = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp); 411 | else 412 | G = zeros(nSmp*(options.k+1),3); 413 | for i = 1:ceil(nSmp/BlockSize) 414 | if i == ceil(nSmp/BlockSize) 415 | smpIdx = (i-1)*BlockSize+1:nSmp; 416 | dist = fea(smpIdx,:)*fea'; 417 | dist = full(dist); 418 | 419 | if bSpeed 420 | nSmpNow = length(smpIdx); 421 | dump = zeros(nSmpNow,options.k+1); 422 | idx = dump; 423 | for j = 1:options.k+1 424 | [dump(:,j),idx(:,j)] = max(dist,[],2); 425 | temp = (idx(:,j)-1)*nSmpNow+[1:nSmpNow]'; 426 | dist(temp) = 0; 427 | end 428 | else 429 | [dump idx] = sort(-dist,2); % sort each row 430 | idx = idx(:,1:options.k+1); 431 | dump = -dump(:,1:options.k+1); 432 | end 433 | 434 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),1) = repmat(smpIdx',[options.k+1,1]); 435 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),2) = idx(:); 436 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),3) = dump(:); 437 | else 438 | smpIdx = (i-1)*BlockSize+1:i*BlockSize; 439 | dist = fea(smpIdx,:)*fea'; 440 | dist = full(dist); 441 | 442 | if bSpeed 443 | nSmpNow = length(smpIdx); 444 | dump = zeros(nSmpNow,options.k+1); 445 | idx = dump; 446 | for j = 1:options.k+1 447 | [dump(:,j),idx(:,j)] = max(dist,[],2); 448 | temp = (idx(:,j)-1)*nSmpNow+[1:nSmpNow]'; 449 | dist(temp) = 0; 450 | end 451 | else 452 | [dump idx] = sort(-dist,2); % sort each row 453 | idx = idx(:,1:options.k+1); 454 | dump = -dump(:,1:options.k+1); 455 | end 456 | 457 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),1) = repmat(smpIdx',[options.k+1,1]); 458 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),2) = idx(:); 459 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),3) = dump(:); 460 | end 461 | end 462 | 463 | W = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp); 464 | end 465 | 466 | if bBinary 467 | W(logical(W)) = 1; 468 | end 469 | 470 | if isfield(options,'bSemiSupervised') && options.bSemiSupervised 471 | tmpgnd = options.gnd(options.semiSplit); 472 | 473 | Label = unique(tmpgnd); 474 | nLabel = length(Label); 475 | G = zeros(sum(options.semiSplit),sum(options.semiSplit)); 476 | for idx=1:nLabel 477 | classIdx = tmpgnd==Label(idx); 478 | G(classIdx,classIdx) = 1; 479 | end 480 | Wsup = sparse(G); 481 | if ~isfield(options,'SameCategoryWeight') 482 | options.SameCategoryWeight = 1; 483 | end 484 | W(options.semiSplit,options.semiSplit) = (Wsup>0)*options.SameCategoryWeight; 485 | end 486 | 487 | if ~options.bSelfConnected 488 | W = W - diag(diag(W)); 489 | end 490 | 491 | if isfield(options,'bTrueKNN') && options.bTrueKNN 492 | 493 | else 494 | W = max(W,W'); 495 | end 496 | 497 | return; 498 | end 499 | 500 | 501 | % strcmpi(options.NeighborMode,'KNN') & (options.k == 0) 502 | % Complete Graph 503 | 504 | switch lower(options.WeightMode) 505 | case {lower('Binary')} 506 | error('Binary weight can not be used for complete graph!'); 507 | case {lower('HeatKernel')} 508 | W = EuDist2(fea,[],0); 509 | W = exp(-W/(2*options.t^2)); 510 | case {lower('Cosine')} 511 | W = full(Normfea*Normfea'); 512 | otherwise 513 | error('WeightMode does not exist!'); 514 | end 515 | 516 | if ~options.bSelfConnected 517 | for i=1:size(W,1) 518 | W(i,i) = 0; 519 | end 520 | end 521 | 522 | W = max(W,W'); 523 | 524 | 525 | 526 | 527 | -------------------------------------------------------------------------------- /constructW_PKN.m: -------------------------------------------------------------------------------- 1 | % construct similarity matrix with probabilistic k-nearest neighbors. It is a parameter free, distance consistent similarity. 2 | function W = constructW_PKN(X, k, issymmetric) 3 | % X: each column is a data point 4 | % k: number of neighbors 5 | % issymmetric: set W = (W+W')/2 if issymmetric=1 6 | % W: similarity matrix 7 | 8 | if nargin < 3 9 | issymmetric = 1; 10 | end; 11 | if nargin < 2 12 | k = 5; 13 | end; 14 | 15 | [dim, n] = size(X); 16 | D = L2_distance_1(X, X); 17 | [dumb, idx] = sort(D, 2); % sort each row 18 | 19 | W = zeros(n); 20 | for i = 1:n 21 | id = idx(i,2:k+2); 22 | di = D(i, id); 23 | W(i,id) = (di(k+1)-di)/(k*di(k+1)-sum(di(1:k))+eps); 24 | end; 25 | 26 | if issymmetric == 1 27 | W = (W+W')/2; 28 | end; 29 | W=full(W); 30 | 31 | 32 | 33 | % compute squared Euclidean distance 34 | % ||A-B||^2 = ||A||^2 + ||B||^2 - 2*A'*B 35 | function d = L2_distance_1(a,b) 36 | % a,b: two matrices. each column is a data 37 | % d: distance matrix of a and b 38 | 39 | 40 | 41 | if (size(a,1) == 1) 42 | a = [a; zeros(1,size(a,2))]; 43 | b = [b; zeros(1,size(b,2))]; 44 | end 45 | 46 | aa=sum(a.*a); bb=sum(b.*b); ab=a'*b; 47 | d = repmat(aa',[1 size(bb,2)]) + repmat(bb,[size(aa,2) 1]) - 2*ab; 48 | 49 | d = real(d); 50 | d = max(d,0); 51 | 52 | 53 | % % force 0 on the diagonal? 54 | % if (df==1) 55 | % d = d.*(1-eye(size(d))); 56 | % end 57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /estimate_top_struct.m: -------------------------------------------------------------------------------- 1 | 2 | function W = estimate_top_struct(X, K) 3 | % 4 | % ESTIMATE_TOP_STRUCT Estimate the topological structure in the feature space. 5 | % 6 | % Description 7 | % W = ESTIMATE_TOP_STRUCT(X, K) estimate the topological structure as ML^2. 8 | % It includes two main steps. First, find K nearest neighbors for each training example. 9 | % Second, approximate the topological structure of the feature manifold via N standard least square programming problems, 10 | % where N is the number of training examples. 11 | % 12 | % Inputs: 13 | % X: data matrix with training samples in rows and features in in columns (N x D) 14 | % K: number of selected nearest neighbors. 15 | % 16 | % Output: 17 | % W: weight matrix 18 | % 19 | % Copyright: Peng Hou (hpeng@seu.edu.cn), Xin Geng (xgeng@seu.edu.cn), 20 | % Min-Ling Zhang (mlzhang@seu.edu.cn) 21 | % School of Computer Science and Engineering, Southeast University 22 | % Nanjing 211189, P.R.China 23 | % 24 | 25 | fprintf(1,'Estimate the topological structure.\n'); 26 | 27 | [N,D] = size(X); 28 | 29 | neighborhood = knnsearch(X, X, 'K', K+1); 30 | neighborhood = neighborhood(:, 2:end); 31 | 32 | if(K>D) 33 | fprintf(1,' [note: K>D; regularization will be used]\n'); 34 | tol=1e-3; % regularlizer in case constrained fits are ill conditioned 35 | else 36 | tol=0; 37 | end 38 | 39 | % Least square programming 40 | W = sparse(N, N); 41 | for i=1:N 42 | neighbors = neighborhood(i,:); 43 | z = X(neighbors,:)-repmat(X(i,:),K,1); % shift ith pt to origin 44 | Z = z*z'; % local covariance 45 | Z = Z + eye(K,K)*tol*trace(Z); % regularlization (K>D) 46 | W(i,neighbors) = Z\ones(K,1); % solve Zw=1 47 | W(i,neighbors) = W(i,neighbors)/sum(W(i,neighbors)); % enforce sum(w)=1 48 | end 49 | 50 | end -------------------------------------------------------------------------------- /sample.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RuiHuang2018/MCLS-code/55cab70bd2851ede6fd69c34a0f58d9aeda761d8/sample.mat --------------------------------------------------------------------------------