├── data └── yaleb10.mat ├── loadmatfile.m ├── README.md ├── compute_acc_test.m ├── Cluster.m ├── missclassGroups.m ├── run_ncut.m ├── linearizedADMM.m └── iADMM.m /data/yaleb10.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhatpd/iADMM/HEAD/data/yaleb10.mat -------------------------------------------------------------------------------- /loadmatfile.m: -------------------------------------------------------------------------------- 1 | function [data] = loadmatfile(path) 2 | %LOADMATFILE Summary of this function goes here 3 | % Detailed explanation goes here 4 | data = load(path); 5 | data = data.obj; -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # iADMM 2 | An Inertial Alternating Direction Method of Multipliers (iADMM) for a low-rank representation optimization problem 3 | 4 | min sum_ig(\sigma_i(Z))+ p(E) + r(Y) 5 | 6 | s.t., X = AZ+ EB + Y 7 | 8 | inputs: 9 | X -- D*N data matrix, D is the data dimension, and N is the number of data points. 10 | 11 | Reference: LTK Hien, DN Phan, N Gillis. "A Framework of Inertial Alternating Direction Method of Multipliers for Non-Convex Non-Smooth Optimization". 12 | -------------------------------------------------------------------------------- /compute_acc_test.m: -------------------------------------------------------------------------------- 1 | function[acc] = compute_acc_test(X,Z,E,para) 2 | if para.er == "knn" 3 | 4 | E = E*para.Q2'; 5 | Train = E*X; 6 | Test = E*para.Test; 7 | ks = para.nknn; 8 | acc = zeros(1,length(ks)); 9 | for j = 1:length(ks) 10 | k = ks(j); 11 | Mdl = fitcknn(Train',para.yTrain,'NumNeighbors',k,'Standardize',1); 12 | ypred = predict(Mdl,Test'); 13 | acc(j) = sum(abs(ypred-para.yTest)<0.1)/length(ypred); 14 | end 15 | end 16 | 17 | if para.er =="ncut" 18 | Z = para.Q1*Z; 19 | if sum(sum(Z))==0 20 | acc = 0; 21 | else 22 | acc = Cluster(Z,para.K,para.yTrain); 23 | end 24 | end 25 | end -------------------------------------------------------------------------------- /Cluster.m: -------------------------------------------------------------------------------- 1 | function[acc] = Cluster(Z,K,gnd) 2 | 3 | %post processing 4 | [U,S,V] = svd(Z,'econ'); 5 | S = diag(S); 6 | r = sum(S>1e-4*S(1)); 7 | U = U(:,1:r);S = S(1:r); 8 | U = U*diag(sqrt(S)); 9 | U = normr(U); 10 | L = (U*U').^4; 11 | 12 | % spectral clustering 13 | %add new 14 | % L = (L + L')/2; 15 | % 16 | D = diag(1./sqrt(sum(L,2))); 17 | L = D*L*D; 18 | [U,S,V] = svd(L); 19 | V = U(:,1:K); 20 | V = D*V; 21 | 22 | n = size(V,1); 23 | M = zeros(K,K,20); 24 | rand('state',123456789); 25 | for i=1:size(M,3) 26 | inds = false(n,1); 27 | while sum(inds)0.1); 26 | end 27 | end 28 | 29 | [miss,temp] = min(miss,[],1); 30 | index = Permutations(temp,:); 31 | -------------------------------------------------------------------------------- /run_ncut.m: -------------------------------------------------------------------------------- 1 | addpath(genpath('./data')); 2 | dataset = {'Yaleb10'}; 3 | 4 | %%%%%%%%%%%%set parameters 5 | para.maxtime = 500; 6 | para.g = "NN"; 7 | para.p = "exp21"; 8 | para.r = "l2"; 9 | para.maxIter = 1e8; 10 | para.maxinner = 20; 11 | para.cf = 1; 12 | para.lambda = 1; 13 | para.gamma = 1; 14 | para.beta = 1; 15 | para.theta = 5; 16 | para.epsilon = 0.1; 17 | para.rho = 1; 18 | para.nuC = 1-1e-15; 19 | para.C1 = 0.999999; 20 | 21 | tmp = 3*para.rho/(1-abs(1-para.rho))^2; 22 | para.mu = 2*(2+para.C1)*tmp/para.C1; 23 | 24 | para.kappa = 1; 25 | para.kappa_non = 1.1; 26 | 27 | for i = 1:1 28 | 29 | data = loadmatfile([dataset{i}, '.mat']); 30 | X = data.X; 31 | gnd = data.cids; 32 | [~, n] = size(X); 33 | gnd = gnd'; 34 | K = max(gnd'); 35 | Q1 = orth(X'); 36 | Q2 = orth(X); 37 | A = X*Q1; 38 | B = Q2'*X; 39 | 40 | para.Q1 = Q1; 41 | para.Q2 = Q2; 42 | 43 | para.yTrain = gnd; 44 | para.nknn = [1]; 45 | para.er = "ncut"; 46 | para.iter_acc = 0; 47 | para.K = K; 48 | 49 | 50 | timeStamp = strcat(datestr(clock,'yyyy-mm-dd_HH-MM-ss')); 51 | 52 | 53 | para.inertial = 1; %iADMM with extrapolation 54 | out{1} = iADMM(X, A, B, para ); 55 | para.inertial = 0;% iADMM without extrapolation 56 | out{2} = iADMM(X, A, B, para ); 57 | out{3} = linearizedADMM(X, A, B, para ); 58 | 59 | 60 | close all; 61 | LEGEND = categorical({'iADMM-mm','ADMM-mm','linearizedADMM'}); 62 | ERROR = []; 63 | for j = 1:3 64 | ERROR = [ERROR,1-compute_acc_test(X,out{j}.Z,out{j}.E,para)]; 65 | end 66 | figure; 67 | subplot(1, 2, 1); 68 | hold on; 69 | bar(LEGEND,ERROR) 70 | xlabel('Method'); 71 | ylabel('Error'); 72 | title(dataset{i}) 73 | 74 | figure; 75 | subplot(1, 2, 1); 76 | hold on; 77 | plot(out{1}.Time, out{1}.obj, 'r', 'LineWidth',3); 78 | 79 | hold on; 80 | plot(out{2}.Time, out{2}.obj, 'b', 'LineWidth',3); 81 | hold on; 82 | plot(out{3}.Time, out{3}.obj, 'g', 'LineWidth',3); 83 | xlabel('Time (s)'); 84 | ylabel('Objective value'); 85 | legend('iADMM-mm','ADMM-mm','linearizedADMM'); 86 | title(dataset{i}) 87 | 88 | 89 | end 90 | 91 | 92 | % save(['result/', dataset{i}, strcat(timeStamp,'.mat')], 'out'); 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /linearizedADMM.m: -------------------------------------------------------------------------------- 1 | function[output] = linearizedADMM( X, A, B, para ) 2 | % linearizedADMM solves the following low-rank representation optimization problem, 3 | % min sum_ig(\sigma_i(Z))+ p(E) + r(Y) 4 | % s.t., X = AZ+ EB + Y 5 | % inputs: 6 | % X -- D*N data matrix, D is the data dimension, and N is the number 7 | % of data vectors. 8 | % para.g could be 9 | % 'NN': nuclear norm 10 | % ... 11 | % para.p: 12 | % 'exp21' : sum_i(1-exp(-\theta\|E_i\|_2)) 13 | % ... 14 | % para.r: 15 | % 'l2': \|Y\|^2/2 16 | % ... 17 | % Written by Duy Nhat Phan. 18 | % Latest update February 2021 19 | output.method = 'linearizedADMM'; 20 | 21 | lambda = para.lambda; 22 | maxIter = para.maxIter; 23 | cf = para.cf; 24 | 25 | [d, n] = size(X); 26 | m = size(A,2); 27 | q = size(B,1); 28 | if(isfield(para, 'Z')) 29 | Z = para.Z; 30 | else 31 | Z = zeros(m,n); 32 | end 33 | if(isfield(para, 'E')) 34 | E = para.E; 35 | else 36 | E = zeros(d,q); 37 | end 38 | if(isfield(para, 'Y')) 39 | Y = para.Y; 40 | else 41 | Y = sparse(d,n); 42 | end 43 | if(isfield(para, 'W')) 44 | W = para.W; 45 | else 46 | W = Y; 47 | end 48 | 49 | AX = A'*X; 50 | 51 | obj = zeros(maxIter+1, 1); 52 | Obj = zeros(maxIter+1, 1); 53 | RMSE = zeros(maxIter+1, length(para.nknn)); 54 | Time = zeros(maxIter+1, 1); 55 | 56 | AZ = A*Z; 57 | AAZ = A'*AZ; 58 | AAZp = AAZ; 59 | mu = para.mu; 60 | 61 | EB = E*B; 62 | EBB = EB*B'; 63 | EBBp = EBB; 64 | XB = X*B'; 65 | 66 | [~, S, ~] = svd(Z, 'econ'); 67 | sigma = diag(S); 68 | 69 | objVal = compute_g(sigma,para) + compute_p(E,para) + compute_r(Y,para); 70 | Obj(1) = objVal; 71 | 72 | objVal = objVal + (mu/2)*sum(sum((AZ + EB + cf*Y - X).^2)) + sum(sum((AZ + EB + cf*Y - X).*W)); 73 | 74 | obj(1) = objVal; 75 | 76 | if para.iter_acc == 1 77 | RMSE(1,:) = compute_acc_test(X,Z,E,para); 78 | end 79 | 80 | [~, tmp, ~] = svd(A'*A, 'econ'); 81 | L1 = max(diag(tmp)); 82 | 83 | L1 = mu*L1; 84 | if para.g == "exp" || para.g == "log" 85 | L1 = L1*para.kappa_non; 86 | end 87 | 88 | [~, tmp, ~] = svd(B*B', 'econ'); 89 | L2 = max(diag(tmp)); 90 | L2 = mu*L2; 91 | 92 | if para.p == "exp" || para.g == "log" 93 | L2 = L2*para.kappa_non; 94 | end 95 | 96 | rho = para.rho; 97 | 98 | c = 1; 99 | for i = 1:maxIter 100 | tt = cputime; 101 | 102 | cp = 1; 103 | c = (1 + sqrt(1+4*cp^2))/2; 104 | bi = (cp - 1)/c; 105 | 106 | AAZ = A'*AZ; 107 | grad = mu*((1+bi)*AAZ - bi*AAZp + A'*EB + cf*A'*Y - AX) + A'*W; 108 | 109 | Z = Prox(Z,Z - grad/L1,lambda,L1,para.g,para); 110 | AZ = A*Z; 111 | [~, S, ~] = svd(Z, 'econ'); 112 | sigma = diag(S); 113 | 114 | %update E 115 | 116 | EBB = EB*B'; 117 | grad = mu*((1+bi)*EBB - bi*EBBp + AZ*B' + cf*Y*B' - XB) + W*B'; 118 | 119 | Ep = E; 120 | E = Prox(E,E - grad/L2,para.gamma,L2,para.p,para); 121 | 122 | EB = E*B; 123 | EBBp = EBB; 124 | 125 | 126 | %update Y 127 | if cf~=0 128 | grad = mu*(AZ + EB +cf*Y- X)+ W; 129 | Y = update_Y(Y - grad/(mu*cf),para,mu*cf); 130 | end 131 | 132 | W = W + rho*mu*(AZ + EB + cf*Y - X); 133 | 134 | objVal = compute_g(sigma,para) + compute_p(E,para) + compute_r(Y,para); 135 | Obj(i+1) = objVal; 136 | 137 | objVal = objVal + (mu/2)*sum(sum((AZ + EB + cf*Y - X).^2)) + sum(sum((AZ + EB + cf*Y - X).*W)); 138 | 139 | Time(i+1) = cputime - tt; 140 | 141 | obj(i+1) = objVal; 142 | 143 | if para.iter_acc == 1 144 | fprintf('iter: %d; obj : %0.4d; diff : %0.4d; acc : %0.4d \n',i,objVal, obj(i) - obj(i+1),max(RMSE(i,:),[],2)); 145 | 146 | RMSE(i+1,:) = compute_acc_test(X,Z,E,para); 147 | else 148 | fprintf('iter: %d; obj : %0.4d; diff : %0.4d \n',i,objVal, obj(i) - obj(i+1)); 149 | end 150 | 151 | if(sum(Time) > para.maxtime) 152 | break; 153 | end 154 | end 155 | 156 | output.obj = obj(2:(i+1)); 157 | output.Obj = Obj(2:(i+1)); 158 | output.RMSE = RMSE(2:(i+1),:); 159 | Time = cumsum(Time); 160 | output.Time = Time(2:(i+1)); 161 | 162 | 163 | output.Z = Z; 164 | output.E = E; 165 | output.Rank = nnz(sigma); 166 | output.Y = Y; 167 | output.W = W; 168 | 169 | end 170 | 171 | function[g] = compute_g(sigma,para) 172 | if para.g == "exp" 173 | g = para.lambda*sum(1-exp(-para.theta*sigma)); 174 | end 175 | 176 | if para.g =="NN" 177 | g = para.lambda*sum(sigma); 178 | end 179 | 180 | if para.g == "log" 181 | g = para.lambda*sum(sigma + para.epsilon); 182 | end 183 | end 184 | 185 | function[p] = compute_p(E, para) 186 | 187 | if para.p == "exp12" 188 | sqrt_E = sqrt(sum(E.^2,2)); 189 | p = para.gamma*sum(1-exp(-para.theta*sqrt_E)); 190 | end 191 | 192 | if para.p == "l21" 193 | sqrt_E = sqrt(sum(E.^2,1)); 194 | p = para.gamma*sum(sqrt_E); 195 | end 196 | 197 | if para.p == "exp21" 198 | sqrt_E = sqrt(sum(E.^2,1)); 199 | p = para.gamma*sum(1-exp(-para.theta*sqrt_E)); 200 | end 201 | 202 | if para.p == "log21" 203 | sqrt_E = sqrt(sum(E.^2,1)); 204 | p = para.gamma*sum(sqrt_E + para.epsilon); 205 | end 206 | 207 | if para.p == "log" 208 | [~, S, ~] = svd(E, 'econ'); 209 | sigma = diag(S); 210 | p = para.gamma*sum(sigma + para.epsilon); 211 | end 212 | 213 | if para.p == "exp" 214 | [~, S, ~] = svd(E, 'econ'); 215 | sigma = diag(S); 216 | p = para.gamma*sum(1-exp(-para.theta*sigma)); 217 | end 218 | 219 | if para.p =="NN" 220 | [~, S, ~] = svd(E, 'econ'); 221 | sigma = diag(S); 222 | p = para.gamma*sum(sigma); 223 | end 224 | 225 | if para.p == "l1" 226 | p = para.gamma*sum(sum(abs(E))); 227 | end 228 | 229 | end 230 | 231 | function[r] = compute_r(Y,para) 232 | if para.r == "l2" 233 | r = (para.beta/2)*sum(sum(Y.^2)); 234 | end 235 | if para.r == "l1" 236 | r = para.beta*sum(sum(abs(Y))); 237 | end 238 | end 239 | 240 | 241 | function[Z,sigma] = prox_NN(grad,w) 242 | [U, S, V] = svd(grad, 'econ'); 243 | sigma = diag(S); 244 | sigma = max(sigma-w,0); 245 | 246 | 247 | svp = nnz(sigma); 248 | 249 | if svp == 0 250 | svp = 1; 251 | end 252 | Z = U(:,1:svp)*diag(sigma(1:svp))*V(:,1:svp)'; 253 | end 254 | 255 | 256 | function[Y] = update_Y(grad,para,L) 257 | 258 | if para.r == "l2" 259 | % L/2\|Y-grad\|^2 + beta/2*\|Y\|^2 260 | Y = L*grad/(L+para.beta); 261 | end 262 | 263 | if para.r=="l1" 264 | % L/2\|Y-grad\|^2 + beta*\|Y\|_1 265 | gammaL = para.beta/L; 266 | Y = max(abs(grad)-gammaL,0).*sign(grad); 267 | end 268 | end 269 | 270 | 271 | function[E] = Prox(E,grad,gamma,L,type,para) 272 | % 273 | for inner = 1:para.maxinner 274 | Ep = E; 275 | if type == "exp12" 276 | d = size(E,2); 277 | gammaL = gamma/L; 278 | sqrt_D = sqrt(sum(grad.^2,2)); 279 | sqrt_E = sqrt(sum(E.^2,2)); 280 | w = para.theta*exp(-para.theta*sqrt_E); 281 | % w = compute_w(sqrt_D,para); 282 | E(sqrt_D>gammaL*w,:) = grad(sqrt_D>gammaL*w,:).*repmat(1-gammaL*w(sqrt_D>gammaL*w)./sqrt_D(sqrt_D>gammaL*w),1,d); 283 | E(sqrt_D<=gammaL*w,:) = 0; 284 | end 285 | 286 | if type == "l21" 287 | d = size(E,1); 288 | gammaL = gamma/L; 289 | sqrt_D = sqrt(sum(grad.^2,1)); 290 | % sqrt_E = sqrt(sum(E.^2,1)); 291 | % w = para.theta*exp(-para.theta*sqrt_E); 292 | E(:,sqrt_D>gammaL) = grad(:,sqrt_D>gammaL).*repmat(1-gammaL./sqrt_D(sqrt_D>gammaL),d,1); 293 | E(:,sqrt_D<=gammaL) = 0; 294 | end 295 | 296 | if type == "exp21" 297 | d = size(E,1); 298 | gammaL = gamma/L; 299 | sqrt_D = sqrt(sum(grad.^2,1)); 300 | sqrt_E = sqrt(sum(E.^2,1)); 301 | w = para.theta*exp(-para.theta*sqrt_E); 302 | % w = compute_w(sqrt_D,para); 303 | E(:,sqrt_D>gammaL*w) = grad(:,sqrt_D>gammaL*w).*repmat(1-gammaL*w(sqrt_D>gammaL*w)./sqrt_D(sqrt_D>gammaL*w),d,1); 304 | E(:,sqrt_D<=gammaL*w) = 0; 305 | end 306 | 307 | if type == "log21" 308 | d = size(E,1); 309 | gammaL = gamma/L; 310 | sqrt_D = sqrt(sum(grad.^2,1)); 311 | sqrt_E = sqrt(sum(E.^2,1)); 312 | w = 1./(sqrt_E + para.epsilon); 313 | % w = compute_w(sqrt_D,para); 314 | E(:,sqrt_D>gammaL*w) = grad(sqrt_D>gammaL*w,:).*repmat(1-gammaL*w(sqrt_D>gammaL*w)./sqrt_D(sqrt_D>gammaL*w),d,1); 315 | E(:,sqrt_D<=gammaL*w) = 0; 316 | end 317 | 318 | if type =="exp" 319 | [~, S, ~] = svd(E, 'econ'); 320 | sigma = diag(S); 321 | w = para.theta*exp(-para.theta*sigma); 322 | [E,~] = prox_NN(grad,gamma*w/L); 323 | end 324 | 325 | if type =="NN" 326 | [E,~] = prox_NN(grad,gamma/L); 327 | end 328 | 329 | if type =="log" 330 | [~, S, ~] = svd(E, 'econ'); 331 | sigma = diag(S); 332 | w = 1./(sigma + para.epsilon); 333 | [E,~] = prox_NN(grad,gamma*w/L); 334 | end 335 | 336 | if type =="l1" 337 | gammaL = gamma/L; 338 | E = max(abs(grad)-gammaL,0).*sign(grad); 339 | end 340 | 341 | if norm(Ep-E,'fro')<1e-5 342 | break; 343 | end 344 | end 345 | end -------------------------------------------------------------------------------- /iADMM.m: -------------------------------------------------------------------------------- 1 | function[output] = iADMM( X, A, B, para ) 2 | % iADMM solves the following low-rank representation optimization problem, 3 | % min sum_ig(\sigma_i(Z))+ p(E) + r(Y) 4 | % s.t., X = AZ+ EB + Y 5 | % inputs: 6 | % X -- D*N data matrix, D is the data dimension, and N is the number 7 | % of data vectors. 8 | % para.g could be 9 | % 'NN': nuclear norm 10 | % ... 11 | % para.p: 12 | % 'exp21' : sum_i(1-exp(-\theta\|E_i\|_2)) 13 | % ... 14 | % para.r: 15 | % 'l2': \|Y\|^2/2 16 | % ... 17 | % Reference: LTK Hien, DN Phan, N Gillis. "A Framework of Inertial Alternating Direction Method of Multipliers for 18 | % Non-Convex Non-Smooth Optimization". 19 | % 20 | % Written by Duy Nhat Phan. 21 | % Latest update February 2021 22 | 23 | output.method = 'iADMM'; 24 | 25 | lambda = para.lambda; 26 | maxIter = para.maxIter; 27 | cf = para.cf; 28 | 29 | [d, n] = size(X); 30 | m = size(A,2); 31 | q = size(B,1); 32 | if(isfield(para, 'Z')) 33 | Z = para.Z; 34 | else 35 | Z = zeros(m,n); 36 | end 37 | if(isfield(para, 'E')) 38 | E = para.E; 39 | else 40 | E = zeros(d,q); 41 | end 42 | if(isfield(para, 'Y')) 43 | Y = para.Y; 44 | else 45 | Y = sparse(d,n); 46 | end 47 | if(isfield(para, 'W')) 48 | W = para.W; 49 | else 50 | W = Y; 51 | end 52 | 53 | AX = A'*X; 54 | 55 | obj = zeros(maxIter+1, 1); 56 | Obj = zeros(maxIter+1, 1); 57 | RMSE = zeros(maxIter+1, length(para.nknn)); 58 | Time = zeros(maxIter+1, 1); 59 | 60 | 61 | 62 | Ep = E; 63 | AZ = A*Z; 64 | AAZ = A'*AZ; 65 | AAZp = AAZ; 66 | mu = para.mu; 67 | Zp=Z; 68 | 69 | EB = E*B; 70 | EBB = EB*B'; 71 | EBBp = EBB; 72 | XB = X*B'; 73 | 74 | [~, S, ~] = svd(Z, 'econ'); 75 | sigma = diag(S); 76 | 77 | objVal = compute_g(sigma,para) + compute_p(E,para) + compute_r(Y,para); 78 | Obj(1) = objVal; 79 | 80 | objVal = objVal + (mu/2)*sum(sum((AZ + EB + cf*Y - X).^2)) + sum(sum((AZ + EB + cf*Y - X).*W)); 81 | 82 | obj(1) = objVal; 83 | 84 | if para.iter_acc == 1 85 | RMSE(1,:) = compute_acc_test(X,Z,E,para); 86 | end 87 | 88 | [~, tmp, ~] = svd(A'*A, 'econ'); 89 | L1 = max(diag(tmp)); 90 | % L1 = min(1,max(diag(tmp))); 91 | 92 | L1 = mu*L1; 93 | if para.g == "exp" || para.g == "log" 94 | L1 = L1*para.kappa_non; 95 | end 96 | 97 | [~, tmp, ~] = svd(B*B', 'econ'); 98 | L2 = max(diag(tmp)); 99 | % L2 = min(1,max(diag(tmp))); 100 | L2 = mu*L2; 101 | 102 | if para.p == "exp" || para.g == "log" 103 | L2 = L2*para.kappa_non; 104 | end 105 | 106 | rho = para.rho; 107 | 108 | c = 1; 109 | for i = 1:maxIter 110 | tt = cputime; 111 | if para.inertial 112 | cp = c; 113 | else 114 | cp = 1; 115 | end 116 | c = (1 + sqrt(1+4*cp^2))/2; 117 | bi = (cp - 1)/c; 118 | if para.g == "exp" || para.g == "log" 119 | bi = min(bi,(para.kappa_non-1)/(para.kappa_non+1)*0.5*sqrt(para.nuC)); 120 | else 121 | bi = min(bi,sqrt(para.nuC)); 122 | end 123 | 124 | AAZ = A'*AZ; 125 | grad = mu*((1+bi)*AAZ - bi*AAZp + A'*EB + cf*A'*Y - AX) + A'*W; 126 | P = (1+bi)*Z - bi*Zp; 127 | w = compute_w(sigma,para); 128 | Zp = Z; 129 | [Z,sigma] = prox_NN(P - grad/L1,lambda*w/L1); 130 | AZ = A*Z; 131 | 132 | AAZp = AAZ; 133 | 134 | %update E 135 | bi = (cp - 1)/c; 136 | if para.p == "l21" || para.p == "l1" 137 | bi = min(bi,sqrt(para.nuC)); 138 | else 139 | bi = min(bi,(para.kappa_non-1)/(para.kappa_non+1)*0.5*sqrt(para.nuC)); 140 | end 141 | 142 | EBB = EB*B'; 143 | grad = mu*((1+bi)*EBB - bi*EBBp + AZ*B' + cf*Y*B' - XB) + W*B'; 144 | P = (1+bi)*E - bi*Ep; 145 | 146 | Ep = E; 147 | E = update_E(E,P - grad/L2,para,L2); 148 | 149 | EB = E*B; 150 | EBBp = EBB; 151 | 152 | 153 | %update Y 154 | if cf~=0 155 | grad = mu*(AZ + EB +cf*Y- X)+ W; 156 | Y = update_Y(Y - grad/(mu*cf),para,mu*cf); 157 | end 158 | 159 | W = W + rho*mu*(AZ + EB + cf*Y - X); 160 | 161 | objVal = compute_g(sigma,para) + compute_p(E,para) + compute_r(Y,para); 162 | Obj(i+1) = objVal; 163 | 164 | objVal = objVal + (mu/2)*sum(sum((AZ + EB + cf*Y - X).^2)) + sum(sum((AZ + EB + cf*Y - X).*W)); 165 | 166 | Time(i+1) = cputime - tt; 167 | 168 | obj(i+1) = objVal; 169 | if para.iter_acc == 1 170 | fprintf('iter: %d; obj : %0.4d; diff : %0.4d; acc : %0.4d \n',i,objVal, obj(i) - obj(i+1),max(RMSE(i,:),[],2)); 171 | 172 | RMSE(i+1,:) = compute_acc_test(X,Z,E,para); 173 | else 174 | fprintf('iter: %d; obj : %0.4d; diff : %0.4d \n',i,objVal, obj(i) - obj(i+1)); 175 | end 176 | 177 | if(sum(Time) > para.maxtime) 178 | break; 179 | end 180 | end 181 | 182 | output.obj = obj(2:(i+1)); 183 | output.Obj = Obj(2:(i+1)); 184 | output.RMSE = RMSE(2:(i+1),:); 185 | Time = cumsum(Time); 186 | output.Time = Time(2:(i+1)); 187 | 188 | output.Z = Z; 189 | output.E = E; 190 | output.Rank = nnz(sigma); 191 | output.Y = Y; 192 | output.W = W; 193 | 194 | end 195 | 196 | function[g] = compute_g(sigma,para) 197 | if para.g == "exp" 198 | g = para.lambda*sum(1-exp(-para.theta*sigma)); 199 | end 200 | 201 | if para.g =="NN" 202 | g = para.lambda*sum(sigma); 203 | end 204 | 205 | if para.g == "log" 206 | g = para.lambda*sum(sigma + para.epsilon); 207 | end 208 | end 209 | 210 | function[p] = compute_p(E, para) 211 | 212 | if para.p == "exp12" 213 | sqrt_E = sqrt(sum(E.^2,2)); 214 | p = para.gamma*sum(1-exp(-para.theta*sqrt_E)); 215 | end 216 | 217 | if para.p == "l21" 218 | sqrt_E = sqrt(sum(E.^2,1)); 219 | p = para.gamma*sum(sqrt_E); 220 | end 221 | 222 | if para.p == "exp21" 223 | sqrt_E = sqrt(sum(E.^2,1)); 224 | p = para.gamma*sum(1-exp(-para.theta*sqrt_E)); 225 | end 226 | 227 | if para.p == "log21" 228 | sqrt_E = sqrt(sum(E.^2,1)); 229 | p = para.gamma*sum(sqrt_E + para.epsilon); 230 | end 231 | 232 | if para.p == "log" 233 | [~, S, ~] = svd(E, 'econ'); 234 | sigma = diag(S); 235 | p = para.gamma*sum(sigma + para.epsilon); 236 | end 237 | 238 | if para.p == "exp" 239 | [~, S, ~] = svd(E, 'econ'); 240 | sigma = diag(S); 241 | p = para.gamma*sum(1-exp(-para.theta*sigma)); 242 | end 243 | 244 | if para.p =="NN" 245 | [~, S, ~] = svd(E, 'econ'); 246 | sigma = diag(S); 247 | p = para.gamma*sum(sigma); 248 | end 249 | 250 | if para.p == "l1" 251 | p = para.gamma*sum(sum(abs(E))); 252 | end 253 | 254 | end 255 | 256 | function[r] = compute_r(Y,para) 257 | if para.r == "l2" 258 | r = (para.beta/2)*sum(sum(Y.^2)); 259 | end 260 | if para.r == "l1" 261 | r = para.beta*sum(sum(abs(Y))); 262 | end 263 | end 264 | 265 | function[w] = compute_w(sigma,para) 266 | if para.g == "exp" 267 | w = para.theta*exp(-para.theta*sigma); 268 | end 269 | 270 | if para.g == "NN" 271 | w = ones(length(sigma),1); 272 | end 273 | 274 | if para.g == "log" 275 | w = 1./(sigma + para.epsilon); 276 | end 277 | end 278 | 279 | function[Z,sigma] = prox_NN(grad,w) 280 | [U, S, V] = svd(grad, 'econ'); 281 | sigma = diag(S); 282 | sigma = max(sigma-w,0); 283 | 284 | svp = nnz(sigma); 285 | if svp == 0 286 | svp = 1; 287 | end 288 | Z = U(:,1:svp)*diag(sigma(1:svp))*V(:,1:svp)'; 289 | end 290 | 291 | function[E] = update_E(E,grad,para,L) 292 | 293 | if para.p == "exp12" 294 | d = size(E,2); 295 | gammaL = para.gamma/L; 296 | sqrt_D = sqrt(sum(grad.^2,2)); 297 | sqrt_E = sqrt(sum(E.^2,2)); 298 | w = para.theta*exp(-para.theta*sqrt_E); 299 | % w = compute_w(sqrt_D,para); 300 | E(sqrt_D>gammaL*w,:) = grad(sqrt_D>gammaL*w,:).*repmat(1-gammaL*w(sqrt_D>gammaL*w)./sqrt_D(sqrt_D>gammaL*w),1,d); 301 | E(sqrt_D<=gammaL*w,:) = 0; 302 | end 303 | 304 | if para.p == "l21" 305 | d = size(E,1); 306 | gammaL = para.gamma/L; 307 | sqrt_D = sqrt(sum(grad.^2,1)); 308 | E(:,sqrt_D>gammaL) = grad(:,sqrt_D>gammaL).*repmat(1-gammaL./sqrt_D(sqrt_D>gammaL),d,1); 309 | E(:,sqrt_D<=gammaL) = 0; 310 | end 311 | 312 | if para.p == "exp21" 313 | d = size(E,1); 314 | gammaL = para.gamma/L; 315 | sqrt_D = sqrt(sum(grad.^2,1)); 316 | sqrt_E = sqrt(sum(grad.^2,1)); 317 | w = para.theta*exp(-para.theta*sqrt_E); 318 | % w = compute_w(sqrt_D,para); 319 | E(:,sqrt_D>gammaL*w) = grad(:,sqrt_D>gammaL*w).*repmat(1-gammaL*w(sqrt_D>gammaL*w)./sqrt_D(sqrt_D>gammaL*w),d,1); 320 | E(:,sqrt_D<=gammaL*w) = 0; 321 | end 322 | 323 | if para.p == "log21" 324 | d = size(E,1); 325 | gammaL = para.gamma/L; 326 | sqrt_D = sqrt(sum(grad.^2,1)); 327 | sqrt_E = sqrt(sum(E.^2,1)); 328 | w = 1./(sqrt_E + para.epsilon); 329 | E(:,sqrt_D>gammaL*w) = grad(sqrt_D>gammaL*w,:).*repmat(1-gammaL*w(sqrt_D>gammaL*w)./sqrt_D(sqrt_D>gammaL*w),d,1); 330 | E(:,sqrt_D<=gammaL*w) = 0; 331 | end 332 | 333 | if para.p =="exp" 334 | [~, S, ~] = svd(E, 'econ'); 335 | sigma = diag(S); 336 | w = para.theta*exp(-para.theta*sigma); 337 | [E,~] = prox_NN(grad,para.gamma*w/L); 338 | end 339 | 340 | if para.p =="NN" 341 | [E,~] = prox_NN(grad,para.gamma/L); 342 | end 343 | 344 | if para.p =="log" 345 | [~, S, ~] = svd(E, 'econ'); 346 | sigma = diag(S); 347 | w = 1./(sigma + para.epsilon); 348 | [E,~] = prox_NN(grad,para.gamma*w/L); 349 | end 350 | 351 | if para.p=="l1" 352 | gammaL = para.gamma/L; 353 | E = max(abs(grad)-gammaL,0).*sign(grad); 354 | end 355 | end 356 | 357 | function[Y] = update_Y(grad,para,L) 358 | 359 | if para.r == "l2" 360 | Y = L*grad/(L+para.beta); 361 | end 362 | 363 | if para.r=="l1" 364 | gammaL = para.beta/L; 365 | Y = max(abs(grad)-gammaL,0).*sign(grad); 366 | end 367 | end 368 | 369 | --------------------------------------------------------------------------------