├── sigm.m ├── tanh_opt.m ├── .gitattributes ├── rproptoolbox ├── minFunc_2012 │ ├── minFunc │ │ ├── precondDiag.m │ │ ├── precondTriu.m │ │ ├── precondTriuDiag.m │ │ ├── isLegal.m │ │ ├── compiled │ │ │ ├── lbfgsC.mexa64 │ │ │ ├── lbfgsC.mexglx │ │ │ ├── lbfgsC.mexmac │ │ │ ├── lbfgsC.mexmaci │ │ │ ├── lbfgsC.mexw32 │ │ │ ├── lbfgsC.mexw64 │ │ │ ├── mcholC.mexa64 │ │ │ ├── mcholC.mexglx │ │ │ ├── mcholC.mexmac │ │ │ ├── mcholC.mexw32 │ │ │ ├── mcholC.mexw64 │ │ │ ├── lbfgsAddC.mexa64 │ │ │ ├── lbfgsAddC.mexw64 │ │ │ ├── lbfgsC.mexmaci64 │ │ │ ├── lbfgsProdC.mexa64 │ │ │ ├── lbfgsProdC.mexw64 │ │ │ ├── mcholC.mexmaci64 │ │ │ ├── lbfgsAddC.mexmaci64 │ │ │ └── lbfgsProdC.mexmaci64 │ │ ├── mcholinc.m │ │ ├── lbfgsUpdate.m │ │ ├── lbfgsAdd.m │ │ ├── lbfgsProd.m │ │ ├── taylorModel.m │ │ ├── mex │ │ │ ├── lbfgsAddC.c │ │ │ ├── lbfgsProdC.c │ │ │ ├── lbfgsC.c │ │ │ └── mcholC.c │ │ ├── lbfgs.m │ │ ├── dampedUpdate.m │ │ ├── mchol.m │ │ ├── conjGrad.m │ │ ├── polyinterp.m │ │ ├── minFunc_processInputOptions.m │ │ ├── ArmijoBacktrack.m │ │ ├── WolfeLineSearch.m │ │ └── minFunc.m │ ├── logisticExample │ │ ├── LogisticHv.m │ │ ├── mylogsumexp.m │ │ ├── LogisticDiagPrecond.m │ │ ├── LogisticLoss.m │ │ └── example_minFunc_LR.m │ ├── mexAll.m │ ├── autoDif │ │ ├── autoHv.m │ │ ├── autoHess.m │ │ ├── derivativeCheck.m │ │ ├── autoGrad.m │ │ ├── autoTensor.m │ │ └── fastDerivativeCheck.m │ ├── example_derivativeCheck.m │ └── example_minFunc.m ├── +Utils │ └── indent.m ├── Rprop │ ├── onehump.m │ ├── costfunction.m │ ├── rosenbrock.m │ ├── costfunction_gpu.m │ ├── Demo_rprop_1.m │ ├── Demo_rprop_2.m │ ├── Demo_rprop_3.m │ └── rprop.m ├── RPROP.txt ├── Contents.txt └── +GPU │ └── GPUsupport.m ├── README.md ├── softmax.m ├── DMF_example.m └── MC_DMF.m /sigm.m: -------------------------------------------------------------------------------- 1 | function X = sigm(P) 2 | X = 1./(1+exp(-P)); 3 | end -------------------------------------------------------------------------------- /tanh_opt.m: -------------------------------------------------------------------------------- 1 | function f=tanh_opt(A) 2 | f=1.7159*tanh(2/3.*A); 3 | end -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/precondDiag.m: -------------------------------------------------------------------------------- 1 | function [y] = precondDiag(r,D) 2 | y = D.*r; -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/precondTriu.m: -------------------------------------------------------------------------------- 1 | function [y] = precondUpper(r,U) 2 | y = U \ (U' \ r); -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/precondTriuDiag.m: -------------------------------------------------------------------------------- 1 | function [y] = precondUpper(r,U,D) 2 | y = U \ (D .* (U' \ r)); -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/isLegal.m: -------------------------------------------------------------------------------- 1 | function [legal] = isLegal(v) 2 | legal = sum(any(imag(v(:))))==0 & sum(isnan(v(:)))==0 & sum(isinf(v(:)))==0; -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexa64 -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexglx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexglx -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexmac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexmac -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexmaci: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexmaci -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexw32: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexw32 -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexw64 -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexa64 -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexglx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexglx -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexmac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexmac -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexw32: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexw32 -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexw64 -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsAddC.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsAddC.mexa64 -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsAddC.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsAddC.mexw64 -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexmaci64 -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsProdC.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsProdC.mexa64 -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsProdC.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsProdC.mexw64 -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexmaci64 -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsAddC.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsAddC.mexmaci64 -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsProdC.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsProdC.mexmaci64 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Matrix-completion-by-deep-matrix-factorization 2 | The MATLAB code for the algorithm proposed in the following paper: 3 | J Fan, J Cheng. Matrix completion by deep matrix factorization. Neural Networks 98, 34-41 4 | -------------------------------------------------------------------------------- /softmax.m: -------------------------------------------------------------------------------- 1 | function mu = softmax(eta) 2 | % Softmax function 3 | % mu(i,c) = exp(eta(i,c))/sum_c' exp(eta(i,c')) 4 | 5 | c = 3; 6 | 7 | tmp = exp(c*eta); 8 | denom = sum(tmp, 2); 9 | mu = bsxfun(@rdivide, tmp, denom); 10 | 11 | end -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/logisticExample/LogisticHv.m: -------------------------------------------------------------------------------- 1 | function [Hv] = LogisticHv(v,w,X,y) 2 | % v(feature,1) - vector that we will multiply Hessian by 3 | % w(feature,1) 4 | % X(instance,feature) 5 | % y(instance,1) 6 | 7 | sig = 1./(1+exp(-y.*(X*w))); 8 | Hv = X.'*(sig.*(1-sig).*(X*v)); 9 | -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/logisticExample/mylogsumexp.m: -------------------------------------------------------------------------------- 1 | function lse = mylogsumexp(b) 2 | % does logsumexp across columns 3 | B = max(b,[],2); 4 | lse = log(sum(exp(b-repmat(B,[1 size(b,2)])),2))+B; 5 | 6 | % Old version that used repmatC 7 | %lse = log(sum(exp(b-repmatC(B,[1 size(b,2)])),2))+B; 8 | end -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/mexAll.m: -------------------------------------------------------------------------------- 1 | % minFunc 2 | fprintf('Compiling minFunc files...\n'); 3 | mex -outdir minFunc/compiled minFunc/mex/mcholC.c 4 | mex -outdir minFunc/compiled minFunc/mex/lbfgsC.c 5 | mex -outdir minFunc/compiled minFunc/mex/lbfgsAddC.c 6 | mex -outdir minFunc/compiled minFunc/mex/lbfgsProdC.c 7 | 8 | -------------------------------------------------------------------------------- /rproptoolbox/+Utils/indent.m: -------------------------------------------------------------------------------- 1 | % INDENT Indent text 2 | % This function is used to indent a text that will be printed to the 3 | % standard output, depending on the verbose level. 4 | % 5 | 6 | % Copyright (c) 2011 Roberto Calandra 7 | % $Revision: 0.11 $ 8 | 9 | 10 | function indent(verbose) 11 | 12 | for i=1:verbose 13 | fprintf(' ') 14 | end 15 | 16 | end -------------------------------------------------------------------------------- /rproptoolbox/Rprop/onehump.m: -------------------------------------------------------------------------------- 1 | function [f,gf] = onehump(x) 2 | % ONEHUMP Helper function for Tutorial for the Optimization Toolbox demo 3 | 4 | % Copyright 2008-2009 The MathWorks, Inc. 5 | % $Revision: 1.1.6.2 $ $Date: 2009/05/07 18:25:30 $ 6 | 7 | r = x(1)^2 + x(2)^2; 8 | s = exp(-r); 9 | f = x(1)*s+r/20; 10 | 11 | if nargout > 1 12 | gf = [(1-2*x(1)^2)*s+x(1)/10; 13 | -2*x(1)*x(2)*s+x(2)/10]; 14 | end 15 | -------------------------------------------------------------------------------- /rproptoolbox/RPROP.txt: -------------------------------------------------------------------------------- 1 | 2 | References: 3 | [1] Igel, C. and Hüsken, M., Improving the Rprop learning algorithm, 2000 4 | [2] Igel, C. and Hüsken, M., Empirical evaluation of the improved Rprop learning algorithms, 2003 5 | [3] Riedmiller, M., Rprop-description and implementation details, 1994 6 | [4] Riedmiller, M., Advanced supervised learning in multi-layer perceptrons-from backpropagation to adaptive learning algorithms, 1994 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/autoDif/autoHv.m: -------------------------------------------------------------------------------- 1 | function [Hv] = autoHv(v,x,g,useComplex,funObj,varargin) 2 | % [Hv] = autoHv(v,x,g,useComplex,funObj,varargin) 3 | % 4 | % Numerically compute Hessian-vector product H*v of funObj(x,varargin{:}) 5 | % based on gradient values 6 | 7 | if useComplex 8 | mu = 1e-150i; 9 | else 10 | mu = 2*sqrt(1e-12)*(1+norm(x))/norm(v); 11 | end 12 | [f,finDif] = funObj(x + v*mu,varargin{:}); 13 | Hv = (finDif-g)/mu; -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/logisticExample/LogisticDiagPrecond.m: -------------------------------------------------------------------------------- 1 | function [m] = LogisticHv(v,w,X,y) 2 | % v(feature,1) - vector that we will apply diagonal preconditioner to 3 | % w(feature,1) 4 | % X(instance,feature) 5 | % y(instance,1) 6 | 7 | sig = 1./(1+exp(-y.*(X*w))); 8 | 9 | % Compute diagonals of Hessian 10 | sig = sig.*(1-sig); 11 | for i = 1:length(w) 12 | h(i,1) = (sig.*X(:,i))'*X(:,i); 13 | end 14 | 15 | % Apply preconditioner 16 | m = v./h; 17 | 18 | % Exact preconditioner 19 | %H = X'*diag(sig.*(1-sig))*X; 20 | %m = H\v; 21 | -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/mcholinc.m: -------------------------------------------------------------------------------- 1 | function [R,tau] = mcholinc(H,verbose) 2 | % Computes Cholesky of H+tau*I, for suitably large tau that matrix is pd 3 | 4 | p = size(H,1); 5 | 6 | beta = norm(H,'fro'); 7 | if min(diag(H)) > 1e-12 8 | tau = 0; 9 | else 10 | if verbose 11 | fprintf('Small Value on Diagonal, Adjusting Hessian\n'); 12 | end 13 | tau = max(beta/2,1e-12); 14 | end 15 | while 1 16 | [R,posDef] = chol(H+tau*eye(p)); 17 | if posDef == 0 18 | break; 19 | else 20 | if verbose 21 | fprintf('Cholesky Failed, Adjusting Hessian\n'); 22 | end 23 | tau = max(2*tau,beta/2); 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/lbfgsUpdate.m: -------------------------------------------------------------------------------- 1 | function [old_dirs,old_stps,Hdiag] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag) 2 | ys = y'*s; 3 | if ys > 1e-10 4 | numCorrections = size(old_dirs,2); 5 | if numCorrections < corrections 6 | % Full Update 7 | old_dirs(:,numCorrections+1) = s; 8 | old_stps(:,numCorrections+1) = y; 9 | else 10 | % Limited-Memory Update 11 | old_dirs = [old_dirs(:,2:corrections) s]; 12 | old_stps = [old_stps(:,2:corrections) y]; 13 | end 14 | 15 | % Update scale of initial Hessian approximation 16 | Hdiag = ys/(y'*y); 17 | else 18 | if debug 19 | fprintf('Skipping Update\n'); 20 | end 21 | end -------------------------------------------------------------------------------- /rproptoolbox/Contents.txt: -------------------------------------------------------------------------------- 1 | File name Revision 2 | ---------------------------------------------------- 3 | +GPU/GPUsupport.m 0.10 4 | +Utils/indent.m 0.11 5 | RPROP.txt 6 | Rprop/costfunction.m 7 | Rprop/costfunction_gpu.m 8 | Rprop/onehump.m 9 | Rprop/rosenbrock.m 10 | Rprop/rprop.m 0.96 11 | Rprop/Demo_rprop_1.m 0.55 12 | Rprop/Demo_rprop_2.m 0.55 13 | Rprop/Demo_rprop_3.m 0.60 14 | Contents.txt 15 | 16 | 17 | Automatically generated by Rpackage 0.26 on 04-Jun-2012 18 | -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/lbfgsAdd.m: -------------------------------------------------------------------------------- 1 | function [S,Y,YS,lbfgs_start,lbfgs_end,Hdiag,skipped] = lbfgsAdd(y,s,S,Y,YS,lbfgs_start,lbfgs_end,Hdiag,useMex) 2 | ys = y'*s; 3 | skipped = 0; 4 | corrections = size(S,2); 5 | if ys > 1e-10 6 | if lbfgs_end < corrections 7 | lbfgs_end = lbfgs_end+1; 8 | if lbfgs_start ~= 1 9 | if lbfgs_start == corrections 10 | lbfgs_start = 1; 11 | else 12 | lbfgs_start = lbfgs_start+1; 13 | end 14 | end 15 | else 16 | lbfgs_start = min(2,corrections); 17 | lbfgs_end = 1; 18 | end 19 | 20 | if useMex 21 | lbfgsAddC(y,s,Y,S,ys,int32(lbfgs_end)); 22 | else 23 | S(:,lbfgs_end) = s; 24 | Y(:,lbfgs_end) = y; 25 | end 26 | YS(lbfgs_end) = ys; 27 | 28 | % Update scale of initial Hessian approximation 29 | Hdiag = ys/(y'*y); 30 | else 31 | skipped = 1; 32 | end -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/lbfgsProd.m: -------------------------------------------------------------------------------- 1 | function [d] = lbfgsProd(g,S,Y,YS,lbfgs_start,lbfgs_end,Hdiag) 2 | % BFGS Search Direction 3 | % 4 | % This function returns the (L-BFGS) approximate inverse Hessian, 5 | % multiplied by the negative gradient 6 | 7 | % Set up indexing 8 | [nVars,maxCorrections] = size(S); 9 | if lbfgs_start == 1 10 | ind = 1:lbfgs_end; 11 | nCor = lbfgs_end-lbfgs_start+1; 12 | else 13 | ind = [lbfgs_start:maxCorrections 1:lbfgs_end]; 14 | nCor = maxCorrections; 15 | end 16 | al = zeros(nCor,1); 17 | be = zeros(nCor,1); 18 | 19 | d = -g; 20 | for j = 1:length(ind) 21 | i = ind(end-j+1); 22 | al(i) = (S(:,i)'*d)/YS(i); 23 | d = d-al(i)*Y(:,i); 24 | end 25 | 26 | % Multiply by Initial Hessian 27 | d = Hdiag*d; 28 | 29 | for i = ind 30 | be(i) = (Y(:,i)'*d)/YS(i); 31 | d = d + S(:,i)*(al(i)-be(i)); 32 | end 33 | -------------------------------------------------------------------------------- /rproptoolbox/Rprop/costfunction.m: -------------------------------------------------------------------------------- 1 | % Cost function 2 | % 3 | % Cost function present in the Matlab Help for fminunc function 4 | 5 | function [f,g] = costfunction(x) 6 | % BROWNFG Nonlinear minimization test problem 7 | % 8 | % Evaluate the function 9 | n = length(x); 10 | y = zeros(n,1); 11 | i = 1:(n-1); 12 | y(i)=(x(i).^2).^(x(i+1).^2+1) + (x(i+1).^2).^(x(i).^2+1); 13 | 14 | f=sum(y); 15 | 16 | % Evaluate the gradient if nargout > 1 17 | if nargout > 1 18 | i=1:(n-1); 19 | g = zeros(n,1); 20 | g(i) = 2*(x(i+1).^2+1).*x(i).* ... 21 | ((x(i).^2).^(x(i+1).^2))+ ... 22 | 2*x(i).*((x(i+1).^2).^(x(i).^2+1)).* ... 23 | log(x(i+1).^2); 24 | g(i+1) = g(i+1) + ... 25 | 2*x(i+1).*((x(i).^2).^(x(i+1).^2+1)).* ... 26 | log(x(i).^2) + ... 27 | 2*(x(i).^2+1).*x(i+1).* ... 28 | ((x(i+1).^2).^(x(i).^2)); 29 | end 30 | end -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/logisticExample/LogisticLoss.m: -------------------------------------------------------------------------------- 1 | function [nll,g,H,T] = LogisticLoss(w,X,y) 2 | % w(feature,1) 3 | % X(instance,feature) 4 | % y(instance,1) 5 | 6 | [n,p] = size(X); 7 | 8 | Xw = X*w; 9 | yXw = y.*Xw; 10 | 11 | nll = sum(mylogsumexp([zeros(n,1) -yXw])); 12 | 13 | if nargout > 1 14 | if nargout > 2 15 | sig = 1./(1+exp(-yXw)); 16 | g = -X.'*(y.*(1-sig)); 17 | else 18 | %g = -X.'*(y./(1+exp(yXw))); 19 | g = -(X.'*(y./(1+exp(yXw)))); 20 | end 21 | end 22 | 23 | if nargout > 2 24 | H = X.'*diag(sparse(sig.*(1-sig)))*X; 25 | end 26 | 27 | if nargout > 3 28 | T = zeros(p,p,p); 29 | for j1 = 1:p 30 | for j2 = 1:p 31 | for j3 = 1:p 32 | T(j1,j2,j3) = sum(y(:).^3.*X(:,j1).*X(:,j2).*X(:,j3).*sig.*(1-sig).*(1-2*sig)); 33 | end 34 | end 35 | end 36 | end -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/taylorModel.m: -------------------------------------------------------------------------------- 1 | function [f,g,H] = taylorModel(d,f,g,H,T) 2 | 3 | p = length(d); 4 | 5 | fd3 = 0; 6 | gd2 = zeros(p,1); 7 | Hd = zeros(p); 8 | for t1 = 1:p 9 | for t2 = 1:p 10 | for t3 = 1:p 11 | fd3 = fd3 + T(t1,t2,t3)*d(t1)*d(t2)*d(t3); 12 | 13 | if nargout > 1 14 | gd2(t3) = gd2(t3) + T(t1,t2,t3)*d(t1)*d(t2); 15 | end 16 | 17 | if nargout > 2 18 | Hd(t2,t3) = Hd(t2,t3) + T(t1,t2,t3)*d(t1); 19 | end 20 | end 21 | 22 | end 23 | end 24 | 25 | f = f + g'*d + (1/2)*d'*H*d + (1/6)*fd3; 26 | 27 | if nargout > 1 28 | g = g + H*d + (1/2)*gd2; 29 | end 30 | 31 | if nargout > 2 32 | H = H + Hd; 33 | end 34 | 35 | if any(abs(d) > 1e5) 36 | % We want the optimizer to stop if the solution is unbounded 37 | g = zeros(p,1); 38 | end -------------------------------------------------------------------------------- /DMF_example.m: -------------------------------------------------------------------------------- 1 | % This is a toy example of DMF 2 | clc 3 | clear all 4 | % Generate synthetic data 5 | missrate=0.5;% missing rate 6 | m=20; 7 | n=100; 8 | r=2; 9 | x=unifrnd(-1,1,[r,n]); 10 | X=randn(m,r)*x+(randn(m,r)*x.^2+randn(m,r)*x.^3);% polynomial function 11 | % mask 12 | N=size(X,2); 13 | [nr,nc]=size(X); 14 | M=ones(nr,nc); 15 | for i=1:N 16 | temp=randperm(nr,ceil(nr*missrate));% 1 17 | M(temp,i)=0; 18 | end 19 | X0=X;% complete data (original) 20 | X=X.*M;% incomplete data masked by M (binary matrix) 21 | % DMF setup 22 | s=[r 10 m];% input size, hidden size 1, ..., output size 23 | options.Wp=0.01; 24 | options.Zp=0.01; 25 | options.maxiter=1000; 26 | options.activation_func={'tanh_opt','linear'}; 27 | [X_DMF,NN_MF]=MC_DMF(X',M',s,options); 28 | Xr=X_DMF'; 29 | % compute recovery error 30 | re_error=norm((X0-Xr).*(1-M),'fro')/norm(X0.*(1-M),'fro'); 31 | disp(['Relative recovery error is ' num2str(re_error)]) 32 | -------------------------------------------------------------------------------- /rproptoolbox/Rprop/rosenbrock.m: -------------------------------------------------------------------------------- 1 | function [f, df, ddf] = rosenbrock(x); 2 | 3 | % rosenbrock.m This function returns the function value, partial derivatives 4 | % and Hessian of the (general dimension) rosenbrock function, given by: 5 | % 6 | % f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 7 | % 8 | % where D is the dimension of x. The true minimum is 0 at x = (1 1 ... 1). 9 | % 10 | % Carl Edward Rasmussen, 2001-07-21. 11 | 12 | D = length(x); 13 | f = sum(100*(x(2:D)-x(1:D-1).^2).^2 + (1-x(1:D-1)).^2); 14 | 15 | if nargout > 1 16 | df = zeros(D, 1); 17 | df(1:D-1) = - 400*x(1:D-1).*(x(2:D)-x(1:D-1).^2) - 2*(1-x(1:D-1)); 18 | df(2:D) = df(2:D) + 200*(x(2:D)-x(1:D-1).^2); 19 | end 20 | 21 | if nargout > 2 22 | ddf = zeros(D,D); 23 | ddf(1:D-1,1:D-1) = diag(-400*x(2:D) + 1200*x(1:D-1).^2 + 2); 24 | ddf(2:D,2:D) = ddf(2:D,2:D) + 200*eye(D-1); 25 | ddf = ddf - diag(400*x(1:D-1),1) - diag(400*x(1:D-1),-1); 26 | end -------------------------------------------------------------------------------- /rproptoolbox/Rprop/costfunction_gpu.m: -------------------------------------------------------------------------------- 1 | % Cost function (using gpu) 2 | % 3 | % Cost function present in the Matlab Help for fminunc function 4 | 5 | function [f,g] = costfunction_gpu(x) 6 | % BROWNFG Nonlinear minimization test problem 7 | % 8 | % Evaluate the function 9 | n = length(x); 10 | y = parallel.gpu.GPUArray.zeros(n,1); 11 | i = 1:(n-1); 12 | y(i)=(x(i).^2).^(x(i+1).^2+1) + (x(i+1).^2).^(x(i).^2+1); 13 | 14 | f=sum(y); 15 | 16 | % Evaluate the gradient if nargout > 1 17 | if nargout > 1 18 | i=1:(n-1); 19 | g = parallel.gpu.GPUArray.zeros(n,1); 20 | g(i) = 2*(x(i+1).^2+1).*x(i).* ... 21 | ((x(i).^2).^(x(i+1).^2))+ ... 22 | 2*x(i).*((x(i+1).^2).^(x(i).^2+1)).* ... 23 | log(x(i+1).^2); 24 | g(i+1) = g(i+1) + ... 25 | 2*x(i+1).*((x(i).^2).^(x(i+1).^2+1)).* ... 26 | log(x(i).^2) + ... 27 | 2*(x(i).^2+1).*x(i+1).* ... 28 | ((x(i+1).^2).^(x(i).^2)); 29 | end 30 | end -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/mex/lbfgsAddC.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "mex.h" 3 | 4 | /* See lbfgsAdd.m for details */ 5 | /* This function will not exit gracefully on bad input! */ 6 | 7 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 8 | { 9 | /* Variable Declarations */ 10 | 11 | double *s,*y,*S, *Y, ys; 12 | int i,j,nVars,lbfgs_end; 13 | 14 | /* Get Input Pointers */ 15 | 16 | y = mxGetPr(prhs[0]); 17 | s = mxGetPr(prhs[1]); 18 | Y = mxGetPr(prhs[2]); 19 | S = mxGetPr(prhs[3]); 20 | ys= mxGetScalar(prhs[4]); 21 | lbfgs_end = (int)mxGetScalar(prhs[5]); 22 | 23 | if (!mxIsClass(prhs[5],"int32")) 24 | mexErrMsgTxt("lbfgs_end must be int32"); 25 | 26 | /* Compute number of variables, maximum number of corrections */ 27 | 28 | nVars = mxGetDimensions(prhs[2])[0]; 29 | 30 | for(j=0;j 1e-4 23 | H 24 | H2 25 | diff = abs(H-H2) 26 | pause; 27 | end 28 | else 29 | [f,g] = funObj(x,varargin{:}); 30 | 31 | fprintf('Checking Gradient...\n'); 32 | [f2,g2] = autoGrad(x,type,funObj,varargin{:}); 33 | 34 | fprintf('Max difference between user and numerical gradient: %e\n',max(abs(g-g2))); 35 | if max(abs(g-g2)) > 1e-4 36 | fprintf('User NumDif:\n'); 37 | [g g2] 38 | diff = abs(g-g2) 39 | pause 40 | end 41 | end 42 | 43 | -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/autoDif/autoGrad.m: -------------------------------------------------------------------------------- 1 | function [f,g] = autoGrad(x,type,funObj,varargin) % [f,g] = autoGrad(x,useComplex,funObj,varargin) % % Numerically compute gradient of objective function from function values % % type = % 1 - forward-differencing (p+1 evaluations) % 2 - central-differencing (more accurate, but requires 2p evaluations) % 3 - complex-step derivative (most accurate and only requires p evaluations, but only works for certain objectives) p = length(x); if type == 1 % Use Finite Differencing f = funObj(x,varargin{:}); mu = 2*sqrt(1e-12)*(1+norm(x)); diff = zeros(p,1); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; diff(j,1) = funObj(x + mu*e_j,varargin{:}); end g = (diff-f)/mu; elseif type == 3 % Use Complex Differentials mu = 1e-150; diff = zeros(p,1); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; diff(j,1) = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(diff)); g = imag(diff)/mu; else % Use Central Differencing mu = 2*sqrt(1e-12)*(1+norm(x)); diff1 = zeros(p,1); diff2 = zeros(p,1); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; diff1(j,1) = funObj(x + mu*e_j,varargin{:}); diff2(j,1) = funObj(x - mu*e_j,varargin{:}); end f = mean([diff1;diff2]); g = (diff1 - diff2)/(2*mu); end if 0 % DEBUG CODE [fReal gReal] = funObj(x,varargin{:}); [fReal f] [gReal g] diff pause; end -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/autoDif/autoTensor.m: -------------------------------------------------------------------------------- 1 | function [f,g,H,T] = autoTensor(x,type,funObj,varargin) % [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin) % Numerically compute Tensor of 3rd-derivatives of objective function from Hessian values p = length(x); if type == 2 mu = 2*sqrt(1e-12)*(1+norm(x)); f1 = zeros(p,1); f2 = zeros(p,2); g1 = zeros(p); g2 = zeros(p); diff = zeros(p,p,p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f1(j) g1(:,j) diff1(:,:,j)] = funObj(x + mu*e_j,varargin{:}); [f2(j) g2(:,j) diff2(:,:,j)] = funObj(x + mu*e_j,varargin{:}); end f = mean([f1;f2]); g = mean([g1 g2],2); H = mean(cat(3,diff1,diff2),3); T = (diff1-diff2)/(2*mu); elseif type == 3 % Use Complex Differentials mu = 1e-150; f = zeros(p,1); g = zeros(p); diff = zeros(p,p,p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f(j) g(:,j) diff(:,:,j)] = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(f)); g = mean(real(g),2); H = mean(real(diff),3); T = imag(diff)/mu; else % Use finite differencing mu = 2*sqrt(1e-12)*(1+norm(x)); [f,g,H] = funObj(x,varargin{:}); diff = zeros(p,p,p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [~ ~ diff(:,:,j)] = funObj(x + mu*e_j,varargin{:}); end T = (diff-repmat(H,[1 1 p]))/mu; end -------------------------------------------------------------------------------- /rproptoolbox/Rprop/Demo_rprop_1.m: -------------------------------------------------------------------------------- 1 | % Compare Rprop to fminunc 2 | % 3 | % Copyright (c) 2012 Roberto Calandra 4 | % $Revision: 0.55 $ 5 | 6 | 7 | %% Init 8 | 9 | funcgrad = @onehump; 10 | 11 | minfunc = 10e-6; 12 | niter = 10; 13 | 14 | p.verbosity = 0; % Increase verbosity to print something 15 | p.MaxIter = 10000; % Maximum number of iterations 16 | p.d_Obj = minfunc; 17 | p.method = 'IRprop-'; % Use IRprop- algorithm 18 | p.display = 0; 19 | 20 | p2.length = 20; 21 | 22 | options = optimset('GradObj','on','TolFun', minfunc,'Display','off'); 23 | 24 | 25 | %% Compute 26 | 27 | for iter = 1:niter 28 | 29 | 30 | a.max = 3; 31 | a.min = 0; 32 | x0 = Utils.rrand([2,1],a); % Randomize initial point 33 | 34 | tic 35 | [x1,~,~,stats1] = rprop(funcgrad,x0,p); 36 | t1(iter)=toc; 37 | 38 | tic 39 | [x2,~,~,stats2] = fminunc(funcgrad,x0,options); 40 | t2(iter)=toc; 41 | 42 | %tic 43 | %[X, stats3, i] = minimize(x0, funcgrad,p2); 44 | %t3(iter)=toc; 45 | 46 | end 47 | 48 | 49 | %% Plot results 50 | 51 | fprintf('Average Running time to reach an Obj. value of %2.0e:\n',minfunc) 52 | fprintf('Rprop: %f\n',mean(t1)); 53 | fprintf('Fminunc: %f\n',mean(t2)); 54 | 55 | figure() 56 | Utils.rplot(@plot,{t1,t2}) 57 | legend(p.method,'fminunc') 58 | xlabel('Experiment number') 59 | ylabel('Time (sec)') 60 | 61 | -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/mchol.m: -------------------------------------------------------------------------------- 1 | function [l,d,perm] = mchol(A,mu) 2 | % [l,d,perm] = mchol(A,mu) 3 | % Compute the Gill-Murray modified LDL factorization of A, 4 | 5 | if nargin < 2 6 | mu = 1e-12; 7 | end 8 | 9 | n = size(A,1); 10 | l = eye(n); 11 | d = zeros(n,1); 12 | perm = 1:n; 13 | 14 | for i = 1:n 15 | c(i,i) = A(i,i); 16 | end 17 | 18 | % Compute modification parameters 19 | gamma = max(abs(diag(A))); 20 | xi = max(max(abs(setdiag(A,0)))); 21 | delta = mu*max(gamma+xi,1); 22 | if n > 1 23 | beta = sqrt(max([gamma xi/sqrt(n^2-1) mu])); 24 | else 25 | beta = sqrt(max([gamma mu])); 26 | end 27 | 28 | for j = 1:n 29 | 30 | % Find q that results in Best Permutation with j 31 | [maxVal maxPos] = max(abs(diag(c(j:end,j:end)))); 32 | q = maxPos+j-1; 33 | 34 | % Permute d,c,l,a 35 | d([j q]) = d([q j]); 36 | perm([j q]) = perm([q j]); 37 | c([j q],:) = c([q j],:); 38 | c(:,[j q]) = c(:,[q j]); 39 | l([j q],:) = l([q j],:); 40 | l(:,[j q]) = l(:,[q j]); 41 | A([j q],:) = A([q j],:); 42 | A(:,[j q]) = A(:,[q j]); 43 | 44 | for s = 1:j-1 45 | l(j,s) = c(j,s)/d(s); 46 | end 47 | for i = j+1:n 48 | c(i,j) = A(i,j) - sum(l(j,1:j-1).*c(i,1:j-1)); 49 | end 50 | theta = 0; 51 | if j < n 52 | theta = max(abs(c(j+1:n,j))); 53 | end 54 | d(j) = max([abs(c(j,j)) (theta/beta)^2 delta]); 55 | if j < n 56 | for i = j+1:n 57 | c(i,i) = c(i,i) - (c(i,j)^2)/d(j); 58 | end 59 | end 60 | end -------------------------------------------------------------------------------- /rproptoolbox/Rprop/Demo_rprop_2.m: -------------------------------------------------------------------------------- 1 | % DEMO_RPROP_2 Compare the four Rprop methods 2 | % 3 | % 4 | 5 | % Copyright (c) 2011 Roberto Calandra 6 | % $Revision: 0.55 $ 7 | 8 | 9 | %% Init 10 | 11 | funcgrad = @costfunction; % Function to optimize 12 | 13 | a.max = 3; 14 | a.min = 0; 15 | x0 = Utils.rrand([5000,1],a); 16 | 17 | p.verbosity = 1; % Increase verbosity to print something 18 | p.MaxIter = 100; % Maximum number of iterations 19 | p.display = 0; 20 | 21 | 22 | %% Compute 23 | 24 | p.method = 'Rprop-'; % Define algorithm to use 25 | [x1,~,~,stats1] = rprop(funcgrad,x0,p); 26 | 27 | p.method = 'Rprop+'; % Define algorithm to use 28 | [x2,~,~,stats2] = rprop(funcgrad,x0,p); 29 | 30 | p.method = 'IRprop-'; % Define algorithm to use 31 | [x3,~,~,stats3] = rprop(funcgrad,x0,p); 32 | 33 | p.method = 'IRprop+'; % Define algorithm to use 34 | [x4,~,~,stats4] = rprop(funcgrad,x0,p); 35 | 36 | 37 | %% Plot results 38 | 39 | figure() 40 | Utils.rplot(@semilogy,{stats1.error, stats2.error, stats3.error, stats4.error}) 41 | legend('Rprop-','Rprop+','IRprop-','IRprop+','Location','SouthWest') 42 | xlabel('Number of iterations') 43 | ylabel('Obj. Value') 44 | 45 | figure() 46 | Utils.rplot(@semilogy,{stats1.time, stats2.time, stats3.time, stats4.time},... 47 | {stats1.error, stats2.error, stats3.error, stats4.error}) 48 | legend('Rprop-','Rprop+','IRprop-','IRprop+','Location','SouthWest') 49 | xlabel('Time (s)') 50 | ylabel('Obj. Value') 51 | 52 | drawnow 53 | 54 | -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/example_derivativeCheck.m: -------------------------------------------------------------------------------- 1 | clear all 2 | 3 | nInst = 250; 4 | nVars = 10; 5 | X = randn(nInst,nVars); 6 | w = randn(nVars,1); 7 | y = sign(X*w + randn(nInst,1)); 8 | 9 | wTest = randn(nVars,1); 10 | 11 | fprintf('Testing gradient using forward-differencing...\n'); 12 | order = 1; 13 | derivativeCheck(@LogisticLoss,wTest,order,1,X,y); 14 | 15 | fprintf('Testing gradient using central-differencing...\n'); 16 | derivativeCheck(@LogisticLoss,wTest,order,2,X,y); 17 | 18 | fprintf('Testing gradient using complex-step derivative...\n'); 19 | derivativeCheck(@LogisticLoss,wTest,order,3,X,y); 20 | 21 | fprintf('\n\n\n'); 22 | pause 23 | 24 | fprintf('Testing Hessian using forward-differencing\n'); 25 | order = 2; 26 | derivativeCheck(@LogisticLoss,wTest,order,1,X,y); 27 | 28 | fprintf('Testing Hessian using central-differencing\n'); 29 | order = 2; 30 | derivativeCheck(@LogisticLoss,wTest,order,2,X,y); 31 | 32 | fprintf('Testing Hessian using complex-step derivative\n'); 33 | order = 2; 34 | derivativeCheck(@LogisticLoss,wTest,order,3,X,y); 35 | 36 | fprintf('\n\n\n'); 37 | pause 38 | 39 | fprintf('Testing gradient using fastDerivativeCheck...\n'); 40 | order = 1; 41 | fastDerivativeCheck(@LogisticLoss,wTest,order,1,X,y); 42 | fastDerivativeCheck(@LogisticLoss,wTest,order,2,X,y); 43 | fastDerivativeCheck(@LogisticLoss,wTest,order,3,X,y); 44 | 45 | fprintf('\n\n\n'); 46 | pause 47 | 48 | fprintf('Testing Hessian using fastDerivativeCheck...\n'); 49 | order = 2; 50 | fastDerivativeCheck(@LogisticLoss,wTest,order,1,X,y); 51 | fastDerivativeCheck(@LogisticLoss,wTest,order,2,X,y); 52 | fastDerivativeCheck(@LogisticLoss,wTest,order,3,X,y); 53 | -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/autoDif/fastDerivativeCheck.m: -------------------------------------------------------------------------------- 1 | function diff = derivativeCheck(funObj,x,order,type,varargin) % diff = fastDerivativeCheck(funObj,x,order,varargin) if nargin < 3 order = 1; % Only check gradient by default if nargin < 4 type = 2; % Use central-differencing by default end end p = length(x); d = sign(randn(p,1)); if order == 2 fprintf('Checking Hessian-vector product along random direction:\n'); [f,g,H] = funObj(x,varargin{:}); Hv = H*d; if type == 1 % Use Finite Differencing mu = 2*sqrt(1e-12)*(1+norm(x))/(1+norm(x)); [diff,diffa] = funObj(x+d*mu,varargin{:}); Hv2 = (diffa-g)/mu; elseif type == 3 % Use Complex Differentials mu = 1e-150; [diff,diffa] = funObj(x+d*mu*i,varargin{:}); Hv2 = imag(diffa-g)/mu; else % Use Central Differencing mu = 2*sqrt(1e-12)*(1+norm(x))/(1+norm(x)); [diff1,diffa] = funObj(x+d*mu,varargin{:}); [diff2,diffb] = funObj(x-d*mu,varargin{:}); Hv2 = (diffa-diffb)/(2*mu); end fprintf('Max difference between user and numerical Hessian-vector product: %e\n',max(abs(Hv-Hv2))); else fprintf('Checking Gradient along random direction:\n'); [f,g] = funObj(x,varargin{:}); gtd = g'*d; if type == 1 % Use Finite Differencing mu = 2*sqrt(1e-12)*(1+norm(x))/(1+norm(x)); diff = funObj(x+d*mu,varargin{:}); gtd2 = (diff-f)/mu; elseif type == 3 % Use Complex Differentials mu = 1e-150; [diff,diffa] = funObj(x+d*mu*i,varargin{:}); gtd2 = imag(diff)/mu; else % Use Central Differencing mu = 2*sqrt(1e-12)*(1+norm(x))/(1+norm(x)); diff1 = funObj(x+d*mu,varargin{:}); diff2 = funObj(x-d*mu,varargin{:}); gtd2 = (diff1-diff2)/(2*mu); end fprintf('Max difference between user and numerical directional-derivative: %e\n',max(abs(gtd-gtd2))); end -------------------------------------------------------------------------------- /rproptoolbox/Rprop/Demo_rprop_3.m: -------------------------------------------------------------------------------- 1 | % DEMO_RPROP_3 Rprop with GPU acceleration 2 | % Show the use of GPU acceleration for the Rprop function and compare its 3 | % performances with the normal CPU-computed version 4 | % 5 | 6 | % Copyright (c) 2011 Roberto Calandra 7 | % $Revision: 0.60 $ 8 | 9 | 10 | %% Init 11 | 12 | numdim = [1000000 500000 100000 50000 10000 5000 1000]; 13 | 14 | p.verbosity = 1; % Increase verbosity to print something 15 | p.MaxIter = 300; % Maximum number of iterations 16 | p.d_Obj = 10e-12; % Desired objective value 17 | 18 | 19 | %% Compute 20 | 21 | t = 1; 22 | 23 | for i = numdim 24 | 25 | a.max = 3; 26 | a.min = 0; 27 | x0 = Utils.rrand([i,1],a); 28 | 29 | % with GPU 30 | funcgrad = @costfunction_gpu; % Function to optimize 31 | p.useGPU = true; % use GPU acceleration if possible? 32 | p.funcgradgpu = true; % does the cost function accept and 33 | % return variables as gpuArray? 34 | [x1,~,~,stats1] = rprop(funcgrad,x0,p); 35 | 36 | 37 | % with CPU 38 | funcgrad = @costfunction; % Function to optimize 39 | p.useGPU = false; % use GPU acceleration if possible? 40 | [x2,~,~,stats2] = rprop(funcgrad,x0,p); 41 | 42 | 43 | res.time1(t) = stats1.time(end); 44 | res.time2(t) = stats2.time(end); 45 | 46 | t = t+1; 47 | 48 | end 49 | 50 | 51 | %% Plot results 52 | 53 | figure() 54 | Utils.rplot(@loglog,{numdim,numdim},... 55 | {res.time1, res.time2}) 56 | legend('GPU','CPU','Location','SouthEast') 57 | ylabel('Time (s)') 58 | xlabel('Number of parameters') 59 | 60 | -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/conjGrad.m: -------------------------------------------------------------------------------- 1 | function [x,k,res,negCurv] = cg(A,b,optTol,maxIter,verbose,precFunc,precArgs,matrixVectFunc,matrixVectArgs) 2 | % [x,k,res,negCurv] = 3 | % cg(A,b,optTol,maxIter,verbose,precFunc,precArgs,matrixVectFunc,matrixVect 4 | % Args) 5 | % Linear Conjugate Gradient, where optionally we use 6 | % - preconditioner on vector v with precFunc(v,precArgs{:}) 7 | % - matrix multipled by vector with matrixVectFunc(v,matrixVectArgs{:}) 8 | 9 | if nargin <= 4 10 | verbose = 0; 11 | end 12 | 13 | x = zeros(size(b)); 14 | r = -b; 15 | 16 | % Apply preconditioner (if supplied) 17 | if nargin >= 7 && ~isempty(precFunc) 18 | y = precFunc(r,precArgs{:}); 19 | else 20 | y = r; 21 | end 22 | 23 | ry = r'*y; 24 | p = -y; 25 | k = 0; 26 | 27 | res = norm(r); 28 | done = 0; 29 | negCurv = []; 30 | while res > optTol & k < maxIter & ~done 31 | % Compute Matrix-vector product 32 | if nargin >= 9 33 | Ap = matrixVectFunc(p,matrixVectArgs{:}); 34 | else 35 | Ap = A*p; 36 | end 37 | pAp = p'*Ap; 38 | 39 | % Check for negative Curvature 40 | if pAp <= 1e-16 41 | if verbose 42 | fprintf('Negative Curvature Detected!\n'); 43 | end 44 | 45 | if nargout == 4 46 | if pAp < 0 47 | negCurv = p; 48 | return 49 | end 50 | end 51 | 52 | if k == 0 53 | if verbose 54 | fprintf('First-Iter, Proceeding...\n'); 55 | end 56 | done = 1; 57 | else 58 | if verbose 59 | fprintf('Stopping\n'); 60 | end 61 | break; 62 | end 63 | end 64 | 65 | % Conjugate Gradient 66 | alpha = ry/(pAp); 67 | x = x + alpha*p; 68 | r = r + alpha*Ap; 69 | 70 | % If supplied, apply preconditioner 71 | if nargin >= 7 && ~isempty(precFunc) 72 | y = precFunc(r,precArgs{:}); 73 | else 74 | y = r; 75 | end 76 | 77 | ry_new = r'*y; 78 | beta = ry_new/ry; 79 | p = -y + beta*p; 80 | k = k + 1; 81 | 82 | % Update variables 83 | ry = ry_new; 84 | res = norm(r); 85 | end 86 | end 87 | -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/mex/lbfgsProdC.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "mex.h" 3 | 4 | /* See lbfgsProd.m for details */ 5 | /* This function will not exit gracefully on bad input! */ 6 | 7 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 8 | { 9 | /* Variable Declarations */ 10 | 11 | double *S, *Y, *YS, *g, Hdiag, *d, *alpha, *beta; 12 | int i,j,nVars,nCor,maxCor,lbfgs_start,lbfgs_end; 13 | 14 | /* Get Input Pointers */ 15 | 16 | g = mxGetPr(prhs[0]); 17 | S = mxGetPr(prhs[1]); 18 | Y = mxGetPr(prhs[2]); 19 | YS= mxGetPr(prhs[3]); 20 | lbfgs_start = (int)mxGetScalar(prhs[4]); 21 | lbfgs_end = (int)mxGetScalar(prhs[5]); 22 | Hdiag = mxGetScalar(prhs[6]); 23 | 24 | if (!mxIsClass(prhs[4],"int32")||!mxIsClass(prhs[5],"int32")) 25 | mexErrMsgTxt("lbfgs_start and lbfgs_end must be int32"); 26 | 27 | /* Compute number of variables, maximum number of corrections */ 28 | 29 | nVars = mxGetDimensions(prhs[1])[0]; 30 | maxCor = mxGetDimensions(prhs[1])[1]; 31 | 32 | /* Compute number of corrections available */ 33 | if (lbfgs_start == 1) 34 | nCor = lbfgs_end-lbfgs_start+1; 35 | else 36 | nCor = maxCor; 37 | 38 | /* Allocate Memory for Local Variables */ 39 | alpha = mxCalloc(nCor,sizeof(double)); 40 | beta = mxCalloc(nCor,sizeof(double)); 41 | 42 | /* Set-up Output Vector */ 43 | plhs[0] = mxCreateDoubleMatrix(nVars,1,mxREAL); 44 | d = mxGetPr(plhs[0]); 45 | 46 | for(j=0;j= 0;i--) { 50 | alpha[i] = 0; 51 | for(j=0;j= lbfgs_start-1;i--) { 59 | alpha[i] = 0; 60 | for(j=0;j2 30 | fprintf ('NVIDIA driver version: %s\n',driverver) 31 | end 32 | 33 | catch 34 | 35 | % No GPU? 36 | if verbose 37 | warning('Impossible to Identify GPU(s)') 38 | end 39 | support = false; 40 | return 41 | 42 | end 43 | 44 | 45 | %% Analyze GPU(s) 46 | 47 | gpucapable = zeros([ngpu 1]); 48 | for ii = 1:ngpu 49 | try 50 | m(ii) = gpuDevice(ii); 51 | 52 | gpucapable(ii)=m(ii).DeviceSupported; 53 | 54 | if verbose>1 55 | if gpucapable(ii) 56 | 57 | fprintf('GPU %d: %s with CUDA support (v.%s)\n',... 58 | ii,m(ii).Name,m(ii).ComputeCapability) 59 | 60 | else 61 | 62 | fprintf('GPU %d: %s does NOT have CUDA support >1.3 (v.%s)\n',... 63 | ii,m(ii).Name,m(ii).ComputeCapability) 64 | 65 | end 66 | end 67 | 68 | catch 69 | warning(['GPU ' num2str(ii) ' doesn"t respond']) 70 | 71 | end 72 | end 73 | 74 | 75 | %% Is there a GPU supported? 76 | 77 | ngpusupported = sum(gpucapable); 78 | 79 | if ngpusupported 80 | support = true; 81 | if verbose 82 | fprintf ('Supported GPU found\n',ngpu) 83 | end 84 | else 85 | support = false; 86 | 87 | if verbose 88 | warning('No supported GPU found') 89 | end 90 | end 91 | 92 | 93 | %% Select best GPU for computations 94 | 95 | if ngpusupported>1 96 | % based either on Gflops or Memory (and support) 97 | if verbose>1 98 | %fprintf('Selected GPU %i') 99 | end 100 | 101 | end 102 | 103 | 104 | end 105 | 106 | 107 | -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/logisticExample/example_minFunc_LR.m: -------------------------------------------------------------------------------- 1 | clear all 2 | 3 | nInst = 500; 4 | nVars = 200; 5 | X = randn(nInst,nVars); 6 | w = randn(nVars,1); 7 | y = sign(X*w + randn(nInst,1)); 8 | 9 | w_init = zeros(nVars,1); 10 | funObj = @(w)LogisticLoss(w,X,y); 11 | 12 | fprintf('\nRunning Steepest Descent\n'); 13 | options.Method = 'sd'; 14 | minFunc(@LogisticLoss,w_init,options,X,y); 15 | pause; 16 | 17 | fprintf('\nRunning Cyclic Steepest Descent\n'); 18 | options.Method = 'csd'; 19 | minFunc(@LogisticLoss,w_init,options,X,y); 20 | pause; 21 | 22 | fprintf('\nRunning Conjugate Gradient\n'); 23 | options.Method = 'cg'; 24 | minFunc(@LogisticLoss,w_init,options,X,y); 25 | pause; 26 | 27 | fprintf('\nRunning Scaled Conjugate Gradient\n'); 28 | options.Method = 'scg'; 29 | minFunc(@LogisticLoss,w_init,options,X,y); 30 | pause; 31 | 32 | fprintf('\nRunning Preconditioned Conjugate Gradient (Diagonal preconditioner)\n'); 33 | options.Method = 'pcg'; 34 | options.precFunc = @LogisticDiagPrecond; 35 | minFunc(@LogisticLoss,w_init,options,X,y); 36 | pause; 37 | 38 | fprintf('\nRunning Preconditioned Conjugate Gradient (L-BFGS preconditioner)\n'); 39 | options.Method = 'pcg'; 40 | options.precFunc = []; 41 | minFunc(@LogisticLoss,w_init,options,X,y); 42 | pause; 43 | 44 | fprintf('\nRunning Hessian-Free Newton w/ numerical Hessian-Vector products\n'); 45 | options.Method = 'newton0'; 46 | minFunc(@LogisticLoss,w_init,options,X,y); 47 | pause; 48 | 49 | fprintf('\nRunning Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (Diagonal preconditioner)\n'); 50 | options.Method = 'pnewton0'; 51 | options.precFunc = @LogisticDiagPrecond; 52 | minFunc(@LogisticLoss,w_init,options,X,y); 53 | pause; 54 | 55 | fprintf('\nRunning Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (L-BFGS preconditioner)\n'); 56 | options.Method = 'pnewton0'; 57 | options.precFunc = []; 58 | minFunc(@LogisticLoss,w_init,options,X,y); 59 | pause; 60 | 61 | fprintf('\nRunning Hessian-Free Newton w/ analytic Hessian-Vector products\n'); 62 | options.Method = 'newton0'; 63 | options.HvFunc = @LogisticHv; 64 | minFunc(@LogisticLoss,w_init,options,X,y); 65 | pause; 66 | 67 | fprintf('\nRunning Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (Diagonal preconditioner)\n'); 68 | options.Method = 'pnewton0'; 69 | options.HvFunc = @LogisticHv; 70 | options.precFunc = @LogisticDiagPrecond; 71 | minFunc(@LogisticLoss,w_init,options,X,y); 72 | pause; 73 | 74 | fprintf('\nRunning Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (L-BFGS preconditioner)\n'); 75 | options.Method = 'pnewton0'; 76 | options.precFunc = []; 77 | options.HvFunc = @LogisticHv; 78 | minFunc(@LogisticLoss,w_init,options,X,y); 79 | pause; -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/example_minFunc.m: -------------------------------------------------------------------------------- 1 | % Runs various limited-memory solvers on 2D rosenbrock function for 25 2 | % function evaluations 3 | maxFunEvals = 25; 4 | 5 | fprintf('Result after %d evaluations of limited-memory solvers on 2D rosenbrock:\n',maxFunEvals); 6 | 7 | fprintf('---------------------------------------\n'); 8 | fprintf('x1 = %.4f, x2 = %.4f (starting point)\n',0,0); 9 | fprintf('x1 = %.4f, x2 = %.4f (optimal solution)\n',1,1); 10 | fprintf('---------------------------------------\n'); 11 | 12 | if exist('minimize') == 2 13 | % Minimize.m - conjugate gradient method 14 | x = minimize([0 0]', 'rosenbrock', -maxFunEvals); 15 | fprintf('x1 = %.4f, x2 = %.4f (minimize.m by C. Rasmussen)\n',x(1),x(2)); 16 | end 17 | 18 | options = []; 19 | options.display = 'none'; 20 | options.maxFunEvals = maxFunEvals; 21 | 22 | % Steepest Descent 23 | options.Method = 'sd'; 24 | x = minFunc(@rosenbrock,[0 0]',options); 25 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with steepest descent)\n',x(1),x(2)); 26 | 27 | % Cyclic Steepest Descent 28 | options.Method = 'csd'; 29 | x = minFunc(@rosenbrock,[0 0]',options); 30 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with cyclic steepest descent)\n',x(1),x(2)); 31 | 32 | % Barzilai & Borwein 33 | options.Method = 'bb'; 34 | options.bbType = 1; 35 | x = minFunc(@rosenbrock,[0 0]',options); 36 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with spectral gradient descent)\n',x(1),x(2)); 37 | 38 | % Hessian-Free Newton 39 | options.Method = 'newton0'; 40 | x = minFunc(@rosenbrock,[0 0]',options); 41 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with Hessian-free Newton)\n',x(1),x(2)); 42 | 43 | % Hessian-Free Newton w/ L-BFGS preconditioner 44 | options.Method = 'pnewton0'; 45 | x = minFunc(@rosenbrock,[0 0]',options); 46 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with preconditioned Hessian-free Newton)\n',x(1),x(2)); 47 | 48 | % Conjugate Gradient 49 | options.Method = 'cg'; 50 | x = minFunc(@rosenbrock,[0 0]',options); 51 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with conjugate gradient)\n',x(1),x(2)); 52 | 53 | % Scaled conjugate Gradient 54 | options.Method = 'scg'; 55 | x = minFunc(@rosenbrock,[0 0]',options); 56 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with scaled conjugate gradient)\n',x(1),x(2)); 57 | 58 | % Preconditioned Conjugate Gradient 59 | options.Method = 'pcg'; 60 | x = minFunc(@rosenbrock,[0 0]',options); 61 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with preconditioned conjugate gradient)\n',x(1),x(2)); 62 | 63 | % Default: L-BFGS (default) 64 | options.Method = 'lbfgs'; 65 | x = minFunc(@rosenbrock,[0 0]',options); 66 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with limited-memory BFGS - default)\n',x(1),x(2)); 67 | 68 | p.MaxIter = maxFunEvals; 69 | x = rprop(@rosenbrock,[0 0]',p); 70 | fprintf('x1 = %.4f, x2 = %.4f (Rprop - default)\n',x(1),x(2)); 71 | 72 | fprintf('---------------------------------------\n'); 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/mex/lbfgsC.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "mex.h" 3 | 4 | /* See lbfgs.m for details! */ 5 | /* This function may not exit gracefully on bad input! */ 6 | 7 | 8 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 9 | { 10 | /* Variable Declarations */ 11 | 12 | double *s, *y, *g, *H, *d, *ro, *alpha, *beta, *q, *r; 13 | int nVars,nSteps,lhs_dims[2]; 14 | double temp; 15 | int i,j; 16 | 17 | /* Get Input Pointers */ 18 | 19 | g = mxGetPr(prhs[0]); 20 | s = mxGetPr(prhs[1]); 21 | y = mxGetPr(prhs[2]); 22 | H = mxGetPr(prhs[3]); 23 | 24 | /* Compute number of variables (p), rank of update (d) */ 25 | 26 | nVars = mxGetDimensions(prhs[1])[0]; 27 | nSteps = mxGetDimensions(prhs[1])[1]; 28 | 29 | /* Allocated Memory for Function Variables */ 30 | ro = mxCalloc(nSteps,sizeof(double)); 31 | alpha = mxCalloc(nSteps,sizeof(double)); 32 | beta = mxCalloc(nSteps,sizeof(double)); 33 | q = mxCalloc(nVars*(nSteps+1),sizeof(double)); 34 | r = mxCalloc(nVars*(nSteps+1),sizeof(double)); 35 | 36 | /* Set-up Output Vector */ 37 | 38 | lhs_dims[0] = nVars; 39 | lhs_dims[1] = 1; 40 | 41 | plhs[0] = mxCreateNumericArray(2,lhs_dims,mxDOUBLE_CLASS,mxREAL); 42 | d = mxGetPr(plhs[0]); 43 | 44 | /* ro = 1/(y(:,i)'*s(:,i)) */ 45 | for(i=0;i=0;i--) 62 | { 63 | /* alpha(i) = ro(i)*s(:,i)'*q(:,i+1) */ 64 | alpha[i] = 0; 65 | for(j=0;j= xminBound && xCP <= xmaxBound 106 | fCP = polyval(params,xCP); 107 | if imag(fCP)==0 && fCP < fmin 108 | minPos = real(xCP); 109 | fmin = real(fCP); 110 | end 111 | end 112 | end 113 | 114 | % Plot Situation 115 | if doPlot 116 | clf; hold on; 117 | 118 | % Plot Points 119 | plot(points(:,1),points(:,2),'b*'); 120 | 121 | % Plot Derivatives 122 | for i = 1:nPoints 123 | if isreal(points(i,3)) 124 | m = points(i,3); 125 | b = points(i,2) - m*points(i,1); 126 | plot([points(i,1)-.05 points(i,1)+.05],... 127 | [(points(i,1)-.05)*m+b (points(i,1)+.05)*m+b],'c.-'); 128 | end 129 | end 130 | 131 | % Plot Function 132 | x = min(xmin,xminBound)-.1:(max(xmax,xmaxBound)+.1-min(xmin,xminBound)+.1)/100:max(xmax,xmaxBound)+.1; 133 | for i = 1:length(x) 134 | f(i) = polyval(params,x(i)); 135 | end 136 | plot(x,f,'y'); 137 | axis([x(1)-.1 x(end)+.1 min(f)-.1 max(f)+.1]); 138 | 139 | % Plot Minimum 140 | plot(minPos,fmin,'g+'); 141 | if doPlot == 1 142 | pause(1); 143 | end 144 | end -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/minFunc_processInputOptions.m: -------------------------------------------------------------------------------- 1 | 2 | function [verbose,verboseI,debug,doPlot,maxFunEvals,maxIter,optTol,progTol,method,... 3 | corrections,c1,c2,LS_init,cgSolve,qnUpdate,cgUpdate,initialHessType,... 4 | HessianModify,Fref,useComplex,numDiff,LS_saveHessianComp,... 5 | Damped,HvFunc,bbType,cycle,... 6 | HessianIter,outputFcn,useMex,useNegCurv,precFunc,... 7 | LS_type,LS_interp,LS_multi,DerivativeCheck] = ... 8 | minFunc_processInputOptions(o) 9 | 10 | % Constants 11 | SD = 0; 12 | CSD = 1; 13 | BB = 2; 14 | CG = 3; 15 | PCG = 4; 16 | LBFGS = 5; 17 | QNEWTON = 6; 18 | NEWTON0 = 7; 19 | NEWTON = 8; 20 | TENSOR = 9; 21 | 22 | verbose = 1; 23 | verboseI= 1; 24 | debug = 0; 25 | doPlot = 0; 26 | method = LBFGS; 27 | cgSolve = 0; 28 | 29 | o = toUpper(o); 30 | 31 | if isfield(o,'DISPLAY') 32 | switch(upper(o.DISPLAY)) 33 | case 0 34 | verbose = 0; 35 | verboseI = 0; 36 | case 'FINAL' 37 | verboseI = 0; 38 | case 'OFF' 39 | verbose = 0; 40 | verboseI = 0; 41 | case 'NONE' 42 | verbose = 0; 43 | verboseI = 0; 44 | case 'FULL' 45 | debug = 1; 46 | case 'EXCESSIVE' 47 | debug = 1; 48 | doPlot = 1; 49 | end 50 | end 51 | 52 | DerivativeCheck = 0; 53 | if isfield(o,'DERIVATIVECHECK') 54 | switch(upper(o.DERIVATIVECHECK)) 55 | case 1 56 | DerivativeCheck = 1; 57 | case 'ON' 58 | DerivativeCheck = 1; 59 | end 60 | end 61 | 62 | LS_init = 0; 63 | LS_type = 1; 64 | LS_interp = 2; 65 | LS_multi = 0; 66 | Fref = 1; 67 | Damped = 0; 68 | HessianIter = 1; 69 | c2 = 0.9; 70 | if isfield(o,'METHOD') 71 | m = upper(o.METHOD); 72 | switch(m) 73 | case 'TENSOR' 74 | method = TENSOR; 75 | case 'NEWTON' 76 | method = NEWTON; 77 | case 'MNEWTON' 78 | method = NEWTON; 79 | HessianIter = 5; 80 | case 'PNEWTON0' 81 | method = NEWTON0; 82 | cgSolve = 1; 83 | case 'NEWTON0' 84 | method = NEWTON0; 85 | case 'QNEWTON' 86 | method = QNEWTON; 87 | Damped = 1; 88 | case 'LBFGS' 89 | method = LBFGS; 90 | case 'BB' 91 | method = BB; 92 | LS_type = 0; 93 | Fref = 20; 94 | case 'PCG' 95 | method = PCG; 96 | c2 = 0.2; 97 | LS_init = 2; 98 | case 'SCG' 99 | method = CG; 100 | c2 = 0.2; 101 | LS_init = 4; 102 | case 'CG' 103 | method = CG; 104 | c2 = 0.2; 105 | LS_init = 2; 106 | case 'CSD' 107 | method = CSD; 108 | c2 = 0.2; 109 | Fref = 10; 110 | LS_init = 2; 111 | case 'SD' 112 | method = SD; 113 | LS_init = 2; 114 | end 115 | end 116 | 117 | maxFunEvals = getOpt(o,'MAXFUNEVALS',1000); 118 | maxIter = getOpt(o,'MAXITER',500); 119 | optTol = getOpt(o,'OPTTOL',1e-5); 120 | progTol = getOpt(o,'PROGTOL',1e-9); 121 | corrections = getOpt(o,'CORRECTIONS',100); 122 | corrections = getOpt(o,'CORR',corrections); 123 | c1 = getOpt(o,'C1',1e-4); 124 | c2 = getOpt(o,'C2',c2); 125 | LS_init = getOpt(o,'LS_INIT',LS_init); 126 | cgSolve = getOpt(o,'CGSOLVE',cgSolve); 127 | qnUpdate = getOpt(o,'QNUPDATE',3); 128 | cgUpdate = getOpt(o,'CGUPDATE',2); 129 | initialHessType = getOpt(o,'INITIALHESSTYPE',1); 130 | HessianModify = getOpt(o,'HESSIANMODIFY',0); 131 | Fref = getOpt(o,'FREF',Fref); 132 | useComplex = getOpt(o,'USECOMPLEX',0); 133 | numDiff = getOpt(o,'NUMDIFF',0); 134 | LS_saveHessianComp = getOpt(o,'LS_SAVEHESSIANCOMP',1); 135 | Damped = getOpt(o,'DAMPED',Damped); 136 | HvFunc = getOpt(o,'HVFUNC',[]); 137 | bbType = getOpt(o,'BBTYPE',0); 138 | cycle = getOpt(o,'CYCLE',3); 139 | HessianIter = getOpt(o,'HESSIANITER',HessianIter); 140 | outputFcn = getOpt(o,'OUTPUTFCN',[]); 141 | useMex = getOpt(o,'USEMEX',1); 142 | useNegCurv = getOpt(o,'USENEGCURV',1); 143 | precFunc = getOpt(o,'PRECFUNC',[]); 144 | LS_type = getOpt(o,'LS_type',LS_type); 145 | LS_interp = getOpt(o,'LS_interp',LS_interp); 146 | LS_multi = getOpt(o,'LS_multi',LS_multi); 147 | end 148 | 149 | function [v] = getOpt(options,opt,default) 150 | if isfield(options,opt) 151 | if ~isempty(getfield(options,opt)) 152 | v = getfield(options,opt); 153 | else 154 | v = default; 155 | end 156 | else 157 | v = default; 158 | end 159 | end 160 | 161 | function [o] = toUpper(o) 162 | if ~isempty(o) 163 | fn = fieldnames(o); 164 | for i = 1:length(fn) 165 | o = setfield(o,upper(fn{i}),getfield(o,fn{i})); 166 | end 167 | end 168 | end -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/mex/mcholC.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "mex.h" 3 | 4 | double mymax(double x, double y) 5 | { 6 | if (x > y) 7 | return x; 8 | else 9 | return y; 10 | } 11 | 12 | double absolute(double x) 13 | { 14 | if (x >= -x) 15 | return x; 16 | else 17 | return -x; 18 | } 19 | 20 | void permuteInt(int *x, int p, int q) 21 | { 22 | int temp; 23 | temp = x[p]; 24 | x[p] = x[q]; 25 | x[q] = temp; 26 | } 27 | 28 | void permute(double *x, int p, int q) 29 | { 30 | double temp; 31 | temp = x[p]; 32 | x[p] = x[q]; 33 | x[q] = temp; 34 | } 35 | 36 | void permuteRows(double *x, int p, int q,int n) 37 | { 38 | int i; 39 | double temp; 40 | for(i = 0; i < n; i++) 41 | { 42 | temp = x[p+i*n]; 43 | x[p+i*n] = x[q+i*n]; 44 | x[q+i*n] = temp; 45 | } 46 | } 47 | 48 | void permuteCols(double *x, int p, int q,int n) 49 | { 50 | int i; 51 | double temp; 52 | for(i = 0; i < n; i++) 53 | { 54 | temp = x[i+p*n]; 55 | x[i+p*n] = x[i+q*n]; 56 | x[i+q*n] = temp; 57 | } 58 | } 59 | 60 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 61 | { 62 | int n,sizL[2],sizD[2],i,j,q,s, 63 | *P; 64 | 65 | double mu,gamma,xi,delta,beta,maxVal,theta, 66 | *c, *H, *L, *D, *A; 67 | 68 | /* Input */ 69 | H = mxGetPr(prhs[0]); 70 | if (nrhs == 1) 71 | { 72 | mu = 1e-12; 73 | } 74 | else 75 | { 76 | mu = mxGetScalar(prhs[1]); 77 | } 78 | 79 | /* Compute Sizes */ 80 | n = mxGetDimensions(prhs[0])[0]; 81 | 82 | /* Form Output */ 83 | sizL[0] = n; 84 | sizL[1] = n; 85 | plhs[0] = mxCreateNumericArray(2,sizL,mxDOUBLE_CLASS,mxREAL); 86 | L = mxGetPr(plhs[0]); 87 | sizD[0] = n; 88 | sizD[1] = 1; 89 | plhs[1] = mxCreateNumericArray(2,sizD,mxDOUBLE_CLASS,mxREAL); 90 | D = mxGetPr(plhs[1]); 91 | plhs[2] = mxCreateNumericArray(2,sizD,mxINT32_CLASS,mxREAL); 92 | P = (int*)mxGetData(plhs[2]); 93 | 94 | /* Initialize */ 95 | c = mxCalloc(n*n,sizeof(double)); 96 | A = mxCalloc(n*n,sizeof(double)); 97 | 98 | for (i = 0; i < n; i++) 99 | { 100 | P[i] = i; 101 | for (j = 0;j < n; j++) 102 | { 103 | A[i+n*j] = H[i+n*j]; 104 | } 105 | } 106 | 107 | gamma = 0; 108 | for (i = 0; i < n; i++) 109 | { 110 | L[i+n*i] = 1; 111 | c[i+n*i] = A[i+n*i]; 112 | } 113 | 114 | /* Compute modification parameters */ 115 | gamma = -1; 116 | xi = -1; 117 | for (i = 0; i < n; i++) 118 | { 119 | gamma = mymax(gamma,absolute(A[i+n*i])); 120 | for (j = 0;j < n; j++) 121 | { 122 | /*printf("A(%d,%d) = %f, %f\n",i,j,A[i+n*j],absolute(A[i+n*j]));*/ 123 | if (i != j) 124 | xi = mymax(xi,absolute(A[i+n*j])); 125 | } 126 | } 127 | delta = mu*mymax(gamma+xi,1); 128 | 129 | if (n > 1) 130 | { 131 | beta = sqrt(mymax(gamma,mymax(mu,xi/sqrt(n*n-1)))); 132 | } 133 | else 134 | { 135 | beta = sqrt(mymax(gamma,mu)); 136 | } 137 | 138 | for (j = 0; j < n; j++) 139 | { 140 | 141 | /* Find q that results in Best Permutation with j */ 142 | maxVal = -1; 143 | q = 0; 144 | for(i = j; i < n; i++) 145 | { 146 | if (absolute(c[i+n*i]) > maxVal) 147 | { 148 | maxVal = mymax(maxVal,absolute(c[i+n*i])); 149 | q = i; 150 | } 151 | } 152 | 153 | /* Permute D,c,L,A,P */ 154 | permute(D,j,q); 155 | permuteInt(P,j,q); 156 | permuteRows(c,j,q,n); 157 | permuteCols(c,j,q,n); 158 | permuteRows(L,j,q,n); 159 | permuteCols(L,j,q,n); 160 | permuteRows(A,j,q,n); 161 | permuteCols(A,j,q,n); 162 | 163 | for(s = 0; s <= j-1; s++) 164 | L[j+n*s] = c[j+n*s]/D[s]; 165 | 166 | for(i = j+1; i < n; i++) 167 | { 168 | c[i+j*n] = A[i+j*n]; 169 | for(s = 0; s <= j-1; s++) 170 | { 171 | c[i+j*n] -= L[j+n*s]*c[i+n*s]; 172 | } 173 | } 174 | 175 | theta = 0; 176 | if (j < n-1) 177 | { 178 | for(i = j+1;i < n; i++) 179 | theta = mymax(theta,absolute(c[i+n*j])); 180 | } 181 | 182 | D[j] = mymax(absolute(c[j+n*j]),mymax(delta,theta*theta/(beta*beta))); 183 | 184 | if (j < n-1) 185 | { 186 | for(i = j+1; i < n; i++) 187 | { 188 | c[i+n*i] = c[i+n*i] - c[i+n*j]*c[i+n*j]/D[j]; 189 | } 190 | } 191 | 192 | } 193 | 194 | for(i = 0; i < n; i++) 195 | P[i]++; 196 | 197 | mxFree(c); 198 | mxFree(A); 199 | } -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/ArmijoBacktrack.m: -------------------------------------------------------------------------------- 1 | function [t,x_new,f_new,g_new,funEvals,H] = ArmijoBacktrack(... 2 | x,t,d,f,fr,g,gtd,c1,LS_interp,LS_multi,progTol,debug,doPlot,saveHessianComp,funObj,varargin) 3 | % [t,x_new,f_new,g_new,funEvals,H] = ArmijoBacktrack(... 4 | % x,t,d,f,fr,g,gtd,c1,LS_interp,LS_multi,progTol,debug,doPlot,saveHessianComp,funObj,varargin) 5 | % 6 | % Backtracking linesearch to satisfy Armijo condition 7 | % 8 | % Inputs: 9 | % x: starting location 10 | % t: initial step size 11 | % d: descent direction 12 | % f: function value at starting location 13 | % fr: reference function value (usually funObj(x)) 14 | % gtd: directional derivative at starting location 15 | % c1: sufficient decrease parameter 16 | % debug: display debugging information 17 | % LS_interp: type of interpolation 18 | % progTol: minimum allowable step length 19 | % doPlot: do a graphical display of interpolation 20 | % funObj: objective function 21 | % varargin: parameters of objective function 22 | % 23 | % Outputs: 24 | % t: step length 25 | % f_new: function value at x+t*d 26 | % g_new: gradient value at x+t*d 27 | % funEvals: number function evaluations performed by line search 28 | % H: Hessian at initial guess (only computed if requested) 29 | % 30 | % recet change: LS changed to LS_interp and LS_multi 31 | 32 | % Evaluate the Objective and Gradient at the Initial Step 33 | if nargout == 6 34 | [f_new,g_new,H] = funObj(x + t*d,varargin{:}); 35 | else 36 | [f_new,g_new] = funObj(x+t*d,varargin{:}); 37 | end 38 | funEvals = 1; 39 | 40 | while f_new > fr + c1*t*gtd || ~isLegal(f_new) 41 | temp = t; 42 | 43 | if LS_interp == 0 || ~isLegal(f_new) 44 | % Ignore value of new point 45 | if debug 46 | fprintf('Fixed BT\n'); 47 | end 48 | t = 0.5*t; 49 | elseif LS_interp == 1 || ~isLegal(g_new) 50 | % Use function value at new point, but not its derivative 51 | if funEvals < 2 || LS_multi == 0 || ~isLegal(f_prev) 52 | % Backtracking w/ quadratic interpolation based on two points 53 | if debug 54 | fprintf('Quad BT\n'); 55 | end 56 | t = polyinterp([0 f gtd; t f_new sqrt(-1)],doPlot,0,t); 57 | else 58 | % Backtracking w/ cubic interpolation based on three points 59 | if debug 60 | fprintf('Cubic BT\n'); 61 | end 62 | t = polyinterp([0 f gtd; t f_new sqrt(-1); t_prev f_prev sqrt(-1)],doPlot,0,t); 63 | end 64 | else 65 | % Use function value and derivative at new point 66 | 67 | if funEvals < 2 || LS_multi == 0 || ~isLegal(f_prev) 68 | % Backtracking w/ cubic interpolation w/ derivative 69 | if debug 70 | fprintf('Grad-Cubic BT\n'); 71 | end 72 | t = polyinterp([0 f gtd; t f_new g_new'*d],doPlot,0,t); 73 | elseif ~isLegal(g_prev) 74 | % Backtracking w/ quartic interpolation 3 points and derivative 75 | % of two 76 | if debug 77 | fprintf('Grad-Quartic BT\n'); 78 | end 79 | t = polyinterp([0 f gtd; t f_new g_new'*d; t_prev f_prev sqrt(-1)],doPlot,0,t); 80 | else 81 | % Backtracking w/ quintic interpolation of 3 points and derivative 82 | % of two 83 | if debug 84 | fprintf('Grad-Quintic BT\n'); 85 | end 86 | t = polyinterp([0 f gtd; t f_new g_new'*d; t_prev f_prev g_prev'*d],doPlot,0,t); 87 | end 88 | end 89 | 90 | % Adjust if change in t is too small/large 91 | if t < temp*1e-3 92 | if debug 93 | fprintf('Interpolated Value Too Small, Adjusting\n'); 94 | end 95 | t = temp*1e-3; 96 | elseif t > temp*0.6 97 | if debug 98 | fprintf('Interpolated Value Too Large, Adjusting\n'); 99 | end 100 | t = temp*0.6; 101 | end 102 | 103 | % Store old point if doing three-point interpolation 104 | if LS_multi 105 | f_prev = f_new; 106 | t_prev = temp; 107 | if LS_interp == 2 108 | g_prev = g_new; 109 | end 110 | end 111 | 112 | if ~saveHessianComp && nargout == 6 113 | [f_new,g_new,H] = funObj(x + t*d,varargin{:}); 114 | else 115 | [f_new,g_new] = funObj(x + t*d,varargin{:}); 116 | end 117 | funEvals = funEvals+1; 118 | 119 | % Check whether step size has become too small 120 | if max(abs(t*d)) <= progTol 121 | if debug 122 | fprintf('Backtracking Line Search Failed\n'); 123 | end 124 | t = 0; 125 | f_new = f; 126 | g_new = g; 127 | break; 128 | end 129 | end 130 | 131 | % Evaluate Hessian at new point 132 | if nargout == 6 && funEvals > 1 && saveHessianComp 133 | [f_new,g_new,H] = funObj(x + t*d,varargin{:}); 134 | funEvals = funEvals+1; 135 | end 136 | 137 | x_new = x + t*d; 138 | 139 | end 140 | -------------------------------------------------------------------------------- /MC_DMF.m: -------------------------------------------------------------------------------- 1 | function [Xr,NN_MF]=MC_DMF(X,M,s,options)%%% X:[n m] 2 | % Code for Matrix completion by deep matrix factorization 3 | % When using this code, please cite the following paper: 4 | % "Matrix completion by deep matrix factorization" 5 | % Jicong Fan, Jieyu Cheng. Neural Networks, 2018(98):34-41 6 | % inputs 7 | % -- X: an n by m matrix with missing entries (m variables, n samples) 8 | % -- M: binary mask matrix for X, 1 indicate observed, 0 indicate missed 9 | % -- s: network structure, a row vector with L elements, indicating one 10 | % input layer, L-2 hidden layers, and one output layer; 11 | % e.g. [r 5*r 10*r m], the last value must equal to 'm'; r<size(NN_MF.W{1},1) 180 | NN_MF.dZ=d{2}(:,2:end)*NN_MF.W{1}(:,2:end); 181 | else 182 | NN_MF.dZ=d{2}(:,1:end)*NN_MF.W{1}(:,2:end); 183 | end 184 | NN_MF.dZ=NN_MF.dZ/NN_MF.n; 185 | end 186 | 187 | 188 | -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/WolfeLineSearch.m: -------------------------------------------------------------------------------- 1 | function [t,f_new,g_new,funEvals,H] = WolfeLineSearch(... 2 | x,t,d,f,g,gtd,c1,c2,LS_interp,LS_multi,maxLS,progTol,debug,doPlot,saveHessianComp,funObj,varargin) 3 | % 4 | % Bracketing Line Search to Satisfy Wolfe Conditions 5 | % 6 | % Inputs: 7 | % x: starting location 8 | % t: initial step size 9 | % d: descent direction 10 | % f: function value at starting location 11 | % g: gradient at starting location 12 | % gtd: directional derivative at starting location 13 | % c1: sufficient decrease parameter 14 | % c2: curvature parameter 15 | % debug: display debugging information 16 | % LS_interp: type of interpolation 17 | % maxLS: maximum number of iterations 18 | % progTol: minimum allowable step length 19 | % doPlot: do a graphical display of interpolation 20 | % funObj: objective function 21 | % varargin: parameters of objective function 22 | % 23 | % Outputs: 24 | % t: step length 25 | % f_new: function value at x+t*d 26 | % g_new: gradient value at x+t*d 27 | % funEvals: number function evaluations performed by line search 28 | % H: Hessian at initial guess (only computed if requested 29 | 30 | % Evaluate the Objective and Gradient at the Initial Step 31 | if nargout == 5 32 | [f_new,g_new,H] = funObj(x + t*d,varargin{:}); 33 | else 34 | [f_new,g_new] = funObj(x+t*d,varargin{:}); 35 | end 36 | funEvals = 1; 37 | gtd_new = g_new'*d; 38 | 39 | % Bracket an Interval containing a point satisfying the 40 | % Wolfe criteria 41 | 42 | LSiter = 0; 43 | t_prev = 0; 44 | f_prev = f; 45 | g_prev = g; 46 | gtd_prev = gtd; 47 | nrmD = max(abs(d)); 48 | done = 0; 49 | 50 | while LSiter < maxLS 51 | 52 | %% Bracketing Phase 53 | if ~isLegal(f_new) || ~isLegal(g_new) 54 | if debug 55 | fprintf('Extrapolated into illegal region, switching to Armijo line-search\n'); 56 | end 57 | t = (t + t_prev)/2; 58 | % Do Armijo 59 | if nargout == 5 60 | [t,x_new,f_new,g_new,armijoFunEvals,H] = ArmijoBacktrack(... 61 | x,t,d,f,f,g,gtd,c1,LS_interp,LS_multi,progTol,debug,doPlot,saveHessianComp,... 62 | funObj,varargin{:}); 63 | else 64 | [t,x_new,f_new,g_new,armijoFunEvals] = ArmijoBacktrack(... 65 | x,t,d,f,f,g,gtd,c1,LS_interp,LS_multi,progTol,debug,doPlot,saveHessianComp,... 66 | funObj,varargin{:}); 67 | end 68 | funEvals = funEvals + armijoFunEvals; 69 | return; 70 | end 71 | 72 | 73 | if f_new > f + c1*t*gtd || (LSiter > 1 && f_new >= f_prev) 74 | bracket = [t_prev t]; 75 | bracketFval = [f_prev f_new]; 76 | bracketGval = [g_prev g_new]; 77 | break; 78 | elseif abs(gtd_new) <= -c2*gtd 79 | bracket = t; 80 | bracketFval = f_new; 81 | bracketGval = g_new; 82 | done = 1; 83 | break; 84 | elseif gtd_new >= 0 85 | bracket = [t_prev t]; 86 | bracketFval = [f_prev f_new]; 87 | bracketGval = [g_prev g_new]; 88 | break; 89 | end 90 | temp = t_prev; 91 | t_prev = t; 92 | minStep = t + 0.01*(t-temp); 93 | maxStep = t*10; 94 | if LS_interp <= 1 95 | if debug 96 | fprintf('Extending Braket\n'); 97 | end 98 | t = maxStep; 99 | elseif LS_interp == 2 100 | if debug 101 | fprintf('Cubic Extrapolation\n'); 102 | end 103 | t = polyinterp([temp f_prev gtd_prev; t f_new gtd_new],doPlot,minStep,maxStep); 104 | elseif LS_interp == 3 105 | t = mixedExtrap(temp,f_prev,gtd_prev,t,f_new,gtd_new,minStep,maxStep,debug,doPlot); 106 | end 107 | 108 | f_prev = f_new; 109 | g_prev = g_new; 110 | gtd_prev = gtd_new; 111 | if ~saveHessianComp && nargout == 5 112 | [f_new,g_new,H] = funObj(x + t*d,varargin{:}); 113 | else 114 | [f_new,g_new] = funObj(x + t*d,varargin{:}); 115 | end 116 | funEvals = funEvals + 1; 117 | gtd_new = g_new'*d; 118 | LSiter = LSiter+1; 119 | end 120 | 121 | if LSiter == maxLS 122 | bracket = [0 t]; 123 | bracketFval = [f f_new]; 124 | bracketGval = [g g_new]; 125 | end 126 | 127 | %% Zoom Phase 128 | 129 | % We now either have a point satisfying the criteria, or a bracket 130 | % surrounding a point satisfying the criteria 131 | % Refine the bracket until we find a point satisfying the criteria 132 | insufProgress = 0; 133 | Tpos = 2; 134 | LOposRemoved = 0; 135 | while ~done && LSiter < maxLS 136 | 137 | % Find High and Low Points in bracket 138 | [f_LO LOpos] = min(bracketFval); 139 | HIpos = -LOpos + 3; 140 | 141 | % Compute new trial value 142 | if LS_interp <= 1 || ~isLegal(bracketFval) || ~isLegal(bracketGval) 143 | if debug 144 | fprintf('Bisecting\n'); 145 | end 146 | t = mean(bracket); 147 | elseif LS_interp == 2 148 | if debug 149 | fprintf('Grad-Cubic Interpolation\n'); 150 | end 151 | t = polyinterp([bracket(1) bracketFval(1) bracketGval(:,1)'*d 152 | bracket(2) bracketFval(2) bracketGval(:,2)'*d],doPlot); 153 | else 154 | % Mixed Case %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 155 | nonTpos = -Tpos+3; 156 | if LOposRemoved == 0 157 | oldLOval = bracket(nonTpos); 158 | oldLOFval = bracketFval(nonTpos); 159 | oldLOGval = bracketGval(:,nonTpos); 160 | end 161 | t = mixedInterp(bracket,bracketFval,bracketGval,d,Tpos,oldLOval,oldLOFval,oldLOGval,debug,doPlot); 162 | end 163 | 164 | 165 | % Test that we are making sufficient progress 166 | if min(max(bracket)-t,t-min(bracket))/(max(bracket)-min(bracket)) < 0.1 167 | if debug 168 | fprintf('Interpolation close to boundary'); 169 | end 170 | if insufProgress || t>=max(bracket) || t <= min(bracket) 171 | if debug 172 | fprintf(', Evaluating at 0.1 away from boundary\n'); 173 | end 174 | if abs(t-max(bracket)) < abs(t-min(bracket)) 175 | t = max(bracket)-0.1*(max(bracket)-min(bracket)); 176 | else 177 | t = min(bracket)+0.1*(max(bracket)-min(bracket)); 178 | end 179 | insufProgress = 0; 180 | else 181 | if debug 182 | fprintf('\n'); 183 | end 184 | insufProgress = 1; 185 | end 186 | else 187 | insufProgress = 0; 188 | end 189 | 190 | % Evaluate new point 191 | if ~saveHessianComp && nargout == 5 192 | [f_new,g_new,H] = funObj(x + t*d,varargin{:}); 193 | else 194 | [f_new,g_new] = funObj(x + t*d,varargin{:}); 195 | end 196 | funEvals = funEvals + 1; 197 | gtd_new = g_new'*d; 198 | LSiter = LSiter+1; 199 | 200 | armijo = f_new < f + c1*t*gtd; 201 | if ~armijo || f_new >= f_LO 202 | % Armijo condition not satisfied or not lower than lowest 203 | % point 204 | bracket(HIpos) = t; 205 | bracketFval(HIpos) = f_new; 206 | bracketGval(:,HIpos) = g_new; 207 | Tpos = HIpos; 208 | else 209 | if abs(gtd_new) <= - c2*gtd 210 | % Wolfe conditions satisfied 211 | done = 1; 212 | elseif gtd_new*(bracket(HIpos)-bracket(LOpos)) >= 0 213 | % Old HI becomes new LO 214 | bracket(HIpos) = bracket(LOpos); 215 | bracketFval(HIpos) = bracketFval(LOpos); 216 | bracketGval(:,HIpos) = bracketGval(:,LOpos); 217 | if LS_interp == 3 218 | if debug 219 | fprintf('LO Pos is being removed!\n'); 220 | end 221 | LOposRemoved = 1; 222 | oldLOval = bracket(LOpos); 223 | oldLOFval = bracketFval(LOpos); 224 | oldLOGval = bracketGval(:,LOpos); 225 | end 226 | end 227 | % New point becomes new LO 228 | bracket(LOpos) = t; 229 | bracketFval(LOpos) = f_new; 230 | bracketGval(:,LOpos) = g_new; 231 | Tpos = LOpos; 232 | end 233 | 234 | if ~done && abs(bracket(1)-bracket(2))*nrmD < progTol 235 | if debug 236 | fprintf('Line-search bracket has been reduced below progTol\n'); 237 | end 238 | break; 239 | end 240 | 241 | end 242 | 243 | %% 244 | if LSiter == maxLS 245 | if debug 246 | fprintf('Line Search Exceeded Maximum Line Search Iterations\n'); 247 | end 248 | end 249 | 250 | [f_LO LOpos] = min(bracketFval); 251 | t = bracket(LOpos); 252 | f_new = bracketFval(LOpos); 253 | g_new = bracketGval(:,LOpos); 254 | 255 | 256 | 257 | % Evaluate Hessian at new point 258 | if nargout == 5 && funEvals > 1 && saveHessianComp 259 | [f_new,g_new,H] = funObj(x + t*d,varargin{:}); 260 | funEvals = funEvals + 1; 261 | end 262 | 263 | end 264 | 265 | 266 | %% 267 | function [t] = mixedExtrap(x0,f0,g0,x1,f1,g1,minStep,maxStep,debug,doPlot); 268 | alpha_c = polyinterp([x0 f0 g0; x1 f1 g1],doPlot,minStep,maxStep); 269 | alpha_s = polyinterp([x0 f0 g0; x1 sqrt(-1) g1],doPlot,minStep,maxStep); 270 | if alpha_c > minStep && abs(alpha_c - x1) < abs(alpha_s - x1) 271 | if debug 272 | fprintf('Cubic Extrapolation\n'); 273 | end 274 | t = alpha_c; 275 | else 276 | if debug 277 | fprintf('Secant Extrapolation\n'); 278 | end 279 | t = alpha_s; 280 | end 281 | end 282 | 283 | %% 284 | function [t] = mixedInterp(bracket,bracketFval,bracketGval,d,Tpos,oldLOval,oldLOFval,oldLOGval,debug,doPlot); 285 | 286 | % Mixed Case %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 287 | nonTpos = -Tpos+3; 288 | 289 | gtdT = bracketGval(:,Tpos)'*d; 290 | gtdNonT = bracketGval(:,nonTpos)'*d; 291 | oldLOgtd = oldLOGval'*d; 292 | if bracketFval(Tpos) > oldLOFval 293 | alpha_c = polyinterp([oldLOval oldLOFval oldLOgtd 294 | bracket(Tpos) bracketFval(Tpos) gtdT],doPlot); 295 | alpha_q = polyinterp([oldLOval oldLOFval oldLOgtd 296 | bracket(Tpos) bracketFval(Tpos) sqrt(-1)],doPlot); 297 | if abs(alpha_c - oldLOval) < abs(alpha_q - oldLOval) 298 | if debug 299 | fprintf('Cubic Interpolation\n'); 300 | end 301 | t = alpha_c; 302 | else 303 | if debug 304 | fprintf('Mixed Quad/Cubic Interpolation\n'); 305 | end 306 | t = (alpha_q + alpha_c)/2; 307 | end 308 | elseif gtdT'*oldLOgtd < 0 309 | alpha_c = polyinterp([oldLOval oldLOFval oldLOgtd 310 | bracket(Tpos) bracketFval(Tpos) gtdT],doPlot); 311 | alpha_s = polyinterp([oldLOval oldLOFval oldLOgtd 312 | bracket(Tpos) sqrt(-1) gtdT],doPlot); 313 | if abs(alpha_c - bracket(Tpos)) >= abs(alpha_s - bracket(Tpos)) 314 | if debug 315 | fprintf('Cubic Interpolation\n'); 316 | end 317 | t = alpha_c; 318 | else 319 | if debug 320 | fprintf('Quad Interpolation\n'); 321 | end 322 | t = alpha_s; 323 | end 324 | elseif abs(gtdT) <= abs(oldLOgtd) 325 | alpha_c = polyinterp([oldLOval oldLOFval oldLOgtd 326 | bracket(Tpos) bracketFval(Tpos) gtdT],... 327 | doPlot,min(bracket),max(bracket)); 328 | alpha_s = polyinterp([oldLOval sqrt(-1) oldLOgtd 329 | bracket(Tpos) bracketFval(Tpos) gtdT],... 330 | doPlot,min(bracket),max(bracket)); 331 | if alpha_c > min(bracket) && alpha_c < max(bracket) 332 | if abs(alpha_c - bracket(Tpos)) < abs(alpha_s - bracket(Tpos)) 333 | if debug 334 | fprintf('Bounded Cubic Extrapolation\n'); 335 | end 336 | t = alpha_c; 337 | else 338 | if debug 339 | fprintf('Bounded Secant Extrapolation\n'); 340 | end 341 | t = alpha_s; 342 | end 343 | else 344 | if debug 345 | fprintf('Bounded Secant Extrapolation\n'); 346 | end 347 | t = alpha_s; 348 | end 349 | 350 | if bracket(Tpos) > oldLOval 351 | t = min(bracket(Tpos) + 0.66*(bracket(nonTpos) - bracket(Tpos)),t); 352 | else 353 | t = max(bracket(Tpos) + 0.66*(bracket(nonTpos) - bracket(Tpos)),t); 354 | end 355 | else 356 | t = polyinterp([bracket(nonTpos) bracketFval(nonTpos) gtdNonT 357 | bracket(Tpos) bracketFval(Tpos) gtdT],doPlot); 358 | end 359 | end -------------------------------------------------------------------------------- /rproptoolbox/Rprop/rprop.m: -------------------------------------------------------------------------------- 1 | % RPROP Unconstrained local minimization using Rprop 2 | % [X,E,EXITFLAG,STATS] = RPROP(FUNCGRAD,X0,PARAMETERS,VARARGIN) Minimize a 3 | % function FUNCGRAD starting from the parameters X0. Optionally a 4 | % structure PARAMETERS can be used to override the default parameters. 5 | % Each additional parameter VARARGIN will be passed to function FUNCGRAD. 6 | % The function returns the optimized parameters X, the final objective 7 | % value E, a flag EXITFLAG that encodes the condition that triggered the 8 | % end of the optimization process and at last a structure STATS that 9 | % contain various information about the optimization process itself. 10 | % 11 | % FUNCGRAD: Function or handle to function. Must take the form 12 | % [F,G] = FUNCGRAD(X) where 13 | % X: Parameters 14 | % F: [1 x 1] Objective value 15 | % G: [size(X)] Gradient 16 | % 17 | % X0: Can be either a matrix or a cell of matrices 18 | % 19 | % PARAMETERS: 20 | % method: Rprop method used, accepts {'Rprop+','Rprop-', 21 | % 'IRprop+','IRprop-'}. 22 | % [Default = 'IRprop-'] 23 | % MaxIter: Stop criterion 0: Maximum number of iterations, accepts 24 | % numeric values. 25 | % [Default = 100] 26 | % d_Obj: Stop criterion 1: Minimum Objective value, accepts 27 | % numeric values. 28 | % [Default = 10e-12] 29 | % d_time: Stop criterion 2: Maximum time, accepts numeric values 30 | % or {inf}. 31 | % [Default = inf] 32 | % Tolfun: Stop criterion 3: Minimum Delta of value, accepts 33 | % numeric values (p.Tolfun is valid only if the 34 | % difference is negative, i.e. if the objective value get 35 | % better really slowly, but not if it get worst). 36 | % [Default = 0] 37 | % TolX: Stop criterion 3: Minimum Delta of value, accepts 38 | % numeric values (p.Tolfun is valid only if the 39 | % difference is negative, i.e. if the objective value get 40 | % better really slowly, but not if it get worst). 41 | % [Default = 0] 42 | % mu_neg: Rprop's decrease factor. 43 | % [Default = 0.5] 44 | % mu_pos: Rprop's increase factor. 45 | % [Default = 1.2] 46 | % delta0: Rprop's initial update-value. 47 | % [Default = 0.0123] 48 | % delta_min: Rprop's lower bound for step size. 49 | % [Default = 0] 50 | % delta_max: Rprop's upper bound for step size. 51 | % [Default = 50] 52 | % verbosity: Determine the amount of information to print during the 53 | % optimization process, accepts numeric values [0-3]. 54 | % [Default = 0] 55 | % display: Plot the Objective value during the optimization 56 | % process. NOTE: SLOW DOWN THE OPTIMIZATION CONSIDERABLY! 57 | % It's preferable to plot stats.error once optimized. 58 | % [Default = false] 59 | % indent: Base indentation level for printing. 60 | % [Default = 0] 61 | % useGPU: If true potentially enable GPU acceleration (it will be 62 | % checked whenever MATLAB is GPU-ready). NOTE: FOR SMALL 63 | % OPTIMIZATION PROBLEMS MIGHT SLOW DOWN THE COMPUTATION! 64 | % [Default = false] 65 | % funcgradgpu: If true the function FUNCGRAD will be fed with 66 | % GPUArray data (Speed up the computation when using GPU 67 | % acceleration, but require a compatible FUNCGRAD). 68 | % [Default = false] 69 | % outputgpu: If true X will be returned as GPUArray (whenever it 70 | % has been computed as GPUArray). 71 | % [Default = false] 72 | % full_hist: If true STATS.full_hist will include all the parameters 73 | % throughout the optimization process. 74 | % NOTE: IT MIGHT GET REALLY BIG! ([p.MaxIter x size(X)]) 75 | % [Default = false] 76 | % 77 | % VARARGIN: Will be passed as argument to FUNCGRAD. 78 | % 79 | % EXITFLAG: 80 | % 0 = Maximum number of iterations PARAMETERS.MaxIter reached. 81 | % 1 = Minimum variation of Obj value PARAMETERS.Tolfun reached. 82 | % 2 = Minimum variation of the gradient of parameters PARAMETERS.TolX 83 | % reached. 84 | % 3 = Minimum Objective value PARAMETERS.d_Obj reached. 85 | % 4 = Maximum computational time PARAMETERS.d_time reached. 86 | % 87 | % 88 | % STATS: 89 | % error: [N_ITER x 1] Objectives value during the optimization 90 | % process. 91 | % time: [N_ITER x 1] Time spent since the beginning of the 92 | % optimization process. 93 | % full_hist: {N_ITER x 1} If PARAMETERS.full_hist is 'true', each 94 | % cell contain X for that particular iteration. 95 | % FunEvals: Number of function evaluations (always N_ITER+1). 96 | % 97 | % 98 | % References: 99 | % [1] C. Igel and M. Hüsken. Improving the Rprop Learning Algorithm. 100 | % Neural Computation, pp. 115-121, 2000. 101 | % [2] C. Igel and M. Hüsken. Empirical Evaluation of the Improved 102 | % Rprop Learning Algorithm. Neurocomputing 50, pp. 105-123, 2003. 103 | % [3] M. Riedmiller and H. Braun. A direct adaptive method for faster 104 | % backpropagation learning: the RPROP algorithm. International 105 | % Conference on Neural Networks, pp. 586-591, IEEE Press, 1993. 106 | % [4] M. Riedmiller. Advanced supervised learning in multilayer 107 | % perceptrons-from backpropagation to adaptive learning 108 | % techniques. International Journal of Computer Standards and 109 | % Interfaces 16(3), pp. 265-278, 1994. 110 | % 111 | % 112 | % Toolbox website: 113 | % http://www.ias.informatik.tu-darmstadt.de/Research/RpropToolbox 114 | % 115 | % 116 | % If used for scientific publications please cite explicitly: 117 | % ----------------------------------------------------------------------- 118 | % @MISC{rproptoolbox, 119 | % author = {Calandra, Roberto}, 120 | % title = {Rprop Toolbox for {MATLAB}}, 121 | % year = {2011}, 122 | % howpublished = {\url{http://www.ias.informatik.tu-darmstadt.de/Research/RpropToolbox}} 123 | % } 124 | % ----------------------------------------------------------------------- 125 | % 126 | 127 | % Copyright (c) 2011 Roberto Calandra 128 | % $Revision: 0.96 $ 129 | 130 | % TODO: what happen when x0 is a gpuArray 131 | % TODO: change d_Obj to 0(-inf), defualt 132 | 133 | 134 | function [x,E,exitflag,stats] = rprop(funcgrad,x0,parameters,varargin) 135 | %% Input validation 136 | 137 | assert(isa(funcgrad,'function_handle'),'Invalid format of FUNCGRAD') 138 | 139 | if exist('parameters','var') 140 | assert(isstruct(parameters),'PARAMETERS is not a structure') 141 | end 142 | 143 | % Start the timer 144 | rpropclock = tic; 145 | 146 | 147 | %% Parameters 148 | 149 | % Default Parameters 150 | p.method = 'Rprop+'; % Rprop method used 151 | p.MaxIter = 100; % Stop 0: Maximum number of iterations 152 | p.Tolfun = 10e-9; % Stop 1: Minimum Delta of value 153 | p.TolX = 10e-9; % Stop 2: Minimum Delta of parameters 154 | p.d_Obj = 10e-12; % Stop 3: Minimum value 155 | p.d_time = inf; % Stop 4: Maximum time 156 | p.mu_neg = 0.5; % Decrease factor 157 | p.mu_pos = 1.2; % Increase factor 158 | p.delta0 = 0.0123; % Initial update-value 159 | p.delta_min = 0; % Lower bound for step size 160 | p.delta_max = 50; % Upper bound for step size 161 | p.verbosity = 0; % [0-3] verbosity mode 162 | p.display = false; % Plot optimization process 163 | p.indent = 0; % Base for indentation 164 | p.useGPU = false; % Use GPU if possible 165 | p.funcgradgpu = false; % Enable if funcgrad accept gpuArray 166 | p.outputgpu = false; % Enable if you want x to be a gpuArray 167 | p.full_hist = false; % Return the full history of parameters 168 | 169 | % Override default parameters with eventual passed ones 170 | if exist('parameters','var') 171 | t_p = fieldnames(parameters); 172 | for i = 1:size(t_p,1) 173 | if isfield(p,t_p{i}) 174 | p.(t_p{i}) = parameters.(t_p{i}); 175 | else 176 | fprintf(2,'%s: unknown parameter passed: %s\n',mfilename,t_p{i}) 177 | end 178 | end 179 | end 180 | 181 | % Validate Parameters 182 | p.MaxIter = round(p.MaxIter); 183 | assert(isfinite(p.MaxIter),'PARAMETERS.MaxIter must be positive') 184 | assert(p.MaxIter>0,'PARAMETERS.MaxIter must be positive') 185 | 186 | 187 | %% Initialization 188 | 189 | x = x0; 190 | 191 | % Are we using Rprop+ or IRprop+ ? 192 | plus = sum(strcmp(p.method,{'Rprop+','IRprop+'})); 193 | 194 | % Shall we use GPU ? 195 | if p.useGPU 196 | GPUenable = GPU.GPUsupport(); 197 | else 198 | GPUenable = false; 199 | end 200 | 201 | % Shall we pass to funcgrad a gpuArray ? 202 | if GPUenable && ~p.funcgradgpu 203 | GPUfuncnotGPU = true; 204 | else 205 | GPUfuncnotGPU = false; 206 | end 207 | 208 | % Do we need to convert x from gpuArray to double? 209 | if ~p.outputgpu && GPUenable 210 | xGPU = true; 211 | else 212 | xGPU = false; 213 | end 214 | 215 | % Initialize some variables 216 | exitflag = 0; % Reached maximum amount of iterations 217 | stats.error = zeros([p.MaxIter,1]); 218 | stats.time = zeros([p.MaxIter,1]); 219 | if p.full_hist 220 | stats.x = cell([p.MaxIter,1]); 221 | end 222 | 223 | % Initialize more variables 224 | if iscell(x0) 225 | % x0 is made out of cells 226 | 227 | ncell = numel(x0); 228 | tb = size(x0); 229 | 230 | delta = cell(tb); 231 | grad = cell(tb); 232 | old_grad = cell(tb); 233 | deltaW = cell(tb); 234 | if plus 235 | old_deltaW = cell(tb); 236 | if GPUenable 237 | old_E = parallel.gpu.GPUArray.inf; 238 | else 239 | old_E = inf; 240 | end 241 | end 242 | 243 | for i = 1:ncell 244 | t2 = size(x0{i}); 245 | 246 | if GPUenable 247 | delta{i} = p.delta0.*parallel.gpu.GPUArray.ones(t2); 248 | grad{i} = parallel.gpu.GPUArray.zeros(t2); 249 | old_grad{i} = parallel.gpu.GPUArray.zeros(t2); 250 | deltaW{i} = parallel.gpu.GPUArray.zeros(t2); 251 | if plus 252 | old_deltaW{i} = parallel.gpu.GPUArray.zeros(t2); 253 | end 254 | else 255 | delta{i} = repmat(p.delta0,t2); 256 | grad{i} = zeros(t2); 257 | old_grad{i} = zeros(t2); 258 | deltaW{i} = zeros(t2); 259 | if plus 260 | old_deltaW{i} = zeros(t2); 261 | end 262 | end 263 | end 264 | 265 | else 266 | % x0 is not a cell 267 | 268 | ncell = 1; 269 | tb = size(x0); 270 | 271 | if GPUenable 272 | x = gpuArray(x); 273 | delta{1} = p.delta0.*parallel.gpu.GPUArray.ones(tb); 274 | grad{1} = parallel.gpu.GPUArray.zeros(tb); 275 | old_grad{1} = parallel.gpu.GPUArray.zeros(tb); 276 | deltaW{1} = parallel.gpu.GPUArray.zeros(tb); 277 | if plus 278 | old_deltaW{1} = parallel.gpu.GPUArray.zeros(tb); 279 | old_E = parallel.gpu.GPUArray.inf; 280 | end 281 | else 282 | delta{1} = repmat(p.delta0,tb); 283 | grad{1} = zeros(tb); 284 | old_grad{1} = zeros(tb); 285 | deltaW{1} = zeros(tb); 286 | if plus 287 | old_deltaW{1} = zeros(tb); 288 | old_E = inf; 289 | end 290 | end 291 | end 292 | 293 | 294 | %% Optimization 295 | 296 | % Print method used for optimization 297 | if p.verbosity>0 298 | Utils.indent(p.indent+0) 299 | fprintf('Optimizing using %s\n',p.method); 300 | end 301 | 302 | if p.verbosity>1 303 | if GPUenable 304 | Utils.indent(p.indent+0) 305 | fprintf('GPU acceleration enabled\n') 306 | end 307 | end 308 | 309 | % Compute initial value function and gradient 310 | if GPUfuncnotGPU 311 | [E grad_t] = funcgrad(gather(x),varargin{:}); 312 | else 313 | [E grad_t] = funcgrad(x,varargin{:}); 314 | end 315 | 316 | % Print initial value 317 | if p.verbosity>2 318 | Utils.indent(p.indent+1) 319 | fprintf('Initial Value: %e\r',E); 320 | end 321 | 322 | % Check stop criterions 323 | 324 | % Stop criterion: TolX 325 | if ncell==1 326 | TolX = max(abs(grad_t)); 327 | else 328 | TolX = max(abs(grad_t{1})); 329 | for i=2:ncell 330 | TolX = max(max(abs(grad_t{i})),TolX); 331 | end 332 | end 333 | if TolX < p.TolX 334 | if p.verbosity>1 335 | Utils.indent(p.indent+1) 336 | fprintf(2,'Stopping criterion reached (TolX < desired TolX)\n') 337 | end 338 | exitflag = 2; 339 | return 340 | end 341 | 342 | % Stop criterion: Error 343 | if E < p.d_Obj 344 | if p.verbosity>1 345 | Utils.indent(p.indent+1) 346 | fprintf(2,'Stopping criterion reached (Error < desired Error)\n') 347 | end 348 | exitflag = 3; 349 | return 350 | end 351 | 352 | % Stop criterion: Time 353 | t1 = toc(rpropclock); 354 | if t1 > p.d_time 355 | if p.verbosity>1 356 | Utils.indent(p.indent+1) 357 | fprintf(2,'Stopping criterion reached (Time > desired Time)\n') 358 | end 359 | exitflag = 4; 360 | return 361 | end 362 | clear t1 363 | 364 | % Init figure 365 | if p.display>0 366 | stats.fig_h = figure(); 367 | end 368 | 369 | % Begin the optimization 370 | for Iter = 1:p.MaxIter 371 | 372 | % Validate input 373 | %assert(isequal(size(grad_t),size(x)),... 374 | % 'The dimension of the gradient do not match the parameters') 375 | %assert(Utils.msum(isfinite(grad))==numel(x)) 376 | %assert(isfinite(E)) 377 | 378 | if ncell==1 379 | grad{1} = grad_t; 380 | else 381 | grad = grad_t; 382 | end 383 | 384 | % Optimization ! 385 | for i = 1:ncell 386 | 387 | gg = grad{i}.*old_grad{i}; 388 | delta{i} = min(delta{i}*p.mu_pos,p.delta_max).*(gg>0) +... 389 | max(delta{i}*p.mu_neg,p.delta_min).*(gg<0) + delta{i}.*(gg==0); 390 | 391 | switch p.method 392 | case 'Rprop-' 393 | deltaW{i} = -sign(grad{i}).*delta{i}; 394 | 395 | case 'Rprop+' 396 | deltaW{i} = -sign(grad{i}).*delta{i}.*(gg>=0) -... 397 | old_deltaW{i}.*(gg<0); 398 | grad{i} = grad{i}.*(gg>=0); 399 | old_deltaW{i} = deltaW{i}; 400 | 401 | case 'IRprop-' 402 | grad{i} = grad{i}.*(gg>=0); 403 | deltaW{i} = -sign(grad{i}).*delta{i}; 404 | 405 | case 'IRprop+' 406 | deltaW{i} = -sign(grad{i}).*delta{i}.*(gg>=0) -... 407 | old_deltaW{i}.*(gg<0)*(E>old_E); 408 | grad{i} = grad{i}.*(gg>=0); 409 | old_deltaW{i} = deltaW{i}; 410 | old_E = E; 411 | 412 | otherwise 413 | error('Unknown method') 414 | 415 | end 416 | 417 | old_grad{i} = grad{i}; 418 | 419 | % Update parameters 420 | if ncell==1 421 | x = x + deltaW{i}; 422 | else 423 | x{i} = x{i} + deltaW{i}; 424 | end 425 | 426 | end 427 | 428 | 429 | % Compute value function and gradient 430 | if GPUfuncnotGPU 431 | if ncell==1 432 | [E grad_t] = funcgrad(gather(x),varargin{:}); 433 | else 434 | x_t = cell(size(x)); 435 | for i=1:numel(x_t) 436 | x_t{i} = gather(x{i}); 437 | end 438 | [E grad_t] = funcgrad(x_t,varargin{:}); 439 | end 440 | else 441 | [E grad_t] = funcgrad(x,varargin{:}); 442 | end 443 | 444 | % Print info about this iteration 445 | if mod(Iter,10)==0 446 | if p.verbosity>1 447 | Utils.indent(p.indent+1) 448 | fprintf('Iter %d (of %d)',Iter,p.MaxIter); 449 | if p.verbosity>2 450 | fprintf(' - value: %e\r',E); 451 | else 452 | fprintf('\r'); 453 | end 454 | end 455 | end 456 | 457 | % Collect statistics 458 | if p.full_hist 459 | stats.x{Iter} = x; 460 | end 461 | stats.time(Iter) = toc(rpropclock); 462 | if p.funcgradgpu 463 | stats.error(Iter) = gather(E); 464 | else 465 | stats.error(Iter) = E; 466 | end 467 | 468 | % Plot optimization process 469 | if p.display>0 470 | set(0,'CurrentFigure',stats.fig_h); 471 | plot(stats.error(1:Iter)); 472 | title('Objective value during optimization') 473 | ylabel('Objective value') 474 | xlabel('Number of Iterations') 475 | drawnow 476 | end 477 | 478 | % Check other stop criterions 479 | 480 | % Stop criterion: TolFun 481 | if Iter>1 482 | if isfinite(p.Tolfun) 483 | deltaobj = stats.error(Iter-1) - stats.error(Iter); 484 | if (deltaobj < p.Tolfun) && (deltaobj > 0) 485 | if p.verbosity>1 486 | Utils.indent(p.indent+1) 487 | fprintf(2,'Stopping criterion reached (Delta < desired Delta)\n') 488 | end 489 | exitflag = 1; 490 | break 491 | end 492 | end 493 | end 494 | 495 | % Stop criterion: TolX 496 | if ncell==1 497 | TolX = max(abs(grad_t)); 498 | else 499 | TolX = max(abs(grad_t{1})); 500 | for i=2:ncell 501 | TolX = max(max(abs(grad_t{i})),TolX); 502 | end 503 | end 504 | if TolX < p.TolX 505 | if p.verbosity>1 506 | Utils.indent(p.indent+1) 507 | fprintf(2,'Stopping criterion reached (TolX < desired TolX)\n') 508 | end 509 | exitflag = 2; 510 | break 511 | end 512 | 513 | % Stop criterion: Error 514 | if E < p.d_Obj 515 | if p.verbosity>1 516 | Utils.indent(p.indent+1) 517 | fprintf(2,'Stopping criterion reached (Error < desired Error)\n') 518 | end 519 | exitflag = 3; 520 | break 521 | end 522 | 523 | % Stop criterion: Time 524 | if stats.time(Iter) > p.d_time 525 | if p.verbosity>1 526 | Utils.indent(p.indent+1) 527 | fprintf(2,'Stopping criterion reached (Time > desired Time)\n') 528 | end 529 | exitflag = 4; 530 | break 531 | end 532 | 533 | end 534 | 535 | 536 | %% Output Validation 537 | 538 | % Cut outputs in case of early-stop 539 | stats.error = stats.error(1:Iter); 540 | stats.time = stats.time(1:Iter); 541 | stats.GPUenabled = GPUenable; 542 | stats.FunEvals = Iter+1; 543 | if p.full_hist 544 | stats.x = stats.x(1:Iter); 545 | end 546 | 547 | % In case the GPU has been used collect the parameters 548 | if xGPU 549 | if ncell==1 550 | x = gather(x); 551 | else 552 | x_t = cell(size(x)); 553 | for i=1:numel(x) 554 | x_t{i} = gather(x{i}); 555 | end 556 | x = x_t; 557 | end 558 | end 559 | 560 | 561 | end 562 | 563 | -------------------------------------------------------------------------------- /rproptoolbox/minFunc_2012/minFunc/minFunc.m: -------------------------------------------------------------------------------- 1 | function [x,f,exitflag,output] = minFunc(funObj,x0,options,varargin) 2 | % [x,f,exitflag,output] = minFunc(funObj,x0,options,varargin) 3 | % 4 | % Unconstrained optimizer using a line search strategy 5 | % 6 | % Uses an interface very similar to fminunc 7 | % (it doesn't support all of the optimization toolbox options, 8 | % but supports many other options). 9 | % 10 | % It computes descent directions using one of ('Method'): 11 | % - 'sd': Steepest Descent 12 | % (no previous information used, not recommended) 13 | % - 'csd': Cyclic Steepest Descent 14 | % (uses previous step length for a fixed length cycle) 15 | % - 'bb': Barzilai and Borwein Gradient 16 | % (uses only previous step) 17 | % - 'cg': Non-Linear Conjugate Gradient 18 | % (uses only previous step and a vector beta) 19 | % - 'scg': Scaled Non-Linear Conjugate Gradient 20 | % (uses previous step and a vector beta, 21 | % and Hessian-vector products to initialize line search) 22 | % - 'pcg': Preconditionined Non-Linear Conjugate Gradient 23 | % (uses only previous step and a vector beta, preconditioned version) 24 | % - 'lbfgs': Quasi-Newton with Limited-Memory BFGS Updating 25 | % (default: uses a predetermined nunber of previous steps to form a 26 | % low-rank Hessian approximation) 27 | % - 'newton0': Hessian-Free Newton 28 | % (numerically computes Hessian-Vector products) 29 | % - 'pnewton0': Preconditioned Hessian-Free Newton 30 | % (numerically computes Hessian-Vector products, preconditioned 31 | % version) 32 | % - 'qnewton': Quasi-Newton Hessian approximation 33 | % (uses dense Hessian approximation) 34 | % - 'mnewton': Newton's method with Hessian calculation after every 35 | % user-specified number of iterations 36 | % (needs user-supplied Hessian matrix) 37 | % - 'newton': Newton's method with Hessian calculation every iteration 38 | % (needs user-supplied Hessian matrix) 39 | % - 'tensor': Tensor 40 | % (needs user-supplied Hessian matrix and Tensor of 3rd partial derivatives) 41 | % 42 | % Several line search strategies are available for finding a step length satisfying 43 | % the termination criteria ('LS_type') 44 | % - 0 : A backtracking line-search based on the Armijo condition (default for 'bb') 45 | % - 1 : A bracekting line-search based on the strong Wolfe conditions (default for all other methods) 46 | % - 2 : The line-search from the Matlab Optimization Toolbox (requires Matlab's linesearch.m to be added to the path) 47 | % 48 | % For the Armijo line-search, several interpolation strategies are available ('LS_interp'): 49 | % - 0 : Step size halving 50 | % - 1 : Polynomial interpolation using new function values 51 | % - 2 : Polynomial interpolation using new function and gradient values (default) 52 | % 53 | % When (LS_interp = 1), the default setting of (LS_multi = 0) uses quadratic interpolation, 54 | % while if (LS_multi = 1) it uses cubic interpolation if more than one point are available. 55 | % 56 | % When (LS_interp = 2), the default setting of (LS_multi = 0) uses cubic interpolation, 57 | % while if (LS_multi = 1) it uses quartic or quintic interpolation if more than one point are available 58 | % 59 | % To use the non-monotonic Armijo condition, set the 'Fref' value to the number of previous function values to store 60 | % 61 | % For the Wolfe line-search, these interpolation strategies are available ('LS_interp'): 62 | % - 0 : Step Size Doubling and Bisection 63 | % - 1 : Cubic interpolation/extrapolation using new function and gradient values (default) 64 | % - 2 : Mixed quadratic/cubic interpolation/extrapolation 65 | % 66 | % Several strategies for choosing the initial step size are avaiable ('LS_init'): 67 | % - 0: Always try an initial step length of 1 (default for all except 'sd' and 'cg') 68 | % (t = 1) 69 | % - 1: Use a step similar to the previous step 70 | % (t = t_old*min(2,g'd/g_old'd_old)) 71 | % - 2: Quadratic Initialization using previous function value and new 72 | % function value/gradient (use this if steps tend to be very long, default for 'sd' and 'cg') 73 | % (t = min(1,2*(f-f_old)/g)) 74 | % - 3: The minimum between 1 and twice the previous step length 75 | % (t = min(1,2*t) 76 | % - 4: The scaled conjugate gradient step length (may accelerate 77 | % conjugate gradient methods, but requires a Hessian-vector product, default for 'scg') 78 | % (t = g'd/d'Hd) 79 | % 80 | % Inputs: 81 | % funObj - is a function handle 82 | % x0 - is a starting vector; 83 | % options - is a struct containing parameters (defaults are used for non-existent or blank fields) 84 | % varargin{:} - all other arguments are passed as additional arguments to funObj 85 | % 86 | % Outputs: 87 | % x is the minimum value found 88 | % f is the function value at the minimum found 89 | % exitflag returns an exit condition 90 | % output returns a structure with other information 91 | % 92 | % Supported Input Options 93 | % Display - Level of display [ off | final | (iter) | full | excessive ] 94 | % MaxFunEvals - Maximum number of function evaluations allowed (1000) 95 | % MaxIter - Maximum number of iterations allowed (500) 96 | % optTol - Termination tolerance on the first-order optimality (1e-5) 97 | % progTol - Termination tolerance on progress in terms of function/parameter changes (1e-9) 98 | % Method - [ sd | csd | bb | cg | scg | pcg | {lbfgs} | newton0 | pnewton0 | 99 | % qnewton | mnewton | newton | tensor ] 100 | % c1 - Sufficient Decrease for Armijo condition (1e-4) 101 | % c2 - Curvature Decrease for Wolfe conditions (.2 for cg methods, .9 otherwise) 102 | % LS_init - Line Search Initialization - see above (2 for cg/sd, 4 for scg, 0 otherwise) 103 | % LS - Line Search type - see above (2 for bb, 4 otherwise) 104 | % Fref - Setting this to a positive integer greater than 1 105 | % will use non-monotone Armijo objective in the line search. 106 | % (20 for bb, 10 for csd, 1 for all others) 107 | % numDiff - [ 0 | 1 | 2] compute derivatives using user-supplied function (0), 108 | % numerically user forward-differencing (1), or numerically using central-differencing (2) 109 | % (default: 0) 110 | % (this option has a different effect for 'newton', see below) 111 | % useComplex - if 1, use complex differentials if computing numerical derivatives 112 | % to get very accurate values (default: 0) 113 | % DerivativeCheck - if 'on', computes derivatives numerically at initial 114 | % point and compares to user-supplied derivative (default: 'off') 115 | % outputFcn - function to run after each iteration (default: []). It 116 | % should have the following interface: 117 | % outputFcn(x,iterationType,i,funEvals,f,t,gtd,g,d,optCond,varargin{:}); 118 | % useMex - where applicable, use mex files to speed things up (default: 1) 119 | % 120 | % Method-specific input options: 121 | % newton: 122 | % HessianModify - type of Hessian modification for direct solvers to 123 | % use if the Hessian is not positive definite (default: 0) 124 | % 0: Minimum Euclidean norm s.t. eigenvalues sufficiently large 125 | % (requires eigenvalues on iterations where matrix is not pd) 126 | % 1: Start with (1/2)*||A||_F and increment until Cholesky succeeds 127 | % (an approximation to method 0, does not require eigenvalues) 128 | % 2: Modified LDL factorization 129 | % (only 1 generalized Cholesky factorization done and no eigenvalues required) 130 | % 3: Modified Spectral Decomposition 131 | % (requires eigenvalues) 132 | % 4: Modified Symmetric Indefinite Factorization 133 | % 5: Uses the eigenvector of the smallest eigenvalue as negative 134 | % curvature direction 135 | % cgSolve - use conjugate gradient instead of direct solver (default: 0) 136 | % 0: Direct Solver 137 | % 1: Conjugate Gradient 138 | % 2: Conjugate Gradient with Diagonal Preconditioner 139 | % 3: Conjugate Gradient with LBFGS Preconditioner 140 | % x: Conjugate Graident with Symmetric Successive Over Relaxation 141 | % Preconditioner with parameter x 142 | % (where x is a real number in the range [0,2]) 143 | % x: Conjugate Gradient with Incomplete Cholesky Preconditioner 144 | % with drop tolerance -x 145 | % (where x is a real negative number) 146 | % numDiff - compute Hessian numerically 147 | % (default: 0, done with complex differentials if useComplex = 1) 148 | % LS_saveHessiancomp - when on, only computes the Hessian at the 149 | % first and last iteration of the line search (default: 1) 150 | % mnewton: 151 | % HessianIter - number of iterations to use same Hessian (default: 5) 152 | % qnewton: 153 | % initialHessType - scale initial Hessian approximation (default: 1) 154 | % qnUpdate - type of quasi-Newton update (default: 3): 155 | % 0: BFGS 156 | % 1: SR1 (when it is positive-definite, otherwise BFGS) 157 | % 2: Hoshino 158 | % 3: Self-Scaling BFGS 159 | % 4: Oren's Self-Scaling Variable Metric method 160 | % 5: McCormick-Huang asymmetric update 161 | % Damped - use damped BFGS update (default: 1) 162 | % newton0/pnewton0: 163 | % HvFunc - user-supplied function that returns Hessian-vector products 164 | % (by default, these are computed numerically using autoHv) 165 | % HvFunc should have the following interface: HvFunc(v,x,varargin{:}) 166 | % useComplex - use a complex perturbation to get high accuracy 167 | % Hessian-vector products (default: 0) 168 | % (the increased accuracy can make the method much more efficient, 169 | % but gradient code must properly support complex inputs) 170 | % useNegCurv - a negative curvature direction is used as the descent 171 | % direction if one is encountered during the cg iterations 172 | % (default: 1) 173 | % precFunc (for pnewton0 only) - user-supplied preconditioner 174 | % (by default, an L-BFGS preconditioner is used) 175 | % precFunc should have the following interfact: 176 | % precFunc(v,x,varargin{:}) 177 | % lbfgs: 178 | % Corr - number of corrections to store in memory (default: 100) 179 | % (higher numbers converge faster but use more memory) 180 | % Damped - use damped update (default: 0) 181 | % cg/scg/pcg: 182 | % cgUpdate - type of update (default for cg/scg: 2, default for pcg: 1) 183 | % 0: Fletcher Reeves 184 | % 1: Polak-Ribiere 185 | % 2: Hestenes-Stiefel (not supported for pcg) 186 | % 3: Gilbert-Nocedal 187 | % HvFunc (for scg only)- user-supplied function that returns Hessian-vector 188 | % products 189 | % (by default, these are computed numerically using autoHv) 190 | % HvFunc should have the following interface: 191 | % HvFunc(v,x,varargin{:}) 192 | % precFunc (for pcg only) - user-supplied preconditioner 193 | % (by default, an L-BFGS preconditioner is used) 194 | % precFunc should have the following interface: 195 | % precFunc(v,x,varargin{:}) 196 | % bb: 197 | % bbType - type of bb step (default: 0) 198 | % 0: min_alpha ||delta_x - alpha delta_g||_2 199 | % 1: min_alpha ||alpha delta_x - delta_g||_2 200 | % 2: Conic BB 201 | % 3: Gradient method with retards 202 | % csd: 203 | % cycle - length of cycle (default: 3) 204 | % 205 | % Supported Output Options 206 | % iterations - number of iterations taken 207 | % funcCount - number of function evaluations 208 | % algorithm - algorithm used 209 | % firstorderopt - first-order optimality 210 | % message - exit message 211 | % trace.funccount - function evaluations after each iteration 212 | % trace.fval - function value after each iteration 213 | % 214 | % Author: Mark Schmidt (2005) 215 | % Web: http://www.di.ens.fr/~mschmidt/Software/minFunc.html 216 | % 217 | % Sources (in order of how much the source material contributes): 218 | % J. Nocedal and S.J. Wright. 1999. "Numerical Optimization". Springer Verlag. 219 | % R. Fletcher. 1987. "Practical Methods of Optimization". Wiley. 220 | % J. Demmel. 1997. "Applied Linear Algebra. SIAM. 221 | % R. Barret, M. Berry, T. Chan, J. Demmel, J. Dongarra, V. Eijkhout, R. 222 | % Pozo, C. Romine, and H. Van der Vost. 1994. "Templates for the Solution of 223 | % Linear Systems: Building Blocks for Iterative Methods". SIAM. 224 | % J. More and D. Thuente. "Line search algorithms with guaranteed 225 | % sufficient decrease". ACM Trans. Math. Softw. vol 20, 286-307, 1994. 226 | % M. Raydan. "The Barzilai and Borwein gradient method for the large 227 | % scale unconstrained minimization problem". SIAM J. Optim., 7, 26-33, 228 | % (1997). 229 | % "Mathematical Optimization". The Computational Science Education 230 | % Project. 1995. 231 | % C. Kelley. 1999. "Iterative Methods for Optimization". Frontiers in 232 | % Applied Mathematics. SIAM. 233 | 234 | if nargin < 3 235 | options = []; 236 | end 237 | 238 | tic 239 | %time=[]; 240 | 241 | % Get Parameters 242 | [verbose,verboseI,debug,doPlot,maxFunEvals,maxIter,optTol,progTol,method,... 243 | corrections,c1,c2,LS_init,cgSolve,qnUpdate,cgUpdate,initialHessType,... 244 | HessianModify,Fref,useComplex,numDiff,LS_saveHessianComp,... 245 | Damped,HvFunc,bbType,cycle,... 246 | HessianIter,outputFcn,useMex,useNegCurv,precFunc,... 247 | LS_type,LS_interp,LS_multi,checkGrad] = ... 248 | minFunc_processInputOptions(options); 249 | 250 | % Constants 251 | SD = 0; 252 | CSD = 1; 253 | BB = 2; 254 | CG = 3; 255 | PCG = 4; 256 | LBFGS = 5; 257 | QNEWTON = 6; 258 | NEWTON0 = 7; 259 | NEWTON = 8; 260 | TENSOR = 9; 261 | 262 | % Initialize 263 | p = length(x0); 264 | d = zeros(p,1); 265 | x = x0; 266 | t = 1; 267 | 268 | % If necessary, form numerical differentiation functions 269 | funEvalMultiplier = 1; 270 | if useComplex 271 | numDiffType = 3; 272 | else 273 | numDiffType = numDiff; 274 | end 275 | if numDiff && method ~= TENSOR 276 | varargin(3:end+2) = varargin(1:end); 277 | varargin{1} = numDiffType; 278 | varargin{2} = funObj; 279 | if method ~= NEWTON 280 | if debug 281 | if useComplex 282 | fprintf('Using complex differentials for gradient computation\n'); 283 | else 284 | fprintf('Using finite differences for gradient computation\n'); 285 | end 286 | end 287 | funObj = @autoGrad; 288 | else 289 | if debug 290 | if useComplex 291 | fprintf('Using complex differentials for Hessian computation\n'); 292 | else 293 | fprintf('Using finite differences for Hessian computation\n'); 294 | end 295 | end 296 | funObj = @autoHess; 297 | end 298 | 299 | if method == NEWTON0 && useComplex == 1 300 | if debug 301 | fprintf('Turning off the use of complex differentials for Hessian-vector products\n'); 302 | end 303 | useComplex = 0; 304 | end 305 | 306 | if useComplex 307 | funEvalMultiplier = p; 308 | elseif numDiff == 2 309 | funEvalMultiplier = 2*p; 310 | else 311 | funEvalMultiplier = p+1; 312 | end 313 | end 314 | 315 | % Evaluate Initial Point 316 | if method < NEWTON 317 | [f,g] = funObj(x,varargin{:}); 318 | computeHessian = 0; 319 | else 320 | [f,g,H] = funObj(x,varargin{:}); 321 | computeHessian = 1; 322 | end 323 | funEvals = 1; 324 | 325 | % Derivative Check 326 | if checkGrad 327 | if numDiff 328 | fprintf('Can not do derivative checking when numDiff is 1\n'); 329 | pause 330 | end 331 | derivativeCheck(funObj,x,1,numDiffType,varargin{:}); % Checks gradient 332 | if computeHessian 333 | derivativeCheck(funObj,x,2,numDiffType,varargin{:}); 334 | end 335 | end 336 | 337 | % Output Log 338 | 339 | 340 | if verboseI 341 | fprintf('%10s %10s %15s %15s %15s\n','Iteration','FunEvals','Step Length','Function Val','Opt Cond'); 342 | end 343 | 344 | 345 | % Compute optimality of initial point 346 | optCond = max(abs(g)); 347 | 348 | if nargout > 3 349 | % Initialize Trace 350 | trace.time = toc; 351 | trace.fval = f; 352 | trace.funcCount = funEvals; 353 | trace.optCond = optCond; 354 | end 355 | 356 | % Exit if initial point is optimal 357 | if optCond <= optTol 358 | exitflag=1; 359 | msg = 'Optimality Condition below optTol'; 360 | if verbose 361 | fprintf('%s\n',msg); 362 | end 363 | if nargout > 3 364 | output = struct('iterations',0,'funcCount',1,... 365 | 'algorithm',method,'firstorderopt',max(abs(g)),'message',msg,'trace',trace); 366 | end 367 | return; 368 | end 369 | 370 | % Output Function 371 | if ~isempty(outputFcn) 372 | stop = outputFcn(x,'init',0,funEvals,f,[],[],g,[],max(abs(g)),varargin{:}); 373 | if stop 374 | exitflag=-1; 375 | msg = 'Stopped by output function'; 376 | if verbose 377 | fprintf('%s\n',msg); 378 | end 379 | if nargout > 3 380 | output = struct('iterations',0,'funcCount',1,... 381 | 'algorithm',method,'firstorderopt',max(abs(g)),'message',msg,'trace',trace); 382 | end 383 | return; 384 | end 385 | end 386 | 387 | % Perform up to a maximum of 'maxIter' descent steps: 388 | 389 | for i = 1:maxIter 390 | 391 | % ****************** COMPUTE DESCENT DIRECTION ***************** 392 | 393 | switch method 394 | case SD % Steepest Descent 395 | d = -g; 396 | 397 | case CSD % Cyclic Steepest Descent 398 | 399 | if mod(i,cycle) == 1 % Use Steepest Descent 400 | alpha = 1; 401 | LS_init = 2; 402 | LS_type = 1; % Wolfe line search 403 | elseif mod(i,cycle) == mod(1+1,cycle) % Use Previous Step 404 | alpha = t; 405 | LS_init = 0; 406 | LS_type = 0; % Armijo line search 407 | end 408 | d = -alpha*g; 409 | 410 | case BB % Steepest Descent with Barzilai and Borwein Step Length 411 | 412 | if i == 1 413 | d = -g; 414 | else 415 | y = g-g_old; 416 | s = t*d; 417 | if bbType == 0 418 | yy = y'*y; 419 | alpha = (s'*y)/(yy); 420 | if alpha <= 1e-10 || alpha > 1e10 421 | alpha = 1; 422 | end 423 | elseif bbType == 1 424 | sy = s'*y; 425 | alpha = (s'*s)/sy; 426 | if alpha <= 1e-10 || alpha > 1e10 427 | alpha = 1; 428 | end 429 | elseif bbType == 2 % Conic Interpolation ('Modified BB') 430 | sy = s'*y; 431 | ss = s'*s; 432 | alpha = ss/sy; 433 | if alpha <= 1e-10 || alpha > 1e10 434 | alpha = 1; 435 | end 436 | alphaConic = ss/(6*(myF_old - f) + 4*g'*s + 2*g_old'*s); 437 | if alphaConic > .001*alpha && alphaConic < 1000*alpha 438 | alpha = alphaConic; 439 | end 440 | elseif bbType == 3 % Gradient Method with retards (bb type 1, random selection of previous step) 441 | sy = s'*y; 442 | alpha = (s'*s)/sy; 443 | if alpha <= 1e-10 || alpha > 1e10 444 | alpha = 1; 445 | end 446 | v(1+mod(i-2,5)) = alpha; 447 | alpha = v(ceil(rand*length(v))); 448 | end 449 | d = -alpha*g; 450 | end 451 | g_old = g; 452 | myF_old = f; 453 | 454 | 455 | case CG % Non-Linear Conjugate Gradient 456 | 457 | if i == 1 458 | d = -g; % Initially use steepest descent direction 459 | else 460 | gotgo = g_old'*g_old; 461 | 462 | if cgUpdate == 0 463 | % Fletcher-Reeves 464 | beta = (g'*g)/(gotgo); 465 | elseif cgUpdate == 1 466 | % Polak-Ribiere 467 | beta = (g'*(g-g_old)) /(gotgo); 468 | elseif cgUpdate == 2 469 | % Hestenes-Stiefel 470 | beta = (g'*(g-g_old))/((g-g_old)'*d); 471 | else 472 | % Gilbert-Nocedal 473 | beta_FR = (g'*(g-g_old)) /(gotgo); 474 | beta_PR = (g'*g-g'*g_old)/(gotgo); 475 | beta = max(-beta_FR,min(beta_PR,beta_FR)); 476 | end 477 | 478 | d = -g + beta*d; 479 | 480 | % Restart if not a direction of sufficient descent 481 | if g'*d > -progTol 482 | if debug 483 | fprintf('Restarting CG\n'); 484 | end 485 | beta = 0; 486 | d = -g; 487 | end 488 | 489 | % Old restart rule: 490 | %if beta < 0 || abs(gtgo)/(gotgo) >= 0.1 || g'*d >= 0 491 | 492 | end 493 | g_old = g; 494 | 495 | case PCG % Preconditioned Non-Linear Conjugate Gradient 496 | 497 | % Apply preconditioner to negative gradient 498 | if isempty(precFunc) 499 | % Use L-BFGS Preconditioner 500 | if i == 1 501 | S = zeros(p,corrections); 502 | Y = zeros(p,corrections); 503 | YS = zeros(corrections,1); 504 | lbfgs_start = 1; 505 | lbfgs_end = 0; 506 | Hdiag = 1; 507 | s = -g; 508 | else 509 | [S,Y,YS,lbfgs_start,lbfgs_end,Hdiag,skipped] = lbfgsAdd(g-g_old,t*d,S,Y,YS,lbfgs_start,lbfgs_end,Hdiag,useMex); 510 | if debug && skipped 511 | fprintf('Skipped L-BFGS updated\n'); 512 | end 513 | if useMex 514 | s = lbfgsProdC(g,S,Y,YS,int32(lbfgs_start),int32(lbfgs_end),Hdiag); 515 | else 516 | s = lbfgsProd(g,S,Y,YS,lbfgs_start,lbfgs_end,Hdiag); 517 | end 518 | end 519 | else % User-supplied preconditioner 520 | s = precFunc(-g,x,varargin{:}); 521 | end 522 | 523 | if i == 1 524 | d = s; 525 | else 526 | 527 | if cgUpdate == 0 528 | % Preconditioned Fletcher-Reeves 529 | beta = (g'*s)/(g_old'*s_old); 530 | elseif cgUpdate < 3 531 | % Preconditioned Polak-Ribiere 532 | beta = (g'*(s-s_old))/(g_old'*s_old); 533 | else 534 | % Preconditioned Gilbert-Nocedal 535 | beta_FR = (g'*s)/(g_old'*s_old); 536 | beta_PR = (g'*(s-s_old))/(g_old'*s_old); 537 | beta = max(-beta_FR,min(beta_PR,beta_FR)); 538 | end 539 | d = s + beta*d; 540 | 541 | if g'*d > -progTol 542 | if debug 543 | fprintf('Restarting CG\n'); 544 | end 545 | beta = 0; 546 | d = s; 547 | end 548 | 549 | end 550 | g_old = g; 551 | s_old = s; 552 | case LBFGS % L-BFGS 553 | 554 | % Update the direction and step sizes 555 | if Damped 556 | if i == 1 557 | d = -g; % Initially use steepest descent direction 558 | old_dirs = zeros(length(g),0); 559 | old_stps = zeros(length(d),0); 560 | Hdiag = 1; 561 | else 562 | [old_dirs,old_stps,Hdiag] = dampedUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); 563 | if useMex 564 | d = lbfgsC(-g,old_dirs,old_stps,Hdiag); 565 | else 566 | d = lbfgs(-g,old_dirs,old_stps,Hdiag); 567 | end 568 | end 569 | else 570 | if i == 1 571 | d = -g; % Initially use steepest descent direction 572 | S = zeros(p,corrections); 573 | Y = zeros(p,corrections); 574 | YS = zeros(corrections,1); 575 | lbfgs_start = 1; 576 | lbfgs_end = 0; 577 | Hdiag = 1; 578 | else 579 | [S,Y,YS,lbfgs_start,lbfgs_end,Hdiag,skipped] = lbfgsAdd(g-g_old,t*d,S,Y,YS,lbfgs_start,lbfgs_end,Hdiag,useMex); 580 | if debug && skipped 581 | fprintf('Skipped L-BFGS updated\n'); 582 | end 583 | if useMex 584 | d = lbfgsProdC(g,S,Y,YS,int32(lbfgs_start),int32(lbfgs_end),Hdiag); 585 | else 586 | d = lbfgsProd(g,S,Y,YS,lbfgs_start,lbfgs_end,Hdiag); 587 | end 588 | end 589 | end 590 | g_old = g; 591 | 592 | case QNEWTON % Use quasi-Newton Hessian approximation 593 | 594 | if i == 1 595 | d = -g; 596 | else 597 | % Compute difference vectors 598 | y = g-g_old; 599 | s = t*d; 600 | 601 | if i == 2 602 | % Make initial Hessian approximation 603 | if initialHessType == 0 604 | % Identity 605 | if qnUpdate <= 1 606 | R = eye(length(g)); 607 | else 608 | H = eye(length(g)); 609 | end 610 | else 611 | % Scaled Identity 612 | if debug 613 | fprintf('Scaling Initial Hessian Approximation\n'); 614 | end 615 | if qnUpdate <= 1 616 | % Use Cholesky of Hessian approximation 617 | R = sqrt((y'*y)/(y'*s))*eye(length(g)); 618 | else 619 | % Use Inverse of Hessian approximation 620 | H = eye(length(g))*(y'*s)/(y'*y); 621 | end 622 | end 623 | end 624 | 625 | if qnUpdate == 0 % Use BFGS updates 626 | Bs = R'*(R*s); 627 | if Damped 628 | eta = .02; 629 | if y'*s < eta*s'*Bs 630 | if debug 631 | fprintf('Damped Update\n'); 632 | end 633 | theta = min(max(0,((1-eta)*s'*Bs)/(s'*Bs - y'*s)),1); 634 | y = theta*y + (1-theta)*Bs; 635 | end 636 | R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-'); 637 | else 638 | if y'*s > 1e-10 639 | R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-'); 640 | else 641 | if debug 642 | fprintf('Skipping Update\n'); 643 | end 644 | end 645 | end 646 | elseif qnUpdate == 1 % Perform SR1 Update if it maintains positive-definiteness 647 | 648 | Bs = R'*(R*s); 649 | ymBs = y-Bs; 650 | if abs(s'*ymBs) >= norm(s)*norm(ymBs)*1e-8 && (s-((R\(R'\y))))'*y > 1e-10 651 | R = cholupdate(R,-ymBs/sqrt(ymBs'*s),'-'); 652 | else 653 | if debug 654 | fprintf('SR1 not positive-definite, doing BFGS Update\n'); 655 | end 656 | if Damped 657 | eta = .02; 658 | if y'*s < eta*s'*Bs 659 | if debug 660 | fprintf('Damped Update\n'); 661 | end 662 | theta = min(max(0,((1-eta)*s'*Bs)/(s'*Bs - y'*s)),1); 663 | y = theta*y + (1-theta)*Bs; 664 | end 665 | R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-'); 666 | else 667 | if y'*s > 1e-10 668 | R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-'); 669 | else 670 | if debug 671 | fprintf('Skipping Update\n'); 672 | end 673 | end 674 | end 675 | end 676 | elseif qnUpdate == 2 % Use Hoshino update 677 | v = sqrt(y'*H*y)*(s/(s'*y) - (H*y)/(y'*H*y)); 678 | phi = 1/(1 + (y'*H*y)/(s'*y)); 679 | H = H + (s*s')/(s'*y) - (H*y*y'*H)/(y'*H*y) + phi*v*v'; 680 | 681 | elseif qnUpdate == 3 % Self-Scaling BFGS update 682 | ys = y'*s; 683 | Hy = H*y; 684 | yHy = y'*Hy; 685 | gamma = ys/yHy; 686 | v = sqrt(yHy)*(s/ys - Hy/yHy); 687 | H = gamma*(H - Hy*Hy'/yHy + v*v') + (s*s')/ys; 688 | elseif qnUpdate == 4 % Oren's Self-Scaling Variable Metric update 689 | 690 | % Oren's method 691 | if (s'*y)/(y'*H*y) > 1 692 | phi = 1; % BFGS 693 | omega = 0; 694 | elseif (s'*(H\s))/(s'*y) < 1 695 | phi = 0; % DFP 696 | omega = 1; 697 | else 698 | phi = (s'*y)*(y'*H*y-s'*y)/((s'*(H\s))*(y'*H*y)-(s'*y)^2); 699 | omega = phi; 700 | end 701 | 702 | gamma = (1-omega)*(s'*y)/(y'*H*y) + omega*(s'*(H\s))/(s'*y); 703 | v = sqrt(y'*H*y)*(s/(s'*y) - (H*y)/(y'*H*y)); 704 | H = gamma*(H - (H*y*y'*H)/(y'*H*y) + phi*v*v') + (s*s')/(s'*y); 705 | 706 | elseif qnUpdate == 5 % McCormick-Huang asymmetric update 707 | theta = 1; 708 | phi = 0; 709 | psi = 1; 710 | omega = 0; 711 | t1 = s*(theta*s + phi*H'*y)'; 712 | t2 = (theta*s + phi*H'*y)'*y; 713 | t3 = H*y*(psi*s + omega*H'*y)'; 714 | t4 = (psi*s + omega*H'*y)'*y; 715 | H = H + t1/t2 - t3/t4; 716 | end 717 | 718 | if qnUpdate <= 1 719 | d = -R\(R'\g); 720 | else 721 | d = -H*g; 722 | end 723 | 724 | end 725 | g_old = g; 726 | 727 | case NEWTON0 % Hessian-Free Newton 728 | 729 | cgMaxIter = min(p,maxFunEvals-funEvals); 730 | cgForce = min(0.5,sqrt(norm(g)))*norm(g); 731 | 732 | % Set-up preconditioner 733 | precondFunc = []; 734 | precondArgs = []; 735 | if cgSolve == 1 736 | if isempty(precFunc) % Apply L-BFGS preconditioner 737 | if i == 1 738 | S = zeros(p,corrections); 739 | Y = zeros(p,corrections); 740 | YS = zeros(corrections,1); 741 | lbfgs_start = 1; 742 | lbfgs_end = 0; 743 | Hdiag = 1; 744 | else 745 | [S,Y,YS,lbfgs_start,lbfgs_end,Hdiag,skipped] = lbfgsAdd(g-g_old,t*d,S,Y,YS,lbfgs_start,lbfgs_end,Hdiag,useMex); 746 | if debug && skipped 747 | fprintf('Skipped L-BFGS updated\n'); 748 | end 749 | if useMex 750 | precondFunc = @lbfgsProdC; 751 | else 752 | precondFunc = @lbfgsProd; 753 | end 754 | precondArgs = {S,Y,YS,int32(lbfgs_start),int32(lbfgs_end),Hdiag}; 755 | end 756 | g_old = g; 757 | else 758 | % Apply user-defined preconditioner 759 | precondFunc = precFunc; 760 | precondArgs = {x,varargin{:}}; 761 | end 762 | end 763 | 764 | % Solve Newton system using cg and hessian-vector products 765 | if isempty(HvFunc) 766 | % No user-supplied Hessian-vector function, 767 | % use automatic differentiation 768 | HvFun = @autoHv; 769 | HvArgs = {x,g,useComplex,funObj,varargin{:}}; 770 | else 771 | % Use user-supplid Hessian-vector function 772 | HvFun = HvFunc; 773 | HvArgs = {x,varargin{:}}; 774 | end 775 | 776 | if useNegCurv 777 | [d,cgIter,cgRes,negCurv] = conjGrad([],-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFun,HvArgs); 778 | else 779 | [d,cgIter,cgRes] = conjGrad([],-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFun,HvArgs); 780 | end 781 | 782 | funEvals = funEvals+cgIter; 783 | if debug 784 | fprintf('newtonCG stopped on iteration %d w/ residual %.5e\n',cgIter,cgRes); 785 | 786 | end 787 | 788 | if useNegCurv 789 | if ~isempty(negCurv) 790 | %if debug 791 | fprintf('Using negative curvature direction\n'); 792 | %end 793 | d = negCurv/norm(negCurv); 794 | d = d/sum(abs(g)); 795 | end 796 | end 797 | 798 | case NEWTON % Newton search direction 799 | 800 | if cgSolve == 0 801 | if HessianModify == 0 802 | % Attempt to perform a Cholesky factorization of the Hessian 803 | [R,posDef] = chol(H); 804 | 805 | % If the Cholesky factorization was successful, then the Hessian is 806 | % positive definite, solve the system 807 | if posDef == 0 808 | d = -R\(R'\g); 809 | 810 | else 811 | % otherwise, adjust the Hessian to be positive definite based on the 812 | % minimum eigenvalue, and solve with QR 813 | % (expensive, we don't want to do this very much) 814 | if debug 815 | fprintf('Adjusting Hessian\n'); 816 | end 817 | H = H + eye(length(g)) * max(0,1e-12 - min(real(eig(H)))); 818 | d = -H\g; 819 | end 820 | elseif HessianModify == 1 821 | % Modified Incomplete Cholesky 822 | R = mcholinc(H,debug); 823 | d = -R\(R'\g); 824 | elseif HessianModify == 2 825 | % Modified Generalized Cholesky 826 | if useMex 827 | [L D perm] = mcholC(H); 828 | else 829 | [L D perm] = mchol(H); 830 | end 831 | d(perm) = -L' \ ((D.^-1).*(L \ g(perm))); 832 | 833 | elseif HessianModify == 3 834 | % Modified Spectral Decomposition 835 | [V,D] = eig((H+H')/2); 836 | D = diag(D); 837 | D = max(abs(D),max(max(abs(D)),1)*1e-12); 838 | d = -V*((V'*g)./D); 839 | elseif HessianModify == 4 840 | % Modified Symmetric Indefinite Factorization 841 | [L,D,perm] = ldl(H,'vector'); 842 | [blockPos junk] = find(triu(D,1)); 843 | for diagInd = setdiff(setdiff(1:p,blockPos),blockPos+1) 844 | if D(diagInd,diagInd) < 1e-12 845 | D(diagInd,diagInd) = 1e-12; 846 | end 847 | end 848 | for blockInd = blockPos' 849 | block = D(blockInd:blockInd+1,blockInd:blockInd+1); 850 | block_a = block(1); 851 | block_b = block(2); 852 | block_d = block(4); 853 | lambda = (block_a+block_d)/2 - sqrt(4*block_b^2 + (block_a - block_d)^2)/2; 854 | D(blockInd:blockInd+1,blockInd:blockInd+1) = block+eye(2)*(lambda+1e-12); 855 | end 856 | d(perm) = -L' \ (D \ (L \ g(perm))); 857 | else 858 | % Take Newton step if Hessian is pd, 859 | % otherwise take a step with negative curvature 860 | [R,posDef] = chol(H); 861 | if posDef == 0 862 | d = -R\(R'\g); 863 | else 864 | if debug 865 | fprintf('Taking Direction of Negative Curvature\n'); 866 | end 867 | [V,D] = eig(H); 868 | u = V(:,1); 869 | d = -sign(u'*g)*u; 870 | end 871 | end 872 | 873 | else 874 | % Solve with Conjugate Gradient 875 | cgMaxIter = p; 876 | cgForce = min(0.5,sqrt(norm(g)))*norm(g); 877 | 878 | % Select Preconditioner 879 | if cgSolve == 1 880 | % No preconditioner 881 | precondFunc = []; 882 | precondArgs = []; 883 | elseif cgSolve == 2 884 | % Diagonal preconditioner 885 | precDiag = diag(H); 886 | precDiag(precDiag < 1e-12) = 1e-12 - min(precDiag); 887 | precondFunc = @precondDiag; 888 | precondArgs = {precDiag.^-1}; 889 | elseif cgSolve == 3 890 | % L-BFGS preconditioner 891 | if i == 1 892 | old_dirs = zeros(length(g),0); 893 | old_stps = zeros(length(g),0); 894 | Hdiag = 1; 895 | else 896 | [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); 897 | end 898 | g_old = g; 899 | if useMex 900 | precondFunc = @lbfgsC; 901 | else 902 | precondFunc = @lbfgs; 903 | end 904 | precondArgs = {old_dirs,old_stps,Hdiag}; 905 | elseif cgSolve > 0 906 | % Symmetric Successive Overelaxation Preconditioner 907 | omega = cgSolve; 908 | D = diag(H); 909 | D(D < 1e-12) = 1e-12 - min(D); 910 | precDiag = (omega/(2-omega))*D.^-1; 911 | precTriu = diag(D/omega) + triu(H,1); 912 | precondFunc = @precondTriuDiag; 913 | precondArgs = {precTriu,precDiag.^-1}; 914 | else 915 | % Incomplete Cholesky Preconditioner 916 | opts.droptol = -cgSolve; 917 | opts.rdiag = 1; 918 | R = cholinc(sparse(H),opts); 919 | if min(diag(R)) < 1e-12 920 | R = cholinc(sparse(H + eye*(1e-12 - min(diag(R)))),opts); 921 | end 922 | precondFunc = @precondTriu; 923 | precondArgs = {R}; 924 | end 925 | 926 | % Run cg with the appropriate preconditioner 927 | if isempty(HvFunc) 928 | % No user-supplied Hessian-vector function 929 | [d,cgIter,cgRes] = conjGrad(H,-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs); 930 | else 931 | % Use user-supplied Hessian-vector function 932 | [d,cgIter,cgRes] = conjGrad(H,-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFunc,{x,varargin{:}}); 933 | end 934 | if debug 935 | fprintf('CG stopped after %d iterations w/ residual %.5e\n',cgIter,cgRes); 936 | %funEvals = funEvals + cgIter; 937 | end 938 | end 939 | 940 | case TENSOR % Tensor Method 941 | 942 | if numDiff 943 | % Compute 3rd-order Tensor Numerically 944 | [junk1 junk2 junk3 T] = autoTensor(x,numDiffType,funObj,varargin{:}); 945 | else 946 | % Use user-supplied 3rd-derivative Tensor 947 | [junk1 junk2 junk3 T] = funObj(x,varargin{:}); 948 | end 949 | options_sub.Method = 'newton'; 950 | options_sub.Display = 'none'; 951 | options_sub.progTol = progTol; 952 | options_sub.optTol = optTol; 953 | d = minFunc(@taylorModel,zeros(p,1),options_sub,f,g,H,T); 954 | 955 | if any(abs(d) > 1e5) || all(abs(d) < 1e-5) || g'*d > -progTol 956 | if debug 957 | fprintf('Using 2nd-Order Step\n'); 958 | end 959 | [V,D] = eig((H+H')/2); 960 | D = diag(D); 961 | D = max(abs(D),max(max(abs(D)),1)*1e-12); 962 | d = -V*((V'*g)./D); 963 | else 964 | if debug 965 | fprintf('Using 3rd-Order Step\n'); 966 | end 967 | end 968 | end 969 | 970 | if ~isLegal(d) 971 | fprintf('Step direction is illegal!\n'); 972 | pause; 973 | return 974 | end 975 | 976 | % ****************** COMPUTE STEP LENGTH ************************ 977 | 978 | % Directional Derivative 979 | gtd = g'*d; 980 | 981 | % Check that progress can be made along direction 982 | if gtd > -progTol 983 | exitflag=2; 984 | msg = 'Directional Derivative below progTol'; 985 | break; 986 | end 987 | 988 | % Select Initial Guess 989 | if i == 1 990 | if method < NEWTON0 991 | t = min(1,1/sum(abs(g))); 992 | else 993 | t = 1; 994 | end 995 | else 996 | if LS_init == 0 997 | % Newton step 998 | t = 1; 999 | elseif LS_init == 1 1000 | % Close to previous step length 1001 | t = t*min(2,(gtd_old)/(gtd)); 1002 | elseif LS_init == 2 1003 | % Quadratic Initialization based on {f,g} and previous f 1004 | t = min(1,2*(f-f_old)/(gtd)); 1005 | elseif LS_init == 3 1006 | % Double previous step length 1007 | t = min(1,t*2); 1008 | elseif LS_init == 4 1009 | % Scaled step length if possible 1010 | if isempty(HvFunc) 1011 | % No user-supplied Hessian-vector function, 1012 | % use automatic differentiation 1013 | dHd = d'*autoHv(d,x,g,0,funObj,varargin{:}); 1014 | else 1015 | % Use user-supplid Hessian-vector function 1016 | dHd = d'*HvFunc(d,x,varargin{:}); 1017 | end 1018 | 1019 | funEvals = funEvals + 1; 1020 | if dHd > 0 1021 | t = -gtd/(dHd); 1022 | else 1023 | t = min(1,2*(f-f_old)/(gtd)); 1024 | end 1025 | end 1026 | 1027 | if t <= 0 1028 | t = 1; 1029 | end 1030 | end 1031 | f_old = f; 1032 | gtd_old = gtd; 1033 | 1034 | % Compute reference fr if using non-monotone objective 1035 | if Fref == 1 1036 | fr = f; 1037 | else 1038 | if i == 1 1039 | old_fvals = repmat(-inf,[Fref 1]); 1040 | end 1041 | 1042 | if i <= Fref 1043 | old_fvals(i) = f; 1044 | else 1045 | old_fvals = [old_fvals(2:end);f]; 1046 | end 1047 | fr = max(old_fvals); 1048 | end 1049 | 1050 | computeHessian = 0; 1051 | if method >= NEWTON 1052 | if HessianIter == 1 1053 | computeHessian = 1; 1054 | elseif i > 1 && mod(i-1,HessianIter) == 0 1055 | computeHessian = 1; 1056 | end 1057 | end 1058 | 1059 | % Line Search 1060 | f_old = f; 1061 | if LS_type == 0 % Use Armijo Bactracking 1062 | % Perform Backtracking line search 1063 | if computeHessian 1064 | [t,x,f,g,LSfunEvals,H] = ArmijoBacktrack(x,t,d,f,fr,g,gtd,c1,LS_interp,LS_multi,progTol,debug,doPlot,LS_saveHessianComp,funObj,varargin{:}); 1065 | else 1066 | [t,x,f,g,LSfunEvals] = ArmijoBacktrack(x,t,d,f,fr,g,gtd,c1,LS_interp,LS_multi,progTol,debug,doPlot,1,funObj,varargin{:}); 1067 | end 1068 | funEvals = funEvals + LSfunEvals; 1069 | 1070 | elseif LS_type == 1 % Find Point satisfying Wolfe conditions 1071 | 1072 | if computeHessian 1073 | [t,f,g,LSfunEvals,H] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS_interp,LS_multi,25,progTol,debug,doPlot,LS_saveHessianComp,funObj,varargin{:}); 1074 | else 1075 | [t,f,g,LSfunEvals] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS_interp,LS_multi,25,progTol,debug,doPlot,1,funObj,varargin{:}); 1076 | end 1077 | funEvals = funEvals + LSfunEvals; 1078 | x = x + t*d; 1079 | 1080 | else 1081 | % Use Matlab optim toolbox line search 1082 | [t,f_new,fPrime_new,g_new,LSexitFlag,LSiter]=... 1083 | lineSearch({'fungrad',[],funObj},x,p,1,p,d,f,gtd,t,c1,c2,-inf,maxFunEvals-funEvals,... 1084 | progTol,[],[],[],varargin{:}); 1085 | funEvals = funEvals + LSiter; 1086 | if isempty(t) 1087 | exitflag = -2; 1088 | msg = 'Matlab LineSearch failed'; 1089 | break; 1090 | end 1091 | 1092 | if method >= NEWTON 1093 | [f_new,g_new,H] = funObj(x + t*d,varargin{:}); 1094 | funEvals = funEvals + 1; 1095 | end 1096 | x = x + t*d; 1097 | f = f_new; 1098 | g = g_new; 1099 | end 1100 | 1101 | % Compute Optimality Condition 1102 | optCond = max(abs(g)); 1103 | 1104 | % Output iteration information 1105 | if verboseI 1106 | if mod(i,10)==0 1107 | fprintf('%10d %10d %15.5e %15.5e %15.5e\n',i,funEvals*funEvalMultiplier,t,f,optCond); 1108 | end 1109 | end 1110 | 1111 | if nargout > 3 1112 | % Update Trace 1113 | trace.time(end+1,1)=toc; 1114 | trace.fval(end+1,1) = f; 1115 | trace.funcCount(end+1,1) = funEvals; 1116 | trace.optCond(end+1,1) = optCond; 1117 | end 1118 | 1119 | % Output Function 1120 | if ~isempty(outputFcn) 1121 | stop = outputFcn(x,'iter',i,funEvals,f,t,gtd,g,d,optCond,varargin{:}); 1122 | if stop 1123 | exitflag=-1; 1124 | msg = 'Stopped by output function'; 1125 | break; 1126 | end 1127 | end 1128 | 1129 | % Check Optimality Condition 1130 | if optCond <= optTol 1131 | exitflag=1; 1132 | msg = 'Optimality Condition below optTol'; 1133 | break; 1134 | end 1135 | 1136 | % ******************* Check for lack of progress ******************* 1137 | 1138 | if max(abs(t*d)) <= progTol 1139 | exitflag=2; 1140 | msg = 'Step Size below progTol'; 1141 | break; 1142 | end 1143 | 1144 | 1145 | if abs(f-f_old) < progTol 1146 | exitflag=2; 1147 | msg = 'Function Value changing by less than progTol'; 1148 | break; 1149 | end 1150 | 1151 | % ******** Check for going over iteration/evaluation limit ******************* 1152 | 1153 | if funEvals*funEvalMultiplier >= maxFunEvals 1154 | exitflag = 0; 1155 | msg = 'Reached Maximum Number of Function Evaluations'; 1156 | break; 1157 | end 1158 | 1159 | if i == maxIter 1160 | exitflag = 0; 1161 | msg='Reached Maximum Number of Iterations'; 1162 | break; 1163 | end 1164 | 1165 | end 1166 | 1167 | if verbose 1168 | fprintf('%s\n',msg); 1169 | end 1170 | if nargout > 3 1171 | output = struct('iterations',i,'funcCount',funEvals*funEvalMultiplier,... 1172 | 'algorithm',method,'firstorderopt',max(abs(g)),'message',msg,'trace',trace); 1173 | end 1174 | 1175 | % Output Function 1176 | if ~isempty(outputFcn) 1177 | outputFcn(x,'done',i,funEvals,f,t,gtd,g,d,max(abs(g)),varargin{:}); 1178 | end 1179 | 1180 | end 1181 | 1182 | --------------------------------------------------------------------------------