├── sigm.m
├── tanh_opt.m
├── .gitattributes
├── rproptoolbox
    ├── minFunc_2012
    │   ├── minFunc
    │   │   ├── precondDiag.m
    │   │   ├── precondTriu.m
    │   │   ├── precondTriuDiag.m
    │   │   ├── isLegal.m
    │   │   ├── compiled
    │   │   │   ├── lbfgsC.mexa64
    │   │   │   ├── lbfgsC.mexglx
    │   │   │   ├── lbfgsC.mexmac
    │   │   │   ├── lbfgsC.mexmaci
    │   │   │   ├── lbfgsC.mexw32
    │   │   │   ├── lbfgsC.mexw64
    │   │   │   ├── mcholC.mexa64
    │   │   │   ├── mcholC.mexglx
    │   │   │   ├── mcholC.mexmac
    │   │   │   ├── mcholC.mexw32
    │   │   │   ├── mcholC.mexw64
    │   │   │   ├── lbfgsAddC.mexa64
    │   │   │   ├── lbfgsAddC.mexw64
    │   │   │   ├── lbfgsC.mexmaci64
    │   │   │   ├── lbfgsProdC.mexa64
    │   │   │   ├── lbfgsProdC.mexw64
    │   │   │   ├── mcholC.mexmaci64
    │   │   │   ├── lbfgsAddC.mexmaci64
    │   │   │   └── lbfgsProdC.mexmaci64
    │   │   ├── mcholinc.m
    │   │   ├── lbfgsUpdate.m
    │   │   ├── lbfgsAdd.m
    │   │   ├── lbfgsProd.m
    │   │   ├── taylorModel.m
    │   │   ├── mex
    │   │   │   ├── lbfgsAddC.c
    │   │   │   ├── lbfgsProdC.c
    │   │   │   ├── lbfgsC.c
    │   │   │   └── mcholC.c
    │   │   ├── lbfgs.m
    │   │   ├── dampedUpdate.m
    │   │   ├── mchol.m
    │   │   ├── conjGrad.m
    │   │   ├── polyinterp.m
    │   │   ├── minFunc_processInputOptions.m
    │   │   ├── ArmijoBacktrack.m
    │   │   ├── WolfeLineSearch.m
    │   │   └── minFunc.m
    │   ├── logisticExample
    │   │   ├── LogisticHv.m
    │   │   ├── mylogsumexp.m
    │   │   ├── LogisticDiagPrecond.m
    │   │   ├── LogisticLoss.m
    │   │   └── example_minFunc_LR.m
    │   ├── mexAll.m
    │   ├── autoDif
    │   │   ├── autoHv.m
    │   │   ├── autoHess.m
    │   │   ├── derivativeCheck.m
    │   │   ├── autoGrad.m
    │   │   ├── autoTensor.m
    │   │   └── fastDerivativeCheck.m
    │   ├── example_derivativeCheck.m
    │   └── example_minFunc.m
    ├── +Utils
    │   └── indent.m
    ├── Rprop
    │   ├── onehump.m
    │   ├── costfunction.m
    │   ├── rosenbrock.m
    │   ├── costfunction_gpu.m
    │   ├── Demo_rprop_1.m
    │   ├── Demo_rprop_2.m
    │   ├── Demo_rprop_3.m
    │   └── rprop.m
    ├── RPROP.txt
    ├── Contents.txt
    └── +GPU
    │   └── GPUsupport.m
├── README.md
├── softmax.m
├── DMF_example.m
└── MC_DMF.m


/sigm.m:
--------------------------------------------------------------------------------
1 | function X = sigm(P)
2 |     X = 1./(1+exp(-P));
3 | end


--------------------------------------------------------------------------------
/tanh_opt.m:
--------------------------------------------------------------------------------
1 | function  f=tanh_opt(A)
2 |     f=1.7159*tanh(2/3.*A);
3 | end


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/precondDiag.m:
--------------------------------------------------------------------------------
1 | function [y] = precondDiag(r,D)
2 | y = D.*r;


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/precondTriu.m:
--------------------------------------------------------------------------------
1 | function [y] = precondUpper(r,U)
2 | y = U \ (U' \ r);


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/precondTriuDiag.m:
--------------------------------------------------------------------------------
1 | function [y] = precondUpper(r,U,D)
2 | y = U \ (D .* (U' \ r));


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/isLegal.m:
--------------------------------------------------------------------------------
1 | function [legal] = isLegal(v)
2 | legal = sum(any(imag(v(:))))==0 & sum(isnan(v(:)))==0 & sum(isinf(v(:)))==0;


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexa64


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexglx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexglx


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexmac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexmac


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexmaci:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexmaci


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexw32:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexw32


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexw64


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexa64


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexglx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexglx


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexmac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexmac


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexw32:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexw32


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexw64


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsAddC.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsAddC.mexa64


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsAddC.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsAddC.mexw64


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsC.mexmaci64


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsProdC.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsProdC.mexa64


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsProdC.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsProdC.mexw64


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/mcholC.mexmaci64


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsAddC.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsAddC.mexmaci64


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsProdC.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jicongfan/Matrix-completion-by-deep-matrix-factorization/HEAD/rproptoolbox/minFunc_2012/minFunc/compiled/lbfgsProdC.mexmaci64


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Matrix-completion-by-deep-matrix-factorization
2 | The MATLAB code for the algorithm proposed in the following paper:
3 | J Fan, J Cheng. Matrix completion by deep matrix factorization. Neural Networks 98, 34-41
4 | 


--------------------------------------------------------------------------------
/softmax.m:
--------------------------------------------------------------------------------
 1 | function mu = softmax(eta)
 2 |     % Softmax function
 3 |     % mu(i,c) = exp(eta(i,c))/sum_c' exp(eta(i,c'))
 4 | 
 5 |     c = 3;
 6 | 
 7 |     tmp = exp(c*eta);
 8 |     denom = sum(tmp, 2);
 9 |     mu = bsxfun(@rdivide, tmp, denom);
10 | 
11 | end


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/logisticExample/LogisticHv.m:
--------------------------------------------------------------------------------
1 | function [Hv] = LogisticHv(v,w,X,y)
2 | % v(feature,1) - vector that we will multiply Hessian by
3 | % w(feature,1)
4 | % X(instance,feature)
5 | % y(instance,1)
6 | 
7 | sig = 1./(1+exp(-y.*(X*w)));
8 | Hv = X.'*(sig.*(1-sig).*(X*v));
9 | 


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/logisticExample/mylogsumexp.m:
--------------------------------------------------------------------------------
1 | function lse = mylogsumexp(b)
2 | % does logsumexp across columns
3 | B = max(b,[],2);
4 | lse = log(sum(exp(b-repmat(B,[1 size(b,2)])),2))+B;
5 | 
6 | % Old version that used repmatC
7 | %lse = log(sum(exp(b-repmatC(B,[1 size(b,2)])),2))+B;
8 | end


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/mexAll.m:
--------------------------------------------------------------------------------
1 | % minFunc
2 | fprintf('Compiling minFunc files...\n');
3 | mex -outdir minFunc/compiled minFunc/mex/mcholC.c
4 | mex -outdir minFunc/compiled minFunc/mex/lbfgsC.c
5 | mex -outdir minFunc/compiled minFunc/mex/lbfgsAddC.c
6 | mex -outdir minFunc/compiled minFunc/mex/lbfgsProdC.c
7 | 
8 | 


--------------------------------------------------------------------------------
/rproptoolbox/+Utils/indent.m:
--------------------------------------------------------------------------------
 1 | % INDENT Indent text
 2 | %   This function is used to indent a text that will be printed to the 
 3 | %   standard output, depending on the verbose level.
 4 | %
 5 | 
 6 | %   Copyright (c) 2011 Roberto Calandra
 7 | %   $Revision: 0.11 $
 8 | 
 9 | 
10 | function indent(verbose)
11 | 
12 | for i=1:verbose
13 |     fprintf('    ')
14 | end
15 | 
16 | end


--------------------------------------------------------------------------------
/rproptoolbox/Rprop/onehump.m:
--------------------------------------------------------------------------------
 1 | function [f,gf] = onehump(x)
 2 | % ONEHUMP Helper function for Tutorial for the Optimization Toolbox demo
 3 | 
 4 | %   Copyright 2008-2009 The MathWorks, Inc.
 5 | %   $Revision: 1.1.6.2 $  $Date: 2009/05/07 18:25:30 $
 6 | 
 7 | r = x(1)^2 + x(2)^2;
 8 | s = exp(-r);
 9 | f = x(1)*s+r/20;
10 | 
11 | if nargout > 1
12 |    gf = [(1-2*x(1)^2)*s+x(1)/10;
13 |        -2*x(1)*x(2)*s+x(2)/10];
14 | end
15 | 


--------------------------------------------------------------------------------
/rproptoolbox/RPROP.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | References:
 3 | [1] Igel, C. and Hüsken, M., Improving the Rprop learning algorithm, 2000
 4 | [2] Igel, C. and Hüsken, M., Empirical evaluation of the improved Rprop learning algorithms, 2003
 5 | [3] Riedmiller, M., Rprop-description and implementation details, 1994
 6 | [4] Riedmiller, M., Advanced supervised learning in multi-layer perceptrons-from backpropagation to adaptive learning algorithms, 1994
 7 | 
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/autoDif/autoHv.m:
--------------------------------------------------------------------------------
 1 | function [Hv] = autoHv(v,x,g,useComplex,funObj,varargin)
 2 | % [Hv] = autoHv(v,x,g,useComplex,funObj,varargin)
 3 | %
 4 | % Numerically compute Hessian-vector product H*v of funObj(x,varargin{:})
 5 | %  based on gradient values
 6 | 
 7 | if useComplex
 8 |     mu = 1e-150i;
 9 | else
10 |     mu = 2*sqrt(1e-12)*(1+norm(x))/norm(v);
11 | end
12 | [f,finDif] = funObj(x + v*mu,varargin{:});
13 | Hv = (finDif-g)/mu;


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/logisticExample/LogisticDiagPrecond.m:
--------------------------------------------------------------------------------
 1 | function [m] = LogisticHv(v,w,X,y)
 2 | % v(feature,1) - vector that we will apply diagonal preconditioner to
 3 | % w(feature,1)
 4 | % X(instance,feature)
 5 | % y(instance,1)
 6 | 
 7 | sig = 1./(1+exp(-y.*(X*w)));
 8 | 
 9 | % Compute diagonals of Hessian
10 | sig = sig.*(1-sig);
11 | for i = 1:length(w)
12 |    h(i,1) = (sig.*X(:,i))'*X(:,i);
13 | end
14 | 
15 | % Apply preconditioner
16 | m = v./h;
17 | 
18 | % Exact preconditioner
19 | %H = X'*diag(sig.*(1-sig))*X;
20 | %m = H\v;
21 | 


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/mcholinc.m:
--------------------------------------------------------------------------------
 1 | function [R,tau] = mcholinc(H,verbose)
 2 | % Computes Cholesky of H+tau*I, for suitably large tau that matrix is pd
 3 | 
 4 | p = size(H,1);
 5 | 
 6 | beta = norm(H,'fro');
 7 | if min(diag(H)) > 1e-12
 8 |     tau = 0;
 9 | else
10 |     if verbose
11 |         fprintf('Small Value on Diagonal, Adjusting Hessian\n');
12 |     end
13 |     tau = max(beta/2,1e-12);
14 | end
15 | while 1
16 |     [R,posDef] = chol(H+tau*eye(p));
17 |     if posDef == 0
18 |         break;
19 |     else
20 |         if verbose
21 |             fprintf('Cholesky Failed, Adjusting Hessian\n');
22 |         end
23 |         tau = max(2*tau,beta/2);
24 |     end
25 | end
26 | 


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/lbfgsUpdate.m:
--------------------------------------------------------------------------------
 1 | function [old_dirs,old_stps,Hdiag] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag)
 2 | ys = y'*s;
 3 | if ys > 1e-10
 4 |     numCorrections = size(old_dirs,2);
 5 |     if numCorrections < corrections
 6 |         % Full Update
 7 |         old_dirs(:,numCorrections+1) = s;
 8 |         old_stps(:,numCorrections+1) = y;
 9 |     else
10 |         % Limited-Memory Update
11 |         old_dirs = [old_dirs(:,2:corrections) s];
12 |         old_stps = [old_stps(:,2:corrections) y];
13 |     end
14 | 
15 |     % Update scale of initial Hessian approximation
16 |     Hdiag = ys/(y'*y);
17 | else
18 |     if debug
19 |         fprintf('Skipping Update\n');
20 |     end
21 | end


--------------------------------------------------------------------------------
/rproptoolbox/Contents.txt:
--------------------------------------------------------------------------------
 1 | File name                              Revision
 2 | ----------------------------------------------------
 3 | +GPU/GPUsupport.m                        0.10 
 4 | +Utils/indent.m                          0.11 
 5 | RPROP.txt                                
 6 | Rprop/costfunction.m                     
 7 | Rprop/costfunction_gpu.m                 
 8 | Rprop/onehump.m                          
 9 | Rprop/rosenbrock.m                       
10 | Rprop/rprop.m                            0.96 
11 | Rprop/Demo_rprop_1.m                     0.55 
12 | Rprop/Demo_rprop_2.m                     0.55 
13 | Rprop/Demo_rprop_3.m                     0.60 
14 | Contents.txt
15 | 
16 | 
17 | Automatically generated by Rpackage 0.26 on 04-Jun-2012
18 | 


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/lbfgsAdd.m:
--------------------------------------------------------------------------------
 1 | function [S,Y,YS,lbfgs_start,lbfgs_end,Hdiag,skipped] = lbfgsAdd(y,s,S,Y,YS,lbfgs_start,lbfgs_end,Hdiag,useMex)
 2 | ys = y'*s;
 3 | skipped = 0;
 4 | corrections = size(S,2);
 5 | if ys > 1e-10
 6 | 	if lbfgs_end < corrections
 7 | 		lbfgs_end = lbfgs_end+1;
 8 | 		if lbfgs_start ~= 1
 9 | 			if lbfgs_start == corrections
10 | 				lbfgs_start = 1;
11 | 			else
12 | 				lbfgs_start = lbfgs_start+1;
13 | 			end
14 | 		end
15 | 	else
16 | 		lbfgs_start = min(2,corrections);
17 | 		lbfgs_end = 1;
18 | 	end
19 | 	
20 | 	if useMex
21 | 		lbfgsAddC(y,s,Y,S,ys,int32(lbfgs_end));
22 | 	else
23 | 		S(:,lbfgs_end) = s;
24 | 		Y(:,lbfgs_end) = y;
25 | 	end
26 | 	YS(lbfgs_end) = ys;
27 | 	
28 | 	% Update scale of initial Hessian approximation
29 | 	Hdiag = ys/(y'*y);
30 | else
31 | 	skipped = 1;
32 | end


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/lbfgsProd.m:
--------------------------------------------------------------------------------
 1 | function [d] = lbfgsProd(g,S,Y,YS,lbfgs_start,lbfgs_end,Hdiag)
 2 | % BFGS Search Direction
 3 | %
 4 | % This function returns the (L-BFGS) approximate inverse Hessian,
 5 | % multiplied by the negative gradient
 6 | 
 7 | % Set up indexing
 8 | [nVars,maxCorrections] = size(S);
 9 | if lbfgs_start == 1
10 | 	ind = 1:lbfgs_end;
11 | 	nCor = lbfgs_end-lbfgs_start+1;
12 | else
13 | 	ind = [lbfgs_start:maxCorrections 1:lbfgs_end];
14 | 	nCor = maxCorrections;
15 | end
16 | al = zeros(nCor,1);
17 | be = zeros(nCor,1);
18 | 
19 | d = -g;
20 | for j = 1:length(ind)
21 | 	i = ind(end-j+1);
22 | 	al(i) = (S(:,i)'*d)/YS(i);
23 | 	d = d-al(i)*Y(:,i);
24 | end
25 | 
26 | % Multiply by Initial Hessian
27 | d = Hdiag*d;
28 | 
29 | for i = ind
30 | 	be(i) = (Y(:,i)'*d)/YS(i);
31 | 	d = d + S(:,i)*(al(i)-be(i));
32 | end
33 | 


--------------------------------------------------------------------------------
/rproptoolbox/Rprop/costfunction.m:
--------------------------------------------------------------------------------
 1 | % Cost function
 2 | %
 3 | %   Cost function present in the Matlab Help for fminunc function
 4 | 
 5 | function [f,g] = costfunction(x)
 6 | % BROWNFG Nonlinear minimization test problem
 7 | %
 8 | % Evaluate the function
 9 | n = length(x); 
10 | y = zeros(n,1);
11 | i = 1:(n-1);
12 | y(i)=(x(i).^2).^(x(i+1).^2+1) + (x(i+1).^2).^(x(i).^2+1);
13 | 
14 | f=sum(y);
15 | 
16 | % Evaluate the gradient if nargout > 1
17 | if nargout > 1
18 |     i=1:(n-1); 
19 |     g = zeros(n,1);
20 |     g(i) = 2*(x(i+1).^2+1).*x(i).* ...
21 |         ((x(i).^2).^(x(i+1).^2))+ ...
22 |         2*x(i).*((x(i+1).^2).^(x(i).^2+1)).* ...
23 |         log(x(i+1).^2);
24 |     g(i+1) = g(i+1) + ...
25 |         2*x(i+1).*((x(i).^2).^(x(i+1).^2+1)).* ...
26 |         log(x(i).^2) + ...
27 |         2*(x(i).^2+1).*x(i+1).* ...
28 |         ((x(i+1).^2).^(x(i).^2));
29 | end
30 | end


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/logisticExample/LogisticLoss.m:
--------------------------------------------------------------------------------
 1 | function [nll,g,H,T] = LogisticLoss(w,X,y)
 2 | % w(feature,1)
 3 | % X(instance,feature)
 4 | % y(instance,1)
 5 | 
 6 | [n,p] = size(X);
 7 | 
 8 | Xw = X*w;
 9 | yXw = y.*Xw;
10 | 
11 | nll = sum(mylogsumexp([zeros(n,1) -yXw]));
12 | 
13 | if nargout > 1
14 |     if nargout > 2
15 |         sig = 1./(1+exp(-yXw));
16 |         g = -X.'*(y.*(1-sig));
17 |     else
18 |         %g = -X.'*(y./(1+exp(yXw)));
19 |         g = -(X.'*(y./(1+exp(yXw))));
20 |     end
21 | end
22 | 
23 | if nargout > 2
24 |     H = X.'*diag(sparse(sig.*(1-sig)))*X;
25 | end
26 | 
27 | if nargout > 3
28 |     T = zeros(p,p,p);
29 |     for j1 = 1:p
30 |         for j2 = 1:p
31 |             for j3 = 1:p
32 |                 T(j1,j2,j3) = sum(y(:).^3.*X(:,j1).*X(:,j2).*X(:,j3).*sig.*(1-sig).*(1-2*sig));
33 |             end
34 |         end
35 |     end
36 | end


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/taylorModel.m:
--------------------------------------------------------------------------------
 1 | function [f,g,H] = taylorModel(d,f,g,H,T)
 2 | 
 3 | p = length(d);
 4 | 
 5 | fd3 = 0;
 6 | gd2 = zeros(p,1);
 7 | Hd = zeros(p);
 8 | for t1 = 1:p
 9 |     for t2 = 1:p
10 |         for t3 = 1:p
11 |             fd3 = fd3 + T(t1,t2,t3)*d(t1)*d(t2)*d(t3);
12 | 
13 |             if nargout > 1
14 |                 gd2(t3) = gd2(t3) + T(t1,t2,t3)*d(t1)*d(t2);
15 |             end
16 | 
17 |             if nargout > 2
18 |                 Hd(t2,t3) = Hd(t2,t3) + T(t1,t2,t3)*d(t1);
19 |             end
20 |         end
21 | 
22 |     end
23 | end
24 | 
25 | f = f + g'*d + (1/2)*d'*H*d + (1/6)*fd3;
26 | 
27 | if nargout > 1
28 |     g = g + H*d + (1/2)*gd2;
29 | end
30 | 
31 | if nargout > 2
32 |     H = H + Hd;
33 | end
34 | 
35 | if any(abs(d) > 1e5)
36 |     % We want the optimizer to stop if the solution is unbounded
37 |     g = zeros(p,1);
38 | end


--------------------------------------------------------------------------------
/DMF_example.m:
--------------------------------------------------------------------------------
 1 | % This is a toy example of DMF
 2 | clc
 3 | clear all
 4 | % Generate synthetic data
 5 | missrate=0.5;% missing rate
 6 | m=20;
 7 | n=100;
 8 | r=2;
 9 | x=unifrnd(-1,1,[r,n]);
10 | X=randn(m,r)*x+(randn(m,r)*x.^2+randn(m,r)*x.^3);% polynomial function
11 | % mask
12 | N=size(X,2);
13 | [nr,nc]=size(X);
14 | M=ones(nr,nc);
15 | for i=1:N
16 |     temp=randperm(nr,ceil(nr*missrate));% 1
17 |     M(temp,i)=0;
18 | end
19 | X0=X;% complete data (original)
20 | X=X.*M;% incomplete data masked by M (binary matrix)
21 | % DMF setup
22 | s=[r 10 m];% input size, hidden size 1, ..., output size
23 | options.Wp=0.01;
24 | options.Zp=0.01;
25 | options.maxiter=1000;
26 | options.activation_func={'tanh_opt','linear'};
27 | [X_DMF,NN_MF]=MC_DMF(X',M',s,options);
28 | Xr=X_DMF';
29 | % compute recovery error
30 | re_error=norm((X0-Xr).*(1-M),'fro')/norm(X0.*(1-M),'fro');
31 | disp(['Relative recovery error is ' num2str(re_error)])
32 | 


--------------------------------------------------------------------------------
/rproptoolbox/Rprop/rosenbrock.m:
--------------------------------------------------------------------------------
 1 | function [f, df, ddf] = rosenbrock(x);
 2 | 
 3 | % rosenbrock.m This function returns the function value, partial derivatives
 4 | % and Hessian of the (general dimension) rosenbrock function, given by:
 5 | %
 6 | %       f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 
 7 | %
 8 | % where D is the dimension of x. The true minimum is 0 at x = (1 1 ... 1).
 9 | %
10 | % Carl Edward Rasmussen, 2001-07-21.
11 | 
12 | D = length(x);
13 | f = sum(100*(x(2:D)-x(1:D-1).^2).^2 + (1-x(1:D-1)).^2);
14 | 
15 | if nargout > 1
16 |   df = zeros(D, 1);
17 |   df(1:D-1) = - 400*x(1:D-1).*(x(2:D)-x(1:D-1).^2) - 2*(1-x(1:D-1));
18 |   df(2:D) = df(2:D) + 200*(x(2:D)-x(1:D-1).^2);
19 | end
20 | 
21 | if nargout > 2
22 |   ddf = zeros(D,D);
23 |   ddf(1:D-1,1:D-1) = diag(-400*x(2:D) + 1200*x(1:D-1).^2 + 2);
24 |   ddf(2:D,2:D) = ddf(2:D,2:D) + 200*eye(D-1);
25 |   ddf = ddf - diag(400*x(1:D-1),1) - diag(400*x(1:D-1),-1);
26 | end


--------------------------------------------------------------------------------
/rproptoolbox/Rprop/costfunction_gpu.m:
--------------------------------------------------------------------------------
 1 | % Cost function (using gpu)
 2 | %
 3 | %   Cost function present in the Matlab Help for fminunc function
 4 | 
 5 | function [f,g] = costfunction_gpu(x)
 6 | % BROWNFG Nonlinear minimization test problem
 7 | %
 8 | % Evaluate the function
 9 | n = length(x); 
10 | y = parallel.gpu.GPUArray.zeros(n,1);
11 | i = 1:(n-1);
12 | y(i)=(x(i).^2).^(x(i+1).^2+1) + (x(i+1).^2).^(x(i).^2+1);
13 | 
14 | f=sum(y);
15 | 
16 | % Evaluate the gradient if nargout > 1
17 | if nargout > 1
18 |     i=1:(n-1); 
19 |     g = parallel.gpu.GPUArray.zeros(n,1);
20 |     g(i) = 2*(x(i+1).^2+1).*x(i).* ...
21 |         ((x(i).^2).^(x(i+1).^2))+ ...
22 |         2*x(i).*((x(i+1).^2).^(x(i).^2+1)).* ...
23 |         log(x(i+1).^2);
24 |     g(i+1) = g(i+1) + ...
25 |         2*x(i+1).*((x(i).^2).^(x(i+1).^2+1)).* ...
26 |         log(x(i).^2) + ...
27 |         2*(x(i).^2+1).*x(i+1).* ...
28 |         ((x(i+1).^2).^(x(i).^2));
29 | end
30 | end


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/mex/lbfgsAddC.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include "mex.h"
 3 | 
 4 | /* See lbfgsAdd.m for details */
 5 | /* This function will not exit gracefully on bad input! */
 6 | 
 7 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
 8 | {
 9 | 	/* Variable Declarations */
10 | 	
11 | 	double *s,*y,*S, *Y, ys;
12 | 	int i,j,nVars,lbfgs_end;
13 | 	
14 | 	/* Get Input Pointers */
15 | 	
16 | 	y = mxGetPr(prhs[0]);
17 | 	s = mxGetPr(prhs[1]);
18 | 	Y = mxGetPr(prhs[2]);
19 | 	S = mxGetPr(prhs[3]);
20 | 	ys= mxGetScalar(prhs[4]);
21 | 	lbfgs_end = (int)mxGetScalar(prhs[5]);
22 | 	
23 | 	if (!mxIsClass(prhs[5],"int32"))
24 | 		mexErrMsgTxt("lbfgs_end must be int32");
25 | 	
26 | 	/* Compute number of variables, maximum number of corrections */
27 | 	
28 | 	nVars = mxGetDimensions(prhs[2])[0];
29 | 	
30 | 	for(j=0;j<nVars;j++) {
31 | 		S[j+nVars*(lbfgs_end-1)] = s[j];
32 | 		Y[j+nVars*(lbfgs_end-1)] = y[j];
33 | 	}
34 | }
35 | 


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/lbfgs.m:
--------------------------------------------------------------------------------
 1 | function [d] = lbfgs(g,s,y,Hdiag)
 2 | % BFGS Search Direction
 3 | %
 4 | % This function returns the (L-BFGS) approximate inverse Hessian,
 5 | % multiplied by the gradient
 6 | %
 7 | % If you pass in all previous directions/sizes, it will be the same as full BFGS
 8 | % If you truncate to the k most recent directions/sizes, it will be L-BFGS
 9 | %
10 | % s - previous search directions (p by k)
11 | % y - previous step sizes (p by k)
12 | % g - gradient (p by 1)
13 | % Hdiag - value of initial Hessian diagonal elements (scalar)
14 | 
15 | [p,k] = size(s);
16 | 
17 | for i = 1:k
18 |     ro(i,1) = 1/(y(:,i)'*s(:,i));
19 | end
20 | 
21 | q = zeros(p,k+1);
22 | r = zeros(p,k+1);
23 | al =zeros(k,1);
24 | be =zeros(k,1);
25 | 
26 | q(:,k+1) = g;
27 | 
28 | for i = k:-1:1
29 |     al(i) = ro(i)*s(:,i)'*q(:,i+1);
30 |     q(:,i) = q(:,i+1)-al(i)*y(:,i);
31 | end
32 | 
33 | % Multiply by Initial Hessian
34 | r(:,1) = Hdiag*q(:,1);
35 | 
36 | for i = 1:k
37 |     be(i) = ro(i)*y(:,i)'*r(:,i);
38 |     r(:,i+1) = r(:,i) + s(:,i)*(al(i)-be(i));
39 | end
40 | d=r(:,k+1);


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/dampedUpdate.m:
--------------------------------------------------------------------------------
 1 | function [old_dirs,old_stps,Hdiag,Bcompact] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag)
 2 | 
 3 | %B0 = eye(length(y))/Hdiag;
 4 | S = old_dirs(:,2:end);
 5 | Y = old_stps(:,2:end);
 6 | k = size(Y,2);
 7 | L = zeros(k);
 8 | for j = 1:k
 9 |     for i = j+1:k
10 |         L(i,j) = S(:,i)'*Y(:,j);
11 |     end
12 | end
13 | D = diag(diag(S'*Y));
14 | N = [S/Hdiag Y];
15 | M = [S'*S/Hdiag L;L' -D];
16 | 
17 | ys = y'*s;
18 | Bs = s/Hdiag - N*(M\(N'*s)); % Product B*s
19 | sBs = s'*Bs;
20 | 
21 | eta = .02;
22 | if ys < eta*sBs
23 |     if debug
24 |         fprintf('Damped Update\n');
25 |     end
26 |     theta = min(max(0,((1-eta)*sBs)/(sBs - ys)),1);
27 |     y = theta*y + (1-theta)*Bs;
28 | end
29 | 
30 | 
31 | numCorrections = size(old_dirs,2);
32 | if numCorrections < corrections
33 |     % Full Update
34 |     old_dirs(:,numCorrections+1) = s;
35 |     old_stps(:,numCorrections+1) = y;
36 | else
37 |     % Limited-Memory Update
38 |     old_dirs = [old_dirs(:,2:corrections) s];
39 |     old_stps = [old_stps(:,2:corrections) y];
40 | end
41 | 
42 | % Update scale of initial Hessian approximation
43 | Hdiag = (y'*s)/(y'*y);


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/autoDif/autoHess.m:
--------------------------------------------------------------------------------
1 | function [f,g,H] = autoHess(x,type,funObj,varargin)% Numerically compute Hessian of objective function from gradient valuesp = length(x);if type == 1	% Use finite differencing	mu = 2*sqrt(1e-12)*(1+norm(x));		[f,g] = funObj(x,varargin{:});	diff = zeros(p);	for j = 1:p		e_j = zeros(p,1);		e_j(j) = 1;		[f diff(:,j)] = funObj(x + mu*e_j,varargin{:});	end	H = (diff-repmat(g,[1 p]))/mu;elseif type == 3 % Use Complex Differentials	mu = 1e-150;		diff = zeros(p);	for j = 1:p		e_j = zeros(p,1);		e_j(j) = 1;		[f(j) diff(:,j)] = funObj(x + mu*i*e_j,varargin{:});	end	f = mean(real(f));	g = mean(real(diff),2);	H = imag(diff)/mu;else % Use central differencing	mu = 2*sqrt(1e-12)*(1+norm(x));	f1 = zeros(p,1);	f2 = zeros(p,1);	diff1 = zeros(p);	diff2 = zeros(p);	for j = 1:p		e_j = zeros(p,1);		e_j(j) = 1;		[f1(j) diff1(:,j)] = funObj(x + mu*e_j,varargin{:});		[f2(j) diff2(:,j)] = funObj(x - mu*e_j,varargin{:});	end	f = mean([f1;f2]);	g = mean([diff1 diff2],2);	H = (diff1-diff2)/(2*mu);end% Make sure H is symmetricH = (H+H')/2;if 0 % DEBUG CODE	[fReal gReal HReal] = funObj(x,varargin{:});	[fReal f]	[gReal g]	[HReal H]	pause;end


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/autoDif/derivativeCheck.m:
--------------------------------------------------------------------------------
 1 | function diff = derivativeCheck(funObj,x,order,type,varargin)
 2 | % diff = derivativeCheck(funObj,x,order,useComplex,varargin)
 3 | %
 4 | % type = 1 (simple forward-difference)
 5 | % type = 2 (central differencing - default)
 6 | % type = 3 (complex-step deriative)
 7 | 
 8 | if nargin < 3
 9 | 	order = 1; % Only check gradient by default
10 | 	if nargin < 4
11 | 		type = 2; % Use central-differencing by default
12 | 	end
13 | end
14 | 
15 | if order == 2
16 | 	[f,g,H] = funObj(x,varargin{:});
17 | 	
18 | 	fprintf('Checking Hessian...\n');
19 | 	[f2,g2,H2] = autoHess(x,type,funObj,varargin{:});
20 | 	
21 | 	fprintf('Max difference between user and numerical hessian: %e\n',max(abs(H(:)-H2(:))));
22 | 	if max(abs(H(:)-H2(:))) > 1e-4
23 | 		H
24 | 		H2
25 | 		diff = abs(H-H2)
26 | 		pause;
27 | 	end
28 | else
29 | 	[f,g] = funObj(x,varargin{:});
30 | 	
31 | 	fprintf('Checking Gradient...\n');
32 | 	[f2,g2] = autoGrad(x,type,funObj,varargin{:});
33 | 	
34 | 	fprintf('Max difference between user and numerical gradient: %e\n',max(abs(g-g2)));
35 | 	if max(abs(g-g2)) > 1e-4
36 | 		fprintf('User NumDif:\n');
37 | 		[g g2]
38 | 		diff = abs(g-g2)
39 | 		pause
40 | 	end
41 | end
42 | 
43 | 


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/autoDif/autoGrad.m:
--------------------------------------------------------------------------------
1 | function [f,g] = autoGrad(x,type,funObj,varargin)% [f,g] = autoGrad(x,useComplex,funObj,varargin)%% Numerically compute gradient of objective function from function values%% type =%     1 - forward-differencing (p+1 evaluations)%     2 - central-differencing (more accurate, but requires 2p evaluations)%     3 - complex-step derivative (most accurate and only requires p evaluations, but only works for certain objectives)p = length(x);if type == 1 % Use Finite Differencing	f = funObj(x,varargin{:});	mu = 2*sqrt(1e-12)*(1+norm(x));	diff = zeros(p,1);	for j = 1:p		e_j = zeros(p,1);		e_j(j) = 1;		diff(j,1) = funObj(x + mu*e_j,varargin{:});	end	g = (diff-f)/mu;elseif type == 3 % Use Complex Differentials	mu = 1e-150;	diff = zeros(p,1);	for j = 1:p		e_j = zeros(p,1);		e_j(j) = 1;		diff(j,1) = funObj(x + mu*i*e_j,varargin{:});	end		f = mean(real(diff));	g = imag(diff)/mu;else % Use Central Differencing	mu = 2*sqrt(1e-12)*(1+norm(x));	diff1 = zeros(p,1);	diff2 = zeros(p,1);	for j = 1:p		e_j = zeros(p,1);		e_j(j) = 1;		diff1(j,1) = funObj(x + mu*e_j,varargin{:});		diff2(j,1) = funObj(x - mu*e_j,varargin{:});	end	f = mean([diff1;diff2]);	g = (diff1 - diff2)/(2*mu);endif 0 % DEBUG CODE	[fReal gReal] = funObj(x,varargin{:});	[fReal f]	[gReal g]	diff	pause;end


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/autoDif/autoTensor.m:
--------------------------------------------------------------------------------
1 | function [f,g,H,T] = autoTensor(x,type,funObj,varargin)% [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin)% Numerically compute Tensor of 3rd-derivatives of objective function from Hessian valuesp = length(x);if type == 2	mu = 2*sqrt(1e-12)*(1+norm(x));    	f1 = zeros(p,1);	f2 = zeros(p,2);	g1 = zeros(p);	g2 = zeros(p);    diff = zeros(p,p,p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f1(j) g1(:,j) diff1(:,:,j)] = funObj(x + mu*e_j,varargin{:});        [f2(j) g2(:,j) diff2(:,:,j)] = funObj(x + mu*e_j,varargin{:});	end	f = mean([f1;f2]);	g = mean([g1 g2],2);	H = mean(cat(3,diff1,diff2),3);	T = (diff1-diff2)/(2*mu);elseif type == 3 % Use Complex Differentials    mu = 1e-150;	f = zeros(p,1);	g = zeros(p);    diff = zeros(p,p,p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f(j) g(:,j) diff(:,:,j)] = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(f));    g = mean(real(g),2);    H = mean(real(diff),3);    T = imag(diff)/mu;else % Use finite differencing    mu = 2*sqrt(1e-12)*(1+norm(x));        [f,g,H] = funObj(x,varargin{:});    diff = zeros(p,p,p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [~ ~ diff(:,:,j)] = funObj(x + mu*e_j,varargin{:});    end    T = (diff-repmat(H,[1 1 p]))/mu;end


--------------------------------------------------------------------------------
/rproptoolbox/Rprop/Demo_rprop_1.m:
--------------------------------------------------------------------------------
 1 | % Compare Rprop to fminunc
 2 | %
 3 | %   Copyright (c) 2012 Roberto Calandra
 4 | %   $Revision: 0.55 $
 5 | 
 6 | 
 7 | %% Init
 8 | 
 9 | funcgrad = @onehump;
10 | 
11 | minfunc = 10e-6;
12 | niter = 10;
13 | 
14 | p.verbosity = 0;                    % Increase verbosity to print something
15 | p.MaxIter   = 10000;            	% Maximum number of iterations
16 | p.d_Obj     = minfunc;
17 | p.method    = 'IRprop-';            % Use IRprop- algorithm
18 | p.display   = 0;
19 | 
20 | p2.length = 20;
21 | 
22 | options = optimset('GradObj','on','TolFun', minfunc,'Display','off');
23 | 
24 | 
25 | %% Compute
26 | 
27 | for iter = 1:niter
28 | 
29 |     
30 | a.max = 3;
31 | a.min = 0;
32 | x0 = Utils.rrand([2,1],a);      % Randomize initial point
33 | 
34 | tic
35 | [x1,~,~,stats1] = rprop(funcgrad,x0,p);
36 | t1(iter)=toc;
37 | 
38 | tic
39 | [x2,~,~,stats2] = fminunc(funcgrad,x0,options);
40 | t2(iter)=toc;
41 | 
42 | %tic
43 | %[X, stats3, i] = minimize(x0, funcgrad,p2);
44 | %t3(iter)=toc;
45 | 
46 | end
47 | 
48 | 
49 | %% Plot results
50 | 
51 | fprintf('Average Running time to reach an Obj. value of %2.0e:\n',minfunc)
52 | fprintf('Rprop: %f\n',mean(t1));
53 | fprintf('Fminunc: %f\n',mean(t2));
54 | 
55 | figure()
56 | Utils.rplot(@plot,{t1,t2})
57 | legend(p.method,'fminunc')
58 | xlabel('Experiment number')
59 | ylabel('Time (sec)')
60 | 
61 | 


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/mchol.m:
--------------------------------------------------------------------------------
 1 | function [l,d,perm] = mchol(A,mu)
 2 | % [l,d,perm] = mchol(A,mu)
 3 | % Compute the Gill-Murray modified LDL factorization of A,
 4 | 
 5 | if nargin < 2
 6 |     mu = 1e-12;
 7 | end
 8 | 
 9 | n = size(A,1);
10 | l = eye(n);
11 | d = zeros(n,1);
12 | perm = 1:n;
13 | 
14 | for i = 1:n
15 |     c(i,i) = A(i,i);
16 | end
17 | 
18 | % Compute modification parameters
19 | gamma = max(abs(diag(A)));
20 | xi = max(max(abs(setdiag(A,0))));
21 | delta = mu*max(gamma+xi,1);
22 | if n > 1
23 |     beta = sqrt(max([gamma xi/sqrt(n^2-1) mu]));
24 | else
25 |     beta = sqrt(max([gamma mu]));
26 | end
27 | 
28 | for j = 1:n
29 |     
30 |     % Find q that results in Best Permutation with j
31 |     [maxVal maxPos] = max(abs(diag(c(j:end,j:end))));
32 |     q = maxPos+j-1;
33 |     
34 |     % Permute d,c,l,a
35 |     d([j q]) = d([q j]);
36 |     perm([j q]) = perm([q j]);
37 |     c([j q],:) = c([q j],:);
38 |     c(:,[j q]) = c(:,[q j]);
39 |     l([j q],:) = l([q j],:);
40 |     l(:,[j q]) = l(:,[q j]);
41 |     A([j q],:) = A([q j],:);
42 |     A(:,[j q]) = A(:,[q j]);
43 |     
44 |     for s = 1:j-1
45 |         l(j,s) = c(j,s)/d(s);
46 |     end
47 |     for i = j+1:n
48 |         c(i,j) = A(i,j) - sum(l(j,1:j-1).*c(i,1:j-1));
49 |     end
50 |     theta = 0;
51 |     if j < n
52 |         theta = max(abs(c(j+1:n,j)));
53 |     end
54 |     d(j) = max([abs(c(j,j)) (theta/beta)^2 delta]);
55 |     if j < n
56 |         for i = j+1:n
57 |             c(i,i) = c(i,i) - (c(i,j)^2)/d(j);
58 |         end
59 |     end
60 | end


--------------------------------------------------------------------------------
/rproptoolbox/Rprop/Demo_rprop_2.m:
--------------------------------------------------------------------------------
 1 | % DEMO_RPROP_2 Compare the four Rprop methods
 2 | %
 3 | %
 4 | 
 5 | %   Copyright (c) 2011 Roberto Calandra
 6 | %   $Revision: 0.55 $
 7 | 
 8 | 
 9 | %% Init
10 | 
11 | funcgrad = @costfunction;       % Function to optimize
12 | 
13 | a.max = 3;
14 | a.min = 0;
15 | x0 = Utils.rrand([5000,1],a);
16 | 
17 | p.verbosity = 1;                    % Increase verbosity to print something
18 | p.MaxIter   = 100;                  % Maximum number of iterations
19 | p.display   = 0;
20 | 
21 | 
22 | %% Compute
23 | 
24 | p.method = 'Rprop-';            % Define algorithm to use
25 | [x1,~,~,stats1] = rprop(funcgrad,x0,p);
26 | 
27 | p.method = 'Rprop+';            % Define algorithm to use
28 | [x2,~,~,stats2] = rprop(funcgrad,x0,p);
29 | 
30 | p.method = 'IRprop-';           % Define algorithm to use
31 | [x3,~,~,stats3] = rprop(funcgrad,x0,p);
32 | 
33 | p.method = 'IRprop+';           % Define algorithm to use
34 | [x4,~,~,stats4] = rprop(funcgrad,x0,p);
35 | 
36 | 
37 | %% Plot results
38 | 
39 | figure()
40 | Utils.rplot(@semilogy,{stats1.error, stats2.error, stats3.error, stats4.error})
41 | legend('Rprop-','Rprop+','IRprop-','IRprop+','Location','SouthWest')
42 | xlabel('Number of iterations')
43 | ylabel('Obj. Value')
44 | 
45 | figure()
46 | Utils.rplot(@semilogy,{stats1.time, stats2.time, stats3.time, stats4.time},...
47 |     {stats1.error, stats2.error, stats3.error, stats4.error})
48 | legend('Rprop-','Rprop+','IRprop-','IRprop+','Location','SouthWest')
49 | xlabel('Time (s)')
50 | ylabel('Obj. Value')
51 | 
52 | drawnow
53 | 
54 | 


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/example_derivativeCheck.m:
--------------------------------------------------------------------------------
 1 | clear all
 2 | 
 3 | nInst = 250;
 4 | nVars = 10;
 5 | X = randn(nInst,nVars);
 6 | w = randn(nVars,1);
 7 | y = sign(X*w + randn(nInst,1));
 8 | 
 9 | wTest = randn(nVars,1);
10 | 
11 | fprintf('Testing gradient using forward-differencing...\n');
12 | order = 1;
13 | derivativeCheck(@LogisticLoss,wTest,order,1,X,y);
14 | 
15 | fprintf('Testing gradient using central-differencing...\n');
16 | derivativeCheck(@LogisticLoss,wTest,order,2,X,y);
17 | 
18 | fprintf('Testing gradient using complex-step derivative...\n');
19 | derivativeCheck(@LogisticLoss,wTest,order,3,X,y);
20 | 
21 | fprintf('\n\n\n');
22 | pause
23 | 
24 | fprintf('Testing Hessian using forward-differencing\n');
25 | order = 2;
26 | derivativeCheck(@LogisticLoss,wTest,order,1,X,y);
27 | 
28 | fprintf('Testing Hessian using central-differencing\n');
29 | order = 2;
30 | derivativeCheck(@LogisticLoss,wTest,order,2,X,y);
31 | 
32 | fprintf('Testing Hessian using complex-step derivative\n');
33 | order = 2;
34 | derivativeCheck(@LogisticLoss,wTest,order,3,X,y);
35 | 
36 | fprintf('\n\n\n');
37 | pause
38 | 
39 | fprintf('Testing gradient using fastDerivativeCheck...\n');
40 | order = 1;
41 | fastDerivativeCheck(@LogisticLoss,wTest,order,1,X,y);
42 | fastDerivativeCheck(@LogisticLoss,wTest,order,2,X,y);
43 | fastDerivativeCheck(@LogisticLoss,wTest,order,3,X,y);
44 | 
45 | fprintf('\n\n\n');
46 | pause
47 | 
48 | fprintf('Testing Hessian using fastDerivativeCheck...\n');
49 | order = 2;
50 | fastDerivativeCheck(@LogisticLoss,wTest,order,1,X,y);
51 | fastDerivativeCheck(@LogisticLoss,wTest,order,2,X,y);
52 | fastDerivativeCheck(@LogisticLoss,wTest,order,3,X,y);
53 | 


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/autoDif/fastDerivativeCheck.m:
--------------------------------------------------------------------------------
1 | function diff = derivativeCheck(funObj,x,order,type,varargin)% diff = fastDerivativeCheck(funObj,x,order,varargin)if nargin < 3	order = 1; % Only check gradient by default	if nargin < 4		type = 2; % Use central-differencing by default	endendp = length(x);d = sign(randn(p,1));if order == 2	fprintf('Checking Hessian-vector product along random direction:\n');	[f,g,H] = funObj(x,varargin{:});	Hv = H*d;		if type == 1 % Use Finite Differencing		mu = 2*sqrt(1e-12)*(1+norm(x))/(1+norm(x));		[diff,diffa] = funObj(x+d*mu,varargin{:});		Hv2 = (diffa-g)/mu;	elseif type == 3 % Use Complex Differentials		mu = 1e-150;		[diff,diffa] = funObj(x+d*mu*i,varargin{:});		Hv2 = imag(diffa-g)/mu;	else % Use Central Differencing		mu = 2*sqrt(1e-12)*(1+norm(x))/(1+norm(x));			[diff1,diffa] = funObj(x+d*mu,varargin{:});			[diff2,diffb] = funObj(x-d*mu,varargin{:});			Hv2 = (diffa-diffb)/(2*mu);	end		fprintf('Max difference between user and numerical Hessian-vector product: %e\n',max(abs(Hv-Hv2)));else	fprintf('Checking Gradient along random direction:\n');	[f,g] = funObj(x,varargin{:});	gtd = g'*d;		if type == 1 % Use Finite Differencing		mu = 2*sqrt(1e-12)*(1+norm(x))/(1+norm(x));		diff = funObj(x+d*mu,varargin{:});		gtd2 = (diff-f)/mu;	elseif type == 3 % Use Complex Differentials		mu = 1e-150;		[diff,diffa] = funObj(x+d*mu*i,varargin{:});		gtd2 = imag(diff)/mu;	else % Use Central Differencing		mu = 2*sqrt(1e-12)*(1+norm(x))/(1+norm(x));		diff1 = funObj(x+d*mu,varargin{:});		diff2 = funObj(x-d*mu,varargin{:});		gtd2 = (diff1-diff2)/(2*mu);	end		fprintf('Max difference between user and numerical directional-derivative: %e\n',max(abs(gtd-gtd2)));end


--------------------------------------------------------------------------------
/rproptoolbox/Rprop/Demo_rprop_3.m:
--------------------------------------------------------------------------------
 1 | % DEMO_RPROP_3 Rprop with GPU acceleration
 2 | %   Show the use of GPU acceleration for the Rprop function and compare its
 3 | %   performances with the normal CPU-computed version
 4 | %
 5 | 
 6 | %   Copyright (c) 2011 Roberto Calandra
 7 | %   $Revision: 0.60 $
 8 | 
 9 | 
10 | %% Init
11 | 
12 | numdim = [1000000 500000 100000 50000 10000 5000 1000];
13 | 
14 | p.verbosity     = 1;                % Increase verbosity to print something
15 | p.MaxIter       = 300;              % Maximum number of iterations
16 | p.d_Obj         = 10e-12;           % Desired objective value
17 | 
18 | 
19 | %% Compute
20 | 
21 | t = 1;
22 | 
23 | for i = numdim
24 |     
25 |     a.max = 3;
26 |     a.min = 0;
27 |     x0 = Utils.rrand([i,1],a);
28 |     
29 |     % with GPU
30 |     funcgrad = @costfunction_gpu;       % Function to optimize
31 |     p.useGPU = true;                    % use GPU acceleration if possible?
32 |     p.funcgradgpu = true;               % does the cost function accept and
33 |                                         % return variables as gpuArray?
34 |     [x1,~,~,stats1] = rprop(funcgrad,x0,p);   
35 |     
36 |     
37 |     % with CPU
38 |     funcgrad = @costfunction;           % Function to optimize
39 |     p.useGPU = false;                   % use GPU acceleration if possible?
40 |     [x2,~,~,stats2] = rprop(funcgrad,x0,p);
41 |     
42 |     
43 |     res.time1(t) = stats1.time(end);
44 |     res.time2(t) = stats2.time(end);
45 |     
46 |     t = t+1;
47 |     
48 | end
49 | 
50 | 
51 | %% Plot results
52 | 
53 | figure()
54 | Utils.rplot(@loglog,{numdim,numdim},...
55 |     {res.time1, res.time2})
56 | legend('GPU','CPU','Location','SouthEast')
57 | ylabel('Time (s)')
58 | xlabel('Number of  parameters')
59 | 
60 | 


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/conjGrad.m:
--------------------------------------------------------------------------------
 1 | function [x,k,res,negCurv] = cg(A,b,optTol,maxIter,verbose,precFunc,precArgs,matrixVectFunc,matrixVectArgs)
 2 | % [x,k,res,negCurv] =
 3 | % cg(A,b,optTol,maxIter,verbose,precFunc,precArgs,matrixVectFunc,matrixVect
 4 | % Args)
 5 | % Linear Conjugate Gradient, where optionally we use
 6 | % - preconditioner on vector v with precFunc(v,precArgs{:})
 7 | % - matrix multipled by vector with matrixVectFunc(v,matrixVectArgs{:})
 8 | 
 9 | if nargin <= 4
10 |     verbose = 0;
11 | end
12 | 
13 | x = zeros(size(b));
14 | r = -b;
15 | 
16 | % Apply preconditioner (if supplied)
17 | if nargin >= 7 && ~isempty(precFunc)
18 |     y = precFunc(r,precArgs{:});
19 | else
20 |     y = r;
21 | end
22 | 
23 | ry = r'*y;
24 | p = -y;
25 | k = 0;
26 | 
27 | res = norm(r);
28 | done = 0;
29 | negCurv = [];
30 | while res > optTol & k < maxIter & ~done
31 |     % Compute Matrix-vector product
32 |     if nargin >= 9
33 |         Ap = matrixVectFunc(p,matrixVectArgs{:});
34 |     else
35 |         Ap = A*p;
36 |     end
37 |     pAp = p'*Ap;
38 | 
39 |     % Check for negative Curvature
40 |     if pAp <= 1e-16
41 |         if verbose
42 |             fprintf('Negative Curvature Detected!\n');
43 |         end
44 |         
45 |         if nargout == 4
46 |            if pAp < 0
47 |               negCurv = p;
48 |               return
49 |            end
50 |         end
51 |         
52 |         if k == 0
53 |             if verbose
54 |                 fprintf('First-Iter, Proceeding...\n');
55 |             end
56 |             done = 1;
57 |         else
58 |             if verbose
59 |                 fprintf('Stopping\n');
60 |             end
61 |             break;
62 |         end
63 |     end
64 | 
65 |     % Conjugate Gradient
66 |     alpha = ry/(pAp);
67 |     x = x + alpha*p;
68 |     r = r + alpha*Ap;
69 |     
70 |     % If supplied, apply preconditioner
71 |     if nargin >= 7 && ~isempty(precFunc)
72 |         y = precFunc(r,precArgs{:});
73 |     else
74 |         y = r;
75 |     end
76 |     
77 |     ry_new = r'*y;
78 |     beta = ry_new/ry;
79 |     p = -y + beta*p;
80 |     k = k + 1;
81 | 
82 |     % Update variables
83 |     ry = ry_new;
84 |     res = norm(r);
85 | end
86 | end
87 | 


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/mex/lbfgsProdC.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include "mex.h"
 3 | 
 4 | /* See lbfgsProd.m for details */
 5 | /* This function will not exit gracefully on bad input! */
 6 | 
 7 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
 8 | {
 9 | 	/* Variable Declarations */
10 | 	
11 | 	double *S, *Y, *YS, *g, Hdiag, *d, *alpha, *beta;
12 | 	int i,j,nVars,nCor,maxCor,lbfgs_start,lbfgs_end;
13 | 	
14 | 	/* Get Input Pointers */
15 | 	
16 | 	g = mxGetPr(prhs[0]);
17 | 	S = mxGetPr(prhs[1]);
18 | 	Y = mxGetPr(prhs[2]);
19 | 	YS= mxGetPr(prhs[3]);
20 | 	lbfgs_start = (int)mxGetScalar(prhs[4]);
21 | 	lbfgs_end = (int)mxGetScalar(prhs[5]);
22 | 	Hdiag = mxGetScalar(prhs[6]);
23 | 	
24 | 	if (!mxIsClass(prhs[4],"int32")||!mxIsClass(prhs[5],"int32"))
25 | 		mexErrMsgTxt("lbfgs_start and lbfgs_end must be int32");
26 | 	
27 | 	/* Compute number of variables, maximum number of corrections */
28 | 	
29 | 	nVars = mxGetDimensions(prhs[1])[0];
30 | 	maxCor = mxGetDimensions(prhs[1])[1];
31 | 	
32 | 	/* Compute number of corrections available */
33 | 	if (lbfgs_start == 1)
34 | 		nCor = lbfgs_end-lbfgs_start+1;
35 | 	else
36 | 		nCor = maxCor;
37 | 	
38 | 	/* Allocate Memory for Local Variables */
39 | 	alpha = mxCalloc(nCor,sizeof(double));
40 | 	beta = mxCalloc(nCor,sizeof(double));
41 | 	
42 | 	/* Set-up Output Vector */
43 | 	plhs[0] = mxCreateDoubleMatrix(nVars,1,mxREAL);
44 | 	d = mxGetPr(plhs[0]);
45 | 	
46 | 	for(j=0;j<nVars;j++)
47 | 		d[j] = -g[j];
48 | 	
49 | 	for(i = lbfgs_end-1;i >= 0;i--) {
50 | 		alpha[i] = 0;
51 | 		for(j=0;j<nVars;j++)
52 | 			alpha[i] += S[j + nVars*i]*d[j];
53 | 		alpha[i] /= YS[i];
54 | 		for(j=0;j<nVars;j++)
55 | 			d[j] -= alpha[i]*Y[j + nVars*i];
56 | 	}
57 | 	if(lbfgs_start != 1) {
58 | 		for(i = maxCor-1;i >= lbfgs_start-1;i--) {
59 | 			alpha[i] = 0;
60 | 			for(j=0;j<nVars;j++)
61 | 				alpha[i] += S[j + nVars*i]*d[j];
62 | 			alpha[i] /= YS[i];
63 | 			for(j=0;j<nVars;j++)
64 | 				d[j] -= alpha[i]*Y[j + nVars*i];
65 | 		}
66 | 	}
67 | 	
68 | 	for(j=0;j<nVars;j++)
69 | 		d[j] *= Hdiag;
70 | 	
71 | 	if(lbfgs_start != 1) {
72 | 		for(i = lbfgs_start-1; i < maxCor; i++) {
73 | 			beta[i] = 0;
74 | 			for(j=0;j<nVars;j++)
75 | 				beta[i] += Y[j + nVars*i]*d[j];
76 | 			beta[i] /= YS[i];
77 | 			for(j=0;j<nVars;j++)
78 | 				d[j] += S[j+nVars*i]*(alpha[i]-beta[i]);
79 | 		}
80 | 	}
81 | 	for(i = 0; i < lbfgs_end; i++) {
82 | 		beta[i] = 0;
83 | 		for(j=0;j<nVars;j++)
84 | 			beta[i] += Y[j + nVars*i]*d[j];
85 | 		beta[i] /= YS[i];
86 | 		for(j=0;j<nVars;j++)
87 | 			d[j] += S[j+nVars*i]*(alpha[i]-beta[i]);
88 | 	}
89 | 	
90 | 	mxFree(alpha);
91 | 	mxFree(beta);
92 | 	
93 | }
94 | 


--------------------------------------------------------------------------------
/rproptoolbox/+GPU/GPUsupport.m:
--------------------------------------------------------------------------------
  1 | % GPUSUPPORT Check if exist a GPU supported for computations
  2 | %   [SUPPORT] = GPUsupport() return a boolean that indicate whenever a GPU
  3 | %   that can be used for computations has been found or not. Whenever
  4 | %   multiple GPU can be used it automatically select the best one.
  5 | %
  6 | 
  7 | %   Copyright (c) 2012 Roberto Calandra
  8 | %   $Revision: 0.10 $
  9 | 
 10 | 
 11 | function [support] = GPUsupport(verbose)
 12 | 
 13 | if nargin<1
 14 |     verbose           = 0;            % [0-3] Verbose mode
 15 | end
 16 | 
 17 | 
 18 | %% Identify Devices
 19 | try
 20 |     
 21 |     % Number of GPU
 22 |     ngpu = gpuDeviceCount;
 23 |     if verbose
 24 |         fprintf ('Number of GPU(s): %d\n',ngpu)
 25 |     end
 26 |     
 27 |     % NVIDIA driver installed
 28 |     driverver = parallel.internal.gpu.CUDADriverVersion;
 29 |     if verbose>2
 30 |         fprintf ('NVIDIA driver version: %s\n',driverver)
 31 |     end
 32 |     
 33 | catch
 34 |     
 35 |     % No GPU?
 36 |     if verbose
 37 |         warning('Impossible to Identify GPU(s)')
 38 |     end
 39 |     support = false;
 40 |     return
 41 |     
 42 | end
 43 | 
 44 | 
 45 | %% Analyze GPU(s)
 46 | 
 47 | gpucapable = zeros([ngpu 1]);
 48 | for ii = 1:ngpu
 49 |     try
 50 |         m(ii) = gpuDevice(ii);
 51 |      
 52 |         gpucapable(ii)=m(ii).DeviceSupported;
 53 |         
 54 |         if verbose>1
 55 |             if gpucapable(ii)
 56 |                 
 57 |                 fprintf('GPU %d: %s with CUDA support (v.%s)\n',...
 58 |                     ii,m(ii).Name,m(ii).ComputeCapability)
 59 |                 
 60 |             else
 61 |                 
 62 |                 fprintf('GPU %d: %s does NOT have CUDA support >1.3 (v.%s)\n',...
 63 |                     ii,m(ii).Name,m(ii).ComputeCapability)
 64 |                 
 65 |             end
 66 |         end
 67 |         
 68 |     catch
 69 |         warning(['GPU ' num2str(ii) ' doesn"t respond'])
 70 |         
 71 |     end
 72 | end
 73 | 
 74 | 
 75 | %% Is there a GPU supported?
 76 | 
 77 | ngpusupported = sum(gpucapable);
 78 | 
 79 | if ngpusupported
 80 |     support = true;
 81 |     if verbose
 82 |         fprintf ('Supported GPU found\n',ngpu)
 83 |     end
 84 | else
 85 |     support = false;
 86 |     
 87 |     if verbose
 88 |         warning('No supported GPU found')
 89 |     end
 90 | end
 91 | 
 92 | 
 93 | %% Select best GPU for computations
 94 | 
 95 | if ngpusupported>1
 96 |     % based either on Gflops or Memory (and support)
 97 |     if verbose>1
 98 |         %fprintf('Selected GPU %i')
 99 |     end
100 |     
101 | end
102 | 
103 | 
104 | end
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/logisticExample/example_minFunc_LR.m:
--------------------------------------------------------------------------------
 1 | clear all
 2 | 
 3 | nInst = 500;
 4 | nVars = 200;
 5 | X = randn(nInst,nVars);
 6 | w = randn(nVars,1);
 7 | y = sign(X*w + randn(nInst,1));
 8 | 
 9 | w_init = zeros(nVars,1);
10 | funObj = @(w)LogisticLoss(w,X,y);
11 | 
12 | fprintf('\nRunning Steepest Descent\n');
13 | options.Method = 'sd';
14 | minFunc(@LogisticLoss,w_init,options,X,y);
15 | pause;
16 | 
17 | fprintf('\nRunning Cyclic Steepest Descent\n');
18 | options.Method = 'csd';
19 | minFunc(@LogisticLoss,w_init,options,X,y);
20 | pause;
21 | 
22 | fprintf('\nRunning Conjugate Gradient\n');
23 | options.Method = 'cg';
24 | minFunc(@LogisticLoss,w_init,options,X,y);
25 | pause;
26 | 
27 | fprintf('\nRunning Scaled Conjugate Gradient\n');
28 | options.Method = 'scg';
29 | minFunc(@LogisticLoss,w_init,options,X,y);
30 | pause;
31 | 
32 | fprintf('\nRunning Preconditioned Conjugate Gradient (Diagonal preconditioner)\n');
33 | options.Method = 'pcg';
34 | options.precFunc = @LogisticDiagPrecond;
35 | minFunc(@LogisticLoss,w_init,options,X,y);
36 | pause;
37 | 
38 | fprintf('\nRunning Preconditioned Conjugate Gradient (L-BFGS preconditioner)\n');
39 | options.Method = 'pcg';
40 | options.precFunc = [];
41 | minFunc(@LogisticLoss,w_init,options,X,y);
42 | pause;
43 | 
44 | fprintf('\nRunning Hessian-Free Newton w/ numerical Hessian-Vector products\n');
45 | options.Method = 'newton0';
46 | minFunc(@LogisticLoss,w_init,options,X,y);
47 | pause;
48 | 
49 | fprintf('\nRunning Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (Diagonal preconditioner)\n');
50 | options.Method = 'pnewton0';
51 | options.precFunc = @LogisticDiagPrecond;
52 | minFunc(@LogisticLoss,w_init,options,X,y);
53 | pause;
54 | 
55 | fprintf('\nRunning Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (L-BFGS preconditioner)\n');
56 | options.Method = 'pnewton0';
57 | options.precFunc = [];
58 | minFunc(@LogisticLoss,w_init,options,X,y);
59 | pause;
60 | 
61 | fprintf('\nRunning Hessian-Free Newton w/ analytic Hessian-Vector products\n');
62 | options.Method = 'newton0';
63 | options.HvFunc = @LogisticHv;
64 | minFunc(@LogisticLoss,w_init,options,X,y);
65 | pause;
66 | 
67 | fprintf('\nRunning Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (Diagonal preconditioner)\n');
68 | options.Method = 'pnewton0';
69 | options.HvFunc = @LogisticHv;
70 | options.precFunc = @LogisticDiagPrecond;
71 | minFunc(@LogisticLoss,w_init,options,X,y);
72 | pause;
73 | 
74 | fprintf('\nRunning Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (L-BFGS preconditioner)\n');
75 | options.Method = 'pnewton0';
76 | options.precFunc = [];
77 | options.HvFunc = @LogisticHv;
78 | minFunc(@LogisticLoss,w_init,options,X,y);
79 | pause;


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/example_minFunc.m:
--------------------------------------------------------------------------------
 1 | % Runs various limited-memory solvers on 2D rosenbrock function for 25
 2 | % function evaluations
 3 | maxFunEvals = 25;
 4 | 
 5 | fprintf('Result after %d evaluations of limited-memory solvers on 2D rosenbrock:\n',maxFunEvals);
 6 | 
 7 | fprintf('---------------------------------------\n');
 8 | fprintf('x1 = %.4f, x2 = %.4f (starting point)\n',0,0);
 9 | fprintf('x1 = %.4f, x2 = %.4f (optimal solution)\n',1,1);
10 | fprintf('---------------------------------------\n');
11 | 
12 | if exist('minimize') == 2
13 |     % Minimize.m - conjugate gradient method
14 |     x = minimize([0 0]', 'rosenbrock', -maxFunEvals);
15 |     fprintf('x1 = %.4f, x2 = %.4f (minimize.m by C. Rasmussen)\n',x(1),x(2));
16 | end
17 | 
18 | options = [];
19 | options.display = 'none';
20 | options.maxFunEvals = maxFunEvals;
21 | 
22 | % Steepest Descent
23 | options.Method = 'sd';
24 | x = minFunc(@rosenbrock,[0 0]',options);
25 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with steepest descent)\n',x(1),x(2));
26 | 
27 | % Cyclic Steepest Descent
28 | options.Method = 'csd';
29 | x = minFunc(@rosenbrock,[0 0]',options);
30 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with cyclic steepest descent)\n',x(1),x(2));
31 | 
32 | % Barzilai & Borwein
33 | options.Method = 'bb';
34 | options.bbType = 1;
35 | x = minFunc(@rosenbrock,[0 0]',options);
36 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with spectral gradient descent)\n',x(1),x(2));
37 | 
38 | % Hessian-Free Newton
39 | options.Method = 'newton0';
40 | x = minFunc(@rosenbrock,[0 0]',options);
41 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with Hessian-free Newton)\n',x(1),x(2));
42 | 
43 | % Hessian-Free Newton w/ L-BFGS preconditioner
44 | options.Method = 'pnewton0';
45 | x = minFunc(@rosenbrock,[0 0]',options);
46 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with preconditioned Hessian-free Newton)\n',x(1),x(2));
47 | 
48 | % Conjugate Gradient
49 | options.Method = 'cg';
50 | x = minFunc(@rosenbrock,[0 0]',options);
51 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with conjugate gradient)\n',x(1),x(2));
52 | 
53 | % Scaled conjugate Gradient
54 | options.Method = 'scg';
55 | x = minFunc(@rosenbrock,[0 0]',options);
56 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with scaled conjugate gradient)\n',x(1),x(2));
57 | 
58 | % Preconditioned Conjugate Gradient
59 | options.Method = 'pcg';
60 | x = minFunc(@rosenbrock,[0 0]',options);
61 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with preconditioned conjugate gradient)\n',x(1),x(2));
62 | 
63 | % Default: L-BFGS (default)
64 | options.Method = 'lbfgs';
65 | x = minFunc(@rosenbrock,[0 0]',options);
66 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with limited-memory BFGS - default)\n',x(1),x(2));
67 | 
68 | p.MaxIter = maxFunEvals;
69 | x = rprop(@rosenbrock,[0 0]',p);
70 | fprintf('x1 = %.4f, x2 = %.4f (Rprop - default)\n',x(1),x(2));
71 | 
72 | fprintf('---------------------------------------\n');
73 | 
74 | 
75 | 
76 | 
77 | 
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/mex/lbfgsC.c:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include "mex.h"
  3 | 
  4 | /* See lbfgs.m for details! */
  5 | /* This function may not exit gracefully on bad input! */
  6 | 
  7 | 
  8 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
  9 | {
 10 |     /* Variable Declarations */
 11 |     
 12 |     double *s, *y, *g, *H, *d, *ro, *alpha, *beta, *q, *r;
 13 |     int nVars,nSteps,lhs_dims[2];
 14 |     double temp;
 15 |     int i,j;
 16 |     
 17 |     /* Get Input Pointers */
 18 | 	
 19 |     g = mxGetPr(prhs[0]);
 20 |     s = mxGetPr(prhs[1]);
 21 |     y = mxGetPr(prhs[2]);
 22 |     H = mxGetPr(prhs[3]);
 23 |     
 24 |     /* Compute number of variables (p), rank of update (d) */
 25 |     
 26 |     nVars = mxGetDimensions(prhs[1])[0];
 27 |     nSteps = mxGetDimensions(prhs[1])[1];
 28 |     
 29 | 	/* Allocated Memory for Function Variables */
 30 |     ro = mxCalloc(nSteps,sizeof(double));
 31 | 	alpha = mxCalloc(nSteps,sizeof(double));
 32 | 	beta = mxCalloc(nSteps,sizeof(double));
 33 | 	q = mxCalloc(nVars*(nSteps+1),sizeof(double));
 34 | 	r = mxCalloc(nVars*(nSteps+1),sizeof(double));
 35 | 	
 36 |     /* Set-up Output Vector */
 37 |     
 38 |     lhs_dims[0] = nVars;
 39 |     lhs_dims[1] = 1;
 40 |     
 41 |     plhs[0] = mxCreateNumericArray(2,lhs_dims,mxDOUBLE_CLASS,mxREAL);
 42 |     d = mxGetPr(plhs[0]);
 43 |     
 44 |     /* ro = 1/(y(:,i)'*s(:,i)) */
 45 |     for(i=0;i<nSteps;i++)
 46 |     {
 47 |         temp = 0;
 48 |         for(j=0;j<nVars;j++)
 49 |         {
 50 | 			temp += y[j+nVars*i]*s[j+nVars*i];
 51 |         }
 52 |         ro[i] = 1/temp;
 53 |     }
 54 | 	
 55 | 	/* q(:,k+1) = g */
 56 | 	for(i=0;i<nVars;i++)
 57 | 	{
 58 | 		q[i+nVars*nSteps] = g[i];
 59 | 	}
 60 | 
 61 | 	for(i=nSteps-1;i>=0;i--)
 62 | 	{
 63 | 		/* alpha(i) = ro(i)*s(:,i)'*q(:,i+1) */
 64 | 		alpha[i] = 0;
 65 | 		for(j=0;j<nVars;j++)
 66 | 		{
 67 | 			alpha[i] += s[j+nVars*i]*q[j+nVars*(i+1)]; 
 68 | 		}
 69 | 		alpha[i] *= ro[i];
 70 | 
 71 | 		/* q(:,i) = q(:,i+1)-alpha(i)*y(:,i) */
 72 | 		for(j=0;j<nVars;j++)
 73 | 		{
 74 | 			q[j+nVars*i]=q[j+nVars*(i+1)]-alpha[i]*y[j+nVars*i];
 75 | 		}
 76 | 	}
 77 | 
 78 | 	/*  r(:,1) = q(:,1) */
 79 | 	for(i=0;i<nVars;i++)
 80 | 	{
 81 | 		r[i] = H[0]*q[i];
 82 | 	}
 83 | 
 84 | 	for(i=0;i<nSteps;i++)
 85 | 	{
 86 | 		/* beta(i) = ro(i)*y(:,i)'*r(:,i) */
 87 | 		beta[i] = 0;
 88 | 		for(j=0;j<nVars;j++)
 89 | 		{
 90 | 			beta[i] += y[j+nVars*i]*r[j+nVars*i];
 91 | 		}
 92 | 		beta[i] *= ro[i];
 93 | 
 94 | 		/* r(:,i+1) = r(:,i) + s(:,i)*(alpha(i)-beta(i)) */
 95 | 		for(j=0;j<nVars;j++)
 96 | 		{
 97 | 			r[j+nVars*(i+1)]=r[j+nVars*i]+s[j+nVars*i]*(alpha[i]-beta[i]);
 98 | 		}
 99 | 	}
100 | 
101 | 	/* d = r(:,k+1) */
102 | 	for(i=0;i<nVars;i++)
103 | 	{
104 | 		d[i]=r[i+nVars*nSteps];
105 | 	}
106 | 
107 | 	/* Free Memory */
108 | 	
109 | 	mxFree(ro);
110 | 	mxFree(alpha);
111 | 	mxFree(beta);
112 | 	mxFree(q);
113 | 	mxFree(r);
114 | 	
115 | }
116 | 


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/polyinterp.m:
--------------------------------------------------------------------------------
  1 | function [minPos,fmin] = polyinterp(points,doPlot,xminBound,xmaxBound)
  2 | % function [minPos] = polyinterp(points,doPlot,xminBound,xmaxBound)
  3 | %
  4 | %   Minimum of interpolating polynomial based on function and derivative
  5 | %   values
  6 | %
  7 | %   It can also be used for extrapolation if {xmin,xmax} are outside
  8 | %   the domain of the points.
  9 | %
 10 | %   Input:
 11 | %       points(pointNum,[x f g])
 12 | %       doPlot: set to 1 to plot, default: 0
 13 | %       xmin: min value that brackets minimum (default: min of points)
 14 | %       xmax: max value that brackets maximum (default: max of points)
 15 | %
 16 | %   set f or g to sqrt(-1) if they are not known
 17 | %   the order of the polynomial is the number of known f and g values minus 1
 18 | 
 19 | if nargin < 2
 20 |     doPlot = 0;
 21 | end
 22 | 
 23 | nPoints = size(points,1);
 24 | order = sum(sum((imag(points(:,2:3))==0)))-1;
 25 | 
 26 | xmin = min(points(:,1));
 27 | xmax = max(points(:,1));
 28 | 
 29 | % Compute Bounds of Interpolation Area
 30 | if nargin < 3
 31 |     xminBound = xmin;
 32 | end
 33 | if nargin < 4
 34 |     xmaxBound = xmax;
 35 | end
 36 | 
 37 | % Code for most common case:
 38 | %   - cubic interpolation of 2 points
 39 | %       w/ function and derivative values for both
 40 | 
 41 | if nPoints == 2 && order ==3 && doPlot == 0
 42 |     % Solution in this case (where x2 is the farthest point):
 43 |     %    d1 = g1 + g2 - 3*(f1-f2)/(x1-x2);
 44 |     %    d2 = sqrt(d1^2 - g1*g2);
 45 |     %    minPos = x2 - (x2 - x1)*((g2 + d2 - d1)/(g2 - g1 + 2*d2));
 46 |     %    t_new = min(max(minPos,x1),x2);
 47 |     [minVal minPos] = min(points(:,1));
 48 |     notMinPos = -minPos+3;
 49 |     d1 = points(minPos,3) + points(notMinPos,3) - 3*(points(minPos,2)-points(notMinPos,2))/(points(minPos,1)-points(notMinPos,1));
 50 |     d2 = sqrt(d1^2 - points(minPos,3)*points(notMinPos,3));
 51 |     if isreal(d2)
 52 |         t = points(notMinPos,1) - (points(notMinPos,1) - points(minPos,1))*((points(notMinPos,3) + d2 - d1)/(points(notMinPos,3) - points(minPos,3) + 2*d2));
 53 |         minPos = min(max(t,xminBound),xmaxBound);
 54 |     else
 55 |         minPos = (xmaxBound+xminBound)/2;
 56 |     end
 57 |     return;
 58 | end
 59 | 
 60 | % Constraints Based on available Function Values
 61 | A = zeros(0,order+1);
 62 | b = zeros(0,1);
 63 | for i = 1:nPoints
 64 |     if imag(points(i,2))==0
 65 |         constraint = zeros(1,order+1);
 66 |         for j = order:-1:0
 67 |             constraint(order-j+1) = points(i,1)^j;
 68 |         end
 69 |         A = [A;constraint];
 70 |         b = [b;points(i,2)];
 71 |     end
 72 | end
 73 | 
 74 | % Constraints based on available Derivatives
 75 | for i = 1:nPoints
 76 |     if isreal(points(i,3))
 77 |         constraint = zeros(1,order+1);
 78 |         for j = 1:order
 79 |             constraint(j) = (order-j+1)*points(i,1)^(order-j);
 80 |         end
 81 |         A = [A;constraint];
 82 |         b = [b;points(i,3)];
 83 |     end
 84 | end
 85 | 
 86 | % Find interpolating polynomial
 87 | [params,~] = linsolve(A,b);
 88 | 
 89 | % Compute Critical Points
 90 | dParams = zeros(order,1);
 91 | for i = 1:length(params)-1
 92 |     dParams(i) = params(i)*(order-i+1);
 93 | end
 94 | 
 95 | if any(isinf(dParams))
 96 |     cp = [xminBound;xmaxBound;points(:,1)].';
 97 | else
 98 |     cp = [xminBound;xmaxBound;points(:,1);roots(dParams)].';
 99 | end
100 | 
101 | % Test Critical Points
102 | fmin = inf;
103 | minPos = (xminBound+xmaxBound)/2; % Default to Bisection if no critical points valid
104 | for xCP = cp
105 |     if imag(xCP)==0 && xCP >= xminBound && xCP <= xmaxBound
106 |         fCP = polyval(params,xCP);
107 |         if imag(fCP)==0 && fCP < fmin
108 |             minPos = real(xCP);
109 |             fmin = real(fCP);
110 |         end
111 |     end
112 | end
113 | 
114 | % Plot Situation
115 | if doPlot
116 |     clf; hold on;
117 | 
118 |     % Plot Points
119 |     plot(points(:,1),points(:,2),'b*');
120 | 
121 |     % Plot Derivatives
122 |     for i = 1:nPoints
123 |         if isreal(points(i,3))
124 |             m = points(i,3);
125 |             b = points(i,2) - m*points(i,1);
126 |             plot([points(i,1)-.05 points(i,1)+.05],...
127 |                 [(points(i,1)-.05)*m+b (points(i,1)+.05)*m+b],'c.-');
128 |         end
129 |     end
130 | 
131 |     % Plot Function
132 |     x = min(xmin,xminBound)-.1:(max(xmax,xmaxBound)+.1-min(xmin,xminBound)+.1)/100:max(xmax,xmaxBound)+.1;
133 |     for i = 1:length(x)
134 |         f(i) = polyval(params,x(i));
135 |     end
136 |     plot(x,f,'y');
137 |     axis([x(1)-.1 x(end)+.1 min(f)-.1 max(f)+.1]);
138 | 
139 |     % Plot Minimum
140 |     plot(minPos,fmin,'g+');
141 |     if doPlot == 1
142 |         pause(1);
143 |     end
144 | end


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/minFunc_processInputOptions.m:
--------------------------------------------------------------------------------
  1 | 
  2 | function [verbose,verboseI,debug,doPlot,maxFunEvals,maxIter,optTol,progTol,method,...
  3 |     corrections,c1,c2,LS_init,cgSolve,qnUpdate,cgUpdate,initialHessType,...
  4 |     HessianModify,Fref,useComplex,numDiff,LS_saveHessianComp,...
  5 |     Damped,HvFunc,bbType,cycle,...
  6 |     HessianIter,outputFcn,useMex,useNegCurv,precFunc,...
  7 |     LS_type,LS_interp,LS_multi,DerivativeCheck] = ...
  8 |     minFunc_processInputOptions(o)
  9 | 
 10 | % Constants
 11 | SD = 0;
 12 | CSD = 1;
 13 | BB = 2;
 14 | CG = 3;
 15 | PCG = 4;
 16 | LBFGS = 5;
 17 | QNEWTON = 6;
 18 | NEWTON0 = 7;
 19 | NEWTON = 8;
 20 | TENSOR = 9;
 21 | 
 22 | verbose = 1;
 23 | verboseI= 1;
 24 | debug = 0;
 25 | doPlot = 0;
 26 | method = LBFGS;
 27 | cgSolve = 0;
 28 | 
 29 | o = toUpper(o);
 30 | 
 31 | if isfield(o,'DISPLAY')
 32 |     switch(upper(o.DISPLAY))
 33 |         case 0
 34 |             verbose = 0;
 35 |             verboseI = 0;
 36 |         case 'FINAL'
 37 |             verboseI = 0;
 38 |         case 'OFF'
 39 |             verbose = 0;
 40 |             verboseI = 0;
 41 |         case 'NONE'
 42 |             verbose = 0;
 43 |             verboseI = 0;
 44 |         case 'FULL'
 45 |             debug = 1;
 46 |         case 'EXCESSIVE'
 47 |             debug = 1;
 48 |             doPlot = 1;
 49 |     end
 50 | end
 51 | 
 52 | DerivativeCheck = 0;
 53 | if isfield(o,'DERIVATIVECHECK')
 54 |     switch(upper(o.DERIVATIVECHECK))
 55 |         case 1
 56 |             DerivativeCheck = 1;
 57 |         case 'ON'
 58 |             DerivativeCheck = 1;
 59 |     end
 60 | end
 61 | 
 62 | LS_init = 0;
 63 | LS_type = 1;
 64 | LS_interp = 2;
 65 | LS_multi = 0;
 66 | Fref = 1;
 67 | Damped = 0;
 68 | HessianIter = 1;
 69 | c2 = 0.9;
 70 | if isfield(o,'METHOD')
 71 |     m = upper(o.METHOD);
 72 |     switch(m)
 73 |         case 'TENSOR'
 74 |             method = TENSOR;
 75 |         case 'NEWTON'
 76 |             method = NEWTON;
 77 |         case 'MNEWTON'
 78 |             method = NEWTON;
 79 |             HessianIter = 5;
 80 |         case 'PNEWTON0'
 81 |             method = NEWTON0;
 82 |             cgSolve = 1;
 83 |         case 'NEWTON0'
 84 |             method = NEWTON0;
 85 |         case 'QNEWTON'
 86 |             method = QNEWTON;
 87 |             Damped = 1;
 88 |         case 'LBFGS'
 89 |             method = LBFGS;
 90 |         case 'BB'
 91 |             method = BB;
 92 |             LS_type = 0;
 93 |             Fref = 20;
 94 |         case 'PCG'
 95 |             method = PCG;
 96 |             c2 = 0.2;
 97 |             LS_init = 2;
 98 |         case 'SCG'
 99 |             method = CG;
100 |             c2 = 0.2;
101 |             LS_init = 4;
102 |         case 'CG'
103 |             method = CG;
104 |             c2 = 0.2;
105 |             LS_init = 2;
106 |         case 'CSD'
107 |             method = CSD;
108 |             c2 = 0.2;
109 |             Fref = 10;
110 |             LS_init = 2;
111 |         case 'SD'
112 |             method = SD;
113 |             LS_init = 2;
114 |     end
115 | end
116 | 
117 | maxFunEvals = getOpt(o,'MAXFUNEVALS',1000);
118 | maxIter = getOpt(o,'MAXITER',500);
119 | optTol = getOpt(o,'OPTTOL',1e-5);
120 | progTol = getOpt(o,'PROGTOL',1e-9);
121 | corrections = getOpt(o,'CORRECTIONS',100);
122 | corrections = getOpt(o,'CORR',corrections);
123 | c1 = getOpt(o,'C1',1e-4);
124 | c2 = getOpt(o,'C2',c2);
125 | LS_init = getOpt(o,'LS_INIT',LS_init);
126 | cgSolve = getOpt(o,'CGSOLVE',cgSolve);
127 | qnUpdate = getOpt(o,'QNUPDATE',3);
128 | cgUpdate = getOpt(o,'CGUPDATE',2);
129 | initialHessType = getOpt(o,'INITIALHESSTYPE',1);
130 | HessianModify = getOpt(o,'HESSIANMODIFY',0);
131 | Fref = getOpt(o,'FREF',Fref);
132 | useComplex = getOpt(o,'USECOMPLEX',0);
133 | numDiff = getOpt(o,'NUMDIFF',0);
134 | LS_saveHessianComp = getOpt(o,'LS_SAVEHESSIANCOMP',1);
135 | Damped = getOpt(o,'DAMPED',Damped);
136 | HvFunc = getOpt(o,'HVFUNC',[]);
137 | bbType = getOpt(o,'BBTYPE',0);
138 | cycle = getOpt(o,'CYCLE',3);
139 | HessianIter = getOpt(o,'HESSIANITER',HessianIter);
140 | outputFcn = getOpt(o,'OUTPUTFCN',[]);
141 | useMex = getOpt(o,'USEMEX',1);
142 | useNegCurv = getOpt(o,'USENEGCURV',1);
143 | precFunc = getOpt(o,'PRECFUNC',[]);
144 | LS_type = getOpt(o,'LS_type',LS_type);
145 | LS_interp = getOpt(o,'LS_interp',LS_interp);
146 | LS_multi = getOpt(o,'LS_multi',LS_multi);
147 | end
148 | 
149 | function [v] = getOpt(options,opt,default)
150 | if isfield(options,opt)
151 |     if ~isempty(getfield(options,opt))
152 |         v = getfield(options,opt);
153 |     else
154 |         v = default;
155 |     end
156 | else
157 |     v = default;
158 | end
159 | end
160 | 
161 | function [o] = toUpper(o)
162 | if ~isempty(o)
163 |     fn = fieldnames(o);
164 |     for i = 1:length(fn)
165 |         o = setfield(o,upper(fn{i}),getfield(o,fn{i}));
166 |     end
167 | end
168 | end


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/mex/mcholC.c:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include "mex.h"
  3 | 
  4 | double mymax(double x, double y)
  5 | {
  6 |     if (x > y)
  7 |         return x;
  8 |     else
  9 |         return y;
 10 | }
 11 | 
 12 | double absolute(double x)
 13 | {
 14 |     if (x >= -x)
 15 |         return x;
 16 |     else
 17 |         return -x;
 18 | }
 19 | 
 20 | void permuteInt(int *x, int p, int q)
 21 | {
 22 |     int temp;
 23 |     temp = x[p];
 24 |     x[p] = x[q];
 25 |     x[q] = temp;
 26 | }
 27 | 
 28 | void permute(double *x, int p, int q)
 29 | {
 30 |     double temp;
 31 |     temp = x[p];
 32 |     x[p] = x[q];
 33 |     x[q] = temp;
 34 | }
 35 | 
 36 | void permuteRows(double *x, int p, int q,int n)
 37 | {
 38 |     int i;
 39 |     double temp;
 40 |     for(i = 0; i < n; i++)
 41 |     {
 42 |         temp = x[p+i*n];
 43 |         x[p+i*n] = x[q+i*n];
 44 |         x[q+i*n] = temp;
 45 |     }
 46 | }
 47 | 
 48 | void permuteCols(double *x, int p, int q,int n)
 49 | {
 50 |     int i;
 51 |     double temp;
 52 |     for(i = 0; i < n; i++)
 53 |     {
 54 |         temp = x[i+p*n];
 55 |         x[i+p*n] = x[i+q*n];
 56 |         x[i+q*n] = temp;
 57 |     }
 58 | }
 59 | 
 60 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
 61 | {
 62 |     int n,sizL[2],sizD[2],i,j,q,s,
 63 |     *P;
 64 |     
 65 |     double mu,gamma,xi,delta,beta,maxVal,theta,
 66 |     *c,    *H, *L, *D, *A;
 67 |     
 68 |     /* Input */
 69 |     H = mxGetPr(prhs[0]);
 70 |     if (nrhs == 1)
 71 |     {
 72 |         mu = 1e-12;
 73 |     }
 74 |     else
 75 |     {
 76 |         mu = mxGetScalar(prhs[1]);
 77 |     }
 78 |     
 79 |     /* Compute Sizes */
 80 |     n = mxGetDimensions(prhs[0])[0];
 81 |     
 82 |     /* Form Output */
 83 |     sizL[0] = n;
 84 |     sizL[1] = n;
 85 |     plhs[0] = mxCreateNumericArray(2,sizL,mxDOUBLE_CLASS,mxREAL);
 86 |     L = mxGetPr(plhs[0]);
 87 |     sizD[0] = n;
 88 |     sizD[1] = 1;
 89 |     plhs[1] = mxCreateNumericArray(2,sizD,mxDOUBLE_CLASS,mxREAL);
 90 |     D = mxGetPr(plhs[1]);
 91 |     plhs[2] = mxCreateNumericArray(2,sizD,mxINT32_CLASS,mxREAL);
 92 |     P = (int*)mxGetData(plhs[2]);
 93 |     
 94 |     /* Initialize */
 95 |     c = mxCalloc(n*n,sizeof(double));
 96 |     A = mxCalloc(n*n,sizeof(double));
 97 |     
 98 |     for (i = 0; i < n; i++)
 99 |     {
100 |         P[i] = i;
101 |         for (j = 0;j < n; j++)
102 |         {
103 |             A[i+n*j] = H[i+n*j];
104 |         }
105 |     }
106 |     
107 |     gamma = 0;
108 |     for (i = 0; i < n; i++)
109 |     {
110 |         L[i+n*i] = 1;
111 |         c[i+n*i] = A[i+n*i];
112 |     }
113 |     
114 |     /* Compute modification parameters */
115 |     gamma = -1;
116 |     xi = -1;
117 |     for (i = 0; i < n; i++)
118 |     {
119 |         gamma = mymax(gamma,absolute(A[i+n*i]));
120 |         for (j = 0;j < n; j++)
121 |         {
122 |             /*printf("A(%d,%d) = %f, %f\n",i,j,A[i+n*j],absolute(A[i+n*j]));*/
123 |             if (i != j)
124 |                 xi = mymax(xi,absolute(A[i+n*j]));
125 |         }
126 |     }
127 |     delta = mu*mymax(gamma+xi,1);
128 |     
129 |     if (n > 1)
130 |     {
131 |         beta = sqrt(mymax(gamma,mymax(mu,xi/sqrt(n*n-1))));
132 |     }
133 |     else
134 |     {
135 |         beta = sqrt(mymax(gamma,mu));
136 |     }
137 |     
138 |     for (j = 0; j < n; j++)
139 |     {
140 |         
141 |     /* Find q that results in Best Permutation with j */
142 |         maxVal = -1;
143 |         q = 0;
144 |         for(i = j; i < n; i++)
145 |         {
146 |             if (absolute(c[i+n*i]) > maxVal)
147 |             {
148 |                 maxVal = mymax(maxVal,absolute(c[i+n*i]));
149 |                 q = i;
150 |             }
151 |         }
152 |         
153 |         /* Permute D,c,L,A,P */
154 |         permute(D,j,q);
155 |         permuteInt(P,j,q);
156 |         permuteRows(c,j,q,n);
157 |         permuteCols(c,j,q,n);
158 |         permuteRows(L,j,q,n);
159 |         permuteCols(L,j,q,n);
160 |         permuteRows(A,j,q,n);
161 |         permuteCols(A,j,q,n);
162 |         
163 |         for(s = 0; s <= j-1; s++)
164 |             L[j+n*s] = c[j+n*s]/D[s];
165 |         
166 |         for(i = j+1; i < n; i++)
167 |         {
168 |             c[i+j*n] = A[i+j*n];
169 |             for(s = 0; s <= j-1; s++)
170 |             {
171 |                 c[i+j*n] -= L[j+n*s]*c[i+n*s];
172 |             }
173 |         }
174 |         
175 |         theta = 0;
176 |         if (j < n-1)
177 |         {
178 |             for(i = j+1;i < n; i++)
179 |                 theta = mymax(theta,absolute(c[i+n*j]));
180 |         }
181 |         
182 |         D[j] = mymax(absolute(c[j+n*j]),mymax(delta,theta*theta/(beta*beta)));
183 |         
184 |         if (j < n-1)
185 |         {
186 |             for(i = j+1; i < n; i++)
187 |             {
188 |                 c[i+n*i] = c[i+n*i] - c[i+n*j]*c[i+n*j]/D[j];
189 |             }
190 |         }
191 |         
192 |     }
193 |     
194 |     for(i = 0; i < n; i++)
195 |         P[i]++;
196 |     
197 |     mxFree(c);
198 |     mxFree(A);
199 | }


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/ArmijoBacktrack.m:
--------------------------------------------------------------------------------
  1 | function [t,x_new,f_new,g_new,funEvals,H] = ArmijoBacktrack(...
  2 |     x,t,d,f,fr,g,gtd,c1,LS_interp,LS_multi,progTol,debug,doPlot,saveHessianComp,funObj,varargin)
  3 | % [t,x_new,f_new,g_new,funEvals,H] = ArmijoBacktrack(...
  4 | %    x,t,d,f,fr,g,gtd,c1,LS_interp,LS_multi,progTol,debug,doPlot,saveHessianComp,funObj,varargin)
  5 | %
  6 | % Backtracking linesearch to satisfy Armijo condition
  7 | %
  8 | % Inputs:
  9 | %   x: starting location
 10 | %   t: initial step size
 11 | %   d: descent direction
 12 | %   f: function value at starting location
 13 | %   fr: reference function value (usually funObj(x))
 14 | %   gtd: directional derivative at starting location
 15 | %   c1: sufficient decrease parameter
 16 | %   debug: display debugging information
 17 | %   LS_interp: type of interpolation
 18 | %   progTol: minimum allowable step length
 19 | %   doPlot: do a graphical display of interpolation
 20 | %   funObj: objective function
 21 | %   varargin: parameters of objective function
 22 | %
 23 | % Outputs:
 24 | %   t: step length
 25 | %   f_new: function value at x+t*d
 26 | %   g_new: gradient value at x+t*d
 27 | %   funEvals: number function evaluations performed by line search
 28 | %   H: Hessian at initial guess (only computed if requested)
 29 | %
 30 | % recet change: LS changed to LS_interp and LS_multi
 31 | 
 32 | % Evaluate the Objective and Gradient at the Initial Step
 33 | if nargout == 6
 34 |     [f_new,g_new,H] = funObj(x + t*d,varargin{:});
 35 | else
 36 |     [f_new,g_new] = funObj(x+t*d,varargin{:});
 37 | end
 38 | funEvals = 1;
 39 | 
 40 | while f_new > fr + c1*t*gtd || ~isLegal(f_new)
 41 |     temp = t;
 42 |     
 43 |     if LS_interp == 0 || ~isLegal(f_new)
 44 |         % Ignore value of new point
 45 |         if debug
 46 |             fprintf('Fixed BT\n');
 47 |         end
 48 |         t = 0.5*t;
 49 |     elseif LS_interp == 1 || ~isLegal(g_new)
 50 |         % Use function value at new point, but not its derivative
 51 |         if funEvals < 2 || LS_multi == 0 || ~isLegal(f_prev)
 52 |             % Backtracking w/ quadratic interpolation based on two points
 53 |             if debug
 54 |                 fprintf('Quad BT\n');
 55 |             end
 56 |             t = polyinterp([0 f gtd; t f_new sqrt(-1)],doPlot,0,t);
 57 |         else
 58 |             % Backtracking w/ cubic interpolation based on three points
 59 |             if debug
 60 |                 fprintf('Cubic BT\n');
 61 |             end
 62 |             t = polyinterp([0 f gtd; t f_new sqrt(-1); t_prev f_prev sqrt(-1)],doPlot,0,t);
 63 |         end
 64 |     else
 65 |         % Use function value and derivative at new point
 66 |         
 67 |         if funEvals < 2 || LS_multi == 0 || ~isLegal(f_prev)
 68 |             % Backtracking w/ cubic interpolation w/ derivative
 69 |             if debug
 70 |                 fprintf('Grad-Cubic BT\n');
 71 |             end
 72 |             t = polyinterp([0 f gtd; t f_new g_new'*d],doPlot,0,t);
 73 |         elseif ~isLegal(g_prev)
 74 |             % Backtracking w/ quartic interpolation 3 points and derivative
 75 |             % of two
 76 |             if debug
 77 |                 fprintf('Grad-Quartic BT\n');
 78 |             end
 79 |             t = polyinterp([0 f gtd; t f_new g_new'*d; t_prev f_prev sqrt(-1)],doPlot,0,t);
 80 |         else
 81 |             % Backtracking w/ quintic interpolation of 3 points and derivative
 82 |             % of two
 83 |             if debug
 84 |                 fprintf('Grad-Quintic BT\n');
 85 |             end
 86 |             t = polyinterp([0 f gtd; t f_new g_new'*d; t_prev f_prev g_prev'*d],doPlot,0,t);
 87 |          end
 88 |     end
 89 |     
 90 |     % Adjust if change in t is too small/large
 91 |     if t < temp*1e-3
 92 |         if debug
 93 |             fprintf('Interpolated Value Too Small, Adjusting\n');
 94 |         end
 95 |         t = temp*1e-3;
 96 |     elseif t > temp*0.6
 97 |         if debug
 98 |             fprintf('Interpolated Value Too Large, Adjusting\n');
 99 |         end
100 |         t = temp*0.6;
101 |     end
102 | 
103 |     % Store old point if doing three-point interpolation
104 |     if LS_multi
105 |         f_prev = f_new;
106 |         t_prev = temp;
107 |         if LS_interp == 2
108 |             g_prev = g_new;
109 |         end
110 |     end
111 |     
112 |     if ~saveHessianComp && nargout == 6
113 |         [f_new,g_new,H] = funObj(x + t*d,varargin{:});
114 |     else
115 |         [f_new,g_new] = funObj(x + t*d,varargin{:});
116 |     end
117 |     funEvals = funEvals+1;
118 | 
119 |     % Check whether step size has become too small
120 |     if max(abs(t*d)) <= progTol
121 |         if debug
122 |             fprintf('Backtracking Line Search Failed\n');
123 |         end
124 |         t = 0;
125 |         f_new = f;
126 |         g_new = g;
127 |         break;
128 |     end
129 | end
130 | 
131 | % Evaluate Hessian at new point
132 | if nargout == 6 && funEvals > 1 && saveHessianComp
133 |     [f_new,g_new,H] = funObj(x + t*d,varargin{:});
134 |     funEvals = funEvals+1;
135 | end
136 | 
137 | x_new = x + t*d;
138 | 
139 | end
140 | 


--------------------------------------------------------------------------------
/MC_DMF.m:
--------------------------------------------------------------------------------
  1 | function [Xr,NN_MF]=MC_DMF(X,M,s,options)%%% X:[n m]
  2 | % Code for Matrix completion by deep matrix factorization
  3 | % When using this code, please cite the following paper:
  4 | % "Matrix completion by deep matrix factorization"
  5 | % Jicong Fan, Jieyu Cheng. Neural Networks, 2018(98):34-41
  6 | % inputs
  7 | % -- X: an n by m matrix with missing entries (m variables, n samples)
  8 | % -- M: binary mask matrix for X, 1 indicate observed, 0 indicate missed
  9 | % -- s: network structure, a row vector with L elements, indicating one
 10 | %       input layer, L-2 hidden layers, and one output layer;
 11 | %       e.g. [r 5*r 10*r m], the last value must equal to 'm'; r<<m
 12 | % -- options:
 13 | %    options.Wp: the weight decay penalty, e.g. 1e-3
 14 | %    options.Zp: the latent variable penalty, e.g. 1e-3
 15 | %    options.activation_func: the activation functions for each layer
 16 | %       the element should be one of 'tanh_opt','softmax', 'sigm', 'linear'.
 17 | %       the length should be L-1, corresponding to s.
 18 | %       e.g., {'tanh_opt','linear'}.
 19 | % outputs
 20 | % -- Xr: the recovered matrix
 21 | % -- NN_MF: the network structure and parameters
 22 | [n,m]=size(X);
 23 | d=s(1);
 24 | Z=zeros(n,d);
 25 | NN_MF=NN_MF_setup(s,options);
 26 | NN_MF.s=s;
 27 | NN_MF.Z=Z;
 28 | NN_MF.m=m;
 29 | NN_MF.n=n;
 30 | NN_MF.d=d;
 31 | NN_MF.Wp=options.Wp;
 32 | NN_MF.Zp=options.Zp;
 33 | NN_MF.MC=1;
 34 | NN_MF.M=M;
 35 | NN_MF.X=X;
 36 | disp('Training neural networks for DMFMC ......')
 37 | NN_MF.maxiter=options.maxiter;
 38 | NN_MF=NN_MF_optimization_rprop(NN_MF);
 39 | Xr=X.*M+NN_MF.a{end}.*~M;
 40 | NN_MF.Z=NN_MF.a{1}(:,2:end);
 41 | %
 42 | end
 43 | 
 44 | %% optimize by RPROP 
 45 | function NN_MF=NN_MF_optimization_rprop(NN_MF)
 46 | p.verbosity = 3;                    % Increase verbosity to print something [0~3]
 47 | p.MaxIter   = NN_MF.maxiter;          
 48 | p.d_Obj     = 1e-5;
 49 | p.method    = 'IRprop+';          
 50 | p.display   = 0;
 51 | 
 52 | w=[];
 53 | for i=1:length(NN_MF.W)
 54 |     w=[w;NN_MF.W{i}(:)];
 55 | end
 56 | z=NN_MF.Z(:);
 57 | y=[z;w];
 58 | [y,f,EXITFLAG,STATS] = rprop(@fg_DMFMC,y,p,NN_MF);
 59 | lz=NN_MF.d*NN_MF.n;
 60 | Z=reshape(y(1:lz),NN_MF.n,NN_MF.d);
 61 | t=lz+1;
 62 | for i=1:length(NN_MF.W)
 63 |     [a,b]=size(NN_MF.W{i});
 64 |     NN_MF.W{i}=reshape(y(t:t+a*b-1),a,b);
 65 |     t=t+a*b;
 66 | end
 67 | NN_MF=NN_MF_ff(NN_MF,Z);
 68 | %
 69 | end
 70 | 
 71 | 
 72 | %% f, dW, dZ
 73 | function [f,g]=fg_DMFMC(y,NN_MF)
 74 |     lz=NN_MF.d*NN_MF.n;
 75 |     Z=reshape(y(1:lz),NN_MF.n,NN_MF.d);
 76 |     t=lz+1;
 77 |     for i=1:length(NN_MF.W)
 78 |         [a,b]=size(NN_MF.W{i});
 79 |         NN_MF.W{i}=reshape(y(t:t+a*b-1),a,b);
 80 |         t=t+a*b;
 81 |     end
 82 |     NN_MF = NN_MF_ff(NN_MF,Z);
 83 |     NN_MF = NN_MF_bp(NN_MF);
 84 |     gW=[];
 85 |     w=y(lz+1:end);
 86 |     sum_w=0;
 87 |     for i=1:length(NN_MF.W)
 88 |         wt=NN_MF.W{i}(:,2:end);
 89 |         sum_w=sum_w+sum(wt(:).^2);
 90 |         dW=NN_MF.dW{i}+NN_MF.Wp*[zeros(size(NN_MF.W{i},1),1) NN_MF.W{i}(:,2:end)];
 91 |         gW=[gW;dW(:)];
 92 | %         gW=[gW;NN_MF.dW{i}(:)+myNN.weight_penalty_L2*myNN.W{i}(:)];
 93 |     end
 94 |     gZ=NN_MF.dZ+NN_MF.Zp*Z/NN_MF.n;
 95 |     gZ=gZ(:);
 96 |     f=NN_MF.loss+0.5*NN_MF.Wp*sum_w+0.5*NN_MF.Zp*sum(Z(:).^2)/NN_MF.n;
 97 |     g=[gZ;gW];
 98 | end
 99 | 
100 | 
101 | %% 
102 | function NN_MF=NN_MF_setup(s,options)
103 | if length(options.activation_func)~=(length(s)-1)
104 |     error('The number of layers does not match the number of activation functions!')
105 | end
106 | NN_MF.activation_func=options.activation_func;
107 | NN_MF.layer=length(s)-1;
108 | for i=1:NN_MF.layer
109 |     NN_MF.W{i}=(rand(s(i+1),s(i)+1)-0.5)*2*4*sqrt(6/(s(i+1)+s(i)));
110 | end
111 | end
112 | %%
113 | function NN_MF=NN_MF_ff(NN_MF,Z)
114 | Z=[ones(NN_MF.n,1) Z];% add bias 1
115 | L=length(NN_MF.s);
116 | NN_MF.a{1}=Z; 
117 | for i=2:L
118 |     switch NN_MF.activation_func{i-1}
119 |         case 'sigm'
120 |             NN_MF.a{i}=sigm(NN_MF.a{i-1}*NN_MF.W{i-1}');
121 |         case 'tanh_opt'
122 |             NN_MF.a{i}=tanh_opt(NN_MF.a{i-1}*NN_MF.W{i-1}');
123 |         case 'linear'
124 |             NN_MF.a{i}=NN_MF.a{i-1}*NN_MF.W{i-1}';
125 |     end
126 |     if i<L
127 |         NN_MF.a{i}=[ones(NN_MF.n,1) NN_MF.a{i}];
128 |     end
129 | end
130 | 
131 | % pedictive error and value of loss function
132 | NN_MF.e=NN_MF.X-NN_MF.a{L};
133 | if NN_MF.MC==1
134 |     NN_MF.e=NN_MF.e.*NN_MF.M;
135 | end
136 | switch NN_MF.activation_func{end}
137 |     case {'sigm', 'linear','tanh_opt'}
138 |         NN_MF.loss=1/2*sum(sum(NN_MF.e.^2))/NN_MF.n;
139 | %     case 'softmax'
140 | %         NN_MF.loss=-sum(sum(Y.* log(NN_MF.a{L})))/NN_MF.n;
141 | end
142 | %
143 | end
144 | %%
145 | function NN_MF=NN_MF_bp(NN_MF)
146 | L=length(NN_MF.s);
147 | switch NN_MF.activation_func{end}
148 |     case 'sigm'
149 |         d{L}=-NN_MF.e.*(NN_MF.a{L}.*(1-NN_MF.a{L}));
150 |     case 'tanh_opt'
151 |         d{L}=-NN_MF.e.*(1.7159*2/3 *(1-1/(1.7159)^2*NN_MF.a{L}.^2));
152 |     case {'softmax','linear'}
153 |         d{L}=-NN_MF.e;
154 | end
155 | for i=L-1:-1:2
156 |     switch NN_MF.activation_func{i-1}
157 |         case 'sigm'
158 |             d_act=NN_MF.a{i}.*(1-NN_MF.a{i});
159 |         case 'linear'
160 |             d_act=1;
161 |         case 'tanh_opt'
162 |             d_act=1.7159*2/3 *(1-1/(1.7159)^2*NN_MF.a{i}.^2);
163 |     end
164 |     if i+1==L % in this case in d{n} there is not the bias term to be removed             
165 |         d{i} = (d{i + 1} * NN_MF.W{i}).* d_act; % Bishop (5.56)
166 |     else % in this case in d{i} the bias term has to be removed
167 |         d{i} = (d{i + 1}(:,2:end) * NN_MF.W{i}) .* d_act;
168 |     end
169 | end
170 | %
171 | for i = 1:(L - 1)
172 |     if i+1==L
173 |         NN_MF.dW{i} = (d{i + 1}' * NN_MF.a{i}) / size(d{i + 1}, 1);
174 |     else
175 |         NN_MF.dW{i} = (d{i + 1}(:,2:end)' * NN_MF.a{i}) / size(d{i + 1}, 1);      
176 |     end
177 | end
178 | % dZ
179 | if size(d{2},2)>size(NN_MF.W{1},1)
180 |     NN_MF.dZ=d{2}(:,2:end)*NN_MF.W{1}(:,2:end);
181 | else
182 |     NN_MF.dZ=d{2}(:,1:end)*NN_MF.W{1}(:,2:end);
183 | end
184 | NN_MF.dZ=NN_MF.dZ/NN_MF.n;
185 | end
186 | 
187 | 
188 | 


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/WolfeLineSearch.m:
--------------------------------------------------------------------------------
  1 | function [t,f_new,g_new,funEvals,H] = WolfeLineSearch(...
  2 |     x,t,d,f,g,gtd,c1,c2,LS_interp,LS_multi,maxLS,progTol,debug,doPlot,saveHessianComp,funObj,varargin)
  3 | %
  4 | % Bracketing Line Search to Satisfy Wolfe Conditions
  5 | %
  6 | % Inputs:
  7 | %   x: starting location
  8 | %   t: initial step size
  9 | %   d: descent direction
 10 | %   f: function value at starting location
 11 | %   g: gradient at starting location
 12 | %   gtd: directional derivative at starting location
 13 | %   c1: sufficient decrease parameter
 14 | %   c2: curvature parameter
 15 | %   debug: display debugging information
 16 | %   LS_interp: type of interpolation
 17 | %   maxLS: maximum number of iterations
 18 | %   progTol: minimum allowable step length
 19 | %   doPlot: do a graphical display of interpolation
 20 | %   funObj: objective function
 21 | %   varargin: parameters of objective function
 22 | %
 23 | % Outputs:
 24 | %   t: step length
 25 | %   f_new: function value at x+t*d
 26 | %   g_new: gradient value at x+t*d
 27 | %   funEvals: number function evaluations performed by line search
 28 | %   H: Hessian at initial guess (only computed if requested
 29 | 
 30 | % Evaluate the Objective and Gradient at the Initial Step
 31 | if nargout == 5
 32 |     [f_new,g_new,H] = funObj(x + t*d,varargin{:});
 33 | else
 34 |     [f_new,g_new] = funObj(x+t*d,varargin{:});
 35 | end
 36 | funEvals = 1;
 37 | gtd_new = g_new'*d;
 38 | 
 39 | % Bracket an Interval containing a point satisfying the
 40 | % Wolfe criteria
 41 | 
 42 | LSiter = 0;
 43 | t_prev = 0;
 44 | f_prev = f;
 45 | g_prev = g;
 46 | gtd_prev = gtd;
 47 | nrmD = max(abs(d));
 48 | done = 0;
 49 | 
 50 | while LSiter < maxLS
 51 | 
 52 |     %% Bracketing Phase
 53 |     if ~isLegal(f_new) || ~isLegal(g_new)
 54 |         if debug
 55 |             fprintf('Extrapolated into illegal region, switching to Armijo line-search\n');
 56 |         end
 57 |         t = (t + t_prev)/2;
 58 |         % Do Armijo
 59 |         if nargout == 5
 60 |             [t,x_new,f_new,g_new,armijoFunEvals,H] = ArmijoBacktrack(...
 61 |                 x,t,d,f,f,g,gtd,c1,LS_interp,LS_multi,progTol,debug,doPlot,saveHessianComp,...
 62 |                 funObj,varargin{:});
 63 |         else
 64 |             [t,x_new,f_new,g_new,armijoFunEvals] = ArmijoBacktrack(...
 65 |                 x,t,d,f,f,g,gtd,c1,LS_interp,LS_multi,progTol,debug,doPlot,saveHessianComp,...
 66 |                 funObj,varargin{:});
 67 |         end
 68 |         funEvals = funEvals + armijoFunEvals;
 69 |         return;
 70 |     end
 71 | 
 72 | 
 73 |     if f_new > f + c1*t*gtd || (LSiter > 1 && f_new >= f_prev)
 74 |         bracket = [t_prev t];
 75 |         bracketFval = [f_prev f_new];
 76 |         bracketGval = [g_prev g_new];
 77 |         break;
 78 |     elseif abs(gtd_new) <= -c2*gtd
 79 |         bracket = t;
 80 |         bracketFval = f_new;
 81 |         bracketGval = g_new;
 82 |         done = 1;
 83 |         break;
 84 |     elseif gtd_new >= 0
 85 |         bracket = [t_prev t];
 86 |         bracketFval = [f_prev f_new];
 87 |         bracketGval = [g_prev g_new];
 88 |         break;
 89 |     end
 90 |     temp = t_prev;
 91 |     t_prev = t;
 92 |     minStep = t + 0.01*(t-temp);
 93 |     maxStep = t*10;
 94 |     if LS_interp <= 1
 95 |         if debug
 96 |             fprintf('Extending Braket\n');
 97 |         end
 98 |         t = maxStep;
 99 |     elseif LS_interp == 2
100 |         if debug
101 |             fprintf('Cubic Extrapolation\n');
102 |         end
103 |         t = polyinterp([temp f_prev gtd_prev; t f_new gtd_new],doPlot,minStep,maxStep);
104 |     elseif LS_interp == 3
105 |         t = mixedExtrap(temp,f_prev,gtd_prev,t,f_new,gtd_new,minStep,maxStep,debug,doPlot);
106 |     end
107 |     
108 |     f_prev = f_new;
109 |     g_prev = g_new;
110 |     gtd_prev = gtd_new;
111 |     if ~saveHessianComp && nargout == 5
112 |         [f_new,g_new,H] = funObj(x + t*d,varargin{:});
113 |     else
114 |         [f_new,g_new] = funObj(x + t*d,varargin{:});
115 |     end
116 |     funEvals = funEvals + 1;
117 |     gtd_new = g_new'*d;
118 |     LSiter = LSiter+1;
119 | end
120 | 
121 | if LSiter == maxLS
122 |     bracket = [0 t];
123 |     bracketFval = [f f_new];
124 |     bracketGval = [g g_new];
125 | end
126 | 
127 | %% Zoom Phase
128 | 
129 | % We now either have a point satisfying the criteria, or a bracket
130 | % surrounding a point satisfying the criteria
131 | % Refine the bracket until we find a point satisfying the criteria
132 | insufProgress = 0;
133 | Tpos = 2;
134 | LOposRemoved = 0;
135 | while ~done && LSiter < maxLS
136 | 
137 |     % Find High and Low Points in bracket
138 |     [f_LO LOpos] = min(bracketFval);
139 |     HIpos = -LOpos + 3;
140 | 
141 |     % Compute new trial value
142 |     if LS_interp <= 1 || ~isLegal(bracketFval) || ~isLegal(bracketGval)
143 |         if debug
144 |             fprintf('Bisecting\n');
145 |         end
146 |         t = mean(bracket);
147 |     elseif LS_interp == 2
148 |         if debug
149 |             fprintf('Grad-Cubic Interpolation\n');
150 |         end
151 |         t = polyinterp([bracket(1) bracketFval(1) bracketGval(:,1)'*d
152 |             bracket(2) bracketFval(2) bracketGval(:,2)'*d],doPlot);
153 |     else
154 |         % Mixed Case %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
155 |         nonTpos = -Tpos+3;
156 |         if LOposRemoved == 0
157 |             oldLOval = bracket(nonTpos);
158 |             oldLOFval = bracketFval(nonTpos);
159 |             oldLOGval = bracketGval(:,nonTpos);
160 |         end
161 |         t = mixedInterp(bracket,bracketFval,bracketGval,d,Tpos,oldLOval,oldLOFval,oldLOGval,debug,doPlot);
162 |     end
163 | 
164 | 
165 |     % Test that we are making sufficient progress
166 |     if min(max(bracket)-t,t-min(bracket))/(max(bracket)-min(bracket)) < 0.1
167 |         if debug
168 |             fprintf('Interpolation close to boundary');
169 |         end
170 |         if insufProgress || t>=max(bracket) || t <= min(bracket)
171 |             if debug
172 |                 fprintf(', Evaluating at 0.1 away from boundary\n');
173 |             end
174 |             if abs(t-max(bracket)) < abs(t-min(bracket))
175 |                 t = max(bracket)-0.1*(max(bracket)-min(bracket));
176 |             else
177 |                 t = min(bracket)+0.1*(max(bracket)-min(bracket));
178 |             end
179 |             insufProgress = 0;
180 |         else
181 |             if debug
182 |                 fprintf('\n');
183 |             end
184 |             insufProgress = 1;
185 |         end
186 |     else
187 |         insufProgress = 0;
188 |     end
189 | 
190 |     % Evaluate new point
191 |     if ~saveHessianComp && nargout == 5
192 |         [f_new,g_new,H] = funObj(x + t*d,varargin{:});
193 |     else
194 |         [f_new,g_new] = funObj(x + t*d,varargin{:});
195 |     end
196 |     funEvals = funEvals + 1;
197 |     gtd_new = g_new'*d;
198 |     LSiter = LSiter+1;
199 | 
200 | 	armijo = f_new < f + c1*t*gtd;
201 |     if ~armijo || f_new >= f_LO
202 |         % Armijo condition not satisfied or not lower than lowest
203 |         % point
204 |         bracket(HIpos) = t;
205 |         bracketFval(HIpos) = f_new;
206 |         bracketGval(:,HIpos) = g_new;
207 |         Tpos = HIpos;
208 |     else
209 |         if abs(gtd_new) <= - c2*gtd
210 |             % Wolfe conditions satisfied
211 |             done = 1;
212 |         elseif gtd_new*(bracket(HIpos)-bracket(LOpos)) >= 0
213 |             % Old HI becomes new LO
214 |             bracket(HIpos) = bracket(LOpos);
215 |             bracketFval(HIpos) = bracketFval(LOpos);
216 |             bracketGval(:,HIpos) = bracketGval(:,LOpos);
217 |             if LS_interp == 3
218 |                 if debug
219 |                     fprintf('LO Pos is being removed!\n');
220 |                 end
221 |                 LOposRemoved = 1;
222 |                 oldLOval = bracket(LOpos);
223 |                 oldLOFval = bracketFval(LOpos);
224 |                 oldLOGval = bracketGval(:,LOpos);
225 |             end
226 |         end
227 |         % New point becomes new LO
228 |         bracket(LOpos) = t;
229 |         bracketFval(LOpos) = f_new;
230 |         bracketGval(:,LOpos) = g_new;
231 |         Tpos = LOpos;
232 | 	end
233 | 
234 |     if ~done && abs(bracket(1)-bracket(2))*nrmD < progTol
235 |         if debug
236 |             fprintf('Line-search bracket has been reduced below progTol\n');
237 |         end
238 |         break;
239 |     end
240 | 
241 | end
242 | 
243 | %%
244 | if LSiter == maxLS
245 |     if debug
246 |         fprintf('Line Search Exceeded Maximum Line Search Iterations\n');
247 |     end
248 | end
249 | 
250 | [f_LO LOpos] = min(bracketFval);
251 | t = bracket(LOpos);
252 | f_new = bracketFval(LOpos);
253 | g_new = bracketGval(:,LOpos);
254 | 
255 | 
256 | 
257 | % Evaluate Hessian at new point
258 | if nargout == 5 && funEvals > 1 && saveHessianComp
259 |     [f_new,g_new,H] = funObj(x + t*d,varargin{:});
260 |     funEvals = funEvals + 1;
261 | end
262 | 
263 | end
264 | 
265 | 
266 | %%
267 | function [t] = mixedExtrap(x0,f0,g0,x1,f1,g1,minStep,maxStep,debug,doPlot);
268 | alpha_c = polyinterp([x0 f0 g0; x1 f1 g1],doPlot,minStep,maxStep);
269 | alpha_s = polyinterp([x0 f0 g0; x1 sqrt(-1) g1],doPlot,minStep,maxStep);
270 | if alpha_c > minStep && abs(alpha_c - x1) < abs(alpha_s - x1)
271 |     if debug
272 |         fprintf('Cubic Extrapolation\n');
273 |     end
274 |     t = alpha_c;
275 | else
276 |     if debug
277 |         fprintf('Secant Extrapolation\n');
278 |     end
279 |     t = alpha_s;
280 | end
281 | end
282 | 
283 | %%
284 | function [t] = mixedInterp(bracket,bracketFval,bracketGval,d,Tpos,oldLOval,oldLOFval,oldLOGval,debug,doPlot);
285 | 
286 | % Mixed Case %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
287 | nonTpos = -Tpos+3;
288 | 
289 | gtdT = bracketGval(:,Tpos)'*d;
290 | gtdNonT = bracketGval(:,nonTpos)'*d;
291 | oldLOgtd = oldLOGval'*d;
292 | if bracketFval(Tpos) > oldLOFval
293 |     alpha_c = polyinterp([oldLOval oldLOFval oldLOgtd
294 |         bracket(Tpos) bracketFval(Tpos) gtdT],doPlot);
295 |     alpha_q = polyinterp([oldLOval oldLOFval oldLOgtd
296 |         bracket(Tpos) bracketFval(Tpos) sqrt(-1)],doPlot);
297 |     if abs(alpha_c - oldLOval) < abs(alpha_q - oldLOval)
298 |         if debug
299 |             fprintf('Cubic Interpolation\n');
300 |         end
301 |         t = alpha_c;
302 |     else
303 |         if debug
304 |             fprintf('Mixed Quad/Cubic Interpolation\n');
305 |         end
306 |         t = (alpha_q + alpha_c)/2;
307 |     end
308 | elseif gtdT'*oldLOgtd < 0
309 |     alpha_c = polyinterp([oldLOval oldLOFval oldLOgtd
310 |         bracket(Tpos) bracketFval(Tpos) gtdT],doPlot);
311 |     alpha_s = polyinterp([oldLOval oldLOFval oldLOgtd
312 |         bracket(Tpos) sqrt(-1) gtdT],doPlot);
313 |     if abs(alpha_c - bracket(Tpos)) >= abs(alpha_s - bracket(Tpos))
314 |         if debug
315 |             fprintf('Cubic Interpolation\n');
316 |         end
317 |         t = alpha_c;
318 |     else
319 |         if debug
320 |             fprintf('Quad Interpolation\n');
321 |         end
322 |         t = alpha_s;
323 |     end
324 | elseif abs(gtdT) <= abs(oldLOgtd)
325 |     alpha_c = polyinterp([oldLOval oldLOFval oldLOgtd
326 |         bracket(Tpos) bracketFval(Tpos) gtdT],...
327 |         doPlot,min(bracket),max(bracket));
328 |     alpha_s = polyinterp([oldLOval sqrt(-1) oldLOgtd
329 |         bracket(Tpos) bracketFval(Tpos) gtdT],...
330 |         doPlot,min(bracket),max(bracket));
331 |     if alpha_c > min(bracket) && alpha_c < max(bracket)
332 |         if abs(alpha_c - bracket(Tpos)) < abs(alpha_s - bracket(Tpos))
333 |             if debug
334 |                 fprintf('Bounded Cubic Extrapolation\n');
335 |             end
336 |             t = alpha_c;
337 |         else
338 |             if debug
339 |                 fprintf('Bounded Secant Extrapolation\n');
340 |             end
341 |             t = alpha_s;
342 |         end
343 |     else
344 |         if debug
345 |             fprintf('Bounded Secant Extrapolation\n');
346 |         end
347 |         t = alpha_s;
348 |     end
349 | 
350 |     if bracket(Tpos) > oldLOval
351 |         t = min(bracket(Tpos) + 0.66*(bracket(nonTpos) - bracket(Tpos)),t);
352 |     else
353 |         t = max(bracket(Tpos) + 0.66*(bracket(nonTpos) - bracket(Tpos)),t);
354 |     end
355 | else
356 |     t = polyinterp([bracket(nonTpos) bracketFval(nonTpos) gtdNonT
357 |         bracket(Tpos) bracketFval(Tpos) gtdT],doPlot);
358 | end
359 | end


--------------------------------------------------------------------------------
/rproptoolbox/Rprop/rprop.m:
--------------------------------------------------------------------------------
  1 | % RPROP Unconstrained local minimization using Rprop
  2 | %   [X,E,EXITFLAG,STATS] = RPROP(FUNCGRAD,X0,PARAMETERS,VARARGIN) Minimize a
  3 | %   function FUNCGRAD starting from the parameters X0. Optionally a
  4 | %   structure PARAMETERS can be used to override the default parameters.
  5 | %   Each additional parameter VARARGIN will be passed to function FUNCGRAD.
  6 | %   The function returns the optimized parameters X, the final objective
  7 | %   value E, a flag EXITFLAG that encodes the condition that triggered the
  8 | %   end of the optimization process and at last a structure STATS that
  9 | %   contain various information about the optimization process itself.
 10 | %
 11 | %   FUNCGRAD:       Function or handle to function. Must take the form
 12 | %                   [F,G] = FUNCGRAD(X) where
 13 | %                   X:      Parameters
 14 | %                   F:      [1 x 1] Objective value
 15 | %                   G:      [size(X)] Gradient
 16 | %
 17 | %   X0:             Can be either a matrix or a cell of matrices
 18 | %
 19 | %  	PARAMETERS:
 20 | %    	method:     Rprop method used, accepts {'Rprop+','Rprop-',
 21 | %                   'IRprop+','IRprop-'}.
 22 | %                   [Default = 'IRprop-']
 23 | %     	MaxIter:    Stop criterion 0: Maximum number of iterations, accepts
 24 | %                   numeric values.
 25 | %                   [Default = 100]
 26 | %     	d_Obj:      Stop criterion 1: Minimum Objective value, accepts
 27 | %                   numeric values.
 28 | %                   [Default = 10e-12]
 29 | %       d_time:     Stop criterion 2: Maximum time, accepts numeric values
 30 | %                   or {inf}.
 31 | %                   [Default = inf]
 32 | %     	Tolfun:     Stop criterion 3: Minimum Delta of value, accepts
 33 | %                   numeric values (p.Tolfun is valid only if the
 34 | %                   difference is negative, i.e. if the objective value get
 35 | %                   better really slowly, but not if it get worst).
 36 | %                   [Default = 0]
 37 | %     	TolX:       Stop criterion 3: Minimum Delta of value, accepts
 38 | %                   numeric values (p.Tolfun is valid only if the
 39 | %                   difference is negative, i.e. if the objective value get
 40 | %                   better really slowly, but not if it get worst).
 41 | %                   [Default = 0]
 42 | %      	mu_neg:     Rprop's decrease factor.
 43 | %                   [Default = 0.5]
 44 | %      	mu_pos:     Rprop's increase factor.
 45 | %                   [Default = 1.2]
 46 | %      	delta0:     Rprop's initial update-value.
 47 | %                   [Default = 0.0123]
 48 | %     	delta_min:  Rprop's lower bound for step size.
 49 | %                   [Default = 0]
 50 | %      	delta_max:  Rprop's upper bound for step size.
 51 | %                   [Default = 50]
 52 | %      	verbosity:  Determine the amount of information to print during the
 53 | %                   optimization process, accepts numeric values [0-3].
 54 | %                   [Default = 0]
 55 | %      	display:    Plot the Objective value during the optimization
 56 | %                   process. NOTE: SLOW DOWN THE OPTIMIZATION CONSIDERABLY!
 57 | %                   It's preferable to plot stats.error once optimized.
 58 | %                   [Default = false]
 59 | %      	indent:     Base indentation level for printing.
 60 | %                   [Default = 0]
 61 | %     	useGPU:     If true potentially enable GPU acceleration (it will be
 62 | %                   checked whenever MATLAB is GPU-ready). NOTE: FOR SMALL
 63 | %                   OPTIMIZATION PROBLEMS MIGHT SLOW DOWN THE COMPUTATION!
 64 | %                   [Default = false]
 65 | %      	funcgradgpu: If true the function FUNCGRAD will be fed with
 66 | %                   GPUArray data (Speed up the computation when using GPU
 67 | %                   acceleration, but require a compatible FUNCGRAD).
 68 | %                   [Default = false]
 69 | %      	outputgpu:  If true X will be returned as GPUArray (whenever it
 70 | %                   has been computed as GPUArray).
 71 | %                   [Default = false]
 72 | %      	full_hist:  If true STATS.full_hist will include all the parameters
 73 | %                   throughout the optimization process.
 74 | %                   NOTE: IT MIGHT GET REALLY BIG! ([p.MaxIter x size(X)])
 75 | %                   [Default = false]
 76 | %
 77 | %   VARARGIN:       Will be passed as argument to FUNCGRAD.
 78 | %
 79 | %  	EXITFLAG:
 80 | %    	0 = Maximum number of iterations PARAMETERS.MaxIter reached.
 81 | %       1 = Minimum variation of Obj value PARAMETERS.Tolfun reached.
 82 | %       2 = Minimum variation of the gradient of parameters PARAMETERS.TolX
 83 | %           reached.
 84 | %     	3 = Minimum Objective value PARAMETERS.d_Obj reached.
 85 | %       4 = Maximum computational time PARAMETERS.d_time reached.
 86 | %
 87 | %
 88 | %   STATS:
 89 | %       error:      [N_ITER x 1] Objectives value during the optimization
 90 | %                   process.
 91 | %       time:       [N_ITER x 1] Time spent since the beginning of the
 92 | %                   optimization process.
 93 | %       full_hist:  {N_ITER x 1} If PARAMETERS.full_hist is 'true', each
 94 | %                   cell contain X for that particular iteration.
 95 | %       FunEvals:   Number of function evaluations (always N_ITER+1).
 96 | %
 97 | %
 98 | %   References:
 99 | %       [1] C. Igel and M. Hüsken. Improving the Rprop Learning Algorithm.
100 | %           Neural Computation, pp. 115-121, 2000.
101 | %       [2] C. Igel and M. Hüsken. Empirical Evaluation of the Improved
102 | %           Rprop Learning Algorithm. Neurocomputing 50, pp. 105-123, 2003.
103 | %       [3] M. Riedmiller and H. Braun. A direct adaptive method for faster
104 | %           backpropagation learning: the RPROP algorithm. International
105 | %           Conference on Neural Networks, pp. 586-591, IEEE Press, 1993.
106 | %       [4] M. Riedmiller. Advanced supervised learning in multilayer
107 | %           perceptrons-from backpropagation to adaptive learning
108 | %           techniques. International Journal of Computer Standards and
109 | %           Interfaces 16(3), pp. 265-278, 1994.
110 | %
111 | %
112 | %   Toolbox website:
113 | %       http://www.ias.informatik.tu-darmstadt.de/Research/RpropToolbox
114 | %
115 | %
116 | %   If used for scientific publications please cite explicitly:
117 | %   -----------------------------------------------------------------------
118 | %   @MISC{rproptoolbox,
119 | %       author = {Calandra, Roberto},
120 | %       title = {Rprop Toolbox for {MATLAB}},
121 | %       year = {2011},
122 | %       howpublished = {\url{http://www.ias.informatik.tu-darmstadt.de/Research/RpropToolbox}}
123 | %   }
124 | %   -----------------------------------------------------------------------
125 | %
126 | 
127 | %   Copyright (c) 2011 Roberto Calandra
128 | %   $Revision: 0.96 $
129 | 
130 | %   TODO: what happen when x0 is a gpuArray
131 | %   TODO: change d_Obj to 0(-inf), defualt
132 | 
133 | 
134 | function [x,E,exitflag,stats] = rprop(funcgrad,x0,parameters,varargin)
135 | %% Input validation
136 | 
137 | assert(isa(funcgrad,'function_handle'),'Invalid format of FUNCGRAD')
138 | 
139 | if exist('parameters','var')
140 |     assert(isstruct(parameters),'PARAMETERS is not a structure')
141 | end
142 | 
143 | % Start the timer
144 | rpropclock = tic;
145 | 
146 | 
147 | %% Parameters
148 | 
149 | % Default Parameters
150 | p.method            = 'Rprop+';    % Rprop method used
151 | p.MaxIter           = 100;          % Stop 0: Maximum number of iterations
152 | p.Tolfun            = 10e-9;        % Stop 1: Minimum Delta of value
153 | p.TolX              = 10e-9;        % Stop 2: Minimum Delta of parameters
154 | p.d_Obj             = 10e-12;       % Stop 3: Minimum value
155 | p.d_time            = inf;          % Stop 4: Maximum time
156 | p.mu_neg            = 0.5;          % Decrease factor
157 | p.mu_pos            = 1.2;          % Increase factor
158 | p.delta0            = 0.0123;       % Initial update-value
159 | p.delta_min         = 0;            % Lower bound for step size
160 | p.delta_max         = 50;           % Upper bound for step size
161 | p.verbosity         = 0;            % [0-3] verbosity mode
162 | p.display           = false;        % Plot optimization process
163 | p.indent            = 0;            % Base for indentation
164 | p.useGPU            = false;        % Use GPU if possible
165 | p.funcgradgpu       = false;        % Enable if funcgrad accept gpuArray
166 | p.outputgpu         = false;        % Enable if you want x to be a gpuArray
167 | p.full_hist         = false;        % Return the full history of parameters
168 | 
169 | % Override default parameters with eventual passed ones
170 | if exist('parameters','var')
171 |     t_p = fieldnames(parameters);
172 |     for i = 1:size(t_p,1)
173 |         if isfield(p,t_p{i})
174 |             p.(t_p{i}) = parameters.(t_p{i});
175 |         else
176 |             fprintf(2,'%s: unknown parameter passed: %s\n',mfilename,t_p{i})
177 |         end
178 |     end
179 | end
180 | 
181 | % Validate Parameters
182 | p.MaxIter = round(p.MaxIter);
183 | assert(isfinite(p.MaxIter),'PARAMETERS.MaxIter must be positive')
184 | assert(p.MaxIter>0,'PARAMETERS.MaxIter must be positive')
185 | 
186 | 
187 | %% Initialization
188 | 
189 | x = x0;
190 | 
191 | % Are we using Rprop+ or IRprop+ ?
192 | plus = sum(strcmp(p.method,{'Rprop+','IRprop+'}));
193 | 
194 | % Shall we use GPU ?
195 | if p.useGPU
196 |     GPUenable = GPU.GPUsupport();
197 | else
198 |     GPUenable = false;
199 | end
200 | 
201 | % Shall we pass to funcgrad a gpuArray ?
202 | if GPUenable && ~p.funcgradgpu
203 |     GPUfuncnotGPU = true;
204 | else
205 |     GPUfuncnotGPU = false;
206 | end
207 | 
208 | % Do we need to convert x from gpuArray to double?
209 | if ~p.outputgpu && GPUenable
210 |     xGPU = true;
211 | else
212 |     xGPU = false;
213 | end
214 | 
215 | % Initialize some variables
216 | exitflag                        = 0; % Reached maximum amount of iterations
217 | stats.error                     = zeros([p.MaxIter,1]);
218 | stats.time                      = zeros([p.MaxIter,1]);
219 | if p.full_hist
220 |     stats.x = cell([p.MaxIter,1]);
221 | end
222 | 
223 | % Initialize more variables
224 | if iscell(x0)
225 |     % x0 is made out of cells
226 |     
227 |     ncell                       = numel(x0);
228 |     tb                          = size(x0);
229 |     
230 |     delta                       = cell(tb);
231 |     grad                        = cell(tb);
232 |     old_grad                    = cell(tb);
233 |     deltaW                      = cell(tb);
234 |     if plus
235 |         old_deltaW              = cell(tb);
236 |         if GPUenable
237 |             old_E               = parallel.gpu.GPUArray.inf;
238 |         else
239 |             old_E               = inf;
240 |         end
241 |     end
242 |     
243 |     for i = 1:ncell
244 |         t2 = size(x0{i});
245 |         
246 |         if GPUenable
247 |             delta{i}            = p.delta0.*parallel.gpu.GPUArray.ones(t2);
248 |             grad{i}             = parallel.gpu.GPUArray.zeros(t2);
249 |             old_grad{i}      	= parallel.gpu.GPUArray.zeros(t2);
250 |             deltaW{i}          	= parallel.gpu.GPUArray.zeros(t2);
251 |             if plus
252 |                 old_deltaW{i}  	= parallel.gpu.GPUArray.zeros(t2);
253 |             end
254 |         else
255 |             delta{i}          	= repmat(p.delta0,t2);
256 |             grad{i}             = zeros(t2);
257 |             old_grad{i}      	= zeros(t2);
258 |             deltaW{i}         	= zeros(t2);
259 |             if plus
260 |                 old_deltaW{i}  	= zeros(t2);
261 |             end
262 |         end
263 |     end
264 |     
265 | else
266 |     % x0 is not a cell
267 |     
268 |     ncell                       = 1;
269 |     tb                          = size(x0);
270 |     
271 |     if GPUenable
272 |         x                       = gpuArray(x);
273 |         delta{1}              	= p.delta0.*parallel.gpu.GPUArray.ones(tb);
274 |         grad{1}               	= parallel.gpu.GPUArray.zeros(tb);
275 |         old_grad{1}          	= parallel.gpu.GPUArray.zeros(tb);
276 |         deltaW{1}           	= parallel.gpu.GPUArray.zeros(tb);
277 |         if plus
278 |             old_deltaW{1}       = parallel.gpu.GPUArray.zeros(tb);
279 |             old_E               = parallel.gpu.GPUArray.inf;
280 |         end
281 |     else
282 |         delta{1}                = repmat(p.delta0,tb);
283 |         grad{1}             	= zeros(tb);
284 |         old_grad{1}           	= zeros(tb);
285 |         deltaW{1}            	= zeros(tb);
286 |         if plus
287 |             old_deltaW{1}     	= zeros(tb);
288 |             old_E               = inf;
289 |         end
290 |     end
291 | end
292 | 
293 | 
294 | %% Optimization
295 | 
296 | % Print method used for optimization
297 | if p.verbosity>0
298 |     Utils.indent(p.indent+0)
299 |     fprintf('Optimizing using %s\n',p.method);
300 | end
301 | 
302 | if p.verbosity>1
303 |     if GPUenable
304 |         Utils.indent(p.indent+0)
305 |         fprintf('GPU acceleration enabled\n')
306 |     end
307 | end
308 | 
309 | % Compute initial value function and gradient
310 | if GPUfuncnotGPU
311 |     [E grad_t] = funcgrad(gather(x),varargin{:});
312 | else
313 |     [E grad_t] = funcgrad(x,varargin{:});
314 | end
315 | 
316 | % Print initial value
317 | if p.verbosity>2
318 |     Utils.indent(p.indent+1)
319 |     fprintf('Initial Value: %e\r',E);
320 | end
321 | 
322 | % Check stop criterions
323 | 
324 | % Stop criterion: TolX
325 | if ncell==1
326 |     TolX = max(abs(grad_t));
327 | else
328 |     TolX = max(abs(grad_t{1}));
329 |     for i=2:ncell
330 |         TolX = max(max(abs(grad_t{i})),TolX);
331 |     end
332 | end
333 | if TolX < p.TolX
334 |     if p.verbosity>1
335 |         Utils.indent(p.indent+1)
336 |         fprintf(2,'Stopping criterion reached (TolX < desired TolX)\n')
337 |     end
338 |     exitflag = 2;
339 |     return
340 | end
341 | 
342 | % Stop criterion: Error
343 | if E < p.d_Obj
344 |     if p.verbosity>1
345 |         Utils.indent(p.indent+1)
346 |         fprintf(2,'Stopping criterion reached (Error < desired Error)\n')
347 |     end
348 |     exitflag = 3;
349 |     return
350 | end
351 | 
352 | % Stop criterion: Time
353 | t1 = toc(rpropclock);
354 | if t1 > p.d_time
355 |     if p.verbosity>1
356 |         Utils.indent(p.indent+1)
357 |         fprintf(2,'Stopping criterion reached (Time > desired Time)\n')
358 |     end
359 |     exitflag = 4;
360 |     return
361 | end
362 | clear t1
363 | 
364 | % Init figure
365 | if p.display>0
366 |     stats.fig_h = figure();
367 | end
368 | 
369 | % Begin the optimization
370 | for Iter = 1:p.MaxIter
371 |     
372 |     % Validate input
373 |     %assert(isequal(size(grad_t),size(x)),...
374 |     %    'The dimension of the gradient do not match the parameters')
375 |     %assert(Utils.msum(isfinite(grad))==numel(x))
376 |     %assert(isfinite(E))
377 |     
378 |     if ncell==1
379 |         grad{1} = grad_t;
380 |     else
381 |         grad = grad_t;
382 |     end
383 |     
384 |     % Optimization !
385 |     for i = 1:ncell
386 |         
387 |         gg          = grad{i}.*old_grad{i};
388 |         delta{i}    = min(delta{i}*p.mu_pos,p.delta_max).*(gg>0) +...
389 |             max(delta{i}*p.mu_neg,p.delta_min).*(gg<0) + delta{i}.*(gg==0);
390 |         
391 |         switch p.method
392 |             case 'Rprop-'
393 |                 deltaW{i}           = -sign(grad{i}).*delta{i};
394 |                 
395 |             case 'Rprop+'
396 |                 deltaW{i}           = -sign(grad{i}).*delta{i}.*(gg>=0) -...
397 |                     old_deltaW{i}.*(gg<0);
398 |                 grad{i}             = grad{i}.*(gg>=0);
399 |                 old_deltaW{i}       = deltaW{i};
400 |                 
401 |             case 'IRprop-'
402 |                 grad{i}             = grad{i}.*(gg>=0);
403 |                 deltaW{i}           = -sign(grad{i}).*delta{i};
404 |                 
405 |             case 'IRprop+'
406 |                 deltaW{i}           = -sign(grad{i}).*delta{i}.*(gg>=0) -...
407 |                     old_deltaW{i}.*(gg<0)*(E>old_E);
408 |                 grad{i}             = grad{i}.*(gg>=0);
409 |                 old_deltaW{i}       = deltaW{i};
410 |                 old_E               = E;
411 |                 
412 |             otherwise
413 |                 error('Unknown method')
414 |                 
415 |         end
416 |         
417 |         old_grad{i} 	= grad{i};
418 |         
419 |         % Update parameters
420 |         if ncell==1
421 |             x           = x + deltaW{i};
422 |         else
423 |             x{i}        = x{i} + deltaW{i};
424 |         end
425 |         
426 |     end
427 |     
428 |     
429 |     % Compute value function and gradient
430 |     if GPUfuncnotGPU
431 |         if ncell==1
432 |             [E grad_t] = funcgrad(gather(x),varargin{:});
433 |         else
434 |             x_t = cell(size(x));
435 |             for i=1:numel(x_t)
436 |                 x_t{i} = gather(x{i});
437 |             end
438 |             [E grad_t] = funcgrad(x_t,varargin{:});
439 |         end
440 |     else
441 |         [E grad_t] = funcgrad(x,varargin{:});
442 |     end
443 |     
444 |     % Print info about this iteration
445 |     if mod(Iter,10)==0
446 |     if p.verbosity>1
447 |         Utils.indent(p.indent+1)
448 |         fprintf('Iter %d (of %d)',Iter,p.MaxIter);
449 |         if p.verbosity>2
450 |             fprintf(' - value: %e\r',E);
451 |         else
452 |             fprintf('\r');
453 |         end
454 |     end
455 |     end
456 |     
457 |     % Collect statistics
458 |     if p.full_hist
459 |         stats.x{Iter} = x;
460 |     end
461 |     stats.time(Iter) = toc(rpropclock);
462 |     if p.funcgradgpu
463 |         stats.error(Iter) = gather(E);
464 |     else
465 |         stats.error(Iter) = E;
466 |     end
467 |     
468 |     % Plot optimization process
469 |     if p.display>0
470 |         set(0,'CurrentFigure',stats.fig_h);
471 |         plot(stats.error(1:Iter));
472 |         title('Objective value during optimization')
473 |         ylabel('Objective value')
474 |         xlabel('Number of Iterations')
475 |         drawnow
476 |     end
477 |     
478 |     % Check other stop criterions
479 |     
480 |     % Stop criterion: TolFun
481 |     if Iter>1
482 |         if isfinite(p.Tolfun)
483 |             deltaobj = stats.error(Iter-1) - stats.error(Iter);
484 |             if (deltaobj < p.Tolfun) && (deltaobj > 0)
485 |                 if p.verbosity>1
486 |                     Utils.indent(p.indent+1)
487 |                     fprintf(2,'Stopping criterion reached (Delta < desired Delta)\n')
488 |                 end
489 |                 exitflag = 1;
490 |                 break
491 |             end
492 |         end
493 |     end
494 |     
495 |     % Stop criterion: TolX
496 |     if ncell==1
497 |         TolX = max(abs(grad_t));
498 |     else
499 |         TolX = max(abs(grad_t{1}));
500 |         for i=2:ncell
501 |             TolX = max(max(abs(grad_t{i})),TolX);
502 |         end
503 |     end
504 |     if TolX < p.TolX
505 |         if p.verbosity>1
506 |             Utils.indent(p.indent+1)
507 |             fprintf(2,'Stopping criterion reached (TolX < desired TolX)\n')
508 |         end
509 |         exitflag = 2;
510 |         break
511 |     end
512 |     
513 |     % Stop criterion: Error
514 |     if E < p.d_Obj
515 |         if p.verbosity>1
516 |             Utils.indent(p.indent+1)
517 |             fprintf(2,'Stopping criterion reached (Error < desired Error)\n')
518 |         end
519 |         exitflag = 3;
520 |         break
521 |     end
522 |     
523 |     % Stop criterion: Time
524 |     if stats.time(Iter) > p.d_time
525 |         if p.verbosity>1
526 |             Utils.indent(p.indent+1)
527 |             fprintf(2,'Stopping criterion reached (Time > desired Time)\n')
528 |         end
529 |         exitflag = 4;
530 |         break
531 |     end
532 |     
533 | end
534 | 
535 | 
536 | %% Output Validation
537 | 
538 | % Cut outputs in case of early-stop
539 | stats.error             = stats.error(1:Iter);
540 | stats.time              = stats.time(1:Iter);
541 | stats.GPUenabled        = GPUenable;
542 | stats.FunEvals          = Iter+1;
543 | if p.full_hist
544 |     stats.x             = stats.x(1:Iter);
545 | end
546 | 
547 | % In case the GPU has been used collect the parameters
548 | if xGPU
549 |     if ncell==1
550 |         x = gather(x);
551 |     else
552 |         x_t = cell(size(x));
553 |         for i=1:numel(x)
554 |             x_t{i} = gather(x{i});
555 |         end
556 |         x = x_t;
557 |     end
558 | end
559 | 
560 | 
561 | end
562 | 
563 | 


--------------------------------------------------------------------------------
/rproptoolbox/minFunc_2012/minFunc/minFunc.m:
--------------------------------------------------------------------------------
   1 | function [x,f,exitflag,output] = minFunc(funObj,x0,options,varargin)
   2 | % [x,f,exitflag,output] = minFunc(funObj,x0,options,varargin)
   3 | %
   4 | % Unconstrained optimizer using a line search strategy
   5 | %
   6 | % Uses an interface very similar to fminunc
   7 | %   (it doesn't support all of the optimization toolbox options,
   8 | %       but supports many other options).
   9 | %
  10 | % It computes descent directions using one of ('Method'):
  11 | %   - 'sd': Steepest Descent
  12 | %       (no previous information used, not recommended)
  13 | %   - 'csd': Cyclic Steepest Descent
  14 | %       (uses previous step length for a fixed length cycle)
  15 | %   - 'bb': Barzilai and Borwein Gradient
  16 | %       (uses only previous step)
  17 | %   - 'cg': Non-Linear Conjugate Gradient
  18 | %       (uses only previous step and a vector beta)
  19 | %   - 'scg': Scaled Non-Linear Conjugate Gradient
  20 | %       (uses previous step and a vector beta, 
  21 | %           and Hessian-vector products to initialize line search)
  22 | %   - 'pcg': Preconditionined Non-Linear Conjugate Gradient
  23 | %       (uses only previous step and a vector beta, preconditioned version)
  24 | %   - 'lbfgs': Quasi-Newton with Limited-Memory BFGS Updating
  25 | %       (default: uses a predetermined nunber of previous steps to form a 
  26 | %           low-rank Hessian approximation)
  27 | %   - 'newton0': Hessian-Free Newton
  28 | %       (numerically computes Hessian-Vector products)
  29 | %   - 'pnewton0': Preconditioned Hessian-Free Newton 
  30 | %       (numerically computes Hessian-Vector products, preconditioned
  31 | %       version)
  32 | %   - 'qnewton': Quasi-Newton Hessian approximation
  33 | %       (uses dense Hessian approximation)
  34 | %   - 'mnewton': Newton's method with Hessian calculation after every
  35 | %   user-specified number of iterations
  36 | %       (needs user-supplied Hessian matrix)
  37 | %   - 'newton': Newton's method with Hessian calculation every iteration
  38 | %       (needs user-supplied Hessian matrix)
  39 | %   - 'tensor': Tensor
  40 | %       (needs user-supplied Hessian matrix and Tensor of 3rd partial derivatives)
  41 | %
  42 | % Several line search strategies are available for finding a step length satisfying
  43 | %   the termination criteria ('LS_type')
  44 | %   - 0 : A backtracking line-search based on the Armijo condition (default for 'bb')
  45 | %   - 1 : A bracekting line-search based on the strong Wolfe conditions (default for all other methods)
  46 | %   - 2 : The line-search from the Matlab Optimization Toolbox (requires Matlab's linesearch.m to be added to the path)
  47 | %
  48 | % For the Armijo line-search, several interpolation strategies are available ('LS_interp'):
  49 | %   - 0 : Step size halving
  50 | %   - 1 : Polynomial interpolation using new function values
  51 | %   - 2 : Polynomial interpolation using new function and gradient values (default)
  52 | %
  53 | % When (LS_interp = 1), the default setting of (LS_multi = 0) uses quadratic interpolation,
  54 | % while if (LS_multi = 1) it uses cubic interpolation if more than one point are available.
  55 | %
  56 | % When (LS_interp = 2), the default setting of (LS_multi = 0) uses cubic interpolation,
  57 | % while if (LS_multi = 1) it uses quartic or quintic interpolation if more than one point are available
  58 | %
  59 | % To use the non-monotonic Armijo condition, set the 'Fref' value to the number of previous function values to store
  60 | %
  61 | % For the Wolfe line-search, these interpolation strategies are available ('LS_interp'):
  62 | %   - 0 : Step Size Doubling and Bisection
  63 | %   - 1 : Cubic interpolation/extrapolation using new function and gradient values (default)
  64 | %   - 2 : Mixed quadratic/cubic interpolation/extrapolation
  65 | %
  66 | % Several strategies for choosing the initial step size are avaiable ('LS_init'):
  67 | %   - 0: Always try an initial step length of 1 (default for all except 'sd' and 'cg')
  68 | %       (t = 1)
  69 | %   - 1: Use a step similar to the previous step
  70 | %       (t = t_old*min(2,g'd/g_old'd_old))
  71 | %   - 2: Quadratic Initialization using previous function value and new
  72 | %   function value/gradient (use this if steps tend to be very long, default for 'sd' and 'cg')
  73 | %       (t = min(1,2*(f-f_old)/g))
  74 | %   - 3: The minimum between 1 and twice the previous step length
  75 | %       (t = min(1,2*t)
  76 | %   - 4: The scaled conjugate gradient step length (may accelerate
  77 | %   conjugate gradient methods, but requires a Hessian-vector product, default for 'scg')
  78 | %       (t = g'd/d'Hd)
  79 | %
  80 | % Inputs:
  81 | %   funObj - is a function handle
  82 | %   x0 - is a starting vector;
  83 | %   options - is a struct containing parameters (defaults are used for non-existent or blank fields)
  84 | %   varargin{:} - all other arguments are passed as additional arguments to funObj
  85 | %
  86 | % Outputs:
  87 | %   x is the minimum value found
  88 | %   f is the function value at the minimum found
  89 | %   exitflag returns an exit condition
  90 | %   output returns a structure with other information
  91 | %
  92 | % Supported Input Options
  93 | %   Display - Level of display [ off | final | (iter) | full | excessive ]
  94 | %   MaxFunEvals - Maximum number of function evaluations allowed (1000)
  95 | %   MaxIter - Maximum number of iterations allowed (500)
  96 | %   optTol - Termination tolerance on the first-order optimality (1e-5)
  97 | %   progTol - Termination tolerance on progress in terms of function/parameter changes (1e-9)
  98 | %   Method - [ sd | csd | bb | cg | scg | pcg | {lbfgs} | newton0 | pnewton0 |
  99 | %       qnewton | mnewton | newton | tensor ]
 100 | %   c1 - Sufficient Decrease for Armijo condition (1e-4)
 101 | %   c2 - Curvature Decrease for Wolfe conditions (.2 for cg methods, .9 otherwise)
 102 | %   LS_init - Line Search Initialization - see above (2 for cg/sd, 4 for scg, 0 otherwise)
 103 | %   LS - Line Search type - see above (2 for bb, 4 otherwise)
 104 | %   Fref - Setting this to a positive integer greater than 1
 105 | %       will use non-monotone Armijo objective in the line search.
 106 | %       (20 for bb, 10 for csd, 1 for all others)
 107 | %   numDiff - [ 0 | 1 | 2] compute derivatives using user-supplied function (0),
 108 | %       numerically user forward-differencing (1), or numerically using central-differencing (2)
 109 | %       (default: 0) 
 110 | %       (this option has a different effect for 'newton', see below)
 111 | %   useComplex - if 1, use complex differentials if computing numerical derivatives
 112 | %       to get very accurate values (default: 0)
 113 | %   DerivativeCheck - if 'on', computes derivatives numerically at initial
 114 | %       point and compares to user-supplied derivative (default: 'off')
 115 | %   outputFcn - function to run after each iteration (default: []).  It
 116 | %       should have the following interface:
 117 | %       outputFcn(x,iterationType,i,funEvals,f,t,gtd,g,d,optCond,varargin{:});
 118 | %   useMex - where applicable, use mex files to speed things up (default: 1)
 119 | %
 120 | % Method-specific input options:
 121 | %   newton:
 122 | %       HessianModify - type of Hessian modification for direct solvers to
 123 | %       use if the Hessian is not positive definite (default: 0)
 124 | %           0: Minimum Euclidean norm s.t. eigenvalues sufficiently large
 125 | %           (requires eigenvalues on iterations where matrix is not pd)
 126 | %           1: Start with (1/2)*||A||_F and increment until Cholesky succeeds
 127 | %           (an approximation to method 0, does not require eigenvalues)
 128 | %           2: Modified LDL factorization
 129 | %           (only 1 generalized Cholesky factorization done and no eigenvalues required)
 130 | %           3: Modified Spectral Decomposition
 131 | %           (requires eigenvalues)
 132 | %           4: Modified Symmetric Indefinite Factorization
 133 | %           5: Uses the eigenvector of the smallest eigenvalue as negative
 134 | %           curvature direction
 135 | %       cgSolve - use conjugate gradient instead of direct solver (default: 0)
 136 | %           0: Direct Solver
 137 | %           1: Conjugate Gradient
 138 | %           2: Conjugate Gradient with Diagonal Preconditioner
 139 | %           3: Conjugate Gradient with LBFGS Preconditioner
 140 | %           x: Conjugate Graident with Symmetric Successive Over Relaxation
 141 | %           Preconditioner with parameter x
 142 | %               (where x is a real number in the range [0,2])
 143 | %           x: Conjugate Gradient with Incomplete Cholesky Preconditioner
 144 | %           with drop tolerance -x
 145 | %               (where x is a real negative number)
 146 | %       numDiff - compute Hessian numerically
 147 | %                 (default: 0, done with complex differentials if useComplex = 1)
 148 | %       LS_saveHessiancomp - when on, only computes the Hessian at the
 149 | %       first and last iteration of the line search (default: 1)
 150 | %   mnewton:
 151 | %       HessianIter - number of iterations to use same Hessian (default: 5)
 152 | %   qnewton:
 153 | %       initialHessType - scale initial Hessian approximation (default: 1)
 154 | %       qnUpdate - type of quasi-Newton update (default: 3):
 155 | %           0: BFGS
 156 | %           1: SR1 (when it is positive-definite, otherwise BFGS)
 157 | %           2: Hoshino
 158 | %           3: Self-Scaling BFGS
 159 | %           4: Oren's Self-Scaling Variable Metric method 
 160 | %           5: McCormick-Huang asymmetric update
 161 | %       Damped - use damped BFGS update (default: 1)
 162 | %   newton0/pnewton0:
 163 | %       HvFunc - user-supplied function that returns Hessian-vector products
 164 | %           (by default, these are computed numerically using autoHv)
 165 | %           HvFunc should have the following interface: HvFunc(v,x,varargin{:})
 166 | %       useComplex - use a complex perturbation to get high accuracy
 167 | %           Hessian-vector products (default: 0)
 168 | %           (the increased accuracy can make the method much more efficient,
 169 | %               but gradient code must properly support complex inputs)
 170 | %       useNegCurv - a negative curvature direction is used as the descent
 171 | %           direction if one is encountered during the cg iterations
 172 | %           (default: 1)
 173 | %       precFunc (for pnewton0 only) - user-supplied preconditioner
 174 | %           (by default, an L-BFGS preconditioner is used)
 175 | %           precFunc should have the following interfact:
 176 | %           precFunc(v,x,varargin{:})
 177 | %   lbfgs:
 178 | %       Corr - number of corrections to store in memory (default: 100)
 179 | %           (higher numbers converge faster but use more memory)
 180 | %       Damped - use damped update (default: 0)
 181 | %   cg/scg/pcg:
 182 | %       cgUpdate - type of update (default for cg/scg: 2, default for pcg: 1)
 183 | %           0: Fletcher Reeves
 184 | %           1: Polak-Ribiere
 185 | %           2: Hestenes-Stiefel (not supported for pcg)
 186 | %           3: Gilbert-Nocedal
 187 | %       HvFunc (for scg only)- user-supplied function that returns Hessian-vector 
 188 | %           products
 189 | %           (by default, these are computed numerically using autoHv)
 190 | %           HvFunc should have the following interface:
 191 | %           HvFunc(v,x,varargin{:})
 192 | %       precFunc (for pcg only) - user-supplied preconditioner
 193 | %           (by default, an L-BFGS preconditioner is used)
 194 | %           precFunc should have the following interface:
 195 | %           precFunc(v,x,varargin{:})
 196 | %   bb:
 197 | %       bbType - type of bb step (default: 0)
 198 | %           0: min_alpha ||delta_x - alpha delta_g||_2
 199 | %           1: min_alpha ||alpha delta_x - delta_g||_2
 200 | %           2: Conic BB
 201 | %           3: Gradient method with retards
 202 | %   csd:
 203 | %       cycle - length of cycle (default: 3)
 204 | %
 205 | % Supported Output Options
 206 | %   iterations - number of iterations taken
 207 | %   funcCount - number of function evaluations
 208 | %   algorithm - algorithm used
 209 | %   firstorderopt - first-order optimality
 210 | %   message - exit message
 211 | %   trace.funccount - function evaluations after each iteration
 212 | %   trace.fval - function value after each iteration
 213 | %
 214 | % Author: Mark Schmidt (2005)
 215 | % Web: http://www.di.ens.fr/~mschmidt/Software/minFunc.html
 216 | %
 217 | % Sources (in order of how much the source material contributes):
 218 | %   J. Nocedal and S.J. Wright.  1999.  "Numerical Optimization".  Springer Verlag.
 219 | %   R. Fletcher.  1987.  "Practical Methods of Optimization".  Wiley.
 220 | %   J. Demmel.  1997.  "Applied Linear Algebra.  SIAM.
 221 | %   R. Barret, M. Berry, T. Chan, J. Demmel, J. Dongarra, V. Eijkhout, R.
 222 | %   Pozo, C. Romine, and H. Van der Vost.  1994.  "Templates for the Solution of
 223 | %   Linear Systems: Building Blocks for Iterative Methods".  SIAM.
 224 | %   J. More and D. Thuente.  "Line search algorithms with guaranteed
 225 | %   sufficient decrease".  ACM Trans. Math. Softw. vol 20, 286-307, 1994.
 226 | %   M. Raydan.  "The Barzilai and Borwein gradient method for the large
 227 | %   scale unconstrained minimization problem".  SIAM J. Optim., 7, 26-33,
 228 | %   (1997).
 229 | %   "Mathematical Optimization".  The Computational Science Education
 230 | %   Project.  1995.
 231 | %   C. Kelley.  1999.  "Iterative Methods for Optimization".  Frontiers in
 232 | %   Applied Mathematics.  SIAM.
 233 | 
 234 | if nargin < 3
 235 |     options = [];
 236 | end
 237 | 
 238 | tic
 239 | %time=[];
 240 | 
 241 | % Get Parameters
 242 | [verbose,verboseI,debug,doPlot,maxFunEvals,maxIter,optTol,progTol,method,...
 243 |     corrections,c1,c2,LS_init,cgSolve,qnUpdate,cgUpdate,initialHessType,...
 244 |     HessianModify,Fref,useComplex,numDiff,LS_saveHessianComp,...
 245 |     Damped,HvFunc,bbType,cycle,...
 246 |     HessianIter,outputFcn,useMex,useNegCurv,precFunc,...
 247 |     LS_type,LS_interp,LS_multi,checkGrad] = ...
 248 |     minFunc_processInputOptions(options);
 249 | 
 250 | % Constants
 251 | SD = 0;
 252 | CSD = 1;
 253 | BB = 2;
 254 | CG = 3;
 255 | PCG = 4;
 256 | LBFGS = 5;
 257 | QNEWTON = 6;
 258 | NEWTON0 = 7;
 259 | NEWTON = 8;
 260 | TENSOR = 9;
 261 | 
 262 | % Initialize
 263 | p = length(x0);
 264 | d = zeros(p,1);
 265 | x = x0;
 266 | t = 1;
 267 | 
 268 | % If necessary, form numerical differentiation functions
 269 | funEvalMultiplier = 1;
 270 | if useComplex
 271 | 	numDiffType = 3;
 272 | else
 273 | 	numDiffType = numDiff;
 274 | end
 275 | if numDiff && method ~= TENSOR
 276 |     varargin(3:end+2) = varargin(1:end);
 277 | 	varargin{1} = numDiffType;
 278 | 	varargin{2} = funObj;
 279 |     if method ~= NEWTON
 280 |         if debug
 281 |             if useComplex
 282 |                 fprintf('Using complex differentials for gradient computation\n');
 283 | 			else
 284 |                 fprintf('Using finite differences for gradient computation\n');
 285 |             end
 286 |         end
 287 |         funObj = @autoGrad;
 288 |     else
 289 |         if debug
 290 |             if useComplex
 291 |                 fprintf('Using complex differentials for Hessian computation\n');
 292 |             else
 293 |                 fprintf('Using finite differences for Hessian computation\n');
 294 |             end
 295 |         end
 296 |         funObj = @autoHess;
 297 |     end
 298 | 
 299 |     if method == NEWTON0 && useComplex == 1
 300 |         if debug
 301 |             fprintf('Turning off the use of complex differentials for Hessian-vector products\n');
 302 |         end
 303 |         useComplex = 0;
 304 |     end
 305 | 
 306 |     if useComplex
 307 |         funEvalMultiplier = p;
 308 | 	elseif numDiff == 2
 309 | 		funEvalMultiplier = 2*p;
 310 | 	else
 311 |         funEvalMultiplier = p+1;
 312 |     end
 313 | end
 314 | 
 315 | % Evaluate Initial Point
 316 | if method < NEWTON
 317 |     [f,g] = funObj(x,varargin{:});
 318 |     computeHessian = 0;
 319 | else
 320 |     [f,g,H] = funObj(x,varargin{:});
 321 |     computeHessian = 1;
 322 | end
 323 | funEvals = 1;
 324 | 
 325 | % Derivative Check
 326 | if checkGrad
 327 | 	if numDiff
 328 | 		fprintf('Can not do derivative checking when numDiff is 1\n');
 329 | 		pause
 330 | 	end
 331 | 	derivativeCheck(funObj,x,1,numDiffType,varargin{:}); % Checks gradient
 332 | 	if computeHessian
 333 | 		derivativeCheck(funObj,x,2,numDiffType,varargin{:});
 334 | 	end
 335 | end
 336 | 
 337 | % Output Log
 338 | 
 339 | 
 340 | if verboseI
 341 |     fprintf('%10s %10s %15s %15s %15s\n','Iteration','FunEvals','Step Length','Function Val','Opt Cond');
 342 | end
 343 | 
 344 | 
 345 | % Compute optimality of initial point
 346 | optCond = max(abs(g));
 347 | 
 348 | if nargout > 3
 349 | 	% Initialize Trace
 350 |     trace.time = toc;
 351 | 	trace.fval = f;
 352 | 	trace.funcCount = funEvals;
 353 | 	trace.optCond = optCond;
 354 | end
 355 | 
 356 | % Exit if initial point is optimal
 357 | if optCond <= optTol
 358 |     exitflag=1;
 359 |     msg = 'Optimality Condition below optTol';
 360 |     if verbose
 361 |         fprintf('%s\n',msg);
 362 |     end
 363 |     if nargout > 3
 364 |         output = struct('iterations',0,'funcCount',1,...
 365 |             'algorithm',method,'firstorderopt',max(abs(g)),'message',msg,'trace',trace);
 366 |     end
 367 |     return;
 368 | end
 369 | 
 370 | % Output Function
 371 | if ~isempty(outputFcn)
 372 |     stop = outputFcn(x,'init',0,funEvals,f,[],[],g,[],max(abs(g)),varargin{:});
 373 | 	if stop
 374 | 		exitflag=-1;
 375 | 		msg = 'Stopped by output function';
 376 | 		if verbose
 377 | 			fprintf('%s\n',msg);
 378 | 		end
 379 | 		if nargout > 3
 380 | 			output = struct('iterations',0,'funcCount',1,...
 381 | 				'algorithm',method,'firstorderopt',max(abs(g)),'message',msg,'trace',trace);
 382 | 		end
 383 | 		return;
 384 | 	end
 385 | end
 386 | 
 387 | % Perform up to a maximum of 'maxIter' descent steps:
 388 | 
 389 | for i = 1:maxIter
 390 | 
 391 |     % ****************** COMPUTE DESCENT DIRECTION *****************
 392 | 
 393 |     switch method
 394 |         case SD % Steepest Descent
 395 |             d = -g;
 396 | 
 397 |         case CSD % Cyclic Steepest Descent
 398 | 
 399 |             if mod(i,cycle) == 1 % Use Steepest Descent
 400 |                 alpha = 1;
 401 |                 LS_init = 2;
 402 |                 LS_type = 1; % Wolfe line search
 403 |             elseif mod(i,cycle) == mod(1+1,cycle) % Use Previous Step
 404 |                 alpha = t;
 405 |                 LS_init = 0;
 406 |                 LS_type = 0; % Armijo line search
 407 |             end
 408 |             d = -alpha*g;
 409 | 
 410 |         case BB % Steepest Descent with Barzilai and Borwein Step Length
 411 | 
 412 |             if i == 1
 413 |                 d = -g;
 414 |             else
 415 |                 y = g-g_old;
 416 |                 s = t*d;
 417 |                 if bbType == 0
 418 |                     yy = y'*y;
 419 |                     alpha = (s'*y)/(yy);
 420 |                     if alpha <= 1e-10 || alpha > 1e10
 421 |                         alpha = 1;
 422 |                     end
 423 |                 elseif bbType == 1
 424 |                     sy = s'*y;
 425 |                     alpha = (s'*s)/sy;
 426 |                     if alpha <= 1e-10 || alpha > 1e10
 427 |                         alpha = 1;
 428 |                     end
 429 |                 elseif bbType == 2 % Conic Interpolation ('Modified BB')
 430 |                     sy = s'*y;
 431 |                     ss = s'*s;
 432 |                     alpha = ss/sy;
 433 |                     if alpha <= 1e-10 || alpha > 1e10
 434 |                         alpha = 1;
 435 |                     end
 436 |                     alphaConic = ss/(6*(myF_old - f) + 4*g'*s + 2*g_old'*s);
 437 |                     if alphaConic > .001*alpha && alphaConic < 1000*alpha
 438 |                         alpha = alphaConic;
 439 |                     end
 440 |                 elseif bbType == 3 % Gradient Method with retards (bb type 1, random selection of previous step)
 441 |                     sy = s'*y;
 442 |                     alpha = (s'*s)/sy;
 443 |                     if alpha <= 1e-10 || alpha > 1e10
 444 |                         alpha = 1;
 445 |                     end
 446 |                     v(1+mod(i-2,5)) = alpha;
 447 |                     alpha = v(ceil(rand*length(v)));
 448 |                 end
 449 |                 d = -alpha*g;
 450 |             end
 451 |             g_old = g;
 452 |             myF_old = f;
 453 | 
 454 | 
 455 |         case CG % Non-Linear Conjugate Gradient
 456 | 
 457 |             if i == 1
 458 |                 d = -g; % Initially use steepest descent direction
 459 |             else
 460 |                 gotgo = g_old'*g_old;
 461 | 
 462 |                 if cgUpdate == 0
 463 |                     % Fletcher-Reeves
 464 |                     beta = (g'*g)/(gotgo);
 465 |                 elseif cgUpdate == 1
 466 |                     % Polak-Ribiere
 467 |                     beta = (g'*(g-g_old)) /(gotgo);
 468 |                 elseif cgUpdate == 2
 469 |                     % Hestenes-Stiefel
 470 |                     beta = (g'*(g-g_old))/((g-g_old)'*d);
 471 |                 else
 472 |                     % Gilbert-Nocedal
 473 |                     beta_FR = (g'*(g-g_old)) /(gotgo);
 474 |                     beta_PR = (g'*g-g'*g_old)/(gotgo);
 475 |                     beta = max(-beta_FR,min(beta_PR,beta_FR));
 476 |                 end
 477 | 
 478 |                 d = -g + beta*d;
 479 | 
 480 |                 % Restart if not a direction of sufficient descent
 481 |                 if g'*d > -progTol
 482 |                     if debug
 483 |                         fprintf('Restarting CG\n');
 484 |                     end
 485 |                     beta = 0;
 486 |                     d = -g;
 487 |                 end
 488 | 
 489 |                 % Old restart rule:
 490 |                 %if beta < 0 || abs(gtgo)/(gotgo) >= 0.1 || g'*d >= 0
 491 | 
 492 |             end
 493 |             g_old = g;
 494 | 
 495 |         case PCG % Preconditioned Non-Linear Conjugate Gradient
 496 | 
 497 | 			% Apply preconditioner to negative gradient
 498 | 			if isempty(precFunc)
 499 | 				% Use L-BFGS Preconditioner
 500 | 				if i == 1
 501 | 					S = zeros(p,corrections);
 502 | 					Y = zeros(p,corrections);
 503 | 					YS = zeros(corrections,1);
 504 | 					lbfgs_start = 1;
 505 | 					lbfgs_end = 0;
 506 | 					Hdiag = 1;
 507 | 					s = -g;
 508 | 				else
 509 | 					[S,Y,YS,lbfgs_start,lbfgs_end,Hdiag,skipped] = lbfgsAdd(g-g_old,t*d,S,Y,YS,lbfgs_start,lbfgs_end,Hdiag,useMex);
 510 | 					if debug && skipped
 511 | 						fprintf('Skipped L-BFGS updated\n');
 512 | 					end
 513 | 					if useMex
 514 | 						s = lbfgsProdC(g,S,Y,YS,int32(lbfgs_start),int32(lbfgs_end),Hdiag);
 515 | 					else
 516 | 						s = lbfgsProd(g,S,Y,YS,lbfgs_start,lbfgs_end,Hdiag);
 517 | 					end
 518 | 				end
 519 | 			else % User-supplied preconditioner
 520 | 				s = precFunc(-g,x,varargin{:});
 521 | 			end
 522 | 			
 523 | 			if i == 1
 524 | 				d = s;
 525 | 			else
 526 | 				
 527 | 				if cgUpdate == 0
 528 | 					% Preconditioned Fletcher-Reeves
 529 | 					beta = (g'*s)/(g_old'*s_old);
 530 | 				elseif cgUpdate < 3
 531 | 					% Preconditioned Polak-Ribiere
 532 | 					beta = (g'*(s-s_old))/(g_old'*s_old);
 533 | 				else
 534 |                     % Preconditioned Gilbert-Nocedal
 535 |                     beta_FR = (g'*s)/(g_old'*s_old);
 536 |                     beta_PR = (g'*(s-s_old))/(g_old'*s_old);
 537 |                     beta = max(-beta_FR,min(beta_PR,beta_FR));
 538 |                 end
 539 |                 d = s + beta*d;
 540 | 
 541 |                 if g'*d > -progTol
 542 |                     if debug
 543 |                         fprintf('Restarting CG\n');
 544 |                     end
 545 |                     beta = 0;
 546 |                     d = s;
 547 |                 end
 548 | 
 549 |             end
 550 |             g_old = g;
 551 |             s_old = s;
 552 |         case LBFGS % L-BFGS
 553 | 
 554 |             % Update the direction and step sizes
 555 | 			if Damped
 556 | 				if i == 1
 557 | 					d = -g; % Initially use steepest descent direction
 558 | 					old_dirs = zeros(length(g),0);
 559 | 					old_stps = zeros(length(d),0);
 560 | 					Hdiag = 1;
 561 | 				else
 562 | 					[old_dirs,old_stps,Hdiag] = dampedUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
 563 | 					if useMex
 564 | 						d = lbfgsC(-g,old_dirs,old_stps,Hdiag);
 565 | 					else
 566 | 						d = lbfgs(-g,old_dirs,old_stps,Hdiag);
 567 | 					end
 568 | 				end
 569 | 			else
 570 | 				if i == 1
 571 | 					d = -g; % Initially use steepest descent direction
 572 | 					S = zeros(p,corrections);
 573 | 					Y = zeros(p,corrections);
 574 | 					YS = zeros(corrections,1);
 575 | 					lbfgs_start = 1;
 576 | 					lbfgs_end = 0;
 577 | 					Hdiag = 1;
 578 | 				else
 579 | 					[S,Y,YS,lbfgs_start,lbfgs_end,Hdiag,skipped] = lbfgsAdd(g-g_old,t*d,S,Y,YS,lbfgs_start,lbfgs_end,Hdiag,useMex);
 580 | 					if debug && skipped
 581 | 						fprintf('Skipped L-BFGS updated\n');
 582 | 					end
 583 | 					if useMex
 584 | 						d = lbfgsProdC(g,S,Y,YS,int32(lbfgs_start),int32(lbfgs_end),Hdiag);
 585 | 					else
 586 | 						d = lbfgsProd(g,S,Y,YS,lbfgs_start,lbfgs_end,Hdiag);
 587 | 					end
 588 | 				end
 589 | 			end
 590 | 			g_old = g;
 591 | 
 592 |         case QNEWTON % Use quasi-Newton Hessian approximation
 593 | 
 594 |             if i == 1
 595 |                 d = -g;
 596 |             else
 597 |                 % Compute difference vectors
 598 |                 y = g-g_old;
 599 |                 s = t*d;
 600 | 
 601 |                 if i == 2
 602 |                     % Make initial Hessian approximation
 603 |                     if initialHessType == 0
 604 |                         % Identity
 605 |                         if qnUpdate <= 1
 606 |                             R = eye(length(g));
 607 |                         else
 608 |                             H = eye(length(g));
 609 |                         end
 610 |                     else
 611 |                         % Scaled Identity
 612 |                         if debug
 613 |                             fprintf('Scaling Initial Hessian Approximation\n');
 614 |                         end
 615 |                         if qnUpdate <= 1
 616 |                             % Use Cholesky of Hessian approximation
 617 |                             R = sqrt((y'*y)/(y'*s))*eye(length(g));
 618 |                         else
 619 |                             % Use Inverse of Hessian approximation
 620 |                             H = eye(length(g))*(y'*s)/(y'*y);
 621 |                         end
 622 |                     end
 623 |                 end
 624 | 
 625 |                 if qnUpdate == 0 % Use BFGS updates
 626 |                     Bs = R'*(R*s);
 627 |                     if Damped
 628 |                         eta = .02;
 629 |                         if y'*s < eta*s'*Bs
 630 |                             if debug
 631 |                                 fprintf('Damped Update\n');
 632 |                             end
 633 |                             theta = min(max(0,((1-eta)*s'*Bs)/(s'*Bs - y'*s)),1);
 634 |                             y = theta*y + (1-theta)*Bs;
 635 |                         end
 636 |                         R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-');
 637 |                     else
 638 |                         if y'*s > 1e-10
 639 |                             R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-');
 640 |                         else
 641 |                             if debug
 642 |                                 fprintf('Skipping Update\n');
 643 |                             end
 644 |                         end
 645 |                     end
 646 |                 elseif qnUpdate == 1 % Perform SR1 Update if it maintains positive-definiteness
 647 | 
 648 |                     Bs = R'*(R*s);
 649 |                     ymBs = y-Bs;
 650 |                     if abs(s'*ymBs) >= norm(s)*norm(ymBs)*1e-8 && (s-((R\(R'\y))))'*y > 1e-10
 651 |                         R = cholupdate(R,-ymBs/sqrt(ymBs'*s),'-');
 652 |                     else
 653 |                         if debug
 654 |                             fprintf('SR1 not positive-definite, doing BFGS Update\n');
 655 |                         end
 656 |                         if Damped
 657 |                             eta = .02;
 658 |                             if y'*s < eta*s'*Bs
 659 |                                 if debug
 660 |                                     fprintf('Damped Update\n');
 661 |                                 end
 662 |                                 theta = min(max(0,((1-eta)*s'*Bs)/(s'*Bs - y'*s)),1);
 663 |                                 y = theta*y + (1-theta)*Bs;
 664 |                             end
 665 |                             R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-');
 666 |                         else
 667 |                             if y'*s > 1e-10
 668 |                                 R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-');
 669 |                             else
 670 |                                 if debug
 671 |                                     fprintf('Skipping Update\n');
 672 |                                 end
 673 |                             end
 674 |                         end
 675 |                     end
 676 |                 elseif qnUpdate == 2 % Use Hoshino update
 677 |                     v = sqrt(y'*H*y)*(s/(s'*y) - (H*y)/(y'*H*y));
 678 |                     phi = 1/(1 + (y'*H*y)/(s'*y));
 679 |                     H = H + (s*s')/(s'*y) - (H*y*y'*H)/(y'*H*y) + phi*v*v';
 680 | 
 681 |                 elseif qnUpdate == 3 % Self-Scaling BFGS update
 682 |                     ys = y'*s;
 683 |                     Hy = H*y;
 684 |                     yHy = y'*Hy;
 685 |                     gamma = ys/yHy;
 686 |                     v = sqrt(yHy)*(s/ys - Hy/yHy);
 687 |                     H = gamma*(H - Hy*Hy'/yHy + v*v') + (s*s')/ys;
 688 |                 elseif qnUpdate == 4 % Oren's Self-Scaling Variable Metric update
 689 | 
 690 |                     % Oren's method
 691 |                     if (s'*y)/(y'*H*y) > 1
 692 |                         phi = 1; % BFGS
 693 |                         omega = 0;
 694 |                     elseif (s'*(H\s))/(s'*y) < 1
 695 |                         phi = 0; % DFP
 696 |                         omega = 1;
 697 |                     else
 698 |                         phi = (s'*y)*(y'*H*y-s'*y)/((s'*(H\s))*(y'*H*y)-(s'*y)^2);
 699 |                         omega = phi;
 700 |                     end
 701 | 
 702 |                     gamma = (1-omega)*(s'*y)/(y'*H*y) + omega*(s'*(H\s))/(s'*y);
 703 |                     v = sqrt(y'*H*y)*(s/(s'*y) - (H*y)/(y'*H*y));
 704 |                     H = gamma*(H - (H*y*y'*H)/(y'*H*y) + phi*v*v') + (s*s')/(s'*y);
 705 | 
 706 |                 elseif qnUpdate == 5 % McCormick-Huang asymmetric update
 707 |                     theta = 1;
 708 |                     phi = 0;
 709 |                     psi = 1;
 710 |                     omega = 0;
 711 |                     t1 = s*(theta*s + phi*H'*y)';
 712 |                     t2 = (theta*s + phi*H'*y)'*y;
 713 |                     t3 = H*y*(psi*s + omega*H'*y)';
 714 |                     t4 = (psi*s + omega*H'*y)'*y;
 715 |                     H = H + t1/t2 - t3/t4;
 716 |                 end
 717 | 
 718 |                 if qnUpdate <= 1
 719 |                     d = -R\(R'\g);
 720 |                 else
 721 |                     d = -H*g;
 722 |                 end
 723 | 
 724 |             end
 725 |             g_old = g;
 726 | 
 727 |         case NEWTON0 % Hessian-Free Newton
 728 | 
 729 |             cgMaxIter = min(p,maxFunEvals-funEvals);
 730 |             cgForce = min(0.5,sqrt(norm(g)))*norm(g);
 731 | 
 732 |             % Set-up preconditioner
 733 |             precondFunc = [];
 734 |             precondArgs = [];
 735 | 			if cgSolve == 1
 736 | 				if isempty(precFunc) % Apply L-BFGS preconditioner
 737 | 					if i == 1
 738 | 						S = zeros(p,corrections);
 739 | 						Y = zeros(p,corrections);
 740 | 						YS = zeros(corrections,1);
 741 | 						lbfgs_start = 1;
 742 | 						lbfgs_end = 0;
 743 | 						Hdiag = 1;
 744 | 					else
 745 | 						[S,Y,YS,lbfgs_start,lbfgs_end,Hdiag,skipped] = lbfgsAdd(g-g_old,t*d,S,Y,YS,lbfgs_start,lbfgs_end,Hdiag,useMex);
 746 | 						if debug && skipped
 747 | 							fprintf('Skipped L-BFGS updated\n');
 748 | 						end
 749 | 						if useMex
 750 | 							precondFunc = @lbfgsProdC;
 751 | 						else
 752 | 							precondFunc = @lbfgsProd;
 753 | 						end
 754 | 						precondArgs = {S,Y,YS,int32(lbfgs_start),int32(lbfgs_end),Hdiag};
 755 | 					end
 756 | 					g_old = g;
 757 | 				else
 758 | 					% Apply user-defined preconditioner
 759 | 					precondFunc = precFunc;
 760 | 					precondArgs = {x,varargin{:}};
 761 | 				end
 762 | 			end
 763 | 
 764 |             % Solve Newton system using cg and hessian-vector products
 765 |             if isempty(HvFunc)
 766 |                 % No user-supplied Hessian-vector function,
 767 |                 % use automatic differentiation
 768 |                 HvFun = @autoHv;
 769 |                 HvArgs = {x,g,useComplex,funObj,varargin{:}};
 770 |             else
 771 |                 % Use user-supplid Hessian-vector function
 772 |                 HvFun = HvFunc;
 773 |                 HvArgs = {x,varargin{:}};
 774 |             end
 775 |             
 776 |             if useNegCurv
 777 |                 [d,cgIter,cgRes,negCurv] = conjGrad([],-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFun,HvArgs);
 778 |             else
 779 |                 [d,cgIter,cgRes] = conjGrad([],-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFun,HvArgs);
 780 |             end
 781 | 
 782 |             funEvals = funEvals+cgIter;
 783 |             if debug
 784 |                 fprintf('newtonCG stopped on iteration %d w/ residual %.5e\n',cgIter,cgRes);
 785 | 
 786 |             end
 787 | 
 788 |             if useNegCurv
 789 |                 if ~isempty(negCurv)
 790 |                     %if debug
 791 |                     fprintf('Using negative curvature direction\n');
 792 |                     %end
 793 |                     d = negCurv/norm(negCurv);
 794 |                     d = d/sum(abs(g));
 795 |                 end
 796 |             end
 797 | 
 798 |         case NEWTON % Newton search direction
 799 | 
 800 |             if cgSolve == 0
 801 |                 if HessianModify == 0
 802 |                     % Attempt to perform a Cholesky factorization of the Hessian
 803 |                     [R,posDef] = chol(H);
 804 | 
 805 |                     % If the Cholesky factorization was successful, then the Hessian is
 806 |                     % positive definite, solve the system
 807 |                     if posDef == 0
 808 |                         d = -R\(R'\g);
 809 | 
 810 |                     else
 811 |                         % otherwise, adjust the Hessian to be positive definite based on the
 812 |                         % minimum eigenvalue, and solve with QR
 813 |                         % (expensive, we don't want to do this very much)
 814 |                         if debug
 815 |                             fprintf('Adjusting Hessian\n');
 816 |                         end
 817 |                         H = H + eye(length(g)) * max(0,1e-12 - min(real(eig(H))));
 818 |                         d = -H\g;
 819 |                     end
 820 |                 elseif HessianModify == 1
 821 |                     % Modified Incomplete Cholesky
 822 |                     R = mcholinc(H,debug);
 823 |                     d = -R\(R'\g);
 824 |                 elseif HessianModify == 2
 825 |                     % Modified Generalized Cholesky
 826 |                     if useMex
 827 |                         [L D perm] = mcholC(H);
 828 |                     else
 829 |                         [L D perm] = mchol(H);
 830 |                     end
 831 |                     d(perm) = -L' \ ((D.^-1).*(L \ g(perm)));
 832 | 
 833 |                 elseif HessianModify == 3
 834 |                     % Modified Spectral Decomposition
 835 |                     [V,D] = eig((H+H')/2);
 836 |                     D = diag(D);
 837 |                     D = max(abs(D),max(max(abs(D)),1)*1e-12);
 838 |                     d = -V*((V'*g)./D);
 839 |                 elseif HessianModify == 4
 840 |                     % Modified Symmetric Indefinite Factorization
 841 |                     [L,D,perm] = ldl(H,'vector');
 842 |                     [blockPos junk] = find(triu(D,1));
 843 |                     for diagInd = setdiff(setdiff(1:p,blockPos),blockPos+1)
 844 |                         if D(diagInd,diagInd) < 1e-12
 845 |                             D(diagInd,diagInd) = 1e-12;
 846 |                         end
 847 |                     end
 848 |                     for blockInd = blockPos'
 849 |                         block = D(blockInd:blockInd+1,blockInd:blockInd+1);
 850 |                         block_a = block(1);
 851 |                         block_b = block(2);
 852 |                         block_d = block(4);
 853 |                         lambda = (block_a+block_d)/2 - sqrt(4*block_b^2 + (block_a - block_d)^2)/2;
 854 |                         D(blockInd:blockInd+1,blockInd:blockInd+1) = block+eye(2)*(lambda+1e-12);
 855 |                     end
 856 |                     d(perm) = -L' \ (D \ (L \ g(perm)));
 857 |                 else
 858 |                     % Take Newton step if Hessian is pd,
 859 |                     % otherwise take a step with negative curvature
 860 |                     [R,posDef] = chol(H);
 861 |                     if posDef == 0
 862 |                         d = -R\(R'\g);
 863 |                     else
 864 |                         if debug
 865 |                             fprintf('Taking Direction of Negative Curvature\n');
 866 |                         end
 867 |                         [V,D] = eig(H);
 868 |                         u = V(:,1);
 869 |                         d = -sign(u'*g)*u;
 870 |                     end
 871 |                 end
 872 | 
 873 |             else
 874 |                 % Solve with Conjugate Gradient
 875 |                 cgMaxIter = p;
 876 |                 cgForce = min(0.5,sqrt(norm(g)))*norm(g);
 877 | 
 878 |                 % Select Preconditioner
 879 |                 if cgSolve == 1
 880 |                     % No preconditioner
 881 |                     precondFunc = [];
 882 |                     precondArgs = [];
 883 |                 elseif cgSolve == 2
 884 |                     % Diagonal preconditioner
 885 |                     precDiag = diag(H);
 886 |                     precDiag(precDiag < 1e-12) = 1e-12 - min(precDiag);
 887 |                     precondFunc = @precondDiag;
 888 |                     precondArgs = {precDiag.^-1};
 889 |                 elseif cgSolve == 3
 890 |                     % L-BFGS preconditioner
 891 |                     if i == 1
 892 |                         old_dirs = zeros(length(g),0);
 893 |                         old_stps = zeros(length(g),0);
 894 |                         Hdiag = 1;
 895 |                     else
 896 |                         [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
 897 |                     end
 898 |                     g_old = g;
 899 |                     if useMex
 900 |                         precondFunc = @lbfgsC;
 901 |                     else
 902 |                         precondFunc = @lbfgs;
 903 |                     end
 904 |                     precondArgs = {old_dirs,old_stps,Hdiag};
 905 |                 elseif cgSolve > 0
 906 |                     % Symmetric Successive Overelaxation Preconditioner
 907 |                     omega = cgSolve;
 908 |                     D = diag(H);
 909 |                     D(D < 1e-12) = 1e-12 - min(D);
 910 |                     precDiag = (omega/(2-omega))*D.^-1;
 911 |                     precTriu = diag(D/omega) + triu(H,1);
 912 |                     precondFunc = @precondTriuDiag;
 913 |                     precondArgs = {precTriu,precDiag.^-1};
 914 |                 else
 915 |                     % Incomplete Cholesky Preconditioner
 916 |                     opts.droptol = -cgSolve;
 917 |                     opts.rdiag = 1;
 918 |                     R = cholinc(sparse(H),opts);
 919 |                     if min(diag(R)) < 1e-12
 920 |                         R = cholinc(sparse(H + eye*(1e-12 - min(diag(R)))),opts);
 921 |                     end
 922 |                     precondFunc = @precondTriu;
 923 |                     precondArgs = {R};
 924 |                 end
 925 | 
 926 |                 % Run cg with the appropriate preconditioner
 927 |                 if isempty(HvFunc)
 928 |                     % No user-supplied Hessian-vector function
 929 |                     [d,cgIter,cgRes] = conjGrad(H,-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs);
 930 |                 else
 931 |                     % Use user-supplied Hessian-vector function
 932 |                     [d,cgIter,cgRes] = conjGrad(H,-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFunc,{x,varargin{:}});
 933 |                 end
 934 |                 if debug
 935 |                     fprintf('CG stopped after %d iterations w/ residual %.5e\n',cgIter,cgRes);
 936 |                     %funEvals = funEvals + cgIter;
 937 |                 end
 938 |             end
 939 | 
 940 |         case TENSOR % Tensor Method
 941 | 
 942 |             if numDiff
 943 |                 % Compute 3rd-order Tensor Numerically
 944 |                 [junk1 junk2 junk3 T] = autoTensor(x,numDiffType,funObj,varargin{:});
 945 |             else
 946 |                 % Use user-supplied 3rd-derivative Tensor
 947 |                 [junk1 junk2 junk3 T] = funObj(x,varargin{:});
 948 |             end
 949 |             options_sub.Method = 'newton';
 950 |             options_sub.Display = 'none';
 951 |             options_sub.progTol = progTol;
 952 |             options_sub.optTol = optTol;
 953 |             d = minFunc(@taylorModel,zeros(p,1),options_sub,f,g,H,T);
 954 | 
 955 |             if any(abs(d) > 1e5) || all(abs(d) < 1e-5) || g'*d > -progTol
 956 |                 if debug
 957 |                     fprintf('Using 2nd-Order Step\n');
 958 |                 end
 959 |                 [V,D] = eig((H+H')/2);
 960 |                 D = diag(D);
 961 |                 D = max(abs(D),max(max(abs(D)),1)*1e-12);
 962 |                 d = -V*((V'*g)./D);
 963 |             else
 964 |                 if debug
 965 |                     fprintf('Using 3rd-Order Step\n');
 966 |                 end
 967 |             end
 968 |     end
 969 | 
 970 |     if ~isLegal(d)
 971 |         fprintf('Step direction is illegal!\n');
 972 |         pause;
 973 |         return
 974 |     end
 975 | 
 976 |     % ****************** COMPUTE STEP LENGTH ************************
 977 | 
 978 |     % Directional Derivative
 979 |     gtd = g'*d;
 980 | 
 981 |     % Check that progress can be made along direction
 982 |     if gtd > -progTol
 983 |         exitflag=2;
 984 |         msg = 'Directional Derivative below progTol';
 985 |         break;
 986 |     end
 987 | 
 988 |     % Select Initial Guess
 989 |     if i == 1
 990 |         if method < NEWTON0
 991 |             t = min(1,1/sum(abs(g)));
 992 |         else
 993 |             t = 1;
 994 |         end
 995 |     else
 996 |         if LS_init == 0
 997 |             % Newton step
 998 |             t = 1;
 999 |         elseif LS_init == 1
1000 |             % Close to previous step length
1001 |             t = t*min(2,(gtd_old)/(gtd));
1002 |         elseif LS_init == 2
1003 |             % Quadratic Initialization based on {f,g} and previous f
1004 |             t = min(1,2*(f-f_old)/(gtd));
1005 |         elseif LS_init == 3
1006 |             % Double previous step length
1007 |             t = min(1,t*2);
1008 |         elseif LS_init == 4
1009 |             % Scaled step length if possible
1010 |             if isempty(HvFunc)
1011 |                 % No user-supplied Hessian-vector function,
1012 |                 % use automatic differentiation
1013 |                 dHd = d'*autoHv(d,x,g,0,funObj,varargin{:});
1014 |             else
1015 |                 % Use user-supplid Hessian-vector function
1016 |                 dHd = d'*HvFunc(d,x,varargin{:});
1017 |             end
1018 | 
1019 |             funEvals = funEvals + 1;
1020 |             if dHd > 0
1021 |                 t = -gtd/(dHd);
1022 |             else
1023 |                 t = min(1,2*(f-f_old)/(gtd));
1024 |             end
1025 |         end
1026 | 
1027 |         if t <= 0
1028 |             t = 1;
1029 |         end
1030 |     end
1031 |     f_old = f;
1032 |     gtd_old = gtd;
1033 | 
1034 |     % Compute reference fr if using non-monotone objective
1035 |     if Fref == 1
1036 |         fr = f;
1037 |     else
1038 |         if i == 1
1039 |             old_fvals = repmat(-inf,[Fref 1]);
1040 |         end
1041 | 
1042 |         if i <= Fref
1043 |             old_fvals(i) = f;
1044 |         else
1045 |             old_fvals = [old_fvals(2:end);f];
1046 |         end
1047 |         fr = max(old_fvals);
1048 |     end
1049 | 
1050 |     computeHessian = 0;
1051 |     if method >= NEWTON
1052 |         if HessianIter == 1
1053 |             computeHessian = 1;
1054 |         elseif i > 1 && mod(i-1,HessianIter) == 0
1055 |             computeHessian = 1;
1056 |         end
1057 |     end
1058 | 
1059 |     % Line Search
1060 |     f_old = f;
1061 |     if LS_type == 0 % Use Armijo Bactracking
1062 |         % Perform Backtracking line search
1063 |         if computeHessian
1064 |             [t,x,f,g,LSfunEvals,H] = ArmijoBacktrack(x,t,d,f,fr,g,gtd,c1,LS_interp,LS_multi,progTol,debug,doPlot,LS_saveHessianComp,funObj,varargin{:});
1065 |         else
1066 |             [t,x,f,g,LSfunEvals] = ArmijoBacktrack(x,t,d,f,fr,g,gtd,c1,LS_interp,LS_multi,progTol,debug,doPlot,1,funObj,varargin{:});
1067 |         end
1068 |         funEvals = funEvals + LSfunEvals;
1069 | 
1070 |     elseif LS_type == 1 % Find Point satisfying Wolfe conditions
1071 | 
1072 |         if computeHessian
1073 |             [t,f,g,LSfunEvals,H] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS_interp,LS_multi,25,progTol,debug,doPlot,LS_saveHessianComp,funObj,varargin{:});
1074 |         else
1075 |             [t,f,g,LSfunEvals] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS_interp,LS_multi,25,progTol,debug,doPlot,1,funObj,varargin{:});
1076 |         end
1077 |         funEvals = funEvals + LSfunEvals;
1078 |         x = x + t*d;
1079 | 
1080 |     else
1081 |         % Use Matlab optim toolbox line search
1082 |         [t,f_new,fPrime_new,g_new,LSexitFlag,LSiter]=...
1083 |             lineSearch({'fungrad',[],funObj},x,p,1,p,d,f,gtd,t,c1,c2,-inf,maxFunEvals-funEvals,...
1084 |             progTol,[],[],[],varargin{:});
1085 |         funEvals = funEvals + LSiter;
1086 |         if isempty(t)
1087 |             exitflag = -2;
1088 |             msg = 'Matlab LineSearch failed';
1089 |             break;
1090 |         end
1091 | 
1092 |         if method >= NEWTON
1093 |             [f_new,g_new,H] = funObj(x + t*d,varargin{:});
1094 |             funEvals = funEvals + 1;
1095 |         end
1096 |         x = x + t*d;
1097 |         f = f_new;
1098 |         g = g_new;
1099 | 	end
1100 | 
1101 | 	% Compute Optimality Condition
1102 | 	optCond = max(abs(g));
1103 | 	
1104 |     % Output iteration information
1105 |     if verboseI
1106 |         if mod(i,10)==0
1107 |         fprintf('%10d %10d %15.5e %15.5e %15.5e\n',i,funEvals*funEvalMultiplier,t,f,optCond);
1108 |         end
1109 |     end
1110 | 
1111 |     if nargout > 3
1112 |     % Update Trace
1113 |     trace.time(end+1,1)=toc;
1114 |     trace.fval(end+1,1) = f;
1115 |     trace.funcCount(end+1,1) = funEvals;
1116 | 	trace.optCond(end+1,1) = optCond;
1117 | 	end
1118 | 
1119 | 	% Output Function
1120 | 	if ~isempty(outputFcn)
1121 | 		stop = outputFcn(x,'iter',i,funEvals,f,t,gtd,g,d,optCond,varargin{:});
1122 | 		if stop
1123 | 			exitflag=-1;
1124 | 			msg = 'Stopped by output function';
1125 | 			break;
1126 | 		end
1127 | 	end
1128 | 	
1129 |     % Check Optimality Condition
1130 |     if optCond <= optTol
1131 |         exitflag=1;
1132 |         msg = 'Optimality Condition below optTol';
1133 |         break;
1134 |     end
1135 | 
1136 |     % ******************* Check for lack of progress *******************
1137 | 
1138 |     if max(abs(t*d)) <= progTol
1139 |         exitflag=2;
1140 |         msg = 'Step Size below progTol';
1141 |         break;
1142 |     end
1143 | 
1144 | 
1145 |     if abs(f-f_old) < progTol
1146 |         exitflag=2;
1147 |         msg = 'Function Value changing by less than progTol';
1148 |         break;
1149 |     end
1150 | 
1151 |     % ******** Check for going over iteration/evaluation limit *******************
1152 | 
1153 |     if funEvals*funEvalMultiplier >= maxFunEvals
1154 |         exitflag = 0;
1155 |         msg = 'Reached Maximum Number of Function Evaluations';
1156 |         break;
1157 |     end
1158 | 
1159 |     if i == maxIter
1160 |         exitflag = 0;
1161 |         msg='Reached Maximum Number of Iterations';
1162 |         break;
1163 |     end
1164 | 
1165 | end
1166 | 
1167 | if verbose
1168 |     fprintf('%s\n',msg);
1169 | end
1170 | if nargout > 3
1171 |     output = struct('iterations',i,'funcCount',funEvals*funEvalMultiplier,...
1172 |         'algorithm',method,'firstorderopt',max(abs(g)),'message',msg,'trace',trace);
1173 | end
1174 | 
1175 | % Output Function
1176 | if ~isempty(outputFcn)
1177 |      outputFcn(x,'done',i,funEvals,f,t,gtd,g,d,max(abs(g)),varargin{:});
1178 |  end
1179 | 
1180 | end
1181 | 
1182 | 


--------------------------------------------------------------------------------