├── .gitignore ├── LICENSE ├── README.md └── src ├── BFGS.m ├── CholeskyMultIdentity.m ├── DogLeg.m ├── GaussN.m ├── LBFGS.m ├── Newton.m ├── Rho.m ├── SteepDescent.m ├── StepSize.m ├── StepSizeSW.m ├── cgTrust.m └── cvsrch.m /.gitignore: -------------------------------------------------------------------------------- 1 | *.asv 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Clark Zinzow 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Nonlinear-Optimization-Algorithms 2 | MATLAB implementations of various nonlinear programming algorithms. 3 | 4 | --- 5 | 6 | This repository contains MATLAB implementations of a variety of popular nonlinear programming algorithms, many of which can be found in *Numerical Optimization* by Nocedal and Wright, a text that I highly recommend. 7 | 8 | List of algorithms implemented: 9 | 10 | 1. [line-search](https://en.wikipedia.org/wiki/Line_search) ([simple Wolfe, strong Wolfe](https://en.wikipedia.org/wiki/Wolfe_conditions), [Moré-Thuente](http://dl.acm.org/citation.cfm?id=192132)) 11 | 2. [steepest descent](https://en.wikipedia.org/wiki/Method_of_steepest_descent) 12 | 3. [Newton's method](https://en.wikipedia.org/wiki/Newton%27s_method_in_optimization) 13 | 4. [Dogleg method](http://www.numerical.rl.ac.uk/people/nimg/course/lectures/raphael/lectures/lec7slides.pdf) 14 | 5. [Steihaug-Toint conjugate gradient trust region method](http://www.numerical.rl.ac.uk/people/nimg/course/lectures/raphael/lectures/lec7slides.pdf) 15 | 6. [BFGS](https://en.wikipedia.org/wiki/Broyden%E2%80%93Fletcher%E2%80%93Goldfarb%E2%80%93Shanno_algorithm) 16 | 7. [limited-memory BFGS](https://en.wikipedia.org/wiki/Limited-memory_BFGS) 17 | 8. [Gauss-Newton method](https://en.wikipedia.org/wiki/Gauss%E2%80%93Newton_algorithm) 18 | 19 | All of the algorithms are heavily commented (possibly to a fault), but I wanted someone in the midst of a nonlinear programming class to be able to read through the code and understand it decently well. Although I have done my best to implement these algorithms with efficiency in mind (within the confines of MATLAB's inherent deficiencies in this regard), this repository is far more valuable as a teaching tool than as a performance-centric library. 20 | 21 | Due to the algorithms being so heavily commented, many implementation details are contained within the code as comments instead of in a README. 22 | 23 | Some day, I will include a demo folder that demonstrates the correctness and performance of each algorithm on a set of representative problems, and I will create a README with implementation details for each algorithm, to be located in the src folder. 24 | 25 | Some day! :) 26 | -------------------------------------------------------------------------------- /src/BFGS.m: -------------------------------------------------------------------------------- 1 | function [inform, x] = BFGS(func, x, bfgsparams) 2 | % Implements the Broyden-Fletcher-Goldfarb-Shanno method, an iterative 3 | % quasi-Newton algorithm for solving unconstrained nonlinear optimization 4 | % problems. 5 | % 6 | % Input: 7 | % func - a pointer to a function 8 | % x - the following structure: 9 | % * x.p - the starting point values 10 | % * x.f - the function value of x.p 11 | % * x.g - the gradient value of x.p 12 | % * x.h - the Hessian value of x.p 13 | % with only x.p guaranteed to be set. 14 | % bfgsparams - the following structure, as an example: 15 | % bfgsparams = struct('maxit',1000,'toler',1.0e-4,); 16 | % 17 | % Output: 18 | % inform - structure containing two fields: 19 | % * inform.status - 1 if gradient tolerance was 20 | % achieved; 21 | % 0 if not. 22 | % * inform.iter - the number of steps taken 23 | % x - the solution structure, with the solution point along with the 24 | % function evaluation thereof. 25 | 26 | % Number of function, gradient, and Hessian evaluations. 27 | global numf numg numh 28 | numf = 0; 29 | numg = 0; 30 | numh = 0; 31 | 32 | % Populate local caching of bfgsparams parameters. 33 | toler = bfgsparams.toler; % Set gradient tolerance. 34 | maxit = bfgsparams.maxit; % Set maximum number of allowed iterations. 35 | xc.p = x.p; % Set the current point to the initial point, x.p. 36 | 37 | % Initialize parameter structure for StepSize function call. 38 | params = struct('ftol', 1e-4, 'gtol', 0.9, 'xtol', 1e-6, 'stpmin', 0, ... 39 | 'stpmax', 1e20, 'maxfev', 10000); 40 | 41 | I = eye(size(xc.p, 1)); % Locally stored identity matrix. 42 | for i = 1:maxit 43 | % Compute function and gradient at current point. 44 | xc.f = feval(func, xc.p, 1); 45 | xc.g = feval(func, xc.p, 2); 46 | 47 | % Check for termination condition: (scaled) norm of gradient less 48 | % than toler. 49 | if norm(xc.g) / min(1000, 1 + abs(xc.f)) < toler 50 | inform.status = 1; % Indicates success. 51 | inform.iter = i; % Number of iterations. 52 | x.p = xc.p; 53 | x.f = xc.f; 54 | return; 55 | end 56 | 57 | % For the first step, we use the identity matrix as an initial inverse 58 | % Hessian approximation. 59 | if i == 1 60 | H = I; 61 | else 62 | % Update the current inverse Hessian approximation, H. 63 | H = (I - rho*s*y') * H * (I - rho*y*s') + rho*s*s'; 64 | end 65 | 66 | % Compute the current search direction. 67 | p = -H * xc.g; 68 | 69 | % Get step size that satisfies simple Wolfe conditions. 70 | % NOTE: alfa = 1 should always be tried first since this step length will 71 | % eventually always be accepted (under certain conditions), thereby 72 | % producing superlinear convergence of the overall algorithm. 73 | % See page 142 of Nocedal and Wright. 74 | [alfa, x] = StepSize(func, xc, p, 1, params); 75 | % Update current point in p-direction with step size alpha. 76 | xc.p = xc.p + alfa * p; 77 | 78 | % Update parameters. 79 | s = alfa * p; % s = new_point - prev_point = alfa * p 80 | y = feval(func, xc.p, 2) - xc.g; % y = grad(new_point) - grad(prev_point) 81 | rho = 1 / (y'*s); 82 | 83 | % If in first step, apply inverse Hessian approximation heuristic given by 84 | % (6.20) on page 143 of Nocedal and Wright. 85 | if i == 1 86 | H = (s'*y)/(y'*y) * I; 87 | end 88 | end 89 | % If reached, method failed. 90 | inform.status = 0; % Update status to failure indicator, 0. 91 | inform.iter = maxit; % Number of iterations. 92 | x.p = xc.p; 93 | x.f = feval(func, x.p, 1); 94 | x.g = feval(func, x.p, 2); 95 | return; % Return inform and final point x 96 | end 97 | -------------------------------------------------------------------------------- /src/CholeskyMultIdentity.m: -------------------------------------------------------------------------------- 1 | function [R, tau] = CholeskyMultIdentity(H) 2 | % Implements Cholesky with added multiple of the identity. This attempts to 3 | % to find a scalar tau > 0 such that H + tau * I is sufficiently positive 4 | % definite, where I is the identity matrix. 5 | 6 | global numFact % Number of Cholesky factorizations attempted. 7 | 8 | % First, we intially try a Cholesky factorization. 9 | [R, fail] = chol(H); 10 | numFact = numFact + 1; 11 | % If it does not fail, we're done. 12 | if fail == 0 13 | tau = 0; 14 | return; 15 | end 16 | 17 | % If the initial Cholesky factorization fails, we attempt to find a scalar 18 | % tau > 0 such that H + tau * I is sufficiently positive definite. 19 | beta = 0.001; % Heuristic for increasing tau. 20 | min_H_diag = min(diag(H)); % Smallest diagonal of H. 21 | 22 | % If smallest diagonal of H is positive, set tau to 0; otherwise, set to 23 | % nonnegative version of the smallest diagonal plus the beta heuristic. 24 | if min_H_diag > 0 25 | tau = 0; 26 | else 27 | tau = -min_H_diag + beta; 28 | end 29 | 30 | I = eye(size(H,1)); % Identity matrix. 31 | 32 | % Repeatedly add a tau-multiple of the identity to H until the Cholesky 33 | % factorization succeeds. Upon each failure, double tau. 34 | while 1 35 | [R, fail] = chol(H + tau * I); 36 | numFact = numFact + 1; 37 | if fail == 0 38 | return; 39 | else 40 | % NOTE: In order to decrease number of factorizations, we may want to 41 | % increase tau by a factor of 10 instead of 2. 42 | tau = max(2*tau, beta); 43 | end 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /src/DogLeg.m: -------------------------------------------------------------------------------- 1 | function [inform, x] = DogLeg(fun, x, dlparams) 2 | % Implements the dogleg method for finding a solution to the subproblem 3 | % 4 | % min m(p) = f + g'p + 1/2 * p' B p s.t. ||p|| <= Del 5 | % 6 | % Input: 7 | % fun - a pointer to a function 8 | % x - the following structure: 9 | % * x.p - the starting point values 10 | % * x.f - the function value of x.p 11 | % * x.g - the gradient value of x.p 12 | % * x.h - the Hessian value of x.p 13 | % with only x.p guaranteed to be set. 14 | % dlparams - the following structure, as an example: 15 | % dlparams = struct('maxit',1000,'toler',1.0e-4,'initdel',1, 16 | % 'maxdel',100,'eta',0.1,'method','chol', 17 | % 'hessian','exact','fail','cauchy'); 18 | % 19 | % Output: 20 | % inform - structure containing two fields: 21 | % * inform.status - 1 if gradient tolerance was 22 | % achieved; 23 | % 0 if not. 24 | % * inform.iter - the number of steps taken 25 | % x - the solution structure, with the solution point along with 26 | % function, gradient, and Hessian evaluations thereof. 27 | 28 | % Number of function, gradient, and Hessian evaluations, and number of Cholesky 29 | % factorizations. 30 | global numf numg numh numFact 31 | numf = 0; 32 | numg = 0; 33 | numh = 0; 34 | numFact = 0; 35 | 36 | % Populate local caching of dlparams parameters. 37 | toler = dlparams.toler; % Set gradient tolerance. 38 | maxit = dlparams.maxit; % Set maximum number of allowed iterations. 39 | initdel = dlparams.initdel; % Set initial delta value. 40 | maxdel = dlparams.maxdel; % Set maximum delta value. 41 | eta = dlparams.eta; % Set eta. 42 | 43 | del = initdel; % Set delta value to initial delta value. 44 | xc.p = x.p; % Set the current point to the initial point, x.p. 45 | 46 | for i = 1:maxit 47 | % Compute function, gradient, and Hessian at current point. 48 | xc.f = feval(fun, xc.p, 1); 49 | xc.g = feval(fun, xc.p, 2); 50 | xc.h = sparse(feval(fun, xc.p, 4)); 51 | 52 | % Check check for termination condition: norm of gradient less than toler. 53 | if norm(xc.g) < toler 54 | inform.status = 1; % Indicates success. 55 | inform.iter = i; % Number of iterations. 56 | x.p = xc.p; 57 | x.f = xc.f; 58 | x.g = xc.g; 59 | x.h = xc.h; 60 | return; 61 | end 62 | 63 | % Calculate the Cauchy point, pU. 64 | 65 | % If g'Hg <= 0 ... 66 | if xc.g'*xc.h*xc.g <= 0 67 | tau = 1; % Set tao = 1. 68 | % Otherwise (g'Hg > 0) ... 69 | else 70 | % tau = min(||g||^3 / (del * g'Hg),1) 71 | tau = min(norm(xc.g)^3 / (del*xc.g'*xc.h*xc.g),1); 72 | end 73 | pU = -tau * del * xc.g / norm(xc.g); 74 | 75 | % Calculate dogleg point, p. 76 | 77 | % Get the Cholesky factorization of the Hessian at the current point. 78 | [R,flag] = chol(xc.h); 79 | numFact = numFact + 1; % Increment the 'chol' call counter. 80 | % If the Cholesky factorization failed and dlparams.fail = 'cauchy', 81 | % then the dogleg point is the Cauchy point, pU. 82 | if flag ~= 0 83 | if isfield(dlparams, 'fail') && strcmp(dlparams.fail, 'cauchy') 84 | p = pU; % Set dogleg point to Cauchy point. 85 | else 86 | % Otherwise, repeatedly try 'multiple of the identity' Hessian 87 | % modifications until the resulting matrix is positive-definite. 88 | beta = 0.001; % t-shift parameter. 89 | minDiag = min(diag(xc.h)); % Minimum of the diagonal of Hessian. 90 | % If diag > 0 componentwise... 91 | if minDiag > 0 92 | t = 0; % Set initial shift to 0; 93 | else % Otherwise.. 94 | % Set the initial shift to the negative of minDiag, plus the 95 | % beta shift parameter (so diag > 0 after this shift is applied. 96 | t = -minDiag + beta; 97 | end 98 | % Keep attempting Cholesky factorizations, with increasing shifts, 99 | % until successful. 100 | while flag ~= 0 101 | [R,flag] = chol(xc.h + t*eye(size(xc.h,1))); 102 | numFact = numFact + 1; 103 | t = max(2*t,beta); 104 | end 105 | % Get following solution. 106 | pB = -R \ (R' \ xc.g); 107 | % Dogleg point is at the boundary of pU, pC-pU, and del. 108 | p = boundary(pU, pB - pU, del); 109 | % a = norm(pU)^2; 110 | % b = 2*pU'*(pB-pU); 111 | % c = norm(pU)^2 - del^2; 112 | % alpha = (-b + sqrt(b^2 - 4*a*c)) / (2*a); 113 | % p = pU + alpha * (pB-pU); 114 | %p = pU + ((-(2*pU'*(pB-pU)) + sqrt((2*pU'*(pB-pU))^2 - ... 115 | %4*norm(pU)^2*(norm(pU)^2-del^2))) / (2*norm(pU)^2)) * (pB - pU); 116 | end 117 | else 118 | % Get following solution. 119 | pB = -R \ (R' \ xc.g); 120 | % Dogleg point is at the boundary of pU, pC-pU, and del. 121 | p = boundary(pU, pB - pU, del); 122 | %a = norm(pU)^2; 123 | %b = 2*pU'*(pB-pU); 124 | %c = norm(pU)^2 - del^2; 125 | %alpha = (-b + sqrt(b^2 - 4*a*c)) / (2*a); 126 | %p = pU + alpha * (pB-pU); 127 | %p = pU + ((-(2*pU'*(pB-pU)) + sqrt((2*pU'*(pB-pU))^2 - ... 128 | %4*norm(pU)^2*(norm(pU)^2-del^2))) / (2*norm(pU)^2)) * (pB - pU); 129 | end 130 | 131 | % Compute the reduction ratio. 132 | rho = (xc.f - feval(fun,xc.p + p,1)) / (-xc.g'*p - 0.5*p'*xc.h*p); 133 | 134 | % Update the trust region; i.e., update del and the current point. 135 | if rho < 0.25 136 | del = del / 4; 137 | else 138 | % Note that radius only increases if norm of p reaches the trust 139 | % region boundary. 140 | if rho > 0.75 && norm(p) == del 141 | del = min(2*del, maxdel); 142 | end 143 | end 144 | if rho > eta 145 | xc.p = xc.p + p; 146 | end 147 | end 148 | % If reached, method failed. 149 | inform.status = 0; % Update status to failure indicator, 0. 150 | inform.iter = maxit; % Number of iterations = i = maxit at this point. 151 | x.p = xc.p; 152 | x.f = feval(fun, x.p, 1); 153 | x.g = feval(fun, x.p, 2); 154 | x.h = sparse(feval(fun,x.p,4)); 155 | return; % Return inform and final point x 156 | end 157 | 158 | function p = boundary(p, q, del) 159 | % Finds the boundary point. 160 | a = norm(q)^2; 161 | b = 2*p'*q; 162 | c = norm(p)^2 - del^2; 163 | alpha = (-b + sqrt(b^2 - 4*a*c)) / (2*a); 164 | p = p + alpha * q; 165 | return; 166 | end -------------------------------------------------------------------------------- /src/GaussN.m: -------------------------------------------------------------------------------- 1 | function [inform, x] = GaussN(fun, resid, x, gnparams) 2 | % Implements the Gauss-Newton algorithm for solving non-linear least squares 3 | % problems. 4 | % 5 | % Input: 6 | % 7 | % fun - a pointer to a function 8 | % resid - a pointer to residual function 9 | % x - the following structure: 10 | % * x.p - the starting point values 11 | % * x.f - the function value of x.p 12 | % * x.g - the gradient value of x.p 13 | % * x.h - the Hessian value of x.p 14 | % with only x.p guaranteed to be set. 15 | % gnparams - the following structure, as an example: 16 | % 17 | % gnparams = struct('maxit',1000,'toler',1.0e-4, 18 | % 'lsmethod','chol'); 19 | % 20 | % Note that 'lsmethod' can be 'chol', 'qr', or 'svd'. 21 | % 22 | % Output: 23 | % 24 | % inform - structure containing two fields: 25 | % * inform.status - 1 if gradient tolerance was 26 | % achieved; 27 | % 0 if not. 28 | % * inform.iter - the number of steps taken 29 | % x - the solution structure, with the solution point along with 30 | % function, gradient, and Hessian evaluations thereof. 31 | 32 | % Number of function, gradient, and Hessian evaluations. 33 | global numf numg numh 34 | numf = 0; 35 | numg = 0; 36 | numh = 0; 37 | 38 | % Populate local caching of gnparams parameters. 39 | toler = gnparams.toler; % Set gradient tolerance. 40 | maxit = gnparams.maxit; % Set maximum number of allowed iterations. 41 | lsmethod = gnparams.lsmethod; % Set method to find search direction. 42 | 43 | % Initialize parameter structure for StepSizeSW function call. 44 | params = struct('ftol', 1e-4, 'gtol', 0.9, 'xtol', 1e-6, 'stpmin', 0, ... 45 | 'stpmax', 1e20, 'maxfev', 10000); 46 | 47 | alfa = 1; % Initial value of alpha. 48 | xc.p = x.p; % Set the current point to the initial point, x.p. 49 | 50 | for i = 1:maxit 51 | % Compute function and gradient at current point. 52 | xc.f = feval(fun, xc.p, 1); 53 | xc.g = feval(fun, xc.p, 2); 54 | 55 | % Check for termination condition: norm of gradient less than toler. 56 | if norm(xc.g) < toler 57 | inform.status = 1; % Indicates success. 58 | inform.iter = i; % Number of iterations. 59 | x.p = xc.p; 60 | x.f = xc.f; 61 | x.g = xc.g; 62 | return; 63 | end 64 | 65 | % Calculate residual function value and Jacobian at current point. 66 | r = feval(resid, xc.p, 1); 67 | J = feval(resid, xc.p, 2); 68 | 69 | % Determine which method should be used to find the search direction, p. 70 | switch lsmethod 71 | case 'chol' % Use Cholesky factorization of J'*J to get p. 72 | % If J'*J is positive-definite, R upper triangular matrix such 73 | % that R'*R = J'*J. 74 | % Else, Cholesky failed and flag is a positive integer. 75 | [R, flag] = chol(J'*J); 76 | % If the Cholesky factorization failed, return with fail status. 77 | if flag ~= 0 78 | inform.status = 0; % Update status to failure indicator, 0. 79 | inform.iter = i; % Number of iterations. 80 | x.p = xc.p; 81 | x.f = xc.f; 82 | return; 83 | end 84 | % J'Jp = -J'r and R'R = -J'r, hence p = -R\(R'\(J'*r)). 85 | p = -R \ (R' \ (J'*r)); 86 | case 'qr' % Use QR factorization of J to get p. 87 | % P permutation matrix, Q unitay matrix, and R upper triangular 88 | % matrix with diagonal arranged in absolute decreasing order. 89 | [Q, R, P] = qr(J); 90 | n = size(J, 2); 91 | Q1 = Q(1:end, 1:n); 92 | R = R(1:n, 1:end); 93 | % p = argmin ||J*p+r||^2 = solution of R*P'*p + Q1'*r = 0, hence 94 | % p = -P' \ (R \ (Q1'*r)) = -P * (R \ (Q1' * r)). 95 | p = -P * (R \ (Q1'*r)); 96 | case 'svd' % Use SVD factorization of J to get p. 97 | % U and V unitary matrices, S diagonal matrix. 98 | [U, S, V] = svd(full(J)); 99 | n = size(J, 2); 100 | U1 = U(1:end, 1:n); 101 | S = S(1:n, 1:n); 102 | % Since the Moore-Penrose inverse is pinv(J) = V*inv(S)*U1' and 103 | % p = -pinv(J)*r, we have that p = -V*inv(S)*U1'*r. 104 | p = -V * inv(S) * U1' * r; 105 | end 106 | 107 | % Get step size that satisfies strong Wolfe conditions. 108 | [alfa, x] = StepSizeSW(fun, xc, p, alfa, params); 109 | % Update current point in p-direction with step size alpha. 110 | xc.p = xc.p + alfa * p; 111 | end 112 | % If reached, method failed. 113 | inform.status = 0; % Update status to failure indicator, 0. 114 | inform.iter = maxit; % Number of iterations i = maxit at this point. 115 | x.p = xc.p; 116 | x.f = feval(fun, x.p, 1); 117 | x.g = feval(fun, x.p, 2); 118 | return; % Return inform and final point x 119 | end 120 | -------------------------------------------------------------------------------- /src/LBFGS.m: -------------------------------------------------------------------------------- 1 | function [inform, x] = LBFGS(func, x, lbfgsparams) 2 | % Implements the limited-memory Broyden-Fletcher-Goldfarb-Shanno algorithm, a 3 | % quasi-Newton method that approximates the Broyden-Fletcher-Goldfarb-Shanno 4 | % method using a limited amount of memory. 5 | 6 | % Input: 7 | % func - a pointer to a function 8 | % x - the following structure: 9 | % * x.p - the starting point values 10 | % * x.f - the function value of x.p 11 | % * x.g - the gradient value of x.p 12 | % * x.h - the Hessian value of x.p 13 | % with only x.p guaranteed to be set. 14 | % lbfgsparams - the following structure, as an example: 15 | % lbfgsparams = struct('maxit', 1000, 'toler', 1.0e-4, 'm', 3); 16 | % 17 | % Output: 18 | % inform - structure containing two fields: 19 | % * inform.status - 1 if gradient tolerance was 20 | % achieved; 21 | % 0 if not. 22 | % * inform.iter - the number of steps taken 23 | % x - the solution structure, with the solution point along with 24 | % function, gradient, and Hessian evaluations thereof. 25 | 26 | % Number of function, gradient, and Hessian evaluations. 27 | global numf numg numh 28 | numf = 0; 29 | numg = 0; 30 | numh = 0; 31 | 32 | % Populate local caching of bfgsparams parameters. 33 | toler = lbfgsparams.toler; % Set gradient tolerance. 34 | maxit = lbfgsparams.maxit; % Set maximum number of allowed iterations. 35 | m = lbfgsparams.m; % Set memory parameter. 36 | % NOTE: Practical experience indicates that modest values 37 | % of m (e.g. between 3 and 20) often produce 38 | % satisfactory results. 39 | xc.p = x.p; % Set the current point to the initial point, x.p. 40 | 41 | % Initialize parameter structure for StepSizeSW function call. 42 | params = struct('ftol', 1e-4, 'gtol', 0.9, 'xtol', 1e-6, 'stpmin', 0, ... 43 | 'stpmax', 1e20, 'maxfev', 10000); 44 | 45 | n = size(xc.p, 1); % Dimension of vector space. 46 | 47 | % Allocate alpha and rho arrays for two-loop recursion procedure, and 48 | % allocate s and y arrays for storing a modified inverse Hessian approximation 49 | % implicitly via {s_i,y_i} pairs. The rho, s, and y arrays will be used as 50 | % queues, with the m most recent entries being stored in the queues. 51 | alfa_arr = zeros(1, m); 52 | rho_arr = zeros(1, m); 53 | s_arr = zeros(n, m); 54 | y_arr = zeros(n, m); 55 | 56 | I = eye(n); % Locally stored identity matrix. 57 | 58 | for i = 1:maxit 59 | % Compute function and gradient at current point. 60 | xc.f = feval(func, xc.p, 1); 61 | xc.g = feval(func, xc.p, 2); 62 | 63 | % Check for termination condition: (scaled) norm of gradient less 64 | % than toler. 65 | if norm(xc.g) / min(1000, 1+abs(xc.f)) < toler 66 | inform.status = 1; % Indicates success. 67 | inform.iter = i; % Number of iterations. 68 | x.p = xc.p; 69 | x.f = xc.f; 70 | return; 71 | end 72 | 73 | % For the first step, we use the identity matrix as an initial inverse 74 | % Hessian approximation. 75 | if i == 1 76 | p = -xc.g; 77 | else 78 | % Otherwise, we update the current inverse Hessian approximation, H, 79 | % using the L-BFGS two-loop recursion procedure detailed in Algorithm 80 | % 7.4 of Nocedal and Wright. 81 | 82 | k = max(1, m-i+2); % Sets (inclusive) lower index bound. If 83 | % i-1 >= m, we use m {s_j,y_j} vector pairs to 84 | % obtain the approximate inverse Hessian and 85 | % gradient product; otherwise, we use i-1 {s_j,y_j} 86 | % vector pairs. 87 | 88 | % Scaling factor, gamma, attempts to estimate the size of the true 89 | % Hessian matrix along the most recent search direction; 90 | % calculated via (7.20) of Nocedal and Wright. 91 | gamma = (s_arr(:, k)'*y_arr(:,k)) / ((y_arr(:,k)'*y_arr(:,k))); 92 | % The following choice for an initial Hessian approximation has proved 93 | % effective in practice. 94 | H_init = gamma * I; 95 | q = xc.g; 96 | % Compute product of inverse Hessian approximation and gradient for 97 | % current iteration, to be stored in r. See Algorithm 7.4 of 98 | % Nocedal and Wright. 99 | for j = m : -1 : k 100 | alfa_arr(1,j) = rho_arr(1,j) * s_arr(:,j)' * q; 101 | q = q - alfa_arr(1,j) * y_arr(:,j); 102 | end 103 | r = H_init * q; 104 | for j = k : m 105 | beta = rho_arr(1, j) * y_arr(:,j)' * r; 106 | r = r + s_arr(:, j) * (alfa_arr(1, j) - beta); 107 | end; 108 | p = -r; % Set search direction to the negative of this product. 109 | end 110 | 111 | % Get step size that satisfies strong Wolfe conditions. 112 | % NOTE: alfa = 1 should always be tried first since this step length will 113 | % eventually always be accepted (under certain conditions), thereby 114 | % producing superlinear convergence of the overall algorithm. 115 | % See page 142 of Nocedal and Wright. 116 | [alfa, x] = StepSizeSW(func, xc, p, 1, params); 117 | % Update current point in p-direction with step size alpha. 118 | xc.p = xc.p + alfa * p; 119 | 120 | % Update parameters. 121 | s = alfa * p; % s = new_point - prev_point = alfa * p 122 | y = feval(func, xc.p, 2) - xc.g; % y = grad(new_point) - grad(prev_point) 123 | rho = 1 / (y'*s); 124 | 125 | % Now we dequeue the oldest entry in each of the rho, s, and y queues, and 126 | % enqueue the most recent rho, s, and y values. 127 | 128 | % Move each rho, s, and y entry up one spot in their respective arrays. 129 | for j = 1 : m-1 130 | rho_arr(1, j) = rho_arr(1, j+1); 131 | s_arr(:, j) = s_arr(:, j+1); 132 | y_arr(:, j) = y_arr(:, j+1); 133 | end 134 | % Set the the last entry in the rho, s, and y arrays to the most recent 135 | % rho, s, and y values, respectively. 136 | rho_arr(1, m) = rho; 137 | s_arr(:, m) = s; 138 | y_arr(:, m) = y; 139 | end 140 | % If reached, method failed. 141 | inform.status = 0; % Update status to failure indicator, 0. 142 | inform.iter = maxit; % Number of iterations. 143 | x.p = xc.p; 144 | x.f = feval(func, x.p, 1); 145 | x.g = feval(func, x.p, 2); 146 | return; % Return inform and final point x 147 | end -------------------------------------------------------------------------------- /src/Newton.m: -------------------------------------------------------------------------------- 1 | function [inform, x] = Newton(func, x, nparams) 2 | % Implements Newton's method, which uses the Newton iteration for the search 3 | % direction: 4 | % 5 | % p = -(Hessian(f)^(-1)) grad(f). 6 | % 7 | % Moreover, we either use a 'direct' modified Hessian method (if 'direct' is 8 | % supplied as the 'method' in nparams) or we use a Cholesky with added 9 | % multiple of the identity method for Hessian modification. We are therefore 10 | % able to deal with functions whose Hessian may not be positive definite away 11 | % from the solution (entailing a non-descent direction for the Newton 12 | % direction.) 13 | % 14 | % Input: 15 | % func - a pointer to a function 16 | % x - the following structure: 17 | % * x.p - the starting point values 18 | % * x.f - the function value of x.p 19 | % * x.g - the gradient value of x.p 20 | % * x.h - the Hessian value of x.p 21 | % with only x.p guaranteed to be set. 22 | % nparams - the following structure, as an example: 23 | % nparams = struct('maxit',1000,'toler',1.0e-4,'method','direct'); 24 | % 25 | % Output: 26 | % inform - structure containing two fields: 27 | % * inform.status - 1 if gradient tolerance was 28 | % achieved; 29 | % 0 if not. 30 | % * inform.iter - the number of steps taken 31 | % x - the solution structure, with the solution point along with 32 | % function and gradient evaluations thereof. 33 | 34 | % Number of function, gradient, and Hessian evaluations. 35 | global numf numg numh 36 | numf = 0; 37 | numg = 0; 38 | numh = 0; 39 | 40 | % Populate local caching of nparams parameters. 41 | toler = nparams.toler; % Set gradient tolerance. 42 | maxit = nparams.maxit; % Set maximum number of allowed iterations. 43 | method = nparams.method; % Set method. 44 | xc.p = x.p; % Set the current point to the initial point, x.p. 45 | 46 | % Initialize parameter structure for StepSize function call. 47 | params = struct('ftol', 1e-4, 'gtol', 0.9, 'xtol', 1e-6, 'stpmin', 0, ... 48 | 'stpmax', 1e20, 'maxfev', 10000); 49 | 50 | for i = 1:maxit 51 | % Compute function, gradient, and Hessian at current point. 52 | xc.f = feval(func, xc.p, 1); 53 | xc.g = feval(func, xc.p, 2); 54 | xc.h = sparse(feval(func, xc.p, 4)); 55 | 56 | % Check for the termination condition: norm of gradient less than toler. 57 | if norm(xc.g) < toler 58 | inform.status = 1; % Indicates success. 59 | inform.iter = i; % Number of iterations. 60 | x.p = xc.p; 61 | x.f = xc.f; 62 | x.g = xc.g; 63 | return; 64 | end 65 | 66 | % Use the direct method. 67 | if strcmp(method, 'direct') 68 | p = -xc.h \ xc.g; % Search direction. 69 | 70 | % If this is not a descent direction... 71 | if xc.g' * p >= 0 72 | D = zeros(size(xc.h)); % Modified Hessian matrix. 73 | for j = 1 : size(xc.p, 1) 74 | D(j,j) = 1.0 / max(0.01, abs(xc.h(j,j))); 75 | end 76 | % D is now a positive diagonal matrix with a diagonal scaled 77 | % by the inverse of the corresponding diagonal element of the 78 | % Hessian (or by 100 if 1/abs(xc.h(j,j)) > 100.) 79 | p = -D*xc.g; % New search direction that is a descent direction. 80 | end 81 | else 82 | % Hessian modification method: Cholesky with added multiple of identity. 83 | [R, ~] = CholeskyMultIdentity(xc.h); 84 | p = -R \ (R'\xc.g); % Search direction. 85 | end 86 | 87 | % Get step size that satisfies simple Wolfe conditions. 88 | [alfa, x] = StepSize(func, xc, p, 1, params); 89 | % Update current point in p-direction with step size alpha. 90 | xc.p = xc.p + alfa * p; 91 | end 92 | % If reached, method failed. 93 | inform.status = 0; % Update status to failure indicator, 0. 94 | inform.iter = maxit; % Number of iterations = i = maxit at this point. 95 | x.p = xc.p; 96 | x.f = feval(func, x.p, 1); 97 | x.g = feval(func, x.p, 2); 98 | return; % Return inform and final point. 99 | end 100 | 101 | 102 | -------------------------------------------------------------------------------- /src/Rho.m: -------------------------------------------------------------------------------- 1 | function rhoout = Rho(X,Y,alpha,beta,mode) 2 | if bitand(mode,1) 3 | N = length(X); 4 | rhoout = zeros(N,1); 5 | for i=1:N 6 | rhoout(i) = 1+alpha*exp(-1*beta*(X(i).^2 + Y(i).^2)); 7 | end 8 | end 9 | return; 10 | end -------------------------------------------------------------------------------- /src/SteepDescent.m: -------------------------------------------------------------------------------- 1 | function [inform, x] = SteepDescent(fun, x, sdparams) 2 | % Implements steepest descent using simple Wolfe conditions via StepSize.m. 3 | % 4 | % Implementation Parameters: 5 | % * STOP when 6 | % - Objective function gradient has norm less than gtol. 7 | % - maxit steps have been taken. 8 | % * ftol = 1.0e-20 9 | % * gtol = 1.0e-4 10 | % * xtol = 1.0e-20 11 | % * maxit = 1000 12 | % * p_k = -grad(f(x_k)) 13 | % * alfa_k = max(10*xtol, f'(x_{k-1})*p_{k-1}*alfa_{k-1}/(f'(x_k)*p_k)) 14 | % 15 | % Input: 16 | % fun - a pointer to a function 17 | % x - the following structure: 18 | % * x.p - the starting point values 19 | % * x.f - the function value of x.p 20 | % * x.g - the gradient value of x.p 21 | % sdparams - the following structure, as an example: 22 | % sdparams = struct('maxit',1000,'toler',1.0e-4); 23 | % 24 | % Output: 25 | % inform - structure containing two fields: 26 | % * inform.status - 1 if gradient tolerance was 27 | % achieved; 28 | % 0 if not. 29 | % * inform.iter - the number of steps taken 30 | % x - the solution structure, with point, function, 31 | % and gradient evaluations at the solution point. 32 | 33 | % Number of function and gradient evaluations. 34 | global numf numg 35 | numf = 0; 36 | numg = 0; 37 | 38 | % Populate local caching of nparams parameters, and params parameters. 39 | toler = sdparams.toler; % Set gradient tolerance. 40 | maxit = sdparams.maxit; % Set maximum number of allowed iterations. 41 | xtol = 1.0e-20; % Set point tolerance. 42 | ftol = 1.0e-20; % Set function tolerance. 43 | gtol = toler; % Set gradient tolerance. 44 | 45 | % Initialize parameter structure for StepSize function call. 46 | params = struct('ftol', ftol, 'gtol', gtol, 'xtol', xtol); 47 | 48 | % Below was tailored to geodesic functions. 49 | % If calling geodesic function... 50 | % if isfield(sdparams, 'geoparams') && isstruct(sdparams.geoparams) 51 | % params.geoparams = sdparams.geoparams; 52 | % x.f = feval(fun, x.p, 1, params.geoparams); 53 | % x.g = feval(fun, x.p, 2, params.geoparams); 54 | % end 55 | % params.geoparams = -1; 56 | 57 | % Compute function and gradient at starting point. 58 | x.f = feval(fun, x.p, 1); 59 | x.g = feval(fun, x.p, 2); 60 | 61 | alfa = 1; % Initial alpha value. 62 | iter = 0; % Number of iterations. 63 | while iter < maxit && norm(x.g) >= toler 64 | pg = x.g; % Store previous gradient value. 65 | d = -x.g; % Steepest descent direction. 66 | 67 | % Get step size satisfying simple Wolfe conditions. 68 | [alfa, x] = StepSize(fun, x, d, alfa, params); 69 | alfa = max(10*xtol, pg'*d*alfa / (x.g'*(-x.g))); % Calculate new alpha. 70 | iter = iter + 1; % Increment step counter. 71 | end 72 | inform.iter = iter; % Number of iterations = iter at this point. 73 | 74 | % If norm of gradient is less than tolerance, the method succeeded. 75 | if norm(x.g) < toler 76 | inform.status = 1; % Update status to success indicator, 1. 77 | else 78 | inform.status = 0; % Update status to failure indicator, 0. 79 | end 80 | return; % Return inform and final point x 81 | end -------------------------------------------------------------------------------- /src/StepSize.m: -------------------------------------------------------------------------------- 1 | function [alfa,x] = StepSize(fun, x, d, alfa, params) 2 | % Implements simple Wolfe conditions. 3 | 4 | x0 = x.p; 5 | Dphi0 = x.g'*d; 6 | if ( (alfa <= 0) || (Dphi0 > 0) ) 7 | error('Initialization of step incorrect'); 8 | end; 9 | 10 | c1 = params.ftol; 11 | c2 = params.gtol; 12 | 13 | phi0 = x.f; 14 | alfaL = 0; 15 | alfaR = inf; 16 | 17 | iter = 0; 18 | while abs(alfaR-alfaL) > params.xtol 19 | iter = iter + 1; 20 | x.p = x0 + alfa*d; 21 | 22 | x.f = feval(fun,x.p,1); 23 | 24 | if (x.f >= phi0 + alfa*c1*Dphi0) 25 | alfaR = alfa; 26 | alfa = Interp(alfaL, alfaR); 27 | else 28 | x.g = feval(fun,x.p,2); 29 | DphiAlfa = x.g'*d; 30 | 31 | if (DphiAlfa >= c2*Dphi0) 32 | return; 33 | else 34 | alfaL = alfa; 35 | end 36 | if isinf(alfaR) 37 | alfa = Extrap(alfaL); 38 | else 39 | alfa = Interp( alfaL, alfaR); 40 | end 41 | end 42 | end 43 | fprintf('step size criteria were not met\n'); 44 | fprintf('after %d step size iterations.\n', iter); 45 | error('STEP SIZE FAILURE'); 46 | return; 47 | 48 | function mid = Interp(left, right) 49 | mid = (left + right)/2.0; 50 | return; 51 | 52 | function mid = Extrap(left) 53 | mid = 2*left; 54 | return; 55 | 56 | -------------------------------------------------------------------------------- /src/StepSizeSW.m: -------------------------------------------------------------------------------- 1 | function [alpha,xp] = StepSizeSW(f,x,d,alpha,params) 2 | % Line search algorithm satisfying strong Wolfe conditions. 3 | % Algorithms 3.5 on pages 60-61 in Nocedal and Wright. 4 | % Requires x.p, x.f and x.g to be initialized. 5 | 6 | alpha0 = params.stpmin; 7 | c1 = params.ftol; 8 | c2 = params.gtol; 9 | % alpha is alpha_i 10 | gxd = x.g'*d; 11 | % alphap is alpha_{i-1} 12 | alphap = alpha0; 13 | fxp = x.f; 14 | i=1; 15 | while norm(alphap-alpha) > params.xtol 16 | xp.p = x.p + alpha*d; 17 | xp.f = feval(f,xp.p,1); 18 | if (xp.f > x.f + c1*alpha*gxd) | ((i > 1) & (xp.f >= fxp)), 19 | [alpha,xp] = zoom(f,x,d,alphap,alpha,fxp,c1,c2); 20 | return; 21 | end 22 | xp.g = feval(f,xp.p,2); gxpd = xp.g'*d; 23 | if abs(gxpd) <= -c2*gxd, 24 | return; 25 | end 26 | if gxpd >= 0, 27 | [alpha,xp] = zoom(f,x,d,alpha,alphap,xp.f,c1,c2); 28 | return; 29 | end 30 | alphap = alpha; 31 | fxp = xp.f; 32 | % alpha = alpha + (params.stpmax-alpha)*rand(1); 33 | alpha = alpha + (params.stpmax-alpha)*0.5; 34 | i = i+1; 35 | end 36 | alpha, 37 | error('No stepsize found'); 38 | 39 | function [alpha,xp] = zoom(f,x,d,alphal,alphah,fxl,c1,c2) 40 | % function [alpha,xp] = zoom(f,x,d,alphal,alphah,fxl) 41 | % Algorithm 3.6 on page 61 in Nocedal and Wright 42 | 43 | gxd = x.g'*d; 44 | 45 | while 1 46 | alpha = 1/2*(alphal+alphah); 47 | xp.p = x.p + alpha*d; 48 | xp.f = feval(f,xp.p,1); 49 | if ((xp.f > x.f + c1*alpha*gxd) | (xp.f >= fxl)), 50 | alphah = alpha; 51 | else 52 | xp.g = feval(f,xp.p,2); gxpd = xp.g'*d; 53 | if abs(gxpd) <= -c2*gxd, 54 | return; 55 | end 56 | if gxpd*(alphah-alphal) >= 0, 57 | alphah = alphal; 58 | end 59 | alphal = alpha; 60 | fxl = xp.f; 61 | end 62 | end -------------------------------------------------------------------------------- /src/cgTrust.m: -------------------------------------------------------------------------------- 1 | function [inform, x] = cgTrust(fun, x, cgtparams) 2 | % Implements the Steihaug-Toint conjugate gradient trust region method for 3 | % finding an approximate solution to the subproblem: 4 | % 5 | % min m(p) = f + g'p + 1/2 * p' B p s.t. ||p|| <= Del 6 | % 7 | % Input: 8 | % fun - a pointer to a function 9 | % x - the following structure: 10 | % * x.p - the starting point values 11 | % * x.f - the function value of x.p 12 | % * x.g - the gradient value of x.p 13 | % * x.h - the Hessian value of x.p 14 | % with only x.p guaranteed to be set. 15 | % cgtparams - the following structure, as an example: 16 | % cgtparams = struct('maxit',1000,'toler',1.0e-4,'initdel',1, 17 | % 'maxdel',100,'eta',0.1); 18 | % 19 | % Output: 20 | % inform - structure containing two fields: 21 | % * inform.status - 1 if gradient tolerance was 22 | % achieved; 23 | % 0 if not. 24 | % * inform.iter - the number of steps taken 25 | % * inform.cgiter - the number of conjugate iterations 26 | % x - the solution structure, with the solution point along with 27 | % function, gradient, and Hessian evaluations thereof. 28 | 29 | % Number of function, gradient, and Hessian evaluations, and number of Cholesky 30 | % factorizations. 31 | global numf numg numh numFact 32 | numf = 0; 33 | numg = 0; 34 | numh = 0; 35 | numFact = 0; 36 | 37 | % Populate local caching of cgtparams parameters. 38 | toler = cgtparams.toler; % Set gradient tolerance. 39 | maxit = cgtparams.maxit; % Set maximum number of allowed iterations. 40 | initdel = cgtparams.initdel; % Set initial delta value. 41 | maxdel = cgtparams.maxdel; % Set maximum delta value. 42 | eta = cgtparams.eta; % Set eta value. 43 | 44 | del = initdel; % Set delta value to initial delta value. 45 | xc.p = x.p; % Set the current point to the initial point, x.p. 46 | cgiter = 0; % Number of conjugate iterations. 47 | 48 | for i = 1:maxit 49 | % Compute function, gradient, and Hessian at current point. 50 | xc.f = feval(fun, xc.p, 1); 51 | xc.g = feval(fun, xc.p, 2); 52 | xc.h = sparse(feval(fun, xc.p, 4)); 53 | 54 | % Check for termination condition: norm of gradient less than toler. 55 | if norm(xc.g) < toler 56 | inform.status = 1; % Indicates success. 57 | inform.iter = i; % Number of iterations. 58 | inform.cgiter = cgiter; % Number of conjugate iterations. 59 | x.p = xc.p; 60 | x.f = xc.f; 61 | x.g = xc.g; 62 | x.h = xc.h; 63 | return; 64 | end 65 | 66 | % Conjugate gradient method. 67 | 68 | g = xc.g; % Set residual to gradient. 69 | initGradToler = toler * norm(g); % Set residual norm-based toler. 70 | d = -g; % Current search direction. 71 | k = 0; % Iteration index. 72 | z = zeros(size(xc.h, 1), 1); 73 | while 1 74 | % If current search direction, d, is a direction of nonpositive 75 | % curvature along xc.h... 76 | if d' * xc.h * d <= 0 77 | % Find the boundary point and break. 78 | p = boundary(z, d, del); 79 | break; 80 | end 81 | alpha = norm(g)^2 / (d' * xc.h * d); 82 | % If z+alpha*d violates the trust-region bound... 83 | if norm(z + alpha * d) >= del 84 | % Find the boundary point and break. 85 | p = boundary(z, d, del); 86 | break; 87 | end 88 | z = z + alpha * d; 89 | gN = g + alpha * xc.h * d; 90 | % If current gradient iterate is below threshold... 91 | if or(norm(gN) < toler, norm(gN) < initGradToler) 92 | % Set point to current z iterate and break. 93 | p = z; 94 | break; 95 | end 96 | 97 | % Parameter updates. 98 | beta = norm(gN)^2 / norm(g)^2; % Update beta. 99 | d = -gN + beta * d; % Update search direction. 100 | g = gN; % Update gradient. 101 | k = k + 1; % Update iteration index. 102 | end 103 | cgiter = cgiter + k; % Update number of conjugate gradient iterations. 104 | 105 | % Compute the reduction ratio. 106 | rho = (xc.f - feval(fun,xc.p + p,1))/(-xc.g'*p - 0.5*p'*xc.h*p); 107 | 108 | % Update the trust region; i.e., update del and the current point. 109 | if rho < 0.25 110 | del = del / 4; 111 | else 112 | % Note that radius only increases if norm of p reaches the trust 113 | % region boundary. 114 | if rho > 0.75 && norm(p) == del 115 | del = min(2*del, maxdel); 116 | end 117 | end 118 | if rho > eta 119 | xc.p = xc.p + p; 120 | end 121 | end 122 | % If reached, method failed. 123 | inform.status = 0; % Update status to failure indicator, 0. 124 | inform.iter = maxit; % Number of iterations = i = maxit at this point. 125 | inform.cgiter = cgiter; % Number of conjugate iterations. 126 | x.p = xc.p; 127 | x.f = feval(fun, x.p, 1); 128 | x.g = feval(fun, x.p, 2); 129 | x.h = sparse(feval(fun, x.p, 4)); 130 | return; % Return inform and final point x 131 | end 132 | 133 | function p = boundary(p, q, del) 134 | % Finds the boundary point. 135 | a = norm(q)^2; 136 | b = 2*p'*q; 137 | c = norm(p)^2 - del^2; 138 | alpha = (-b + sqrt(b^2 - 4*a*c)) / (2*a); 139 | p = p + alpha * q; 140 | return; 141 | end -------------------------------------------------------------------------------- /src/cvsrch.m: -------------------------------------------------------------------------------- 1 | function [stp,x,inform] = cvsrch(fcn,x,s,stp,params) 2 | % Translation of minpack subroutine cvsrch 3 | % Dianne O'Leary July 1991 4 | % ********** 5 | % 6 | % Subroutine cvsrch 7 | % 8 | % The purpose of cvsrch is to find a step which satisfies 9 | % a sufficient decrease condition and a curvature condition. 10 | % The user must provide a subroutine which calculates the 11 | % function and the gradient. 12 | % 13 | % At each stage the subroutine updates an interval of 14 | % uncertainty with endpoints stx and sty. The interval of 15 | % uncertainty is initially chosen so that it contains a 16 | % minimizer of the modified function 17 | % 18 | % f(x+stp*s) - f(x) - ftol*stp*(gradf(x)'s). 19 | % 20 | % If a step is obtained for which the modified function 21 | % has a nonpositive function value and nonnegative derivative, 22 | % then the interval of uncertainty is chosen so that it 23 | % contains a minimizer of f(x+stp*s). 24 | % 25 | % The algorithm is designed to find a step which satisfies 26 | % the sufficient decrease condition 27 | % 28 | % f(x+stp*s) <= f(x) + ftol*stp*(gradf(x)'s), 29 | % 30 | % and the curvature condition 31 | % 32 | % abs(gradf(x+stp*s)'s)) <= gtol*abs(gradf(x)'s). 33 | % 34 | % If ftol is less than gtol and if, for example, the function 35 | % is bounded below, then there is always a step which satisfies 36 | % both conditions. If no step can be found which satisfies both 37 | % conditions, then the algorithm usually stops when rounding 38 | % errors prevent further progress. In this case stp only 39 | % satisfies the sufficient decrease condition. 40 | % 41 | % The subroutine statement is 42 | % 43 | % subroutine cvsrch(fcn,n,x,f,g,s,stp,ftol,gtol,xtol, 44 | % stpmin,stpmax,maxfev,info,nfev,wa) 45 | % where 46 | % 47 | % fcn is the name of the user-supplied subroutine which 48 | % calculates the function and the gradient. fcn must 49 | % be declared in an external statement in the user 50 | % calling program, and should be written as follows. 51 | % 52 | % function [f,g] = fcn(n,x) (Matlab) (10/2010 change in documentation) 53 | % (derived from Fortran subroutine fcn(n,x,f,g) ) 54 | % integer n 55 | % f 56 | % x(n),g(n) 57 | % ---------- 58 | % Calculate the function at x and 59 | % return this value in the variable f. 60 | % Calculate the gradient at x and 61 | % return this vector in g. 62 | % ---------- 63 | % return 64 | % end 65 | % 66 | % n is a positive integer input variable set to the number 67 | % of variables. 68 | % 69 | % x is an array of length n. On input it must contain the 70 | % base point for the line search. On output it contains 71 | % x + stp*s. 72 | % 73 | % f is a variable. On input it must contain the value of f 74 | % at x. On output it contains the value of f at x + stp*s. 75 | % 76 | % g is an array of length n. On input it must contain the 77 | % gradient of f at x. On output it contains the gradient 78 | % of f at x + stp*s. 79 | % 80 | % s is an input array of length n which specifies the 81 | % search direction. 82 | % 83 | % stp is a nonnegative variable. On input stp contains an 84 | % initial estimate of a satisfactory step. On output 85 | % stp contains the final estimate. 86 | % 87 | % ftol and gtol are nonnegative input variables. Termination 88 | % occurs when the sufficient decrease condition and the 89 | % directional derivative condition are satisfied. 90 | % 91 | % xtol is a nonnegative input variable. Termination occurs 92 | % when the relative width of the interval of uncertainty 93 | % is at most xtol. 94 | % 95 | % stpmin and stpmax are nonnegative input variables which 96 | % specify lower and upper bounds for the step. 97 | % 98 | % maxfev is a positive integer input variable. Termination 99 | % occurs when the number of calls to fcn is at least 100 | % maxfev by the end of an iteration. 101 | % 102 | % info is an integer output variable set as follows: 103 | % 104 | % info = 0 Improper input parameters. 105 | % 106 | % info = 1 The sufficient decrease condition and the 107 | % directional derivative condition hold. 108 | % 109 | % info = 2 Relative width of the interval of uncertainty 110 | % is at most xtol. 111 | % 112 | % info = 3 Number of calls to fcn has reached maxfev. 113 | % 114 | % info = 4 The step is at the lower bound stpmin. 115 | % 116 | % info = 5 The step is at the upper bound stpmax. 117 | % 118 | % info = 6 Rounding errors prevent further progress. 119 | % There may not be a step which satisfies the 120 | % sufficient decrease and curvature conditions. 121 | % Tolerances may be too small. 122 | % 123 | % nfev is an integer output variable set to the number of 124 | % calls to fcn. 125 | % 126 | % wa is a work array of length n. 127 | % 128 | % Subprograms called 129 | % 130 | % user-supplied......fcn 131 | % 132 | % MINPACK-supplied...cstep 133 | % 134 | % FORTRAN-supplied...abs,max,min 135 | % 136 | % Argonne National Laboratory. MINPACK Project. June 1983 137 | % Jorge J. More', David J. Thuente 138 | % 139 | % ********** 140 | p5 = .5; 141 | p66 = .66; 142 | xtrapf = 4; 143 | inform.info = 0; 144 | infoc = 1; 145 | 146 | % 147 | % Check the input parameters for errors. 148 | % 149 | if (length(x.p) <= 0 | stp <= 0.0 | params.ftol < 0.0 | ... 150 | params.gtol < 0.0 | params.xtol < 0.0 | params.stpmin < 0.0 ... 151 | | params.stpmax < params.stpmin | params.maxfev <= 0) 152 | return 153 | end 154 | % 155 | % Compute the initial gradient in the search direction 156 | % and check that s is a descent direction. 157 | % 158 | dginit = x.g'*s; 159 | if (dginit >= 0.0) 160 | return 161 | end 162 | % 163 | % Initialize local variables. 164 | % 165 | brackt = 0; 166 | stage1 = 1; 167 | inform.nfev = 0; 168 | finit = x.f; 169 | dgtest = params.ftol*dginit; 170 | width = params.stpmax - params.stpmin; 171 | width1 = 2*width; 172 | wa = x.p; 173 | % 174 | % The variables stx, fx, dgx contain the values of the step, 175 | % function, and directional derivative at the best step. 176 | % The variables sty, fy, dgy contain the value of the step, 177 | % function, and derivative at the other endpoint of 178 | % the interval of uncertainty. 179 | % The variables stp, f, dg contain the values of the step, 180 | % function, and derivative at the current step. 181 | % 182 | stx = 0.0; 183 | fx = finit; 184 | dgx = dginit; 185 | sty = 0.0; 186 | fy = finit; 187 | dgy = dginit; 188 | % 189 | % Start of iteration. 190 | % 191 | while (1) 192 | % 193 | % Set the minimum and maximum steps to correspond 194 | % to the present interval of uncertainty. 195 | % 196 | if (brackt) 197 | stmin = min(stx,sty); 198 | stmax = max(stx,sty); 199 | else 200 | stmin = stx; 201 | stmax = stp + xtrapf*(stp - stx); 202 | end 203 | % 204 | % Force the step to be within the bounds stpmax and stpmin. 205 | % 206 | stp = max(stp,params.stpmin); 207 | stp = min(stp,params.stpmax); 208 | % 209 | % If an unusual termination is to occur then let 210 | % stp be the lowest point obtained so far. 211 | % 212 | if ((brackt & (stp <= stmin | stp >= stmax)) ... 213 | | inform.nfev >= params.maxfev-1 | infoc == 0 ... 214 | | (brackt & stmax-stmin <= params.xtol*stmax)) 215 | stp = stx; 216 | end 217 | % 218 | % Evaluate the function and gradient at stp 219 | % and compute the directional derivative. 220 | % 221 | x.p = wa + stp * s; 222 | [x.f,x.g] = feval(fcn,x.p,3); 223 | inform.nfev = inform.nfev + 1; 224 | dg = x.g' * s; 225 | ftest1 = finit + stp*dgtest; 226 | % 227 | % Test for convergence. 228 | % 229 | if ((brackt & (stp <= stmin | stp >= stmax)) | infoc == 0) 230 | inform.info = 6; 231 | end 232 | if (stp == params.stpmax & x.f <= ftest1 & dg <= dgtest) 233 | inform.info = 5; 234 | end 235 | if (stp == params.stpmin & (x.f > ftest1 | dg >= dgtest)) 236 | inform.info = 4; 237 | end 238 | if (inform.nfev >= params.maxfev) 239 | inform.info = 3; 240 | end 241 | if (brackt & stmax-stmin <= params.xtol*stmax) 242 | inform.info = 2; 243 | end 244 | if (x.f <= ftest1 & abs(dg) <= params.gtol*(-dginit)) 245 | inform.info = 1; 246 | end 247 | % 248 | % Check for termination. 249 | % 250 | if (inform.info ~= 0) 251 | return 252 | end 253 | % 254 | % In the first stage we seek a step for which the modified 255 | % function has a nonpositive value and nonnegative derivative. 256 | % 257 | if (stage1 & x.f <= ftest1 & dg >= min(params.ftol,params.gtol)*dginit) 258 | stage1 = 0; 259 | end 260 | % 261 | % A modified function is used to predict the step only if 262 | % we have not obtained a step for which the modified 263 | % function has a nonpositive function value and nonnegative 264 | % derivative, and if a lower function value has been 265 | % obtained but the decrease is not sufficient. 266 | % 267 | if (stage1 & x.f <= fx & x.f > ftest1) 268 | % 269 | % Define the modified function and derivative values. 270 | % 271 | fm = x.f - stp*dgtest; 272 | fxm = fx - stx*dgtest; 273 | fym = fy - sty*dgtest; 274 | dgm = dg - dgtest; 275 | dgxm = dgx - dgtest; 276 | dgym = dgy - dgtest; 277 | % 278 | % Call cstep to update the interval of uncertainty 279 | % and to compute the new step. 280 | % 281 | [stx,fxm,dgxm,sty,fym,dgym,stp,fm,dgm,brackt,infoc] ... 282 | = cstep(stx,fxm,dgxm,sty,fym,dgym,stp,fm,dgm, ... 283 | brackt,stmin,stmax); 284 | % 285 | % Reset the function and gradient values for f. 286 | % 287 | fx = fxm + stx*dgtest; 288 | fy = fym + sty*dgtest; 289 | dgx = dgxm + dgtest; 290 | dgy = dgym + dgtest; 291 | else 292 | % 293 | % Call cstep to update the interval of uncertainty 294 | % and to compute the new step. 295 | % 296 | [stx,fx,dgx,sty,fy,dgy,stp,x.f,dg,brackt,infoc] ... 297 | = cstep(stx,fx,dgx,sty,fy,dgy,stp,x.f,dg, ... 298 | brackt,stmin,stmax); 299 | end 300 | % 301 | % Force a sufficient decrease in the size of the 302 | % interval of uncertainty. 303 | % 304 | if (brackt) 305 | if (abs(sty-stx) >= p66*width1) 306 | stp = stx + p5*(sty - stx); 307 | end 308 | width1 = width; 309 | width = abs(sty-stx); 310 | end 311 | % 312 | % End of iteration. 313 | % 314 | end 315 | % 316 | % Last card of subroutine cvsrch. 317 | % 318 | 319 | function [stx,fx,dx,sty,fy,dy,stp,fp,dp,brackt,info] ... 320 | = cstep(stx,fx,dx,sty,fy,dy,stp,fp,dp,brackt,stpmin,stpmax) 321 | % Translation of minpack subroutine cstep 322 | % Dianne O'Leary July 1991 323 | % ********** 324 | % 325 | % Subroutine cstep 326 | % 327 | % The purpose of cstep is to compute a safeguarded step for 328 | % a linesearch and to update an interval of uncertainty for 329 | % a minimizer of the function. 330 | % 331 | % The parameter stx contains the step with the least function 332 | % value. The parameter stp contains the current step. It is 333 | % assumed that the derivative at stx is negative in the 334 | % direction of the step. If brackt is set true then a 335 | % minimizer has been bracketed in an interval of uncertainty 336 | % with endpoints stx and sty. 337 | % 338 | % The subroutine statement is 339 | % 340 | % subroutine cstep(stx,fx,dx,sty,fy,dy,stp,fp,dp,brackt, 341 | % stpmin,stpmax,info) 342 | % 343 | % where 344 | % 345 | % stx, fx, and dx are variables which specify the step, 346 | % the function, and the derivative at the best step obtained 347 | % so far. The derivative must be negative in the direction 348 | % of the step, that is, dx and stp-stx must have opposite 349 | % signs. On output these parameters are updated appropriately. 350 | % 351 | % sty, fy, and dy are variables which specify the step, 352 | % the function, and the derivative at the other endpoint of 353 | % the interval of uncertainty. On output these parameters are 354 | % updated appropriately. 355 | % 356 | % stp, fp, and dp are variables which specify the step, 357 | % the function, and the derivative at the current step. 358 | % If brackt is set true then on input stp must be 359 | % between stx and sty. On output stp is set to the new step. 360 | % 361 | % brackt is a logical variable which specifies if a minimizer 362 | % has been bracketed. If the minimizer has not been bracketed 363 | % then on input brackt must be set false. If the minimizer 364 | % is bracketed then on output brackt is set true. 365 | % 366 | % stpmin and stpmax are input variables which specify lower 367 | % and upper bounds for the step. 368 | % 369 | % info is an integer output variable set as follows: 370 | % If info = 1,2,3,4,5, then the step has been computed 371 | % according to one of the five cases below. Otherwise 372 | % info = 0, and this indicates improper input parameters. 373 | % 374 | % Subprograms called 375 | % 376 | % FORTRAN-supplied ... abs,max,min,sqrt 377 | % ... dble 378 | % 379 | % Argonne National Laboratory. MINPACK Project. June 1983 380 | % Jorge J. More', David J. Thuente 381 | % 382 | % ********** 383 | p66 = 0.66; 384 | info = 0; 385 | % 386 | % Check the input parameters for errors. 387 | % 388 | if ((brackt & (stp <= min(stx,sty) | ... 389 | stp >= max(stx,sty))) | ... 390 | dx*(stp-stx) >= 0.0 | stpmax < stpmin) 391 | return 392 | end 393 | % 394 | % Determine if the derivatives have opposite sign. 395 | % 396 | sgnd = dp*(dx/abs(dx)); 397 | % 398 | % First case. A higher function value. 399 | % The minimum is bracketed. If the cubic step is closer 400 | % to stx than the quadratic step, the cubic step is taken, 401 | % else the average of the cubic and quadratic steps is taken. 402 | % 403 | if (fp > fx) 404 | info = 1; 405 | bound = 1; 406 | theta = 3*(fx - fp)/(stp - stx) + dx + dp; 407 | s = norm([theta,dx,dp],inf); 408 | gamma = s*sqrt((theta/s)^2 - (dx/s)*(dp/s)); 409 | if (stp < stx) 410 | gamma = -gamma; 411 | end 412 | p = (gamma - dx) + theta; 413 | q = ((gamma - dx) + gamma) + dp; 414 | r = p/q; 415 | stpc = stx + r*(stp - stx); 416 | stpq = stx + ((dx/((fx-fp)/(stp-stx)+dx))/2)*(stp - stx); 417 | if (abs(stpc-stx) < abs(stpq-stx)) 418 | stpf = stpc; 419 | else 420 | stpf = stpc + (stpq - stpc)/2; 421 | end 422 | brackt = 1; 423 | % 424 | % Second case. A lower function value and derivatives of 425 | % opposite sign. The minimum is bracketed. If the cubic 426 | % step is closer to stx than the quadratic (secant) step, 427 | % the cubic step is taken, else the quadratic step is taken. 428 | % 429 | elseif (sgnd < 0.0) 430 | info = 2; 431 | bound = 0; 432 | theta = 3*(fx - fp)/(stp - stx) + dx + dp; 433 | s = norm([theta,dx,dp],inf); 434 | gamma = s*sqrt((theta/s)^2 - (dx/s)*(dp/s)); 435 | if (stp > stx) 436 | gamma = -gamma; 437 | end 438 | p = (gamma - dp) + theta; 439 | q = ((gamma - dp) + gamma) + dx; 440 | r = p/q; 441 | stpc = stp + r*(stx - stp); 442 | stpq = stp + (dp/(dp-dx))*(stx - stp); 443 | if (abs(stpc-stp) > abs(stpq-stp)) 444 | stpf = stpc; 445 | else 446 | stpf = stpq; 447 | end 448 | brackt = 1; 449 | % 450 | % Third case. A lower function value, derivatives of the 451 | % same sign, and the magnitude of the derivative decreases. 452 | % The cubic step is only used if the cubic tends to infinity 453 | % in the direction of the step or if the minimum of the cubic 454 | % is beyond stp. Otherwise the cubic step is defined to be 455 | % either stpmin or stpmax. The quadratic (secant) step is also 456 | % computed and if the minimum is bracketed then the the step 457 | % closest to stx is taken, else the step farthest away is taken. 458 | % 459 | elseif (abs(dp) < abs(dx)) 460 | info = 3; 461 | bound = 1; 462 | theta = 3*(fx - fp)/(stp - stx) + dx + dp; 463 | s = norm([theta,dx,dp],inf); 464 | % 465 | % The case gamma = 0 only arises if the cubic does not tend 466 | % to infinity in the direction of the step. 467 | % 468 | gamma = s*sqrt(max(0.,(theta/s)^2 - (dx/s)*(dp/s))); 469 | if (stp > stx) 470 | gamma = -gamma; 471 | end 472 | p = (gamma - dp) + theta; 473 | q = (gamma + (dx - dp)) + gamma; 474 | r = p/q; 475 | if (r < 0.0 & gamma ~= 0.0) 476 | stpc = stp + r*(stx - stp); 477 | elseif (stp > stx) 478 | stpc = stpmax; 479 | else 480 | stpc = stpmin; 481 | end 482 | stpq = stp + (dp/(dp-dx))*(stx - stp); 483 | if (brackt) 484 | if (abs(stp-stpc) < abs(stp-stpq)) 485 | stpf = stpc; 486 | else 487 | stpf = stpq; 488 | end 489 | else 490 | if (abs(stp-stpc) > abs(stp-stpq)) 491 | stpf = stpc; 492 | else 493 | stpf = stpq; 494 | end 495 | end 496 | % 497 | % Fourth case. A lower function value, derivatives of the 498 | % same sign, and the magnitude of the derivative does 499 | % not decrease. If the minimum is not bracketed, the step 500 | % is either stpmin or stpmax, else the cubic step is taken. 501 | % 502 | else 503 | info = 4; 504 | bound = 0; 505 | if (brackt) 506 | theta = 3*(fp - fy)/(sty - stp) + dy + dp; 507 | s = norm([theta,dy,dp],inf); 508 | gamma = s*sqrt((theta/s)^2 - (dy/s)*(dp/s)); 509 | if (stp > sty) 510 | gamma = -gamma; 511 | end 512 | p = (gamma - dp) + theta; 513 | q = ((gamma - dp) + gamma) + dy; 514 | r = p/q; 515 | stpc = stp + r*(sty - stp); 516 | stpf = stpc; 517 | elseif (stp > stx) 518 | stpf = stpmax; 519 | else 520 | stpf = stpmin; 521 | end 522 | end 523 | % 524 | % Update the interval of uncertainty. This update does not 525 | % depend on the new step or the case analysis above. 526 | % 527 | if (fp > fx) 528 | sty = stp; 529 | fy = fp; 530 | dy = dp; 531 | else 532 | if (sgnd < 0.0) 533 | sty = stx; 534 | fy = fx; 535 | dy = dx; 536 | end 537 | stx = stp; 538 | fx = fp; 539 | dx = dp; 540 | end 541 | % 542 | % Compute the new step and safeguard it. 543 | % 544 | stpf = min(stpmax,stpf); 545 | stpf = max(stpmin,stpf); 546 | stp = stpf; 547 | if (brackt & bound) 548 | if (sty > stx) 549 | stp = min(stx+p66*(sty-stx),stp); 550 | else 551 | stp = max(stx+p66*(sty-stx),stp); 552 | end 553 | end 554 | return 555 | % 556 | % Last card of subroutine cstep. 557 | % 558 | 559 | 560 | 561 | --------------------------------------------------------------------------------