├── bigfonts.m ├── proddiag.m ├── LICENSE.txt ├── testFindiffStepsize.m ├── getLcurveCorner.m ├── jaccen.m ├── convinfo.m ├── jacfwd.m ├── regress2lines.m ├── finiteDiffOp.m ├── manpage.txt ├── readme.md └── invGN.m /bigfonts.m: -------------------------------------------------------------------------------- 1 | % BIGFONTS - Script to increase plot fonts sizes and line widths 2 | % 3 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, Seattle. 4 | % Contact at andy@ganse.org , http://research.ganse.org 5 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license. 6 | % See LICENSE.txt for full license statement. 7 | % 8 | % Usage summary: 9 | % bigfonts; 10 | 11 | % These first two work in Matlab only (Octave understands these commands but 12 | % the result isn't pretty on the plot) 13 | %set(0,'defaultaxesfontsize',14); 14 | %set(0,'defaulttextfontsize',14); 15 | set(0,'DefaultAxesFontName','Arial'); 16 | set(0,'DefaultLineLineWidth',2); 17 | set(0,'DefaultAxesFontSize',14); 18 | set(0,'DefaultTextFontName','Arial'); 19 | set(0,'DefaultTextFontSize',14); 20 | set(0,'DefaultLineMarkerSize',5); 21 | -------------------------------------------------------------------------------- /proddiag.m: -------------------------------------------------------------------------------- 1 | function Cdiag = proddiag(A,B) 2 | % PRODDIAG - Compute only diagonal elements of matrix product solution 3 | % Ie, in cases or large outer dimensions of matrices A & B, it may be far too 4 | % slow (or not even feasible) to multiply them completely to obtain the product 5 | % matrix. If we just want the diagonal elements of their product, we can 6 | % compute that more directly, returning a vector with the diagonal. 7 | % 8 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, 2010. 9 | % Contact at andy@ganse.org , http://research.ganse.org 10 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license. 11 | % See LICENSE.txt for full license statement. 12 | % 13 | % Usage: 14 | % Cdiag = proddiag(A,B) 15 | 16 | [Ma,Na]=size(A); 17 | [Mb,Nb]=size(B); 18 | if Na~=Mb 19 | disp('proddiag: error: must have equal inner dimensions of matrices for mult'); 20 | return; 21 | end 22 | 23 | M=min(Ma,Nb); % ie the diag of the product matrix will only be as long as the 24 | % smaller of the two outer dimensions of A & B 25 | Cdiag=zeros(M,1); 26 | 27 | for i=1:M 28 | Cdiag(i)=A(i,:)*B(:,i); 29 | end 30 | 31 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, University of Washington 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the University of Washington nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF WASHINGTON BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /testFindiffStepsize.m: -------------------------------------------------------------------------------- 1 | function testFindiffStepsize(fwdprob,mtest,mplot,varargin) 2 | % TESTFINDIFFSTEPSIZE - Analyze step size choice for 1st order finite differences 3 | % 4 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, 2010. 5 | % Contact at andy@ganse.org , http://research.ganse.org 6 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license. 7 | % See LICENSE.txt for full license statement. 8 | % 9 | % Usage: 10 | % testFindiffStepsize(fwdprob,mtest,mplot,varargin); 11 | 12 | 13 | ind=0:1:12; % indices defining the step sizes (0:1:20) 14 | 15 | for i=ind 16 | epsf=10^(-i); 17 | dx=sqrt(epsf); % true for fwd diffs when vars scaled to O(1) 18 | if i<0, ii=['m',num2str(-i)]; else ii=num2str(i); end; 19 | 20 | disp([' computing Dfwd(h) for epsr=10^(-' ii ')...']); 21 | eval(['J' ii '=jacfwd(fwdprob,mtest,-dx,0,1,varargin{:});']); 22 | eval(['J' ii '_2=jacfwd(fwdprob,mtest,-2*dx,0,1,varargin{:});']); 23 | eval(['D' ii '=J' ii '_2-J' ii ';']); 24 | end 25 | clear J*; 26 | 27 | % plotting: 28 | for m=mplot 29 | figure; 30 | h=sqrt(10.^(-ind)); 31 | d1=1; d2=6; dincr=1; % use all 6 data pts 32 | c=ones(1,ceil((d2-d1+1)/dincr)); % just making as many ones as data pts 33 | 34 | % first form the vectors to plot, in the following format: 35 | % x=[h(1)*c;h(2)*c;...] 36 | % y=[D0(d1:d2,m)';D1(d1:d2,m)';...] 37 | x=[]; y=[]; 38 | for j=1:length(ind) 39 | x=[x;h(j)*c]; 40 | eval([ 'y=[y;abs(D' num2str(ind(j)) '(d1:dincr:d2,m))''];']); 41 | %eval([ 'y=[y;D' num2str(ind(j)) '(d1:dincr:d2,m)''];']); 42 | end 43 | 44 | loglog(x,y,'*-'); 45 | axis tight 46 | grid on 47 | t=title(['F=' fwdprob ', param#' num2str(m)]); 48 | set(t,'Interpreter','none'); % prevent "_" characters in title causing subscript 49 | xlabel('finite difference stepsize h'); 50 | ylabel('\partial{F}_{(2h)} - \partial{F}_{(h)}'); 51 | end 52 | -------------------------------------------------------------------------------- /getLcurveCorner.m: -------------------------------------------------------------------------------- 1 | function corner=getLcurveCorner(rough,misfit,method) 2 | % GETLCURVECORNER - Give index of Lcurve corner based on ROUGH and MISFIT values 3 | % 4 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, 2010. 5 | % Contact at andy@ganse.org , http://research.ganse.org 6 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license. 7 | % See LICENSE.txt for full license statement. 8 | % 9 | % Usage: corner=getLcurveCorner(rough,misfit) 10 | % 11 | % ROUGH and MISFIT are the equal-length vectors with the Lcurve axes values. 12 | % CORNER is the index of of the corner. 13 | % METHOD (optional - its default is "derivative-based") can be one of: 14 | % 0 = derivative-based 15 | % 1 = two-phase linear regression to find intersection point of two lines 16 | % 17 | % Uses two-phase linear regression in regress2lines() to find the knee; 18 | % suggested that ROUGH and MISFIT axes are squares of the respective two-norms. 19 | 20 | if nargin<3, method=0; end 21 | 22 | % Scale Lcurve so axes same magnitude: 23 | % (and the (:) ensures they are column vectors...) 24 | x=rough(:)/max(rough); y=misfit(:)/max(misfit); 25 | % Rotate Lcurve by 45deg to stabilize: 26 | theta=pi/180*45; 27 | rr=[cos(theta) -sin(theta); cos(theta) sin(theta)]; 28 | xxyy=rr*[x';y']; 29 | xx=xxyy(1,:)'; yy=xxyy(2,:)'; 30 | 31 | 32 | if method==0 33 | % use derivatives: 34 | [dummy,corner]=max(diff(diff(yy)./diff(xx))); 35 | elseif method==1 36 | % use two-phase linear regression: 37 | addpath('~/APLUW/src/matlab/AGmultiphase_linreg/'); 38 | [dummy,dummy,corner] = regress2lines(xx,yy); 39 | elseif method==2 40 | % just find min of rotated lcurve: 41 | [dummy,corner]=min(yy); 42 | end 43 | 44 | %plot(rough.^2,misfit.^2,'*-'); 45 | %hold on; 46 | %plot(rough(corner).^2,misfit(corner).^2,'r*'); 47 | 48 | % example derivative-based method - works great sometimes... 49 | %e=log10(normrough(:,iters)); r=log10(normmisfit(:,iters)); 50 | %[dummy,corner]=max(diff(diff(r)./diff(e))); % 2nd deriv to find lcurve knee 51 | -------------------------------------------------------------------------------- /jaccen.m: -------------------------------------------------------------------------------- 1 | function [J,F] = jaccen(funcname,x,epsf,verbose,fid,varargin); 2 | % JACCEN - 1st centered difference approximation of Jacobian matrix 3 | % of partial derivs of function 'funcname' w.r.t. its arguments in vector 'x'. 4 | % 5 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, 2005. 6 | % Contact at andy@ganse.org , http://research.ganse.org 7 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license. 8 | % See LICENSE.txt for full license statement. 9 | % 10 | % Usage: 11 | % [J,F] = jaccen('funcname',x,dx,verbose,fid,...); 12 | % [where funcname.m is a file within path] 13 | % or 14 | % [J,F] = jaccen(@funcname,x,dx,verbose,fid,...); 15 | % [when funcname() is defined within the calling script] 16 | % 17 | % Inputs: 18 | % funcname = String or function handle for the function being differenced. 19 | % (See below for required form of this function.) 20 | % x = The column vector (length M) of arguments to the function funcname. 21 | % dx = Small change in x used to compute differences. 22 | % dx can be a scalar or vector of same length as x. 23 | % If scalar it is converted to a vector of same length of x in 24 | % which all elements are equal to the scalar dx. 25 | % For problems in which x is all the same type of quantity, such 26 | % as a profile of wavespeeds, it is recommended to use a scalar 27 | % dx = sqrt(epsilon), where epsilon is the accuracy of funcname. 28 | % verbose = 1: output progress text, 0: run quietly. 29 | % Progress text (percent done) is useful for very slow functions. 30 | % fid = File handle to which progress text is outputted in verbose option. 31 | % Note fid=1 outputs to stdout, easiest to do this is not verbose 32 | % since nothing will then be outputted. 33 | % 34 | % Outputs: 35 | % J = the finite diffs approximation of NxM Jacobian matrix at x 36 | % F = the function output column vector (length N) funcname(x) at x 37 | % 38 | % Function 'funcname' must be of form: 39 | % F=funcname(x) 40 | % or 41 | % F=funcname(x,...) 42 | % where the '...' are other arguments required besides the variables 43 | % x with repsect to which the function is being differenced. 44 | 45 | 46 | checksize=size(x); 47 | if checksize(2)~=1 48 | error(' jac(): x vector argument must be a column vector...\n'); 49 | % fprintf(fid,'error: jac(): x vector argument must be a column vector...\n'); 50 | % return; 51 | end 52 | M=length(x); 53 | 54 | % The following choice of dx requires that x is scaled to order 1: 55 | % FIXME: should check for that 56 | dx=epsf^(1/3)*ones(M,1); % note epsf^(1/3) is for ctr diffs 57 | % Now make dx and exactly representable number vis a vis machine precision 58 | % so that all error in derivs are from numerator (see Num Rec 2nd ed, sec 5.7) 59 | temp = x+dx; 60 | dx = temp-x; % Believe it or not this is actually a slightly different dx now, 61 | % different by an error of order of machine precision. 62 | % This effect may or may not make it into fwdprob input which 63 | % could have limited input precision, but in any case will have 64 | % an effect via denominator at end of this script. 65 | mask=eye(M); 66 | 67 | if verbose>1 68 | fprintf(fid,' Computing 1st centered finite diffs for Jacobian matrix...\n'); 69 | end 70 | 71 | if nargout==2 72 | F=feval(funcname,x,varargin{:}); 73 | end 74 | 75 | % centered diffs 76 | for j=1:M 77 | F1=feval(funcname,x+dx.*mask(:,j),varargin{:}); 78 | F2=feval(funcname,x-dx.*mask(:,j),varargin{:}); 79 | J(:,j) = ( F1 - F2 ) ./ dx(j)/2; 80 | 81 | % percent-done markers to show at matlab cmdline while running: 82 | if verbose>0 83 | p=j/M; 84 | if round(mod(p*100,10))==0 85 | fprintf(fid,' '); 86 | fprintf(fid,'%2.0f%%..',p*100); 87 | if p==1 88 | fprintf(fid,'\n'); 89 | end; 90 | end; 91 | end; 92 | 93 | end 94 | 95 | -------------------------------------------------------------------------------- /convinfo.m: -------------------------------------------------------------------------------- 1 | function [statusall,gradtest,mtest,ftest]=... 2 | convinfo(fwdprob,epsr,Ghat1,rhat0,rhat1,m0,m1,verbose,fid) 3 | % CONVINFO - Display convergence info for previous linearization step 4 | % in locally linearized inversion of a nonlinear inverse problem (using the 5 | % "invGN.m" script). 6 | % 7 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, Seattle. 8 | % Contact at andy@ganse.org , http://research.ganse.org 9 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license. 10 | % See LICENSE.txt for full license statement. 11 | % 12 | % Based on material in: 13 | % RC Aster, B Borchers, and CH Thurber, "Parameter Estimation and Inverse 14 | % Problems," Elsevier Academic Press, 2004. 15 | % 16 | % Usage: 17 | % [statusall,gradtest,mtest,ftest]=... 18 | % convinfo(fwdprob,epsr,Ghat1,rhat0,rhat1,m0,m1,verbose,fid); 19 | % 20 | % Inputs: 21 | % fwdprob = Matlab function handle to forward problem function which 22 | % is of form outvector=myfunction(invector). (See example 1 23 | % of "help function_handle" in Matlab for how to implement) 24 | % epsr = relative precision of forward problem output (predicted data) 25 | % Ghat1 = normalized Jacobian matrix of partial derivs of fwdprob at m1, 26 | % ie Ghat1=C^{-1/2}*G(m1), where C is the cov matrix of meas noise 27 | % rhat0 = column vector of normalized residuals at m0, i.e. 28 | % C^{-1/2}*(dmeas-dpred0) 29 | % rhat1 = column vector of normalized residuals at m1, i.e. 30 | % C^{-1/2}*(dmeas-dpred1) 31 | % m0 = column vector containing previous model value. 32 | % m1 = updated model (column) vector from latest invOccam() iteration. 33 | % verbose = 0: no status/diagnostic output, 1: some output 34 | % fid = file identifier for verbose output (status, warnings, etc.) 35 | % 36 | % Output "statusall" is a logical AND of the boolean results of 3 conv tests: 37 | % * gradtest = obj gradient being close enough to zero per A/B/T def p182 38 | % * mtest = change in norm(m) being close enough to zero per A/B/T def p182 39 | % * ftest = change in obje value being close enough to zero per A/B/T def p182 40 | % Statusall=1 means all 3 tests passed. Statusall=0 means at least 1 didn't, 41 | % in which case you probably want to take another linearization step. 42 | % 43 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, 2007-2008. 44 | % andy@ganse.org 45 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license. 46 | % See LICENSE.txt for full license statement. 47 | 48 | if length(epsr)>1 49 | fprintf(fid,' convinfo: note skipping this for case with length(epsr)>1...\n'); 50 | gradtest=0; mtest=0; ftest=0; statusall=0; 51 | return; 52 | end 53 | 54 | 55 | f1=rhat1'*rhat1; % objective function value at m1 56 | f0=rhat0'*rhat0; % objective function value at m0 57 | 58 | 59 | % check if the gradient of f(m) is approximately 0: 60 | gradobj1=2*Ghat1'*rhat1; % objective gradient at m1: grad(f(m1)) = 2G(m1)^T F(m1) 61 | % where G(m1) is the normalized jacobian matrix of fwd problem at m1 62 | left1=norm(gradobj1); 63 | right1=sqrt(epsr)*(1+f1); 64 | gradtest=left11 66 | if gradtest 67 | ans='small enough'; 68 | op='<'; 69 | else 70 | ans='not small enough yet'; 71 | op='not <'; 72 | end; 73 | fprintf(fid,' grad(Fobj): %s (%g %s %g)\n', ans, left1, op, right1); 74 | end 75 | 76 | % check if successive values of m are close: 77 | left2=norm(m1-m0); 78 | right2=sqrt(epsr)*(1+norm(m1)); 79 | mtest=left21 81 | if mtest 82 | ans='close enough'; 83 | op='<'; 84 | else 85 | ans='not close enough yet'; 86 | op='not <'; 87 | end; 88 | fprintf(fid,' dm: %s (%g %s %g)\n', ans, left2, op, right2); 89 | end 90 | 91 | % check if the values of f(m) have stopped changing: 92 | left3=abs(f1-f0); 93 | right3=epsr*(1+f1); 94 | ftest=left31 96 | if ftest 97 | ans='close enough'; 98 | op='<'; 99 | else 100 | ans='not close enough yet'; 101 | op='not <'; 102 | end; 103 | fprintf(fid,' df: %s (%g %s %g)\n', ans, left3, op, right3); 104 | end 105 | 106 | 107 | statusall = gradtest && mtest && ftest; 108 | %if verbose>=1 109 | %if statusall 110 | % fprintf(fid,' CONVERGED according to grad(Fobj) and dm and df...\n'); 111 | %else 112 | % fprintf(fid,' Not converged yet according to grad(Fobj) and dm and df...\n'); 113 | %end 114 | %end 115 | -------------------------------------------------------------------------------- /jacfwd.m: -------------------------------------------------------------------------------- 1 | function [J,F] = jacfwd(funcname,x,epsr,verbose,fid,varargin); 2 | % JACFWD - 1st forward difference approximation of Jacobian matrix 3 | % of partial derivs of function 'funcname' w.r.t. its arguments in vector 'x'. 4 | % 5 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, 2005. 6 | % Contact at andy@ganse.org , http://research.ganse.org 7 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license. 8 | % See LICENSE.txt for full license statement. 9 | % 10 | % Usage: 11 | % [J,F] = jacfwd('funcname',x,epsf,verbose,fid,...); 12 | % [where funcname.m is a file within path] 13 | % or 14 | % [J,F] = jacfwd(@funcname,x,epsf,verbose,fid,...); 15 | % [when funcname() is defined within the calling script] 16 | % 17 | % Inputs: 18 | % funcname = String or function handle for the function being differenced. 19 | % (See below for required form of this function.) 20 | % x = The column vector (length M) of arguments to the function funcname. 21 | % This function assumes that elements of x are scaled to order unity. 22 | % epsr = The relative accuracy of the forward problem computation - on 23 | % really simple fwd problem functions, epsr will just equal machine 24 | % precision. But for more complicated functions epsr will be larger 25 | % than machine precision, and determining it may take some analysis. 26 | % Try e.g. Gill, Murray, & Wright, "Practical Optimization". In any 27 | % case note that epsr is used to choose stepsize in the finite diff. 28 | % Can be scalar (ie same for all elements of model vector) or vector 29 | % of same length as model vector for 1-to-1 correspondence to difft 30 | % variable "types" (eg layer velocities vs layer thickness) 31 | % If EPSR is negative then that flags it as actually being the DX to 32 | % use. Note that use of EPSR rather than DX assumes that the values 33 | % in X have been all scaled to order 1. 34 | % verbose = 1: output progress text, 0: run quietly. 35 | % Progress text (percent done) is useful for very slow functions. 36 | % fid = File handle to which progress text is outputted in verbose option. 37 | % Note fid=1 outputs to stdout, easiest to do this is not verbose 38 | % since nothing will then be outputted. 39 | % 40 | % Outputs: 41 | % J = the finite diffs approximation of NxM Jacobian matrix at x. 42 | % F = the function output column vector (length N) funcname(x) at x. 43 | % 44 | % Function 'funcname' must be of form: 45 | % F=funcname(x) 46 | % or 47 | % F=funcname(x,...) 48 | % where the '...' are other arguments required besides the variables 49 | % x with repsect to which the function is being differenced. 50 | 51 | 52 | checksize=size(x); 53 | if checksize(2)~=1 54 | error(' jacfwd(): x vector argument must be a column vector...\n'); 55 | % fprintf(fid,'error: jacfwd(): x vector argument must be a column vector...\n'); 56 | % return; 57 | end 58 | M=length(x); 59 | % The following choice of dx requires that x is scaled to order 1: 60 | % FIXME: should check for that 61 | if epsr(1)>0 62 | if length(epsr)==1 63 | dx=epsr^(1/2)*ones(M,1); % note epsr^(1/2) is for fwd diffs, cent diffs is different value 64 | elseif length(epsr)==length(x) 65 | dx=epsr.^(1/2); % note epsr^(1/2) is for fwd diffs, cent diffs is different value 66 | end 67 | elseif epsr(1)<0 68 | if length(epsr)==1 69 | dx=-epsr*ones(M,1); % note epsr^(1/2) is for fwd diffs, cent diffs is different value 70 | elseif length(epsr)==length(x) 71 | dx=-epsr; % note epsr^(1/2) is for fwd diffs, cent diffs is different value 72 | end 73 | else 74 | disp(' jacfwd: error: all elements of epsr and dx must be nonzero.\n'); 75 | return; 76 | end 77 | % Now make dx and exactly representable number vis a vis machine precision 78 | % so that all error in derivs are from numerator (see Num Rec 2nd ed, sec 5.7) 79 | temp = x+dx; % note temp is vector of same length as model vector 80 | dx = temp-x; % Believe it or not this is actually a slightly different dx now, 81 | % different by an error of order of machine precision. 82 | % This effect may or may not make it into fwdprob input which 83 | % could have limited input precision, but in any case will have 84 | % an effect via denominator at end of this script. 85 | mask=eye(M); 86 | 87 | F=feval(funcname,x,varargin{:}); 88 | 89 | % forward diffs 90 | for j=1:M 91 | J(:,j) = ( feval( funcname, x+dx.*mask(:,j), varargin{:} ) - F ) ./ dx(j); 92 | 93 | % percent-done markers to show at matlab cmdline while running: 94 | if verbose>1 95 | fprintf(fid,' '); 96 | p=j/M; 97 | if round(mod(p*100,10))==0 98 | fprintf(fid,' '); 99 | fprintf(fid,'%2.0f%%..',p*100); 100 | if p==1 101 | fprintf(fid,'\n'); 102 | end; 103 | end; 104 | end; 105 | 106 | end 107 | -------------------------------------------------------------------------------- /regress2lines.m: -------------------------------------------------------------------------------- 1 | function [mstar, Rstar, idiv, Gstar] = regress2lines(x,y) 2 | % REGRESS2LINES - fit two cojoined straight lines to data points. 3 | % This is a nonlinear estimation problem. The data will be divided into two 4 | % sets, each fitted with a line; the lines intersect between the data division. 5 | % 6 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, 2010. 7 | % Contact at andy@ganse.org , http://research.ganse.org 8 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license. 9 | % See LICENSE.txt for full license statement. 10 | % 11 | % IMPORTANT - You should be able to justify such a restricted choice of 12 | % model as simply two straight lines. Perhaps it makes more sense for you 13 | % to instead fit a single, higher-degree polynomial (an easy linear problem) 14 | % or exponential curve to your data. To use this two-phase straight-line 15 | % regression on your data you should be able to cite some theory for why 16 | % you specifically want two intersecting straight lines as opposed to a 17 | % more general model. 18 | % 19 | % Usage: [m, R, idiv, G] = regress2lines(x,y); 20 | % 21 | % Input - data column vectors x & y of equal length. 22 | % 23 | % Outputs - 24 | % m = column vector of the 5 solution paramters [a1,b1,a2,b2,x0] ala 25 | % lines y=a1*x+b1 and y=a2*x+b2, intersecting at x0. 26 | % R = sum of sqrs of residuals of the solution 27 | % idiv = the index of the data point after which the data was divided. 28 | % x0 is between x(idiv) and x(idiv+1). 29 | % G = the linear problem matrix associated with the solution x0. 30 | % This might be used to calculate *some* stats of the solution. 31 | % 32 | % Since the problem as a whole is nonlinear, the full picture of the 33 | % solution statistics is seen by Monte Carlo simulation, as demonstrated and 34 | % explored in my MultiRegressLines.matlab package, available at 35 | % https://www.github.com/aganse/MultiRegressLines.matlab 36 | 37 | 38 | % More specific technical description: 39 | % 40 | % The problem is to solve: 41 | % Given xdata(1:n),ydata(1:n) 42 | % Find m={a1,b1,a2,b2,x0} such that we 43 | % Minimize data misfit in least-squares sense 44 | % ie min sum( ydata(1:istar)-a1*xdata(1:istar)-b1 )^2 + 45 | % sum( ydata(istar+1:end)-a2*xdata(istar+1:end)-b2 )^2 46 | % With constraint: x(istar) <= x0=(b2-b1)/(a1-a2) <= x(istar+1) 47 | % (ie x0 is the intersection of the two fit lines and is at the break in 48 | % data) 49 | % 50 | % The problem as a whole is a nonlinear regression problem because of 51 | % the parameter x0. But it's really convenient that for a given constant 52 | % value of x0 the problem becomes linear, on top of which each x0 53 | % corresponds to a single division in the data points into two sets. 54 | % (For each set we compute a standard linear fit, and x0 is the unique 55 | % location at which the two lines intersect.) 56 | % Given a value for x0, which tells the data points between which to split 57 | % the data, we set up the linear problem as y=G*m, where m=[a1,b1,a2,b2]. 58 | % x0 is then computed from the elements of m as x0=(b2-b1)/(a1-a2). 59 | % 60 | % We loop over the data points: each time dividing them into two groups, 61 | % recording the sum-of-squares-of-residuals R and the x0 value. And after 62 | % going over all the data points the solution is ideally the x0 and its 63 | % associated a1,b1,a2,b2 which correspond to the minimum R value. We do 64 | % this "brute force" approach to the nonlinear problem rather than such 65 | % often-seen methods like locally approximating the nonlinear function as 66 | % linear and using derivatives, because this problem has discontinuities 67 | % that mess up the differentiability at the data points. And the search 68 | % over all x0 is very small because it is discretized - there's only one 69 | % x0 that corresponds to each data division, and so we only need to compute 70 | % N-2 values of R to find that minimum, where N is the number of data points. 71 | % Note there is a remote possibility that there will be two equal minima, 72 | % ie that there are two equally worthy places to intersect two fit lines, 73 | % so one should look for that when analyzing our R values and report both 74 | % cases. Please see a full lowdown at: 75 | % http://staff.washington.edu/aganse/mpregression/index.html 76 | % Especially note on that webpage the important caveats about when this 77 | % script will and will not work. 78 | 79 | 80 | N=length(y); 81 | igood=zeros(N-2,1); x0vector=igood; Rvector=igood; % creating col vectors 82 | for i=2:N-1 83 | % set up the linear problem as y=G*m, where m=[a1,b1,a2,b2]: 84 | G(1:i,:)=[x(1:i),ones(i,1),zeros(i,2)]; 85 | G(i+1:N,:)=[zeros(N-i,2),x(i+1:N),ones(N-i,1)]; 86 | m=pinv(G'*G)*G'*y; % least-squares solution to linear problem 87 | x0=(m(4)-m(2))/(m(1)-m(3)); % where the two fit lines intersect 88 | R=sum( (y-G*m).^2 ); 89 | %fprintf(1,'x(1)=%f, x0=%f, x(N)=%f\n',x(1),x0,x(N)); 90 | if x0>=x(1) && x0<=x(N) 91 | igood(i-1)=1; 92 | x0vector(i-1)=x0; 93 | Rvector(i-1)=R; 94 | end 95 | end 96 | 97 | % Pick out the minimum R and its associated solution info: 98 | %disp(num2str([(2:N-1)',igood,x0vector,Rvector])); % (for debugging) 99 | [Rstar,imin]=min(Rvector(find(igood==1))); 100 | idiv=imin+1; 101 | Gstar(1:idiv,:)=[x(1:idiv),ones(idiv,1),zeros(idiv,2)]; 102 | Gstar(idiv+1:N,:)=[zeros(N-idiv,2),x(idiv+1:N),ones(N-idiv,1)]; 103 | m=pinv(Gstar'*Gstar)*Gstar'*y; % least-squares solution to linear problem 104 | mstar=[m;(m(4)-m(2))/(m(1)-m(3))]; 105 | 106 | 107 | 108 | 109 | % Test data, etc...: 110 | % N=40; 111 | % x=sort(N*rand(N,1)); 112 | % 113 | % noisy two lines: 114 | % G(1:i,:)=[x(1:i),ones(i,1),zeros(i,2)]; 115 | % G(i+1:N,:)=[zeros(N-i,2),x(i+1:N),ones(N-i,1)]; 116 | % y=G*[4,3,-1,73]'; 117 | % yd=y+4*randn(N,1); 118 | % 119 | % noisy parabola: 120 | % G=[x.^2/2, x, ones(N,1)]; 121 | % y=G*[4,4,4]'; 122 | % yd=y+10*randn(N,1); 123 | % 124 | % plots: 125 | % plot(x,y,'-',x,yd,'*') 126 | % plot(x,y,'-',x,yd,'*',[x(1);m(5);m(5);x(end)],... 127 | % [m(1)*x(1)+m(2), m(1)*m(5)+m(2), m(3)*m(5)+m(4), m(3)*x(end)+m(4)],'-+') 128 | -------------------------------------------------------------------------------- /finiteDiffOp.m: -------------------------------------------------------------------------------- 1 | function L=finiteDiffOp(order,segmentlengths,dx,BCoption) 2 | % FINITEDIFFOP - Return an ORDERth order finite difference operator matrix 3 | % with "kinks" in it to break it into segments of SEGMENTLENGTHS to accommodate 4 | % discontinuities in a profile model, or multiple profiles concatenated together 5 | % (e.g. to avoid regularizing across the profile boundaries). 6 | % Various boundary conditions for the operator may be specified, or none at all, 7 | % according to the codes in BCoption. (NOTE BCOPTION ONLY PARTIALLY IMPLEMENTED) 8 | % 9 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, 2010. 10 | % Contact at andy@ganse.org , http://research.ganse.org 11 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license. 12 | % See LICENSE.txt for full license statement. 13 | % 14 | % Usage: 15 | % L=finiteDiffOp(order,segmentlengths,dx,BCoption); 16 | % 17 | % Inputs: 18 | % order = order of fin diff operator (=0,1,2) 19 | % segmentlengths = vector containing number of model elements between each 20 | % discontinuity, or can be simply a scalar value with number 21 | % of total model elements if no discontinuities in model. 22 | % Note this could equivalently be used when model vector is 23 | % a concatenation of multiple profiles such that you don't 24 | % want to smooth boundary from one profile to next. 25 | % dx = delta x between model parameters in continuous profile. 26 | % BCoption: NOTE: OPTS 2-5 REQUIRE CODE FIX AT BTM OF SCRIPT, BUT OPTS 0-1 OK. 27 | % 0 = no additional bdy conds. 28 | % 1 = prefer 0 gradient between last microlayer and basement, 29 | % trying to keep vels equal so no reflection from basement. 30 | % 2 = Constrain f'' to equal zero at the endpoints, so that there is 31 | % zero curvature there. L not full rank here. 32 | % 3 = Extends minimum curvature integral from -inf to +inf, forcing 33 | % f=0 at all nodes outside grid. f'' at boundary and first node 34 | % outside grid can have non-zero curvature. Note results using 35 | % this BC can be similar to #4 because of the nearby f=0. But L 36 | % is full rank here. 37 | % 4 = Constrain f' to equal zero at the endpoints, so that there is 38 | % zero gradient there. L not full rank here. 39 | % 5 = Constrain f itself to equal zero at the endpoints. L full rank. 40 | % 41 | % Outputs: 42 | % L = the finite diff operator matrix 43 | 44 | 45 | if order==0 46 | % "Zeroth order Tikhonov regularization", also called "ridge regression". 47 | % Note no segment bdys are handled here because 0th order regu doesn't care. 48 | L=eye(sum(segmentlengths)); 49 | 50 | elseif order==1 51 | % "first order Tikhonov regularization" for total variation regularization 52 | 53 | L=[]; 54 | for i=1:length(segmentlengths) 55 | M=segmentlengths(i); 56 | L1=-1*diag(ones(M,1)) + diag(ones(M-1,1),+1); % fwd diffs : O(dx) 57 | %L1=diag(ones(M-1,1),1) - diag(ones(M-1,1),-1); % cen diffs : O(dx^2) 58 | L1=L1(1:(end-1),:); % diag() by itself didn't give quite what we wanted - 59 | % it gave a square matrix with implicit bdy conds - 60 | % so we're just removing top and bottom rows with BCs. 61 | % Note nonsquare now. 62 | L=blkdiag(L,L1); 63 | end 64 | 65 | % NOTE dx presently not implemented but if reimplementing note dx is now a vector. 66 | 67 | % Following is dx normalization such that L*m = 1st deriv of m. 68 | % Only really makes sense to do this when not including other constraints 69 | % or segments in problem, otherwise easier to just leave off: 70 | %L=L/dx; % for forward diffs to O(dx) 71 | %L=L/dx/2; % for central diffs to O(dx^2) 72 | 73 | elseif order==2 74 | % "2nd order Tikhonov regularization" for Occam's (smoothing) regularization 75 | 76 | % central diffs : O(dx^2) : 77 | L=[]; 78 | for i=1:length(segmentlengths) 79 | M=segmentlengths(i); 80 | L1=-2*diag(ones(M,1)) + diag(ones(M-1,1),1) + diag(ones(M-1,1),-1); 81 | if BCoption==1 82 | L1(1,1)=-1; L1(end,end)=-1; 83 | elseif BCoption==0 84 | L1=L1(2:end-1,:); % diag() by itself didn't give quite what we wanted - 85 | % it gave a square matrix with implicit bdy conds - 86 | % so we're just removing top and bottom rows with BCs. 87 | % Note nonsquare now. 88 | end 89 | L=blkdiag(L,L1); 90 | end 91 | 92 | % NOTE dx presently not implemented but if reimplementing note dx is now a vector. 93 | 94 | % Following is dx normalization such that L*m = 2nd deriv of m. 95 | % Only really makes sense to do this when not including other constraints 96 | %or segments in problem, otherwise easier to just leave off: 97 | %L1=L1/dx^2; 98 | %L1=L1/sqrt(M-2); % or here we normalize by #pts-2 (ie subtracting endpts) 99 | % to address the finite evaluation domain. 100 | % Now the units of L1*vector are M/X^2, where M are 101 | % the units of model and X are the units of the axis 102 | % the model is discretized over. 103 | 104 | end 105 | 106 | 107 | 108 | if 0 109 | % Apply boundary conditions based on input options: 110 | % Note if BCoption==0 then all this is skipped... 111 | if BCoption==1, L(end+1,end-1:end)=[-1,1]; end; % for my microlayers on top 112 | % of lower halfspace case - 113 | % prefer zero-gradient between 114 | % last microlayer and halfspace 115 | % to prevent reflections there 116 | if BCoption==2, topleftcorner=[0 0 0 0]; btmrightcorner=[0 0 0 0]; end; 117 | if BCoption==3, topleftcorner=[-2 1 0 0]; btmrightcorner=[0 0 1 -2]; end; 118 | if BCoption==4, topleftcorner=[-2 2 0 0]; btmrightcorner=[0 0 2 -2]; end; 119 | if BCoption==5, topleftcorner=[-2 0 0 0]; btmrightcorner=[0 0 0 -2]; end; 120 | 121 | if BCoption>1, 122 | % WARNING: FIXME: this part is not ready and must be correctly implemented: 123 | % need to first prepend a new row to top of L and append a new row to btm of L, then: 124 | %L(1,1:4)=topleftcorner; 125 | %L(M,M-3:M)=btmrightcorner; 126 | end 127 | 128 | end 129 | -------------------------------------------------------------------------------- /manpage.txt: -------------------------------------------------------------------------------- 1 | INVGN - Gauss-Newton inversion, version 1.0. 2 | by Andrew Ganse, Applied Physics Laboratory, University of Washington, Seattle. 3 | Contact at andy@ganse.org , http://research.ganse.org 4 | Copyright (c) 2015, University of Washington, via 3-clause BSD license. 5 | See LICENSE.txt for full license statement. 6 | 7 | ------------------------------------------------------------------------------- 8 | 9 | INVGN: Calculate Tikhonov-regularized, Gauss-Newton nonlinear iterated inversion 10 | to solve the damped nonlinear least squares problem: 11 | minimize ||g(m)-d||^2_2 + lambda^2||Lm||^2_2 12 | For appropriate choices of regularization parameter lambda, this problem is 13 | equivalent to: 14 | minimize ||Lm||_2 subject to ||g(m)-d||_23), but to handle a few minor 38 | remaining differences between Matlab and Octave, do note the "usingOctave" 39 | flag among the options listed at top of the InvGN.m script. Perhaps in future 40 | versions INVGN will allow these options to be set via keyword/value pairs in the 41 | input argument list, but not implemented yet. 42 | 43 | 44 | USAGE: 45 | [m,normdm,normmisfit,normrough,lambda,dpred,G,C,R,Ddiag,i_conv] = ... 46 | invGN(fwdfunc,derivfunc,epsr,dmeas,covd,lb,ub,m0,L,lambda,useMpref,maxiters,... 47 | verbose,fid,vargin); 48 | 49 | 50 | INPUTS: (note all vectors are always column vectors) 51 | fwdfunc = Matlab/Octave function handle to forward problem function which 52 | is of form outvector=myfunction(invector,...). (See example 1 53 | of "help function_handle" in Matlab for how to implement) 54 | derivfunc = Matlab/Octave function handle to derivatives function which 55 | is of form outvector=myfunction(invector,...). 56 | If derivfunc=[] then finitediffs are automatically used instead. 57 | epsr = relative accuracy of forward problem computation (not the 58 | accuracy of the modeling of the physical process, but of the 59 | computation itself, see e.g. Gill,Murray,Wright 1981). Affects 60 | convergence testing and is passed into finite diff function (if 61 | used), in both cases providing stepsize h (h is about sqrt(epsr) 62 | if the model params are all of order unity). Epsr has a minimum 63 | at the tradeoff between degrading Talor series expansion accuracy 64 | if h too large, and increasing roundoff error in the numerator 65 | G(m+h)-G(m) of finite diffs if h too small. 66 | Epsr can be scalar (ie same for all elements of model vector) or 67 | vector of same length as model vector for 1-to-1 correspondence 68 | to different variable "types" (eg layer bulk properties vs layer 69 | thicknesses). 70 | dmeas = measured data vector (includes noise) 71 | covd = if sqr matrix, then this is measurement noise covariance matrix. 72 | if col vector, then this is diag of meas noise covariance matrix. 73 | if scalar, then this is measurement variance, equal for all meas. 74 | if neg, then = -covd^(-1/2), allowing e.g. use of zeros for a mask, 75 | so note in that case it's the inverse of the STDEVS (sqrt(cov)). 76 | lb = lower model bounds vector (hard bnds, no tolerance) 77 | ub = upper model bounds vector (hard bnds, no tolerance) 78 | These bounds are only implemented at the iteration steps after 79 | solving for the model perturbation without them; ie functions 80 | such as BVLS or LSQNONNEG are not used. If the bounds are seen 81 | to be passed in a candidate model perturbation, the stepsize of 82 | the perturbation is reduced (but retaining its direction) until 83 | within bounds. As long as we know the final solution must be 84 | within the bounds and not on them, this is merely like changing 85 | the starting point of inversion and is valid (perhaps somtimes 86 | not as efficient as one of those more complicated functions). 87 | But if the solution from this code ends up on a bound, note you 88 | should treat that solution with some caution and not consider it 89 | a true GN solution. In my own inverse problems using this code 90 | the bounds only ever engage at the tails of my Lcurve where I'm 91 | not too worried about being rigorous, so I haven't found it 92 | necessary to complicate things with those other functions. 93 | m0 = initial model vector estimate to start iterating from; if this 94 | is instead a matrix (NxM), then it's a collection of M initial 95 | model vectors of length N each, corresponding to M lambda values 96 | (Typically this is done to continue iterations on previous runs 97 | without starting over.) 98 | Alternate use: if L (next input arg) = I (identity matrix), then 99 | this is also the "preferred model" whose distance from the soln 100 | model is minimized. 101 | L = If this is a matrix, then this is the user-supplied finite 102 | difference operator for Tikhonov regularization (function 103 | finiteDiffOP.m can help construct it) in the frequentist 104 | framework. If in the Bayesian framework and lambda is set to 1, 105 | then L can be supplied as the Cholesky decomposition of the 106 | inverse model prior covariance matrix. 107 | If L is scalar then it is the number of profile segments (for 108 | example if the model vector represents concatenated P & S wave 109 | speed profiles then Lin=2) and 2nd order Tikhonov (smoothness) 110 | frequentist regularization will be used. 111 | lambda = Used for the frequentist inversion framework (if using Bayesian 112 | framework then set lambda=1 and see Bayesian note for L above). 113 | If lambda is a negative scalar, auto calculate Lcurve with num 114 | pts equal to magnitude of the negative scalar. The auto-calc'd 115 | curve starts at lambda_mid = sqrt(max(eig(G'*G))/max(eig(L'*L))) 116 | and decreases logarithmically from there to 3 orders of mag 117 | less and more than lambda_mid by default. If lambda >=0, use 118 | its value(s) directly as frequentist Tikhonov tradeoff param(s), 119 | in which case lambda can be scalar or vector. 120 | If lambda is a 3-element vector, lambda(1) is the pos or neg scalar 121 | just described; and lambda(2:3) are how many orders of magnitude 122 | greater (lambda(2)) or less (lambda(3)) than lambda_mid to sweep 123 | in the auto-calc'd curve. (lambda(2:3) defaults are 3,-3) 124 | maxiters = Number of GN iterations to perform at each lambda on the Lcurve. 125 | So total number of derivative estimations will be 126 | numlambdas*maxiters. 127 | useMpref = flag for one of three problem frameworks: 128 | useMpref=0 is for a frequentist formulation with higher-order 129 | Tikhonov regularization. Here the regularization doesn't 130 | handle distance from preferred (initial) model m0, but no 131 | restriction on L (unlike useMpref=1). 132 | useMpref=1 is for using 0th-order-Tikh (ridge regr), ie 133 | L==eye(size(L)), in a frequentist formulation in which m0 is 134 | the "preferred model" the inversion minimizes distance to. 135 | useMpref=2 is for using Bayesian formulation, in which case: 136 | m0 is the prior model mean, lambda must =1, L=chol(inv(Cprior)) 137 | (user must set that L externally, note ironically L there must 138 | be RIGHT triangular, the "L" notation is from frequentist case), 139 | output C is now the Bayesian posterior covariance matrix (which 140 | note is different from the frequentist C), outputs R and Ddiag 141 | are empty, and normrough won't have literal meaning anymore. 142 | verbose = 0-3, with 0 = run quietly, and 3 = lots of progress text 143 | Progress text (percent done) in findiffs is useful for slow 144 | forward problems and/or debugging - but note percent done text 145 | is unavailable in jacfwddist and jaccendist. 146 | fid = File handle to which progress text is output in verbose option. 147 | Note fid=1 outputs to stdout, but an output file is useful for 148 | long batch runs when the forward problem is slow. 149 | vargin = the extra arguments (if any) that must be passed to the fwdprob 150 | function are tacked onto the end here. You may have none, in 151 | which case your last arg is the fid.) 152 | 153 | OUTPUTS: (note all vectors are always column vectors) 154 | m = matrix of maximum likelihood model solution vectors for each 155 | lambda. So m is MxP for M model params and P points on Lcurve, 156 | ie each column of m is a model vector for a different lambda. 157 | normdm = vectors of L2 norms of model perturbation steps "dm" for each 158 | lambda. (so normdm is imax x P, with i steps to convergence 159 | for l'th lambda, imax=max(i(l)), and P points on Lcurve). Unused 160 | elements in normdm are set to -1, since norms can't be negative. 161 | normmisfit = vector of L2 norms of data misfit between measured and 162 | predicted data at solution point for each lambda. 163 | normrough = vector of L2 norms of roughness (for which units are 2nd deriv) 164 | at solution points for each lambda. "Roughness" refers to case 165 | of 2nd order Tikhonov (smoothing constraint) but note really here 166 | this norm's meaning depends on what the supplied L matrix is 167 | (eg might be other model norms than roughness). And in Bayesian 168 | case (useMpref=2) this doesn't have same meaning anymore although 169 | it's still part of the objective function as it relates to Cprior. 170 | lambda = vector of tradeoff parameters used for Tikhonov regularization. 171 | (you might already have this if you passed in vector of lambdas 172 | as one of the input arguments, but this is useful in the case of 173 | specifying to auto-calculate N Lcurve points). 174 | dpred = predicted data at solution models (so there are as many vectors 175 | in dpred as there are points on the Lcurve, so dpred is NxP, for 176 | N data pts and P points on Lcurve). 177 | G = Jacobian matrices of partial derivatives at solution models 178 | (so there are as many jacobian matrices in G as there are points 179 | on the Lcurve, so G is NxMxP, for N data pts, M model params, 180 | and P points on Lcurve). 181 | C = if useMpref<2, C contains the frequentist covariance matrices of 182 | solution models (so there are as many cov matrices in C as there 183 | are points on the Lcurve, so C is MxMxP, for M model params and 184 | P points on Lcurve). If useMpref==2 (flagging Bayesian case), 185 | then C is instead the Bayesian posterior model covariance matrix. 186 | R = resolution matrices of solution models (so there are as many cov 187 | matrices in C as there are points on the Lcurve, so C is MxMxP, 188 | for M model params and P points on Lcurve). 189 | (R is empty for Bayesian case when useMpref=2.) 190 | Ddiag = column vector of diagonal of data res matrix (length Ndata) 191 | (Ddiag is empty for Bayesian case when useMpref=2.) 192 | iconv = column vector of number of iterations before convergence at each 193 | lambda. if didn't converge, the iconv value for that lambda 194 | is 0. (length Ndata). 195 | 196 | For more clarity in the code, this function computes its model perturbations via 197 | (regularized) normal equations, rather than generalized SVD or subiterations of 198 | gradient descent, which may be more efficient and/or scalable. The assumption 199 | in this choice was that forward function code takes much longer to compute than 200 | the model perturbation solution itself (which can otherwise be computed faster 201 | via GSVD). If that assumption is not true, then much improvement in compute 202 | time of this function would result from solving for the perturbation using GSVD 203 | methods such as those provided in Per Christian Hansen's toolkit (however note 204 | that toolkit would take a lot of tweaking/paring to make it Octave compatible): 205 | http://www2.imm.dtu.dk/~pch/Regutools 206 | Aside from computational efficiency/speed, another important consequence of 207 | solving normal equations in this code is that it constrains the size of the 208 | problem (number of model parameters) relative to amount of computer memory due 209 | to calculation of a matrix inverse. If useful to note I have successfully run 210 | this code for over 10,000 model parameters on a computer with 16GB of RAM, and 211 | for over 2500 model parameters on a computer with 8GB RAM (in both those cases 212 | the matrix inverse was the memory constraint rather than forward problem). 213 | 214 | 215 | EXAMPLES: 216 | (leaving off output part "[var1,var2,etc]=" which is same format for all): 217 | 218 | Regularized frequentist inversion with auto-chosen lambdas (numlambdas of them): 219 | invGN(fwdp,[],epsr,datameas,-invcovd,lb,ub,minit,L,-numlambdas,0,maxiters,3,1); 220 | 221 | Regularized frequentist inversion with prechosen lambdas: 222 | invGN(fwdp,[],epsr,datameas,-invcovd,lb,ub,minit,L,... 223 | [1.04193e+07; 10419.3; 10.4193; 0.0104193;],0,maxiters,3,1); 224 | 225 | Regu inv, picking up after previous run (using lambda vector from prev run): 226 | invGN(fwdp,[],epsr,datameas,-invcovd,lb,ub,minit,L,lambda,0,maxiters,3,1); 227 | 228 | Parameter estimation (no regularization): 229 | invGN(fwdp,[],epsr,datameas,-invcovd,lb,ub,minit,zeros(size(L)),1,0,... 230 | maxiters,3,1); 231 | 232 | Frequentist linear problem (should converge after 1st iter), w/ smoothing regu: 233 | deriv=@(x) A; fwdp=@(x) A*x; % define funcs using the A matrix inline 234 | invGN(fwdp,deriv,epsr,datameas,covd,lb,ub,zeros(M,1),L,-numlambdas,0,2,0,1); 235 | (Note maxiters=2 there: actually the first "iter" in saved results is for the 236 | initial value, which admittedly isn't relevant for purely linear problems, but 237 | that's how this nonlinear script fits purely linear problems. Yes definitely 238 | an inefficient kludge.) 239 | 240 | Frequentist nonlinear problem with preferred model: 241 | invGN(fwdp,[],epsr,datameas,covd,lb,ub,mpref,eye(M),-numlambdas,1,maxiters,0,1); 242 | 243 | Bayesian nonlinear problem: 244 | invGN(fwdp,[],epsr,datameas,covd,lb,ub,mprior,chol(inv(Cprior)),1,2,... 245 | maxiters,0,1); 246 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | _(here's a Markdown-enabled version of manpage.txt which describes InvGN...)_ 2 | ##INVGN - Gauss-Newton inversion - version 1.0. 3 | _by Andrew Ganse, Applied Physics Laboratory, University of Washington, Seattle._ 4 | _[andy@ganse.org](mailto:andy@ganse.org)_ 5 | _Copyright (c) 2015, University of Washington, via 3-clause BSD license._ 6 | _See LICENSE.txt for full license statement._ 7 | 8 | ------------------------------------------------------------------------------- 9 | 10 | INVGN calculates Tikhonov-regularized, Gauss-Newton nonlinear iterated inversion 11 | to solve the following damped nonlinear least squares problem: 12 | 13 | minimize ||g(m)-d||^2_2 + lambda^2||Lm||^2_2 14 | 15 | For appropriate choices of regularization parameter lambda, this problem is 16 | equivalent to: 17 | 18 | minimize ||Lm||_2 subject to ||g(m)-d||_23), but to handle a few minor 44 | remaining differences between Matlab and Octave, do note the "usingOctave" 45 | flag among the options listed at top of the InvGN.m script. Perhaps in future 46 | versions INVGN will allow these options to be set via keyword/value pairs in the 47 | input argument list, but not implemented yet. 48 | 49 |
50 | ###USAGE: 51 | 52 | ``` 53 | [m,normdm,normmisfit,normrough,lambda,dpred,G,C,R,Ddiag,i_conv] = ... 54 | invGN(fwdfunc,derivfunc,epsr,dmeas,covd,lb,ub,m0,L,lambda,maxiters,useMpref,... 55 | verbose,fid,vargin); 56 | ``` 57 | 58 |
59 | ###INPUTS: (all vectors are always column vectors) 60 | 61 | **`fwdfunc`** = Matlab/Octave function handle to forward problem function which 62 | is of form outvector=myfunction(invector,...). (See example 1 63 | of "help function_handle" in Matlab for how to implement) 64 | **`derivfunc`** = Matlab/Octave function handle to derivatives function which 65 | is of form outvector=myfunction(invector,...). 66 | If derivfunc=[] then finitediffs are automatically used instead. 67 | **`epsr`** = relative accuracy of forward problem computation (not the 68 | accuracy of the modeling of the physical process, but of the 69 | computation itself, see e.g. Gill,Murray,Wright 1981). Affects 70 | convergence testing and is passed into finite diff function (if 71 | used), in both cases providing stepsize h (h is about sqrt(epsr) 72 | if the model params are all of order unity). Epsr has a minimum 73 | at the tradeoff between degrading Talor series expansion accuracy 74 | if h too large, and increasing roundoff error in the numerator 75 | G(m+h)-G(m) of finite diffs if h too small. 76 | Epsr can be scalar (ie same for all elements of model vector) or 77 | vector of same length as model vector for 1-to-1 correspondence 78 | to different variable "types" (eg layer bulk properties vs layer 79 | thicknesses). 80 | **`dmeas`** = measured data vector (includes noise) 81 | **`covd`** = if sqr matrix, then this is measurement noise covariance matrix. 82 | if col vector, then this is diag of meas noise covariance matrix. 83 | if neg, then = -covd^(-1/2), allowing e.g. use of zeros for a mask, 84 | so note in that case it's the inverse of the STDEVS (sqrt(cov)). 85 | **`lb`** = lower model bounds vector (hard bnds, no tolerance) 86 | **`ub`** = upper model bounds vector (hard bnds, no tolerance) 87 | These bounds are only implemented at the iteration steps after 88 | solving for the model perturbation without them; ie functions 89 | such as BVLS or LSQNONNEG are not used. If the bounds are seen 90 | to be passed in a candidate model perturbation, the stepsize of 91 | the perturbation is reduced (but retaining its direction) until 92 | within bounds. As long as we know the final solution must be 93 | within the bounds and not on them, this is merely like changing 94 | the starting point of inversion and is valid (perhaps somtimes 95 | not as efficient as one of those more complicated functions). 96 | But if the solution from this code ends up on a bound, note you 97 | should treat that solution with some caution and not consider it 98 | a true GN solution. In my own inverse problems using this code 99 | the bounds only ever engage at the tails of my Lcurve where I'm 100 | not too worried about being rigorous, so I haven't found it 101 | necessary to complicate things with those other functions. 102 | **`m0`** = initial model vector estimate to start iterating from; if this 103 | is instead a matrix (NxM), then it's a collection of M initial 104 | model vectors of length N each, corresponding to M lambda values 105 | (Typically this is done to continue iterations on previous runs 106 | without starting over.) 107 | Alternate use: if L (next input arg) = I (identity matrix), then 108 | this is also the "preferred model" whose distance from the soln 109 | model is minimized. 110 | **`L`** = If this is a matrix, then this is the user-supplied finite 111 | difference operator for Tikhonov regularization (function 112 | finiteDiffOP.m can help construct it) in the frequentist 113 | framework. If in the Bayesian framework and lambda is set to 1, 114 | then L can be supplied as the Cholesky decomposition of the 115 | inverse model prior covariance matrix. 116 | If L is scalar then it is the number of profile segments (for 117 | example if the model vector represents concatenated P & S wave 118 | speed profiles then Lin=2) and 2nd order Tikhonov (smoothness) 119 | frequentist regularization will be used. 120 | **`lambda`** = Used for the frequentist inversion framework (if using Bayesian 121 | framework then set lambda=1 and see Bayesian note for L above). 122 | If lambda is a negative scalar, auto calculate Lcurve with num 123 | pts equal to magnitude of the negative scalar. The auto-calc'd 124 | curve starts at lambda_mid = sqrt(max(eig(G'*G))/max(eig(L'*L))) 125 | and decreases logarithmically from there to 3 orders of mag 126 | less and more than lambda_mid by default. If lambda >=0, use 127 | its value(s) directly as frequentist Tikhonov tradeoff param(s), 128 | in which case lambda can be scalar or vector. 129 | If lambda is a 3-element vector, lambda(1) is the pos or neg scalar 130 | just described; and lambda(2:3) are how many orders of magnitude 131 | greater (lambda(2)) or less (lambda(3)) than lambda_mid to sweep 132 | in the auto-calc'd curve. (lambda(2:3) defaults are 3,-3) 133 | **`useMpref`** = flag for one of three problem frameworks: 134 | useMpref=0 is for a frequentist formulation with higher-order 135 | Tikhonov regularization. Here the regularization doesn't 136 | handle distance from preferred (initial) model m0, but no 137 | restriction on L (unlike useMpref=1). 138 | useMpref=1 is for using 0th-order-Tikh (ridge regr), ie 139 | L==eye(size(L)), in a frequentist formulation in which m0 is 140 | the "preferred model" the inversion minimizes distance to. 141 | useMpref=2 is for using Bayesian formulation, in which case: 142 | m0 is the prior model mean, lambda must =1, L=chol(inv(Cprior)) 143 | (user must set that L externally, note ironically L there must 144 | be RIGHT triangular, the "L" notation is from frequentist case), 145 | output C is now the Bayesian posterior covariance matrix (which 146 | note is different from the frequentist C), outputs R and Ddiag 147 | are empty, and normrough won't have literal meaning anymore. 148 | **`maxiters`** = Number of GN iterations to perform at each lambda on the Lcurve. 149 | So total number of derivative estimations will be 150 | numlambdas*maxiters. 151 | **`verbose`** = 0-3, with 0 = run quietly, and 3 = lots of progress text 152 | Progress text (percent done) in findiffs is useful for slow 153 | forward problems and/or debugging - but note percent done text 154 | is unavailable in jacfwddist and jaccendist. 155 | **`fid`** = File handle to which progress text is output in verbose option. 156 | Note fid=1 outputs to stdout, but an output file is useful for 157 | long batch runs when the forward problem is slow. 158 | **`vargin`** = the extra arguments (if any) that must be passed to the fwdprob 159 | function are tacked onto the end here. You may have none, in 160 | which case your last arg is the fid.) 161 |
162 | ###OUTPUTS: (all vectors are always column vectors) 163 | 164 | **`m`** = matrix of maximum likelihood model solution vectors for each 165 | lambda. So m is MxP for M model params and P points on Lcurve, 166 | ie each column of m is a model vector for a different lambda. 167 | **`normdm`** = vectors of L2 norms of model perturbation steps "dm" for each 168 | lambda. (so normdm is imax x P, with i steps to convergence 169 | for l'th lambda, imax=max(i(l)), and P points on Lcurve). Unused 170 | elements in normdm are set to -1, since norms can't be negative. 171 | **`normmisfit`** = vector of L2 norms of data misfit between measured and 172 | predicted data at solution point for each lambda. 173 | **`normrough`** = vector of L2 norms of roughness (for which units are 2nd deriv) 174 | at solution points for each lambda. "Roughness" refers to case 175 | of 2nd order Tikhonov (smoothing constraint) but note really here 176 | this norm's meaning depends on what the supplied L matrix is 177 | (eg might be other model norms than roughness). And in Bayesian 178 | case (useMpref=2) this doesn't have same meaning anymore although 179 | it's still part of the objective function as it relates to Cprior. 180 | **`lambda`** = vector of tradeoff parameters used for Tikhonov regularization. 181 | (you might already have this if you passed in vector of lambdas 182 | as one of the input arguments, but this is useful in the case of 183 | specifying to auto-calculate N Lcurve points). 184 | **`dpred`** = predicted data at solution models (so there are as many vectors 185 | in dpred as there are points on the Lcurve, so dpred is NxP, for 186 | N data pts and P points on Lcurve). 187 | **`G`** = Jacobian matrices of partial derivatives at solution models 188 | (so there are as many jacobian matrices in G as there are points 189 | on the Lcurve, so G is NxMxP, for N data pts, M model params, 190 | and P points on Lcurve). 191 | **`C`** = if useMpref<2, C contains the frequentist covariance matrices of 192 | solution models (so there are as many cov matrices in C as there 193 | are points on the Lcurve, so C is MxMxP, for M model params and 194 | P points on Lcurve). If useMpref==2 (flagging Bayesian case), 195 | then C is instead the Bayesian posterior model covariance matrix. 196 | **`R`** = resolution matrices of solution models (so there are as many cov 197 | matrices in C as there are points on the Lcurve, so C is MxMxP, 198 | for M model params and P points on Lcurve). 199 | (R is empty for Bayesian case when useMpref=2.) 200 | **`Ddiag`** = column vector of diagonal of data res matrix (length Ndata) 201 | (Ddiag is empty for Bayesian case when useMpref=2.) 202 | **`iconv`** = column vector of number of iterations before convergence at each 203 | lambda. if didn't converge, the iconv value for that lambda 204 | is 0. (length Ndata). 205 | 206 | For more clarity in the code, this function computes its model perturbations via 207 | (regularized) normal equations, rather than generalized SVD or subiterations of 208 | gradient descent, which may be more efficient and/or scalable. The assumption 209 | in this choice was that forward function code takes much longer to compute than 210 | the model perturbation solution itself (which can otherwise be computed faster 211 | via GSVD). If that assumption is not true, then much improvement in compute 212 | time of this function would result from solving for the perturbation using GSVD 213 | methods such as those provided in Per Christian Hansen's toolkit (however note 214 | that toolkit would take a lot of tweaking/paring to make it Octave compatible): 215 | http://www2.imm.dtu.dk/~pch/Regutools 216 | Aside from computational efficiency/speed, another important consequence of 217 | solving normal equations in this code is that it constrains the size of the 218 | problem (number of model parameters) relative to amount of computer memory due 219 | to calculation of a matrix inverse. If useful to note I have successfully run 220 | this code for over 10,000 model parameters on a computer with 16GB of RAM, and 221 | for over 2500 model parameters on a computer with 8GB RAM (in both those cases 222 | the matrix inverse was the memory constraint rather than forward problem). 223 | 224 |
225 | ###EXAMPLES: 226 | (Leaving off the output part `[var1,var2,etc]=` which is same format for all...) 227 | 228 | **Calling invGN with forward problem function `fwdp` and derivatives function `derivp`:** 229 | 230 | invGN(fwdp,derivp,epsr,datameas,... 231 | 232 | **Calling invGN to calculate first finite differences of `fwdp` internally:** 233 | 234 | invGN(fwdp,[],epsr,datameas,... 235 | 236 | **Regularized frequentist inversion with auto-chosen lambdas (numlambdas of them):** 237 | 238 | invGN(fwdp,[],epsr,datameas,-invcovd,lb,ub,minit,L,-numlambdas,0,maxiters,3,1); 239 | 240 | **Regularized frequentist inversion with prechosen lambdas:** 241 | 242 | invGN(fwdp,[],epsr,datameas,-invcovd,lb,ub,minit,L,... 243 | [1.04193e+07; 10419.3; 10.4193; 0.0104193;],0,maxiters,3,1); 244 | 245 | **Regu inv, picking up after previous run (using lambda vector from prev run):** 246 | 247 | invGN(fwdp,[],epsr,datameas,-invcovd,lb,ub,minit,L,lambda,0,maxiters,3,1); 248 | 249 | **Parameter estimation (no regularization):** 250 | 251 | invGN(fwdp,[],epsr,datameas,-invcovd,lb,ub,minit,zeros(size(L)),1,0,maxiters,3,1); 252 | 253 | **Frequentist nonlinear problem with preferred model:** 254 | 255 | invGN(fwdp,[],epsr,datameas,covd,lb,ub,mpref,eye(M),-numlambdas,1,maxiters,0,1); 256 | 257 | **Bayesian nonlinear problem:** 258 | 259 | invGN(fwdp,[],epsr,datameas,covd,lb,ub,mprior,chol(inv(Cprior)),1,2,maxiters,0,1); 260 | 261 | **Frequentist linear problem (should converge after 1st iter), w/ smoothing regu:** 262 | 263 | deriv=@(x) A; fwdp=@(x) A*x; % define funcs using the A matrix inline 264 | invGN(fwdp,deriv,epsr,datameas,covd,lb,ub,zeros(M,1),L,-numlambdas,0,2,0,1); 265 | 266 | (Note `maxiters=2` there: actually the first "iter" in saved results is for the 267 | initial value, which admittedly isn't relevant for purely linear problems, but 268 | that's how this nonlinear script fits purely linear problems. Yes absolutely 269 | an inefficient kludge for the linear case.) 270 | 271 | -------------------------------------------------------------------------------- /invGN.m: -------------------------------------------------------------------------------- 1 | function [m,normdm,normmisfit,normrough,lambda,dpred,G,C,R,Ddiag,i_conv]=... 2 | invGN(fwdfunc,derivfunc,epsr,dmeas,covd,lb,ub,m0,Lin,lambdaIn,useMpref,... 3 | maxiters,verbose,fid,varargin) 4 | 5 | % INVGN - Compute Gauss-Newton nonlinear inversion, version 1.0 6 | % Calculate Tikhonov-regularized, Gauss-Newton nonlinear iterated inversion 7 | % to solve the damped nonlinear least squares problem. 8 | % 9 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, Seattle. 10 | % Contact at andy@ganse.org , http://research.ganse.org 11 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license. 12 | % See LICENSE.txt for full license statement. 13 | % See manpage.txt for full description and usage instructions. 14 | % 15 | % Usage summary: 16 | % [m,normdm,normmisfit,normrough,lambda,dpred,G,C,R,Ddiag,i_conv] = ... 17 | % invGN(fwdfunc,derivfunc,epsr,dmeas,covd,lb,ub,m0,L,lambda,useMpref,maxiters,... 18 | % verbose,fid,vargin); 19 | 20 | 21 | % Some more programattic run options (so left off argument list): 22 | usingOctave=0; % 1=octave, 0=matlab (affects form of just a few commands). 23 | use_central_diffs=0; % 0=no use fwd finite diffs, 1=yes use centr finite diffs. 24 | % central diffs are more accurate but twice the calc time. 25 | % In practice this often comes down to a tradeoff between 26 | % calc time for one jacobian matrix vs number of iterations 27 | % (more accurate derivs may make for fewer iterations). 28 | use_dist_jaccalc=0; % 0=compute all columns of Jacobian finite diffs one at 29 | % a time in sequence. (only recommended for few params.) 30 | % 1=compute all columns of Jacobian finite diffs in nproc 31 | % simulaaneous instances of octave/matlab on same host. 32 | % (nproc is set in jacfwdN.m and jaccenN.m, typically 4or8) 33 | % 2=distribute computation of Jacobian finite diff columns 34 | % over all nodes of bluewater cluster. 35 | saveIntermediate=0; % save intermediate results into a file mid-script in case 36 | % of crash, for slow-running fwd problems and/or debugging. 37 | % Filename is invGN_.mat. 38 | % 0=none, 1=at end of each lambda loop, 2=at end of each 39 | % iteration too. 40 | keepalliters=0; % 1=add a dimension to the output matrices to save all 41 | % iters while debugging/studying a new inverse problem (but 42 | % watch out - this can potentially eat up a LOT of memory). 43 | % 0=only keep the info for the last iteration, ie solution 44 | % point, but still keep normdm iterations (default). 45 | reuse_first_G=1; % save compute time by not recomuting the first (same) 46 | % jacobian matrix for each lambda (they're same because 47 | % same minit for all lambdas) 48 | check_objfn=0; % Ensure the objective function is decreasing with each 49 | % iteration; if not then halve the step and check again 50 | % till satisfied. Presently this is set up to only kick in 51 | % after "convergence" (based on dm); see next option... 52 | check_objfn_iters=0 ;% If using check_objfn=1, then after hitting "convergence" 53 | % (based on dm), continue this many iterations with the 54 | % decreasing objective function constraint. (This option 55 | % is ignored if check_objfn=0.) 56 | check_bnds=1; % Ensure the iteration does not step out of the bounds; 57 | % if so then halve the step and check again till satisfied. 58 | outbnds_method=0; % If using check_bnds=1, use one of the following methods: 59 | % if 0 revert to half^n step in same direction for nth step. 60 | % if 1 use reflection approach to handle overstepping bnds, 61 | % if 2 set param to bounds if oversteps bnds. 62 | maxstephalves=10; % max # times to halve step when using either: 63 | % (check_bnds AND outbnds_method=0) OR check_objfn. 64 | warningsoff=1; % Regardless of verbosity level, turn off calculational 65 | % warnings; currently this is only the high cond# warning 66 | % and is done so that invGN can give more specific warning. 67 | 68 | 69 | warning('off','MATLAB:nearlySingularMatrix'); % instead caught manually and put in 70 | % better format for this script's output 71 | 72 | 73 | % Dimensions and preliminaries: 74 | ndata=length(dmeas); 75 | nparams=size(m0,1); 76 | if lambdaIn(1)<0 77 | nlambdas=-lambdaIn(1); 78 | if length(lambdaIn)==3 79 | maxlambdaexp=lambdaIn(2); 80 | minlambdaexp=lambdaIn(3); 81 | else 82 | % otherwise use these default values: 83 | maxlambdaexp=3; 84 | minlambdaexp=-3; 85 | end 86 | if verbose>1 87 | fprintf(fid,'Max/min exponent values for Lcurve: %d, %d...\n',maxlambdaexp,minlambdaexp); 88 | fprintf(fid,'Lambda values for Lcurve will be determined during 1st iteration...\n'); 89 | if usingOctave, fflush(fid); end 90 | end 91 | else 92 | nlambdas=length(lambdaIn); 93 | lambda=lambdaIn; 94 | if verbose>1 95 | fprintf(fid,'Using lambda values for Lcurve specified by user...\n'); 96 | if usingOctave, fflush(fid); end 97 | end 98 | % FIXME: add a check that all lambdas are positive... 99 | end; 100 | if verbose>1 101 | fprintf(fid,'Number of lambdas (Lcurve points): %d\n',nlambdas); 102 | fprintf(fid,'Max number of GN iterations at each lambda: %d\n',maxiters); 103 | fprintf(fid,'Number of model parameters: %d\n',nparams); 104 | fprintf(fid,'Number of data points: %d\n',ndata); 105 | if outbnds_method==0 106 | fprintf(fid,'Out-of-bounds steps will revert to half-step in same direction...\n'); 107 | elseif outbnds_method==1 108 | fprintf(fid,'Out-of-bounds steps will reflect from bounds...\n'); 109 | elseif outbnds_method==2 110 | fprintf(fid,'Out-of-bounds steps will stop at bounds...\n'); 111 | end 112 | if usingOctave, fflush(fid); end 113 | end 114 | if size(m0,2)>1 && nlambdas~=size(m0,2) 115 | fprintf(fid,'invGN: error: numcols of m0 not equal to numlambdas.\n'); 116 | if usingOctave, fflush(fid); end 117 | return; 118 | end; 119 | if usingOctave 120 | pid=getpid(); % this is a built-in function in Octave; for Matlab will need external getpid.m script 121 | else 122 | pid=12345; 123 | end 124 | 125 | 126 | % Preallocate main (large) storage matrices: 127 | if keepalliters 128 | m=zeros(nparams,nlambdas,maxiters+1); % maxiters+1 because first iter holds init model 129 | dm=zeros(nparams,nlambdas,maxiters); 130 | normdm=zeros(nlambdas,maxiters); 131 | normmisfit=zeros(nlambdas,maxiters); 132 | normrough=zeros(nlambdas,maxiters); 133 | dpred=zeros(ndata,nlambdas,maxiters); 134 | residshat=zeros(ndata,nlambdas,maxiters); 135 | else 136 | m=zeros(nparams,nlambdas); 137 | dm=zeros(nparams,nlambdas); 138 | %normdm=zeros(nlambdas,1); 139 | normdm=zeros(nlambdas,maxiters); 140 | normmisfit=zeros(nlambdas,1); 141 | normrough=zeros(nlambdas,1); 142 | dpred=zeros(ndata,nlambdas); 143 | residshat=zeros(ndata,nlambdas); 144 | end 145 | % An extra dimension of nlambdas on each of the following matrix variables will 146 | % allow us to keep the matrix from the last iteration (hopefully equaling the 147 | % soln point) for each lambda, so we can study solution sensitivities. If 148 | % that's taking too much mem then we can remove that dim. Note before soln 149 | % point for given lambda is reached, these matrices simply hold the value for 150 | % the current iteration. 151 | G=zeros(ndata,nparams,nlambdas); 152 | C=zeros(nparams,nparams,nlambdas); 153 | R=zeros(nparams,nparams,nlambdas); 154 | Ddiag=zeros(ndata,nlambdas); 155 | i_conv=zeros(nlambdas,1); 156 | 157 | % Compute invsqrtcovd once here since it's expensive to compute (for matrices): 158 | if size(covd,2)==1 % covd is either scalar or vector 159 | 160 | if size(covd,1)==1 % it's a scalar variance 161 | if verbose>1 162 | fprintf(fid,'Measurement noise variance specified as scalar Cd...\n'); 163 | if usingOctave, fflush(fid); end 164 | end 165 | if covd<0 166 | invsqrtcovd=-covd*ones(ndata,1); 167 | else 168 | invsqrtcovd=covd^(-1/2)*ones(ndata,1); 169 | end 170 | 171 | else % it's a column vector = diag of cov matrix 172 | if verbose>1 173 | fprintf(fid,'Measurement noise variances specified = diag(covmatrix)...\n'); 174 | if usingOctave, fflush(fid); end 175 | end 176 | if covd(1,1)<0 % minus sign (in arglist as "-covd") flags that it's really 177 | % the invcond. (this works since diagonal of cov matrix must 178 | % be non-neg.) Useful e.g. when implementing a data mask via 179 | % zeroes in the covd diagonal. 180 | invsqrtcovd=-covd; % already inverted and sqrt'd, just removing "-" flag 181 | else 182 | invsqrtcovd=covd.^(-1/2); % otherwise it really is the variances; convert to invstdvs 183 | end 184 | end 185 | 186 | else % then covd is full covariance matrix 187 | 188 | if verbose>1 189 | fprintf(fid,'Full measurement noise cov matrix specified; computing invsqrtcovd...\n'); 190 | if usingOctave, fflush(fid); end 191 | end; 192 | if covd(1,1)<1 % minus sign (in arglist as "-covd") flags that it's really the invcond. 193 | % (this works since diagonal of cov matrix must be non-neg.) 194 | % useful e.g. when implementing a data mask via zeroes in the covd diagonal. 195 | invsqrtcovd=-covd; % it's already inverted and sqrt'd, just removing the "-" flag... 196 | else 197 | invsqrtcovd=covd^(-1/2); 198 | end 199 | end 200 | 201 | % By default, do 2nd-order Tikh regularization, ie smoothing based regu. 202 | % (IDEA: if I allowed for Lin to be a vector - must be done for finiteDiffOp 203 | % too - I could specify where discontinuities are that way... better yet would 204 | % be an option to include discontinuity estimation with the rest, but I think 205 | % that'll significantly change the form of the dm solution expressions.) 206 | 207 | % Auto-create "roughening" matrix L (2nd order finite diff operator matrix) 208 | % with no discontinuities or bdy conds: 209 | if length(Lin)==1 && size(m0,1)>=8 % the >=8 is because m is supposed to be 210 | % a discretized approximation to a 211 | % continuous function. Hard to imagine 212 | % that with length(m)<8 ! 213 | % in this case, Lin is scalar and meant to be number of profile segments: 214 | dx=sqrt(epsr); % (assumes that model vector elements are of order unity) 215 | L=finiteDiffOp(2,nparams*ones(Lin,1),dx,0); %*sqrt(dx); % non-sqr L, ie no bdy conds. 216 | % finiteDiffOp() returns a findiff operator, eg L=1/deltax^2*[1 -2 1...]. 217 | % The *sqrt(dx) in the L= line above accommodates the application of the 218 | % finitediffs in the continuous integral norm on m: 219 | % integ{(m'')^2}dx -> integ{(Lm)^2}dx -> (Lm)'*(Lm)*deltax 220 | % = m'*(L'*L*deltax)*m = m'*Lhat'*Lhat*m, where Lhat=L*sqrt(deltax). 221 | % Rather than create a new var Lhat, I just included sqrt(dx) above. 222 | % 223 | % FIXME: temporarily commented out the sqrt(dx) part for num stability concerns... 224 | elseif length(Lin)~=1 && size(m0,1)<8 && norm(Lin)~=0 && useMpref<2 225 | % catch the error case 226 | fprintf(fid,'invGN: error: if using default L matrix, must have length(m)>=8.\n'); 227 | if usingOctave, fflush(fid); end 228 | return; % errors out & dies. 229 | else 230 | % ie Lin is actually a user-supplied finite diff operator matrix L 231 | if norm(Lin)==0 && verbose>1 232 | fprintf(fid,'Noting L matrix of zeros to signal unregularized parameter estimation.\n'); 233 | if usingOctave, fflush(fid); end 234 | end; 235 | L=Lin; 236 | end 237 | 238 | 239 | if size(m0,2)==1 % ie a single m0 col vector independent of lambda 240 | % Put m0 into first slot of m results matrix for each lambda (repetitive but more intuitive) 241 | if verbose>1 242 | fprintf(fid,'Populating initial model guesses for each lambda from m0 since all same...\n'); 243 | if usingOctave, fflush(fid); end 244 | end; 245 | m(:,:,1)=m0*ones(1,nlambdas); % ie make nlambdas cols of repeated m0 col vectors, equiv to repmat(m0,1,nlambdas) 246 | else 247 | % In this case the initial model guess is diff't for each lambda, so m0 is a matrix. 248 | if verbose>1 249 | fprintf(fid,'Populating initial model guesses for each lambda from columns of m0...\n'); 250 | if usingOctave, fflush(fid); end 251 | end; 252 | m(:,:,1)=m0; 253 | end; 254 | 255 | 256 | for l=1:nlambdas % regularization tradeoff parameters lambda 257 | if verbose && l==1 && lambdaIn(1)<0 % lambdaIn(1)<0 tells us we must auto-find lambdas 258 | % won't know lambda values yet since based on G in 1st iteration: 259 | fprintf(fid,'Lambda(%d) [value determined below in 1st iteration]...\n',l); 260 | if usingOctave, fflush(fid); end 261 | elseif verbose 262 | % here we know lambda values either from 1st iteration or from input arg: 263 | fprintf(fid,'Lambda(%d)=%g...\n',l,lambda(l)); 264 | if usingOctave, fflush(fid); end 265 | end; 266 | converged=0; 267 | for iter=1:maxiters % gauss-newton iterations for a given lambda 268 | if verbose 269 | fprintf(fid,' GN iteration #%d...\n',iter); 270 | if usingOctave, fflush(fid); end 271 | end; 272 | 273 | 274 | if verbose>1, tic; end; 275 | 276 | if keepalliters, i=iter; else i=1; end; % for saving all vs. just one iteration 277 | 278 | % Compute local jacobian matrix of derivs G and local predicted data: 279 | if isempty(derivfunc) 280 | % Compute finite diffs: 281 | if l==1 || iter>1 || size(m0,2)>1 || ~reuse_first_G 282 | if length(m0)<=2 || use_dist_jaccalc==0 283 | if use_central_diffs 284 | if verbose>1 285 | fprintf(fid,' Computing finite diffs using jaccen() (single proc on local host)...\n'); 286 | if usingOctave, fflush(fid); end 287 | end; 288 | [G(:,:,l),dpred(:,l,i)] = ... 289 | jaccen(fwdfunc,m(:,l,i),epsr,verbose,fid,varargin{:}); 290 | else 291 | if verbose>1 292 | fprintf(fid,' Computing finite diffs using jacfwd() (single proc on local host)...\n'); 293 | if usingOctave, fflush(fid); end 294 | end; 295 | [G(:,:,l),dpred(:,l,i)] = ... 296 | jacfwd(fwdfunc,m(:,l,i),epsr,verbose,fid,varargin{:}); 297 | end 298 | elseif use_dist_jaccalc==1 299 | if use_central_diffs 300 | fprintf(fid,'ERROR: invGN: locally-computed central-diffs is specified for calculating Jacobian\n'); 301 | fprintf(fid,' but this is not yet implemented in the code. You could choose distributed-calc\n'); 302 | fprintf(fid,' central diffs, or locally-computed forward diffs at top of invGN...\n'); 303 | return; 304 | else 305 | if verbose>1 306 | fprintf(fid,' Computing finite diffs using jacfwdN() (distributed among procs of local host)...\n'); 307 | if usingOctave, fflush(fid); end 308 | end; 309 | [G(:,:,l),dpred(:,l,i)] = ... 310 | jacfwdN(fwdfunc,m(:,l,i),epsr,verbose,fid,varargin{:}); 311 | end 312 | elseif use_dist_jaccalc==2 313 | if use_central_diffs 314 | if verbose>1, fprintf(fid,' Computing finite diffs using jaccendist() (network distributed calculation)...\n'); end; 315 | [G(:,:,l),dpred(:,l,i)] = ... 316 | jaccendist(fwdfunc,m(:,l,i),epsr,verbose,fid,varargin{:}); 317 | else 318 | if verbose>1, fprintf(fid,' Computing finite diffs using jacfwddist() (network distributed calculation)...\n'); end; 319 | [G(:,:,l),dpred(:,l,i)] = ... 320 | jacfwddist(fwdfunc,m(:,l,i),epsr,verbose,fid,varargin{:}); 321 | end 322 | end 323 | if saveIntermediate == 2 324 | % save expensive derivs immediately in case of crash before end of loop 325 | eval(['save -v7 invGN_l' num2str(l) '_i' num2str(i) '.mat']); 326 | if verbose>1 327 | fprintf(fid,' saving intermediate workspace to file invGN_%d.mat...\n',pid); 328 | if usingOctave, fflush(fid); end 329 | end; 330 | eval(['save -v7 /tmp/invGN_' num2str(pid) '.mat']); 331 | end 332 | if nlambdas>1 && l==1 && iter==1 333 | % since the first m vector for every lambda is the same (=m0), 334 | % save the first G matrix so we don't unnecessarily recompute it for 335 | % every new lambda: 336 | if verbose>1 337 | fprintf(fid,' Saving finite diffs from l=1,i=1 to avoid recomputing for l>1,i=1...\n'); 338 | if usingOctave, fflush(fid); end 339 | end; 340 | G1=G(:,:,l); 341 | end; 342 | elseif l>1 && iter==1 && size(m0,2)==1 && reuse_first_G 343 | if verbose>1 344 | fprintf(fid,' Reusing equivalent finite diffs from l=1,i=1...\n'); 345 | if usingOctave, fflush(fid); end 346 | end; 347 | G(:,:,l)=G1; 348 | end 349 | else 350 | % Compute directly from analytical derivative function: 351 | if verbose>1 352 | fprintf(fid,' Computing derivatives...\n'); 353 | if usingOctave, fflush(fid); end 354 | end; 355 | G(:,:,l)=feval(derivfunc,m(:,l,i),varargin{:}); 356 | dpred(:,l,i)=feval(fwdfunc,m(:,l,i),varargin{:}); 357 | end 358 | 359 | 360 | % Prewhiten the local jacobian matrix and residuals: 361 | if verbose>1 362 | fprintf(fid,' Prewhitening local jacobian and residuals...\n'); 363 | if usingOctave, fflush(fid); end 364 | end; 365 | if size(covd,2)==1 366 | Ghat=zeros(size(G(:,:,1))); 367 | for q=1:size(G,1); 368 | Ghat(q,:)=invsqrtcovd(q)*G(q,:,l); % weighting rows of G, equiv to diag*G 369 | end 370 | residshat(:,l,i)=invsqrtcovd.*(dmeas-dpred(:,l,i)); 371 | else 372 | Ghat=invsqrtcovd*G(:,:,l); 373 | residshat(:,l,i)=invsqrtcovd*(dmeas-dpred(:,l,i)); 374 | end 375 | 376 | % On first of both iterations set the vector of lambdas based on G at m0: 377 | if l==1 && iter==1 378 | if lambdaIn(1)<0 % ie specifying auto-find lambdas 379 | if verbose>1 380 | fprintf(fid,' Computing lambda points for Lcurve...\n'); 381 | if usingOctave, fflush(fid); end 382 | end; 383 | % Automatically computing the lamdas (regularization params) for L-curve: 384 | % These are computed using the first iteration's derivatives, which are 385 | % the same for all lambas (note only true for the first iteration) since 386 | % the first m is the same for all lambdas, equaling m0. 387 | 388 | %Per Christian Hansen approach, goes WAY far out on the Lcurve legs but 389 | %has restrictions on form of L: 390 | %lambdamax=max(eig(Ghat'*Ghat))/max(min(eig(L'*L)),16*eps); 391 | %lambdamin=max(min(eig(Ghat'*Ghat))/max(eig(L'*L)),16*eps); 392 | %lambda=logspace(log10(lambdamax),log10(lambdamin),nlambdas); 393 | %disp(num2str(lambda,'%g\n')) 394 | 395 | %Ken Creager approach: 396 | lambda0=sqrt(max(eig(Ghat'*Ghat))/max(eig(L'*L))); 397 | % except Creager starts Lcurve (ie max misfit) at lambda0 and reduces from 398 | % there, whereas here we have lambda0 sortof roughly in the middle of 399 | % Lcurve somewhere: 400 | lambda=logspace(log10(lambda0)+maxlambdaexp,... 401 | log10(lambda0)+minlambdaexp, nlambdas); 402 | if verbose>1 403 | fprintf(fid,' Lambda(1)=%g\n',lambda(1)); 404 | if usingOctave, fflush(fid); end 405 | end 406 | else 407 | if verbose>1 408 | fprintf(fid,' Using lambda points for Lcurve specified by user...\n'); 409 | if usingOctave, fflush(fid); end 410 | end; 411 | end 412 | end 413 | 414 | 415 | % Compute residual and model norms, and objfn which is based on them, at current 416 | % lambda & iteration. In Bayesian case (useMpref==2) normrough doesn't have 417 | % same meaning and not very useful outside of the inversion (eg no Lcurve) but 418 | % it's still used in the objective fn. 419 | normmisfit(l,i)=norm(residshat(:,l,i)); 420 | normrough(l,i)=norm(L*(m(:,l,i))); 421 | objfn(l,i)=normmisfit(l,i)+lambda(l)^2*normrough(l,i); 422 | 423 | 424 | 425 | % Check condition number to warn about stability of the inv() in dm()=... below: 426 | if verbose>1 427 | fprintf(fid,' Computing model perturbation...\n'); 428 | if usingOctave, fflush(fid); end 429 | end; 430 | c=cond(Ghat'*Ghat+lambda(l)^2*L'*L); 431 | if c>1e15 432 | if ~warningsoff 433 | fprintf(fid,' Warning: cond number of (Ghat''*Ghat+lambda^2*L''*L) is large (%g)\n',c); 434 | if usingOctave, fflush(fid); end 435 | end 436 | end 437 | % Compute maximum likelihood model perturbation: 438 | if useMpref 439 | % Caution! This formulation only valid when: 440 | % a.) using 0th-order-Tikh (ridge regr), ie if L==eye(size(L)), 441 | % or b.) using Bayesian formulation, in which case m0 is the prior model mean 442 | % and lambda=1 and L=chol(inv(Cprior)). 443 | if size(m0,2)>1 % ie diff't m0 for each lambda 444 | dm(:,l,i)=inv(Ghat'*Ghat+lambda(l)^2*L'*L)*(Ghat'*residshat(:,l,i)-... 445 | lambda(l)^2*L'*L*(m(:,l,i)-m0(:,l))); 446 | else % if same m0 for each lambda (and this is the expected case when Bayesian) 447 | dm(:,l,i)=inv(Ghat'*Ghat+lambda(l)^2*L'*L)*(Ghat'*residshat(:,l,i)-... 448 | lambda(l)^2*L'*L*(m(:,l,i)-m0)); 449 | end 450 | else 451 | % This option is for frequentist framework with higher-order Tikh regularization. 452 | % Here regu doesn't care about distance from preferred (initial) model, and 453 | % there is no restriction on L (whereas above required L=I for useMpref case). 454 | dm(:,l,i)=inv(Ghat'*Ghat+lambda(l)^2*L'*L)*(Ghat'*residshat(:,l,i)-... 455 | lambda(l)^2*L'*L*m(:,l,i)); 456 | end 457 | 458 | 459 | % Check dm for passing model bounds, ie that step is not too far 460 | if check_bnds 461 | if verbose>1 462 | fprintf(fid,' Checking bounds...\n'); 463 | if usingOctave, fflush(fid); end 464 | end; 465 | mtilde=m(:,l,i)+dm(:,l,i); % candidate model based on current dm() value 466 | outofbnds=sum(mtildeub); 467 | if outofbnds>0 468 | % find out of bounds components 469 | outlow=mtildeub; 471 | if outbnds_method==0 472 | % if a component out of bnds, revert to half-step in same direction 473 | j=0; 474 | while outofbnds>0 && jub); 481 | end 482 | if j>1 483 | action=['passed bounds so stepsize halved^' num2str(j)]; 484 | else 485 | action='passed bounds so stepsize halved'; 486 | end 487 | elseif outbnds_method==1 488 | % if a component out of bnds, reflect at that bnd 489 | mtilde(outlow)=lb(outlow)+( lb(outlow)-mtilde(outlow) ); 490 | mtilde(outhigh)=ub(outhigh)-( mtilde(outhigh)-ub(outhigh) ); 491 | action='reflected from bounds'; 492 | elseif outbnds_method==2 493 | % if a component out of bnds, set it equal to that bnd 494 | mtilde(outlow)=lb(outlow); % strange code notation here but it works 495 | mtilde(outhigh)=ub(outhigh); % (in both Matlab and Octave) 496 | action='set to bounds'; 497 | end 498 | if outbnds_method>0 % update dm given bounds behavior when mtilde altered 499 | dm(:,l,i)=mtilde-m(:,l,i); 500 | end 501 | if verbose>1 502 | fprintf(fid,' %d/%d model params %s in lambda(%d)...\n',outofbnds,nparams,action,l); 503 | if usingOctave, fflush(fid); end 504 | end; 505 | else 506 | if verbose>1 507 | fprintf(fid,' All parameter updates within bounds.\n'); 508 | if usingOctave, fflush(fid); end 509 | end; 510 | end 511 | end 512 | 513 | 514 | % Check dm for decreasing obj fn, ie once again that step is not too far, after 515 | % having hit "convergence" (presently based on dm) 516 | if check_objfn && converged 517 | if verbose>1 518 | fprintf(fid,' Checking for decreasing objfn...\n'); 519 | fprintf(fid,' Calculating fwdprob for preliminary candidate step...\n'); 520 | if usingOctave, fflush(fid); end 521 | end; 522 | mtilde=m(:,l,i)+dm(:,l,i); % candidate model based on current dm() value 523 | dpred_tilde=feval(fwdfunc,mtilde,varargin{:}); 524 | if size(covd,2)==1 525 | residshat_tilde=invsqrtcovd.*(dmeas-dpred_tilde); 526 | else 527 | residshat_tilde=invsqrtcovd*(dmeas-dpred_tilde); 528 | end 529 | normmisfit_tilde=norm(residshat_tilde); 530 | normrough_tilde=norm(L*mtilde); 531 | objfn_tilde=normmisfit_tilde+lambda(l)^2*normrough_tilde; 532 | decreasing_objfn=objfn_tilde<=objfn(l,i); 533 | fprintf(fid,' objfn(l,i)=%g, objfn_tilde=%g\n',objfn(l,i),objfn_tilde); 534 | if ~decreasing_objfn 535 | j=0; 536 | while ~decreasing_objfn && j1 541 | fprintf(fid,' Calculating fwdprob for halved candidate step...\n'); 542 | if usingOctave, fflush(fid); end 543 | end; 544 | dpred_tilde2=feval(fwdfunc,mtilde2,varargin{:}); 545 | if size(covd,2)==1 546 | residshat_tilde2=invsqrtcovd.*(dmeas-dpred_tilde2); 547 | else 548 | residshat_tilde2=invsqrtcovd*(dmeas-dpred_tilde2); 549 | end 550 | normmisfit_tilde2=norm(residshat_tilde2); 551 | normrough_tilde2=norm(L*mtilde2); 552 | objfn_tilde2=normmisfit_tilde2+lambda(l)^2*normrough_tilde2; 553 | fprintf(fid,' objfn(l,i)=%g, objfn_tilde2=%g, objfn_tilde=%g, j=%d \n',... 554 | objfn(l,i),objfn_tilde2,objfn_tilde,j); 555 | if objfn_tilde2 > objfn_tilde 556 | fprintf(fid,' WARNING: hump just found between last step and candidate step,\n'); 557 | fprintf(fid,' implying a region not satisfying monotonicity...\n'); 558 | return; 559 | else % objfn_tilde2 <= objfn_tilde 560 | decreasing_objfn=objfn_tilde2<=objfn(l,i); 561 | end 562 | objfn_tilde=objfn_tilde2; 563 | end 564 | if verbose>1 565 | if j>1 566 | fprintf(fid,' Non-decreasing objfn initially, so stepsize halved^%d; objfn now decreasing.\n',j); 567 | else 568 | fprintf(fid,' Non-decreasing objfn initially, so stepsize halved; objfn now decreasing.\n'); 569 | end 570 | if usingOctave, fflush(fid); end 571 | end; 572 | else 573 | if verbose>1 574 | fprintf(fid,' Parameter update satisfies decreasing objfn.\n'); 575 | if usingOctave, fflush(fid); end 576 | end; 577 | end 578 | end 579 | 580 | 581 | % Update the model vector with the new perturbation: 582 | if keepalliters 583 | m(:,l,i+1)=dm(:,l,i)+m(:,l,i); 584 | normdm(l,i)=norm(dm(:,l,i)); 585 | else 586 | % It's a bit of a hack, but note that i in here always =1 if keepalliters=0: 587 | m(:,l,1)=dm(:,l,i)+m(:,l,i); 588 | normdm(l,iter)=norm(dm(:,l,i)); % i=1 since keepalliters=0, so use iter here. 589 | % FIXME: hm, the above means that at final iteration, the m(:,l,1) won't 590 | % technically correspond to the resids & derivs for that i,l. But if it's 591 | % really the final iteration such that the solution converged, this shouldn't 592 | % matter. By comparison, up in the "keepalliters" part of the if block, 593 | % at end the m() will have one more element in iters dimension than normdm 594 | % since the first iter for m() holds the initial m vector. 595 | end 596 | 597 | 598 | if i>1 && keepalliters 599 | % (since requires previous iteration's results for comparison in order 600 | % check for convergence) 601 | % Note goal is to go one step past convergence, ie obtain m_{i+1}, 602 | % so that can have derivs & resids at soln point m_i, so that's why the 603 | % resids & Ghat here are from before the model vector update that just 604 | % happened. 605 | % Anyway, model perturbation after convergence should be so small that 606 | % additional update is negligible. 607 | if verbose>1 608 | fprintf(fid,' Checking convergence criteria for grad(Fobj), dm, & df...\n'); 609 | if usingOctave, fflush(fid); end 610 | end; 611 | % for now, "convergence" is based only on dm getting really small: 612 | % (still analyzing the full convergence test) FIXME 613 | %[converged,gradtest,mtest,ftest]=... 614 | [dummy,gradtest,converged,ftest]=... 615 | convinfo(fwdfunc,epsr,Ghat,residshat(:,l,i-1),residshat(:,l,i),... 616 | m(:,l,i-1),m(:,l,i),verbose,fid); 617 | end 618 | 619 | 620 | if verbose>1 621 | fprintf(fid,' Norm(deltam)=%g\n',normdm(l,i)); 622 | fprintf(fid,' Norm(datamisfit)=%g\n',normmisfit(l,i)); 623 | fprintf(fid,' Norm(roughness)=%g\n',normrough(l,i)); 624 | fprintf(fid,' Norm(m)=%g\n',norm(m(:,l,i))); 625 | if verbose==4 626 | fprintf(fid,' dm={'); 627 | for q=1:length(m0) 628 | fprintf(fid,' %g',dm(q,l,i)); 629 | end 630 | fprintf(fid,' }\n'); 631 | fprintf(fid,' m={'); for q=1:length(m0) 632 | fprintf(fid,' %g',m(q,l,i)); 633 | end 634 | fprintf(fid,' }\n'); 635 | fprintf(fid,' 1/m={'); 636 | for q=1:length(m0) 637 | fprintf(fid,' %g',1/m(q,l,i)); 638 | end 639 | fprintf(fid,' }\n'); 640 | end 641 | fprintf(fid,' Objfn=%g\n',objfn(l,i)); 642 | if usingOctave, fflush(fid); end 643 | end 644 | 645 | 646 | if verbose>1 647 | % output compute time: 648 | mytime=toc; 649 | fprintf(fid,' This iteration took %f seconds of wall-clock time.\n',mytime); 650 | % output convergence info: 651 | if converged 652 | % note mechanism to go check_objfn_iters iters past "convergence" at i_conv(l) iters: 653 | if i_conv(l)==0, i_conv(l)=iter; end 654 | fprintf(fid,' CONVERGED at i=%d based on dm criteria.\n',i_conv(l)); 655 | if iter>=i_conv(l)+check_objfn_iters, break; end % break out of iters loop at end 656 | if iter>=i_conv(l) 657 | fprintf(fid,' continuing with %d more iters imposing decreasing objfn...\n',... 658 | check_objfn_iters-(iter-i_conv(l))); 659 | end 660 | else 661 | if i_conv(l)>0 662 | fprintf(fid,' Reseting convergence flag based on suddenly increasing dm.\n',i_conv(l)); 663 | i_conv(l)=0; % reset; ie must have 3 converged iters to consider it good 664 | end 665 | end 666 | if usingOctave, fflush(fid); end 667 | end 668 | 669 | 670 | end; % end of iteration loop over i 671 | 672 | if verbose>1 673 | if ~converged && iter==maxiters 674 | fprintf(fid,' Max iterations reached without convergence.\n'); 675 | if usingOctave, fflush(fid); end 676 | end 677 | end 678 | 679 | 680 | % Compute model covariance and resolution matrices for current lambda: 681 | if verbose>1 682 | fprintf(fid,' Computing cov and res matrices for Lambda(%d)...\n',l); 683 | if usingOctave, fflush(fid); end 684 | end; 685 | 686 | % Note Ghat corresponds to final model solution m(:,l,i), not m(:,l,i)+dm(:,l,i). 687 | if useMpref<2 % frequentist 688 | Gginv=inv(Ghat'*Ghat+lambda(l)^2*L'*L)*Ghat'; 689 | C(:,:,l)=Gginv*Gginv'; % model cov matrix for current lambda 690 | R(:,:,l)=Gginv*Ghat; % model resolution matrix for current lambda 691 | % data res matrix D is too big to save whole thing, so just saving its diag: 692 | Ddiag(:,l)=proddiag(Ghat,Gginv); 693 | else % Bayesian when useMpref==2 694 | C(:,:,l)=inv(Ghat'*Ghat+lambda(l)^2*L'*L); % posterior model covariance 695 | % note lambda(l) should always be 1 in this case, 696 | % and L=chol(inv(Cpriod)) but note notation confusion 697 | % in that the "L" does not refer to "left triangular" 698 | % but the frequentist findiff operator from earlier! 699 | % Sorry but what to do. "L" here is right triangular. 700 | % set these empty as they are not relevant for Bayesian formulation: 701 | R(:,:,l)=[]; 702 | Ddiag(:,l)=[]; 703 | end 704 | 705 | 706 | if saveIntermediate 707 | if verbose>1 708 | fprintf(fid,' saving intermediate workspace to file invGN_%d.mat...\n',pid); 709 | if usingOctave, fflush(fid); end 710 | end; 711 | eval(['save -v7 /tmp/invGN_' num2str(pid) '.mat']); 712 | end 713 | 714 | 715 | end; % end of loop over lambdas index l 716 | 717 | 718 | if verbose 719 | fprintf(fid,'Finished all iterations for all lambdas.\n'); 720 | if usingOctave, fflush(fid); end 721 | end 722 | 723 | --------------------------------------------------------------------------------