├── bigfonts.m
├── proddiag.m
├── LICENSE.txt
├── testFindiffStepsize.m
├── getLcurveCorner.m
├── jaccen.m
├── convinfo.m
├── jacfwd.m
├── regress2lines.m
├── finiteDiffOp.m
├── manpage.txt
├── readme.md
└── invGN.m


/bigfonts.m:
--------------------------------------------------------------------------------
 1 | % BIGFONTS - Script to increase plot fonts sizes and line widths
 2 | %
 3 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, Seattle.  
 4 | % Contact at andy@ganse.org , http://research.ganse.org
 5 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license.  
 6 | % See LICENSE.txt for full license statement.  
 7 | %
 8 | % Usage summary:
 9 | %   bigfonts;
10 | 
11 | % These first two work in Matlab only (Octave understands these commands but
12 | % the result isn't pretty on the plot)
13 | %set(0,'defaultaxesfontsize',14);
14 | %set(0,'defaulttextfontsize',14);
15 | set(0,'DefaultAxesFontName','Arial');
16 | set(0,'DefaultLineLineWidth',2);
17 | set(0,'DefaultAxesFontSize',14);
18 | set(0,'DefaultTextFontName','Arial');
19 | set(0,'DefaultTextFontSize',14);
20 | set(0,'DefaultLineMarkerSize',5);
21 | 


--------------------------------------------------------------------------------
/proddiag.m:
--------------------------------------------------------------------------------
 1 | function Cdiag = proddiag(A,B)
 2 | % PRODDIAG - Compute only diagonal elements of matrix product solution
 3 | % Ie, in cases or large outer dimensions of matrices A & B, it may be far too
 4 | % slow (or not even feasible) to multiply them completely to obtain the product
 5 | % matrix.  If we just want the diagonal elements of their product, we can
 6 | % compute that more directly, returning a vector with the diagonal.
 7 | %
 8 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, 2010.
 9 | % Contact at andy@ganse.org , http://research.ganse.org
10 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license.  
11 | % See LICENSE.txt for full license statement.  
12 | %
13 | % Usage:
14 | % Cdiag = proddiag(A,B)
15 | 
16 | [Ma,Na]=size(A);
17 | [Mb,Nb]=size(B);
18 | if Na~=Mb
19 |   disp('proddiag: error: must have equal inner dimensions of matrices for mult');
20 |   return;
21 | end
22 | 
23 | M=min(Ma,Nb);  % ie the diag of the product matrix will only be as long as the
24 |                % smaller of the two outer dimensions of A & B
25 | Cdiag=zeros(M,1);
26 | 
27 | for i=1:M
28 |   Cdiag(i)=A(i,:)*B(:,i);
29 | end
30 | 
31 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015, University of Washington
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 |     * Redistributions of source code must retain the above copyright
 7 |       notice, this list of conditions and the following disclaimer.
 8 |     * Redistributions in binary form must reproduce the above copyright
 9 |       notice, this list of conditions and the following disclaimer in the
10 |       documentation and/or other materials provided with the distribution.
11 |     * Neither the name of the University of Washington nor the
12 |       names of its contributors may be used to endorse or promote products
13 |       derived from this software without specific prior written permission.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF WASHINGTON BE LIABLE FOR ANY
19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | 


--------------------------------------------------------------------------------
/testFindiffStepsize.m:
--------------------------------------------------------------------------------
 1 | function testFindiffStepsize(fwdprob,mtest,mplot,varargin)
 2 | % TESTFINDIFFSTEPSIZE - Analyze step size choice for 1st order finite differences
 3 | %
 4 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, 2010.
 5 | % Contact at andy@ganse.org , http://research.ganse.org
 6 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license.  
 7 | % See LICENSE.txt for full license statement.  
 8 | %
 9 | % Usage:
10 | % testFindiffStepsize(fwdprob,mtest,mplot,varargin);
11 | 
12 | 
13 | ind=0:1:12;  % indices defining the step sizes  (0:1:20)
14 | 
15 | for i=ind
16 |   epsf=10^(-i);
17 |   dx=sqrt(epsf); % true for fwd diffs when vars scaled to O(1)
18 |   if i<0, ii=['m',num2str(-i)]; else ii=num2str(i); end;
19 | 
20 |   disp(['  computing Dfwd(h) for epsr=10^(-' ii ')...']);
21 |   eval(['J' ii '=jacfwd(fwdprob,mtest,-dx,0,1,varargin{:});']);
22 |   eval(['J' ii '_2=jacfwd(fwdprob,mtest,-2*dx,0,1,varargin{:});']);
23 |   eval(['D' ii '=J' ii '_2-J' ii ';']);
24 | end
25 | clear J*;
26 | 
27 | % plotting:
28 | for m=mplot
29 |   figure;
30 |   h=sqrt(10.^(-ind));
31 |   d1=1; d2=6; dincr=1;  % use all 6 data pts
32 |   c=ones(1,ceil((d2-d1+1)/dincr));  % just making as many ones as data pts
33 |   
34 |   % first form the vectors to plot, in the following format:
35 |   % x=[h(1)*c;h(2)*c;...]
36 |   % y=[D0(d1:d2,m)';D1(d1:d2,m)';...]
37 |   x=[]; y=[];
38 |   for j=1:length(ind)
39 |     x=[x;h(j)*c];
40 |     eval([ 'y=[y;abs(D' num2str(ind(j)) '(d1:dincr:d2,m))''];']);
41 |     %eval([ 'y=[y;D' num2str(ind(j)) '(d1:dincr:d2,m)''];']);
42 |   end
43 |   
44 |   loglog(x,y,'*-');
45 |   axis tight
46 |   grid on
47 |   t=title(['F=' fwdprob ', param#' num2str(m)]);
48 |   set(t,'Interpreter','none');  % prevent "_" characters in title causing subscript
49 |   xlabel('finite difference stepsize h');
50 |   ylabel('\partial{F}_{(2h)} - \partial{F}_{(h)}');
51 | end
52 | 


--------------------------------------------------------------------------------
/getLcurveCorner.m:
--------------------------------------------------------------------------------
 1 | function corner=getLcurveCorner(rough,misfit,method)
 2 | % GETLCURVECORNER - Give index of Lcurve corner based on ROUGH and MISFIT values
 3 | %
 4 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, 2010.
 5 | % Contact at andy@ganse.org , http://research.ganse.org
 6 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license.  
 7 | % See LICENSE.txt for full license statement.  
 8 | %
 9 | % Usage:  corner=getLcurveCorner(rough,misfit)
10 | % 
11 | % ROUGH and MISFIT are the equal-length vectors with the Lcurve axes values.
12 | % CORNER is the index of of the corner.
13 | % METHOD (optional - its default is "derivative-based") can be one of:
14 | %   0 = derivative-based
15 | %   1 = two-phase linear regression to find intersection point of two lines
16 | %
17 | % Uses two-phase linear regression in regress2lines() to find the knee;
18 | % suggested that ROUGH and MISFIT axes are squares of the respective two-norms.
19 | 
20 | if nargin<3, method=0; end
21 | 
22 |   % Scale Lcurve so axes same magnitude:
23 |   % (and the (:) ensures they are column vectors...)
24 |   x=rough(:)/max(rough); y=misfit(:)/max(misfit);
25 |   % Rotate Lcurve by 45deg to stabilize:
26 |   theta=pi/180*45;
27 |   rr=[cos(theta) -sin(theta); cos(theta) sin(theta)];
28 |   xxyy=rr*[x';y'];
29 |   xx=xxyy(1,:)'; yy=xxyy(2,:)';
30 | 
31 | 
32 | if method==0
33 |   % use derivatives:
34 |   [dummy,corner]=max(diff(diff(yy)./diff(xx)));
35 | elseif method==1
36 |   % use two-phase linear regression:
37 |   addpath('~/APLUW/src/matlab/AGmultiphase_linreg/');
38 |   [dummy,dummy,corner] = regress2lines(xx,yy);
39 | elseif method==2
40 |   % just find min of rotated lcurve:
41 |   [dummy,corner]=min(yy);
42 | end
43 | 
44 | %plot(rough.^2,misfit.^2,'*-');
45 | %hold on;
46 | %plot(rough(corner).^2,misfit(corner).^2,'r*');
47 | 
48 | % example derivative-based method - works great sometimes...
49 | %e=log10(normrough(:,iters)); r=log10(normmisfit(:,iters));
50 | %[dummy,corner]=max(diff(diff(r)./diff(e)));  % 2nd deriv to find lcurve knee
51 | 


--------------------------------------------------------------------------------
/jaccen.m:
--------------------------------------------------------------------------------
 1 | function [J,F] = jaccen(funcname,x,epsf,verbose,fid,varargin);
 2 | % JACCEN - 1st centered difference approximation of Jacobian matrix
 3 | % of partial derivs of function 'funcname' w.r.t. its arguments in vector 'x'.
 4 | %             
 5 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, 2005.
 6 | % Contact at andy@ganse.org , http://research.ganse.org
 7 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license.  
 8 | % See LICENSE.txt for full license statement.  
 9 | %
10 | % Usage:
11 | % [J,F] = jaccen('funcname',x,dx,verbose,fid,...);   
12 | %         [where funcname.m is a file within path]
13 | % or
14 | % [J,F] = jaccen(@funcname,x,dx,verbose,fid,...);    
15 | %         [when funcname() is defined within the calling script]
16 | %
17 | % Inputs:
18 | %  funcname = String or function handle for the function being differenced.
19 | %             (See below for required form of this function.)
20 | %         x = The column vector (length M) of arguments to the function funcname.
21 | %        dx = Small change in x used to compute differences.
22 | %             dx can be a scalar or vector of same length as x.
23 | %             If scalar it is converted to a vector of same length of x in
24 | %             which all elements are equal to the scalar dx.
25 | %             For problems in which x is all the same type of quantity, such
26 | %             as a profile of wavespeeds, it is recommended to use a scalar
27 | %             dx = sqrt(epsilon), where epsilon is the accuracy of funcname.
28 | %   verbose = 1: output progress text, 0: run quietly.
29 | %             Progress text (percent done) is useful for very slow functions.
30 | %       fid = File handle to which progress text is outputted in verbose option.
31 | %             Note fid=1 outputs to stdout, easiest to do this is not verbose
32 | %             since nothing will then be outputted.
33 | %
34 | % Outputs:
35 | %         J = the finite diffs approximation of NxM Jacobian matrix at x
36 | %         F = the function output column vector (length N) funcname(x) at x
37 | %
38 | % Function 'funcname' must be of form:
39 | %   F=funcname(x)
40 | %       or
41 | %   F=funcname(x,...)
42 | %             where the '...' are other arguments required besides the variables
43 | %             x with repsect to which the function is being differenced.
44 | 
45 | 
46 | checksize=size(x);
47 | if checksize(2)~=1
48 |    error(' jac(): x vector argument must be a column vector...\n');
49 | %   fprintf(fid,'error: jac(): x vector argument must be a column vector...\n');
50 | %   return;
51 | end
52 | M=length(x);
53 | 
54 | % The following choice of dx requires that x is scaled to order 1:
55 | % FIXME: should check for that
56 | dx=epsf^(1/3)*ones(M,1);  % note epsf^(1/3) is for ctr diffs
57 | % Now make dx and exactly representable number vis a vis machine precision
58 | % so that all error in derivs are from numerator (see Num Rec 2nd ed, sec 5.7)
59 | temp = x+dx;
60 | dx = temp-x;  % Believe it or not this is actually a slightly different dx now,
61 |               % different by an error of order of machine precision.
62 |               % This effect may or may not make it into fwdprob input which 
63 |               % could have limited input precision, but in any case will have
64 |               % an effect via denominator at end of this script.
65 | mask=eye(M);
66 | 
67 | if verbose>1
68 |    fprintf(fid,'  Computing 1st centered finite diffs for Jacobian matrix...\n');
69 | end
70 | 
71 | if nargout==2
72 |    F=feval(funcname,x,varargin{:});
73 | end
74 | 
75 | % centered diffs
76 | for j=1:M
77 |    F1=feval(funcname,x+dx.*mask(:,j),varargin{:});
78 |    F2=feval(funcname,x-dx.*mask(:,j),varargin{:});
79 |    J(:,j) = ( F1 - F2 ) ./ dx(j)/2;
80 | 
81 |    % percent-done markers to show at matlab cmdline while running:
82 |    if verbose>0
83 |        p=j/M;
84 |        if round(mod(p*100,10))==0
85 |            fprintf(fid,'  ');
86 |            fprintf(fid,'%2.0f%%..',p*100);
87 |            if p==1
88 |                fprintf(fid,'\n');
89 |            end;
90 |        end;
91 |    end;
92 | 
93 | end
94 | 
95 | 


--------------------------------------------------------------------------------
/convinfo.m:
--------------------------------------------------------------------------------
  1 | function [statusall,gradtest,mtest,ftest]=...
  2 |          convinfo(fwdprob,epsr,Ghat1,rhat0,rhat1,m0,m1,verbose,fid)
  3 | % CONVINFO - Display convergence info for previous linearization step
  4 | % in locally linearized inversion of a nonlinear inverse problem (using the
  5 | % "invGN.m" script).
  6 | %
  7 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, Seattle.  
  8 | % Contact at andy@ganse.org , http://research.ganse.org
  9 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license.  
 10 | % See LICENSE.txt for full license statement.  
 11 | %
 12 | % Based on material in:
 13 | % RC Aster, B Borchers, and CH Thurber, "Parameter Estimation and Inverse
 14 | % Problems," Elsevier Academic Press, 2004.
 15 | %
 16 | % Usage:
 17 | %    [statusall,gradtest,mtest,ftest]=...
 18 | %        convinfo(fwdprob,epsr,Ghat1,rhat0,rhat1,m0,m1,verbose,fid);
 19 | %
 20 | % Inputs:
 21 | %     fwdprob = Matlab function handle to forward problem function which
 22 | %               is of form outvector=myfunction(invector).  (See example 1
 23 | %               of "help function_handle" in Matlab for how to implement)
 24 | %        epsr = relative precision of forward problem output (predicted data)
 25 | %       Ghat1 = normalized Jacobian matrix of partial derivs of fwdprob at m1,
 26 | %               ie Ghat1=C^{-1/2}*G(m1), where C is the cov matrix of meas noise
 27 | %       rhat0 = column vector of normalized residuals at m0, i.e.
 28 | %               C^{-1/2}*(dmeas-dpred0)
 29 | %       rhat1 = column vector of normalized residuals at m1, i.e.
 30 | %               C^{-1/2}*(dmeas-dpred1)
 31 | %          m0 = column vector containing previous model value.
 32 | %          m1 = updated model (column) vector from latest invOccam() iteration.
 33 | %     verbose = 0: no status/diagnostic output, 1: some output
 34 | %         fid = file identifier for verbose output (status, warnings, etc.)
 35 | %
 36 | % Output "statusall" is a logical AND of the boolean results of 3 conv tests:
 37 | %   * gradtest = obj gradient being close enough to zero per A/B/T def p182
 38 | %   * mtest = change in norm(m) being close enough to zero per A/B/T def p182
 39 | %   * ftest = change in obje value being close enough to zero per A/B/T def p182
 40 | % Statusall=1 means all 3 tests passed.  Statusall=0 means at least 1 didn't,
 41 | % in which case you probably want to take another linearization step.
 42 | %
 43 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, 2007-2008.
 44 | % andy@ganse.org  
 45 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license.  
 46 | % See LICENSE.txt for full license statement.  
 47 | 
 48 | if length(epsr)>1
 49 |   fprintf(fid,'      convinfo: note skipping this for case with length(epsr)>1...\n');
 50 |   gradtest=0; mtest=0; ftest=0; statusall=0;
 51 |   return;
 52 | end
 53 | 
 54 | 
 55 | f1=rhat1'*rhat1;  % objective function value at m1
 56 | f0=rhat0'*rhat0;  % objective function value at m0
 57 | 
 58 | 
 59 | % check if the gradient of f(m) is approximately 0:
 60 | gradobj1=2*Ghat1'*rhat1;  % objective gradient at m1:  grad(f(m1)) = 2G(m1)^T F(m1)
 61 |                     % where G(m1) is the normalized jacobian matrix of fwd problem at m1
 62 | left1=norm(gradobj1);
 63 | right1=sqrt(epsr)*(1+f1);
 64 | gradtest=left1<right1;
 65 | if verbose>1
 66 | if gradtest
 67 |    ans='small enough';
 68 |    op='<'; 
 69 | else
 70 |    ans='not small enough yet';
 71 |    op='not <';
 72 | end;
 73 | fprintf(fid,'      grad(Fobj): %s (%g %s %g)\n', ans, left1, op, right1);
 74 | end
 75 | 
 76 | % check if successive values of m are close:
 77 | left2=norm(m1-m0);
 78 | right2=sqrt(epsr)*(1+norm(m1));
 79 | mtest=left2<right2;
 80 | if verbose>1
 81 | if mtest
 82 |    ans='close enough';
 83 |    op='<'; 
 84 | else
 85 |    ans='not close enough yet';
 86 |    op='not <';
 87 | end;
 88 | fprintf(fid,'      dm: %s (%g %s %g)\n', ans, left2, op, right2);
 89 | end
 90 | 
 91 | % check if the values of f(m) have stopped changing:
 92 | left3=abs(f1-f0);
 93 | right3=epsr*(1+f1);
 94 | ftest=left3<right3;
 95 | if verbose>1
 96 | if ftest
 97 |    ans='close enough';
 98 |    op='<'; 
 99 | else
100 |    ans='not close enough yet';
101 |    op='not <';
102 | end;
103 | fprintf(fid,'      df: %s (%g %s %g)\n', ans, left3, op, right3);
104 | end
105 | 
106 | 
107 | statusall = gradtest && mtest && ftest;
108 | %if verbose>=1
109 | %if statusall
110 | %   fprintf(fid,'      CONVERGED according to grad(Fobj) and dm and df...\n');
111 | %else
112 | %   fprintf(fid,'      Not converged yet according to grad(Fobj) and dm and df...\n');
113 | %end
114 | %end
115 | 


--------------------------------------------------------------------------------
/jacfwd.m:
--------------------------------------------------------------------------------
  1 | function [J,F] = jacfwd(funcname,x,epsr,verbose,fid,varargin);
  2 | % JACFWD - 1st forward difference approximation of Jacobian matrix
  3 | % of partial derivs of function 'funcname' w.r.t. its arguments in vector 'x'.
  4 | %             
  5 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, 2005.
  6 | % Contact at andy@ganse.org , http://research.ganse.org
  7 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license.  
  8 | % See LICENSE.txt for full license statement.  
  9 | %
 10 | % Usage:
 11 | % [J,F] = jacfwd('funcname',x,epsf,verbose,fid,...);   
 12 | %         [where funcname.m is a file within path]
 13 | % or
 14 | % [J,F] = jacfwd(@funcname,x,epsf,verbose,fid,...);    
 15 | %         [when funcname() is defined within the calling script]
 16 | %
 17 | % Inputs:
 18 | %  funcname = String or function handle for the function being differenced.
 19 | %             (See below for required form of this function.)
 20 | %         x = The column vector (length M) of arguments to the function funcname.
 21 | %             This function assumes that elements of x are scaled to order unity.
 22 | %      epsr = The relative accuracy of the forward problem computation - on
 23 | %             really simple fwd problem functions, epsr will just equal machine
 24 | %             precision.  But for more complicated functions epsr will be larger
 25 | %             than machine precision, and determining it may take some analysis.
 26 | %             Try e.g. Gill, Murray, & Wright, "Practical Optimization".  In any
 27 | %             case note that epsr is used to choose stepsize in the finite diff.
 28 | %             Can be scalar (ie same for all elements of model vector) or vector
 29 | %             of same length as model vector for 1-to-1 correspondence to difft
 30 | %             variable "types" (eg layer velocities vs layer thickness)
 31 | %             If EPSR is negative then that flags it as actually being the DX to
 32 | %             use.  Note that use of EPSR rather than DX assumes that the values
 33 | %             in X have been all scaled to order 1.
 34 | %   verbose = 1: output progress text, 0: run quietly.
 35 | %             Progress text (percent done) is useful for very slow functions.
 36 | %       fid = File handle to which progress text is outputted in verbose option.
 37 | %             Note fid=1 outputs to stdout, easiest to do this is not verbose
 38 | %             since nothing will then be outputted.
 39 | %
 40 | % Outputs:
 41 | %         J = the finite diffs approximation of NxM Jacobian matrix at x.
 42 | %         F = the function output column vector (length N) funcname(x) at x.
 43 | %
 44 | % Function 'funcname' must be of form:
 45 | %   F=funcname(x)
 46 | %       or
 47 | %   F=funcname(x,...)
 48 | %             where the '...' are other arguments required besides the variables
 49 | %             x with repsect to which the function is being differenced.
 50 | 
 51 | 
 52 | checksize=size(x);
 53 | if checksize(2)~=1
 54 |    error(' jacfwd(): x vector argument must be a column vector...\n');
 55 | %   fprintf(fid,'error: jacfwd(): x vector argument must be a column vector...\n');
 56 | %   return;
 57 | end
 58 | M=length(x);
 59 | % The following choice of dx requires that x is scaled to order 1:
 60 | % FIXME: should check for that
 61 | if epsr(1)>0
 62 |   if length(epsr)==1
 63 |     dx=epsr^(1/2)*ones(M,1);  % note epsr^(1/2) is for fwd diffs, cent diffs is different value
 64 |   elseif length(epsr)==length(x)
 65 |     dx=epsr.^(1/2);  % note epsr^(1/2) is for fwd diffs, cent diffs is different value
 66 |   end
 67 | elseif epsr(1)<0
 68 |   if length(epsr)==1
 69 |     dx=-epsr*ones(M,1);  % note epsr^(1/2) is for fwd diffs, cent diffs is different value
 70 |   elseif length(epsr)==length(x)
 71 |     dx=-epsr;  % note epsr^(1/2) is for fwd diffs, cent diffs is different value
 72 |   end
 73 | else
 74 |   disp('      jacfwd: error: all elements of epsr and dx must be nonzero.\n');
 75 |   return;
 76 | end
 77 | % Now make dx and exactly representable number vis a vis machine precision
 78 | % so that all error in derivs are from numerator (see Num Rec 2nd ed, sec 5.7)
 79 | temp = x+dx;  % note temp is vector of same length as model vector
 80 | dx = temp-x;  % Believe it or not this is actually a slightly different dx now,
 81 |               % different by an error of order of machine precision.
 82 |               % This effect may or may not make it into fwdprob input which 
 83 |               % could have limited input precision, but in any case will have
 84 |               % an effect via denominator at end of this script.
 85 | mask=eye(M);
 86 | 
 87 | F=feval(funcname,x,varargin{:});
 88 | 
 89 | % forward diffs
 90 | for j=1:M
 91 |    J(:,j) = ( feval( funcname, x+dx.*mask(:,j), varargin{:} ) - F ) ./ dx(j);
 92 | 
 93 |    % percent-done markers to show at matlab cmdline while running:
 94 |    if verbose>1
 95 |        fprintf(fid,'    ');
 96 |        p=j/M;
 97 |        if round(mod(p*100,10))==0
 98 |            fprintf(fid,'  ');
 99 |            fprintf(fid,'%2.0f%%..',p*100);
100 |            if p==1
101 |                fprintf(fid,'\n');
102 |            end;
103 |        end;
104 |    end;
105 | 
106 | end
107 | 


--------------------------------------------------------------------------------
/regress2lines.m:
--------------------------------------------------------------------------------
  1 | function [mstar, Rstar, idiv, Gstar] = regress2lines(x,y)
  2 | % REGRESS2LINES - fit two cojoined straight lines to data points.
  3 | % This is a nonlinear estimation problem.  The data will be divided into two
  4 | % sets, each fitted with a line; the lines intersect between the data division.
  5 | %
  6 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, 2010.
  7 | % Contact at andy@ganse.org , http://research.ganse.org
  8 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license.  
  9 | % See LICENSE.txt for full license statement.  
 10 | %
 11 | % IMPORTANT - You should be able to justify such a restricted choice of
 12 | % model as simply two straight lines.  Perhaps it makes more sense for you
 13 | % to instead fit a single, higher-degree polynomial (an easy linear problem)
 14 | % or exponential curve to your data.  To use this two-phase straight-line
 15 | % regression on your data you should be able to cite some theory for why
 16 | % you specifically want two intersecting straight lines as opposed to a
 17 | % more general model.
 18 | %
 19 | % Usage:   [m, R, idiv, G] = regress2lines(x,y);
 20 | %
 21 | %   Input - data column vectors x & y of equal length.
 22 | %        
 23 | %   Outputs - 
 24 | %      m = column vector of the 5 solution paramters [a1,b1,a2,b2,x0] ala
 25 | %             lines y=a1*x+b1 and y=a2*x+b2, intersecting at x0.
 26 | %      R = sum of sqrs of residuals of the solution
 27 | %      idiv = the index of the data point after which the data was divided.
 28 | %          x0 is between x(idiv) and x(idiv+1).
 29 | %      G = the linear problem matrix associated with the solution x0.
 30 | %             This might be used to calculate *some* stats of the solution.
 31 | %
 32 | % Since the problem as a whole is nonlinear, the full picture of the
 33 | % solution statistics is seen by Monte Carlo simulation, as demonstrated and
 34 | % explored in my MultiRegressLines.matlab package, available at
 35 | % https://www.github.com/aganse/MultiRegressLines.matlab
 36 | 
 37 | 
 38 | % More specific technical description:
 39 | %
 40 | % The problem is to solve:
 41 | %   Given xdata(1:n),ydata(1:n)
 42 | %   Find m={a1,b1,a2,b2,x0} such that we
 43 | %   Minimize data misfit in least-squares sense
 44 | %   ie min sum( ydata(1:istar)-a1*xdata(1:istar)-b1 )^2 +
 45 | %          sum( ydata(istar+1:end)-a2*xdata(istar+1:end)-b2 )^2
 46 | %   With constraint:  x(istar) <= x0=(b2-b1)/(a1-a2) <= x(istar+1)
 47 | % (ie x0 is the intersection of the two fit lines and is at the break in
 48 | % data)
 49 | %
 50 | % The problem as a whole is a nonlinear regression problem because of
 51 | % the parameter x0.  But it's really convenient that for a given constant
 52 | % value of x0 the problem becomes linear, on top of which each x0
 53 | % corresponds to a single division in the data points into two sets.
 54 | % (For each set we compute a standard linear fit, and x0 is the unique
 55 | % location at which the two lines intersect.)
 56 | % Given a value for x0, which tells the data points between which to split
 57 | % the data, we set up the linear problem as y=G*m, where m=[a1,b1,a2,b2].
 58 | % x0 is then computed from the elements of m as x0=(b2-b1)/(a1-a2).
 59 | %
 60 | % We loop over the data points: each time dividing them into two groups,
 61 | % recording the sum-of-squares-of-residuals R and the x0 value.  And after
 62 | % going over all the data points the solution is ideally the x0 and its
 63 | % associated a1,b1,a2,b2 which correspond to the minimum R value.  We do
 64 | % this "brute force" approach to the nonlinear problem rather than such
 65 | % often-seen methods like locally approximating the nonlinear function as
 66 | % linear and using derivatives, because this problem has discontinuities
 67 | % that mess up the differentiability at the data points.  And the search
 68 | % over all x0 is very small because it is discretized - there's only one
 69 | % x0 that corresponds to each data division, and so we only need to compute
 70 | % N-2 values of R to find that minimum, where N is the number of data points.
 71 | % Note there is a remote possibility that there will be two equal minima,
 72 | % ie that there are two equally worthy places to intersect two fit lines,
 73 | % so one should look for that when analyzing our R values and report both
 74 | % cases.  Please see a full lowdown at:
 75 | % http://staff.washington.edu/aganse/mpregression/index.html
 76 | % Especially note on that webpage the important caveats about when this
 77 | % script will and will not work.
 78 | 
 79 | 
 80 | N=length(y);
 81 | igood=zeros(N-2,1); x0vector=igood; Rvector=igood;  % creating col vectors
 82 | for i=2:N-1
 83 |     % set up the linear problem as y=G*m, where m=[a1,b1,a2,b2]:
 84 |     G(1:i,:)=[x(1:i),ones(i,1),zeros(i,2)];
 85 |     G(i+1:N,:)=[zeros(N-i,2),x(i+1:N),ones(N-i,1)];
 86 |     m=pinv(G'*G)*G'*y;  % least-squares solution to linear problem
 87 |     x0=(m(4)-m(2))/(m(1)-m(3));  % where the two fit lines intersect
 88 |     R=sum( (y-G*m).^2 );
 89 | %fprintf(1,'x(1)=%f, x0=%f, x(N)=%f\n',x(1),x0,x(N));
 90 |     if x0>=x(1) && x0<=x(N)
 91 |         igood(i-1)=1;
 92 |         x0vector(i-1)=x0;
 93 |         Rvector(i-1)=R;
 94 |     end
 95 | end
 96 | 
 97 | % Pick out the minimum R and its associated solution info:
 98 | %disp(num2str([(2:N-1)',igood,x0vector,Rvector]));  % (for debugging)
 99 | [Rstar,imin]=min(Rvector(find(igood==1)));
100 | idiv=imin+1;
101 | Gstar(1:idiv,:)=[x(1:idiv),ones(idiv,1),zeros(idiv,2)];
102 | Gstar(idiv+1:N,:)=[zeros(N-idiv,2),x(idiv+1:N),ones(N-idiv,1)];
103 | m=pinv(Gstar'*Gstar)*Gstar'*y;  % least-squares solution to linear problem
104 | mstar=[m;(m(4)-m(2))/(m(1)-m(3))];
105 | 
106 | 
107 | 
108 | 
109 | % Test data, etc...:
110 | % N=40;
111 | % x=sort(N*rand(N,1));
112 | %
113 | % noisy two lines:
114 | % G(1:i,:)=[x(1:i),ones(i,1),zeros(i,2)];
115 | % G(i+1:N,:)=[zeros(N-i,2),x(i+1:N),ones(N-i,1)];
116 | % y=G*[4,3,-1,73]';
117 | % yd=y+4*randn(N,1);
118 | %
119 | % noisy parabola:
120 | % G=[x.^2/2, x, ones(N,1)];
121 | % y=G*[4,4,4]';
122 | % yd=y+10*randn(N,1);
123 | % 
124 | % plots:
125 | % plot(x,y,'-',x,yd,'*')
126 | % plot(x,y,'-',x,yd,'*',[x(1);m(5);m(5);x(end)],...
127 | %     [m(1)*x(1)+m(2), m(1)*m(5)+m(2), m(3)*m(5)+m(4), m(3)*x(end)+m(4)],'-+')
128 | 


--------------------------------------------------------------------------------
/finiteDiffOp.m:
--------------------------------------------------------------------------------
  1 | function L=finiteDiffOp(order,segmentlengths,dx,BCoption)
  2 | % FINITEDIFFOP - Return an ORDERth order finite difference operator matrix
  3 | % with "kinks" in it to break it into segments of SEGMENTLENGTHS to accommodate
  4 | % discontinuities in a profile model, or multiple profiles concatenated together
  5 | % (e.g. to avoid regularizing across the profile boundaries).
  6 | % Various boundary conditions for the operator may be specified, or none at all,
  7 | % according to the codes in BCoption. (NOTE BCOPTION ONLY PARTIALLY IMPLEMENTED)
  8 | %
  9 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, 2010.
 10 | % Contact at andy@ganse.org , http://research.ganse.org
 11 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license.  
 12 | % See LICENSE.txt for full license statement.  
 13 | %
 14 | % Usage:
 15 | % L=finiteDiffOp(order,segmentlengths,dx,BCoption);
 16 | %
 17 | % Inputs:
 18 | %          order = order of fin diff operator (=0,1,2)
 19 | % segmentlengths = vector containing number of model elements between each
 20 | %                  discontinuity, or can be simply a scalar value with number
 21 | %                  of total model elements if no discontinuities in model.
 22 | %                  Note this could equivalently be used when model vector is
 23 | %                  a concatenation of multiple profiles such that you don't
 24 | %                  want to smooth boundary from one profile to next.
 25 | %             dx = delta x between model parameters in continuous profile.
 26 | %   BCoption: NOTE: OPTS 2-5 REQUIRE CODE FIX AT BTM OF SCRIPT, BUT OPTS 0-1 OK.
 27 | %      0     = no additional bdy conds.
 28 | %      1     = prefer 0 gradient between last microlayer and basement,
 29 | %              trying to keep vels equal so no reflection from basement.
 30 | %      2     = Constrain f'' to equal zero at the endpoints, so that there is
 31 | %              zero curvature there.  L not full rank here.
 32 | %      3     = Extends minimum curvature integral from -inf to +inf, forcing 
 33 | %              f=0 at all nodes outside grid.  f'' at boundary and first node
 34 | %              outside grid can have non-zero curvature.  Note results using
 35 | %              this BC can be similar to #4 because of the nearby f=0.  But L
 36 | %              is full rank here.
 37 | %      4     = Constrain f' to equal zero at the endpoints, so that there is
 38 | %              zero gradient there.  L not full rank here.
 39 | %      5     = Constrain f itself to equal zero at the endpoints.  L full rank.
 40 | %
 41 | % Outputs:
 42 | %   L = the finite diff operator matrix 
 43 | 
 44 | 
 45 | if order==0
 46 |     % "Zeroth order Tikhonov regularization", also called "ridge regression".
 47 |     % Note no segment bdys are handled here because 0th order regu doesn't care.
 48 |     L=eye(sum(segmentlengths));
 49 |     
 50 | elseif order==1
 51 |     % "first order Tikhonov regularization" for total variation regularization
 52 | 
 53 |     L=[];
 54 |     for i=1:length(segmentlengths)
 55 |       M=segmentlengths(i);
 56 |       L1=-1*diag(ones(M,1)) + diag(ones(M-1,1),+1);  % fwd diffs : O(dx)
 57 |       %L1=diag(ones(M-1,1),1) - diag(ones(M-1,1),-1); % cen diffs : O(dx^2)
 58 |       L1=L1(1:(end-1),:);  % diag() by itself didn't give quite what we wanted -
 59 |                            % it gave a square matrix with implicit bdy conds -
 60 |                            % so we're just removing top and bottom rows with BCs.
 61 |                            % Note nonsquare now.
 62 |       L=blkdiag(L,L1);
 63 |     end
 64 | 
 65 |     % NOTE dx presently not implemented but if reimplementing note dx is now a vector.
 66 | 
 67 |     % Following is dx normalization such that L*m = 1st deriv of m.
 68 |     % Only really makes sense to do this when not including other constraints
 69 |     % or segments in problem, otherwise easier to just leave off:
 70 |     %L=L/dx;    % for forward diffs to O(dx)
 71 |     %L=L/dx/2;  % for central diffs to O(dx^2)
 72 | 
 73 | elseif order==2
 74 |     % "2nd order Tikhonov regularization" for Occam's (smoothing) regularization
 75 | 
 76 |     % central diffs : O(dx^2) :
 77 |     L=[];
 78 |     for i=1:length(segmentlengths)
 79 |       M=segmentlengths(i);
 80 |       L1=-2*diag(ones(M,1)) + diag(ones(M-1,1),1) + diag(ones(M-1,1),-1);
 81 |       if BCoption==1
 82 |         L1(1,1)=-1; L1(end,end)=-1;
 83 |       elseif BCoption==0
 84 |         L1=L1(2:end-1,:);  % diag() by itself didn't give quite what we wanted -
 85 |                            % it gave a square matrix with implicit bdy conds -
 86 |                            % so we're just removing top and bottom rows with BCs.
 87 |                            % Note nonsquare now.
 88 |       end
 89 |       L=blkdiag(L,L1);
 90 |     end
 91 | 
 92 |     % NOTE dx presently not implemented but if reimplementing note dx is now a vector.
 93 | 
 94 |     % Following is dx normalization such that L*m = 2nd deriv of m.
 95 |     % Only really makes sense to do this when not including other constraints
 96 |     %or segments in problem, otherwise easier to just leave off:
 97 |     %L1=L1/dx^2;
 98 |     %L1=L1/sqrt(M-2);  % or here we normalize by #pts-2 (ie subtracting endpts)
 99 |                        % to address the finite evaluation domain.
100 |                        % Now the units of L1*vector are M/X^2, where M are
101 |                        % the units of model and X are the units of the axis
102 |                        % the model is discretized over.
103 | 
104 | end
105 | 
106 | 
107 | 
108 | if 0
109 | % Apply boundary conditions based on input options:
110 | % Note if BCoption==0 then all this is skipped...
111 | if BCoption==1, L(end+1,end-1:end)=[-1,1]; end;  % for my microlayers on top
112 |                                                  % of lower halfspace case -
113 |                                                  % prefer zero-gradient between
114 |                                                  % last microlayer and halfspace
115 |                                                  % to prevent reflections there
116 | if BCoption==2, topleftcorner=[0 0 0 0]; btmrightcorner=[0 0 0 0]; end;
117 | if BCoption==3, topleftcorner=[-2 1 0 0]; btmrightcorner=[0 0 1 -2]; end;
118 | if BCoption==4, topleftcorner=[-2 2 0 0]; btmrightcorner=[0 0 2 -2]; end;
119 | if BCoption==5, topleftcorner=[-2 0 0 0]; btmrightcorner=[0 0 0 -2]; end;
120 | 
121 | if BCoption>1,
122 |   % WARNING: FIXME: this part is not ready and must be correctly implemented:
123 |   % need to first prepend a new row to top of L and append a new row to btm of L, then:
124 |   %L(1,1:4)=topleftcorner;
125 |   %L(M,M-3:M)=btmrightcorner;
126 | end
127 | 
128 | end
129 | 


--------------------------------------------------------------------------------
/manpage.txt:
--------------------------------------------------------------------------------
  1 | INVGN - Gauss-Newton inversion, version 1.0.  
  2 | by Andrew Ganse, Applied Physics Laboratory, University of Washington, Seattle.  
  3 | Contact at andy@ganse.org , http://research.ganse.org
  4 | Copyright (c) 2015, University of Washington, via 3-clause BSD license.  
  5 | See LICENSE.txt for full license statement.  
  6 | 
  7 | -------------------------------------------------------------------------------
  8 | 
  9 | INVGN: Calculate Tikhonov-regularized, Gauss-Newton nonlinear iterated inversion
 10 | to solve the damped nonlinear least squares problem:  
 11 |           minimize ||g(m)-d||^2_2 + lambda^2||Lm||^2_2  
 12 | For appropriate choices of regularization parameter lambda, this problem is
 13 | equivalent to:  
 14 |           minimize ||Lm||_2 subject to ||g(m)-d||_2<delta  
 15 |           where delta is some statistically-determined noise threshold  
 16 | and also to:  
 17 |           minimize ||g(m)-d||_2 subject to ||Lm||_2<epsilon  
 18 |           where epsilon is some model-norm threshold  
 19 |           
 20 | The function call is set up to allow use on both nonlinear and linear problems,
 21 | both regularized (inverse) and non-regularized (parameter estimation) problems,
 22 | and both frequentist and Bayesian problems.  See EXAMPLES below for these calls.
 23 | The dampled NLS regularization is accomplished with the L-curve method (see e.g.
 24 | Hansen 1987).
 25 | 
 26 | Code based on material from:  
 27 | 
 28 | * Professor Ken Creager's ESS-523 inverse theory class, Univ of Washington, 2005  
 29 | * RC Aster, B Borchers, & CH Thurber, "Parameter Estimation and Inverse
 30 |     Problems," Elsevier Academic Press, 2004.  
 31 | * A Tarantola, "Inverse Problem Theory and Methods for Model Parameter
 32 |     Estimation", Society for Industrial and Applied Mathematics, 2005.  
 33 | * PC Hansen, "Rank-Deficient and Discrete Ill-Posed Problems: Numerical Aspects
 34 |     of Linear Inversion," Society for Industrial Mathematics, 1987.  
 35 | * PE Gill, W Murray, & MH Wright, "Practical Optimization," Academic Press, 1981.  
 36 | 
 37 | INVGN is compatible with Octave (version >3), but to handle a few minor
 38 | remaining differences between Matlab and Octave, do note the "usingOctave"
 39 | flag among the options listed at top of the InvGN.m script.  Perhaps in future
 40 | versions INVGN will allow these options to be set via keyword/value pairs in the
 41 | input argument list, but not implemented yet.
 42 | 
 43 | 
 44 | USAGE:  
 45 |  [m,normdm,normmisfit,normrough,lambda,dpred,G,C,R,Ddiag,i_conv] = ...
 46 |  invGN(fwdfunc,derivfunc,epsr,dmeas,covd,lb,ub,m0,L,lambda,useMpref,maxiters,...
 47 |        verbose,fid,vargin);
 48 | 
 49 | 
 50 | INPUTS: (note all vectors are always column vectors)  
 51 |     fwdfunc = Matlab/Octave function handle to forward problem function which
 52 |               is of form outvector=myfunction(invector,...).  (See example 1
 53 |               of "help function_handle" in Matlab for how to implement)  
 54 |   derivfunc = Matlab/Octave function handle to derivatives function which
 55 |               is of form outvector=myfunction(invector,...).
 56 |               If derivfunc=[] then finitediffs are automatically used instead.  
 57 |        epsr = relative accuracy of forward problem computation (not the
 58 |               accuracy of the modeling of the physical process, but of the
 59 |               computation itself, see e.g. Gill,Murray,Wright 1981).  Affects
 60 |               convergence testing and is passed into finite diff function (if
 61 |               used), in both cases providing stepsize h (h is about sqrt(epsr)
 62 |               if the model params are all of order unity).  Epsr has a minimum
 63 |               at the tradeoff between degrading Talor series expansion accuracy
 64 |               if h too large, and increasing roundoff error in the numerator
 65 |               G(m+h)-G(m) of finite diffs if h too small.
 66 |               Epsr can be scalar (ie same for all elements of model vector) or 
 67 |               vector of same length as model vector for 1-to-1 correspondence
 68 |               to different variable "types" (eg layer bulk properties vs layer
 69 |               thicknesses).  
 70 |       dmeas = measured data vector (includes noise)  
 71 |        covd = if sqr matrix, then this is measurement noise covariance matrix.
 72 |               if col vector, then this is diag of meas noise covariance matrix.
 73 |               if scalar, then this is measurement variance, equal for all meas.
 74 |               if neg, then = -covd^(-1/2), allowing e.g. use of zeros for a mask,
 75 |               so note in that case it's the inverse of the STDEVS (sqrt(cov)).
 76 |          lb = lower model bounds vector (hard bnds, no tolerance)  
 77 |          ub = upper model bounds vector (hard bnds, no tolerance)
 78 |               These bounds are only implemented at the iteration steps after
 79 |               solving for the model perturbation without them; ie functions
 80 |               such as BVLS or LSQNONNEG are not used.  If the bounds are seen
 81 |               to be passed in a candidate model perturbation, the stepsize of
 82 |               the perturbation is reduced (but retaining its direction) until
 83 |               within bounds.  As long as we know the final solution must be
 84 |               within the bounds and not on them, this is merely like changing
 85 |               the starting point of inversion and is valid (perhaps somtimes
 86 |               not as efficient as one of those more complicated functions).
 87 |               But if the solution from this code ends up on a bound, note you
 88 |               should treat that solution with some caution and not consider it
 89 |               a true GN solution.  In my own inverse problems using this code
 90 |               the bounds only ever engage at the tails of my Lcurve where I'm
 91 |               not too worried about being rigorous, so I haven't found it
 92 |               necessary to complicate things with those other functions.  
 93 |          m0 = initial model vector estimate to start iterating from; if this
 94 |               is instead a matrix (NxM), then it's a collection of M initial
 95 |               model vectors of length N each, corresponding to M lambda values
 96 |               (Typically this is done to continue iterations on previous runs
 97 |               without starting over.)
 98 |               Alternate use: if L (next input arg) = I (identity matrix), then
 99 |               this is also the "preferred model" whose distance from the soln
100 |               model is minimized.  
101 |           L = If this is a matrix, then this is the user-supplied finite
102 |               difference operator for Tikhonov regularization (function
103 |               finiteDiffOP.m can help construct it) in the frequentist
104 |               framework.  If in the Bayesian framework and lambda is set to 1,
105 |               then L can be supplied as the Cholesky decomposition of the
106 |               inverse model prior covariance matrix.
107 |               If L is scalar then it is the number of profile segments (for
108 |               example if the model vector represents concatenated P & S wave
109 |               speed profiles then Lin=2) and 2nd order Tikhonov (smoothness)
110 |               frequentist regularization will be used.  
111 |      lambda = Used for the frequentist inversion framework (if using Bayesian
112 |               framework then set lambda=1 and see Bayesian note for L above).
113 |               If lambda is a negative scalar, auto calculate Lcurve with num
114 |               pts equal to magnitude of the negative scalar.  The auto-calc'd
115 |               curve starts at lambda_mid = sqrt(max(eig(G'*G))/max(eig(L'*L)))
116 |               and decreases logarithmically from there to 3 orders of mag
117 |               less and more than lambda_mid by default.  If lambda >=0, use
118 |               its value(s) directly as frequentist Tikhonov tradeoff param(s),
119 |               in which case lambda can be scalar or vector.
120 |               If lambda is a 3-element vector, lambda(1) is the pos or neg scalar
121 |               just described; and lambda(2:3) are how many orders of magnitude
122 |               greater (lambda(2)) or less (lambda(3)) than lambda_mid to sweep
123 |               in the auto-calc'd curve.  (lambda(2:3) defaults are 3,-3)  
124 |    maxiters = Number of GN iterations to perform at each lambda on the Lcurve.
125 |               So total number of derivative estimations will be
126 |               numlambdas*maxiters.  
127 |    useMpref = flag for one of three problem frameworks:
128 |               useMpref=0 is for a frequentist formulation with higher-order
129 |                 Tikhonov regularization.  Here the regularization doesn't
130 |                 handle distance from preferred (initial) model m0, but no
131 |                 restriction on L (unlike useMpref=1).
132 |               useMpref=1 is for using 0th-order-Tikh (ridge regr), ie
133 |                 L==eye(size(L)), in a frequentist formulation in which m0 is
134 |                 the "preferred model" the inversion minimizes distance to.
135 |               useMpref=2 is for using Bayesian formulation, in which case:
136 |                 m0 is the prior model mean, lambda must =1, L=chol(inv(Cprior))
137 |                 (user must set that L externally, note ironically L there must
138 |                 be RIGHT triangular, the "L" notation is from frequentist case),
139 |                 output C is now the Bayesian posterior covariance matrix (which
140 |                 note is different from the frequentist C), outputs R and Ddiag
141 |                 are empty, and normrough won't have literal meaning anymore.  
142 |     verbose = 0-3, with 0 = run quietly, and 3 = lots of progress text
143 |               Progress text (percent done) in findiffs is useful for slow
144 |               forward problems and/or debugging - but note percent done text
145 |               is unavailable in jacfwddist and jaccendist.  
146 |         fid = File handle to which progress text is output in verbose option.
147 |               Note fid=1 outputs to stdout, but an output file is useful for
148 |               long batch runs when the forward problem is slow.  
149 |       vargin = the extra arguments (if any) that must be passed to the fwdprob
150 |               function are tacked onto the end here.  You may have none, in
151 |               which case your last arg is the fid.)  
152 | 
153 | OUTPUTS: (note all vectors are always column vectors)  
154 |           m = matrix of maximum likelihood model solution vectors for each
155 |               lambda.  So m is MxP for M model params and P points on Lcurve,
156 |               ie each column of m is a model vector for a different lambda.  
157 |      normdm = vectors of L2 norms of model perturbation steps "dm" for each
158 |               lambda.  (so normdm is imax x P, with i steps to convergence
159 |               for l'th lambda, imax=max(i(l)), and P points on Lcurve).  Unused
160 |               elements in normdm are set to -1, since norms can't be negative.  
161 |  normmisfit = vector of L2 norms of data misfit between measured and 
162 |               predicted data at solution point for each lambda.  
163 |   normrough = vector of L2 norms of roughness (for which units are 2nd deriv)
164 |               at solution points for each lambda.  "Roughness" refers to case
165 |               of 2nd order Tikhonov (smoothing constraint) but note really here
166 |               this norm's meaning depends on what the supplied L matrix is
167 |               (eg might be other model norms than roughness).  And in Bayesian
168 |               case (useMpref=2) this doesn't have same meaning anymore although
169 |               it's still part of the objective function as it relates to Cprior.  
170 |      lambda = vector of tradeoff parameters used for Tikhonov regularization.
171 |               (you might already have this if you passed in vector of lambdas
172 |               as one of the input arguments, but this is useful in the case of
173 |               specifying to auto-calculate N Lcurve points).  
174 |       dpred = predicted data at solution models (so there are as many vectors
175 |               in dpred as there are points on the Lcurve, so dpred is NxP, for
176 |               N data pts and P points on Lcurve).  
177 |           G = Jacobian matrices of partial derivatives at solution models
178 |               (so there are as many jacobian matrices in G as there are points
179 |               on the Lcurve, so G is NxMxP, for N data pts, M model params,
180 |               and P points on Lcurve).  
181 |           C = if useMpref<2, C contains the frequentist covariance matrices of
182 |               solution models (so there are as many cov matrices in C as there
183 |               are points on the Lcurve, so C is MxMxP, for M model params and
184 |               P points on Lcurve).  If useMpref==2 (flagging Bayesian case),
185 |               then C is instead the Bayesian posterior model covariance matrix.  
186 |           R = resolution matrices of solution models (so there are as many cov
187 |               matrices in C as there are points on the Lcurve, so C is MxMxP,
188 |               for M model params and P points on Lcurve).
189 |               (R is empty for Bayesian case when useMpref=2.)  
190 |       Ddiag = column vector of diagonal of data res matrix (length Ndata)
191 |               (Ddiag is empty for Bayesian case when useMpref=2.)  
192 |       iconv = column vector of number of iterations before convergence at each
193 |               lambda.  if didn't converge, the iconv value for that lambda
194 |               is 0.  (length Ndata).  
195 | 
196 | For more clarity in the code, this function computes its model perturbations via
197 | (regularized) normal equations, rather than generalized SVD or subiterations of
198 | gradient descent, which may be more efficient and/or scalable.  The assumption
199 | in this choice was that forward function code takes much longer to compute than
200 | the model perturbation solution itself (which can otherwise be computed faster
201 | via GSVD).  If that assumption is not true, then much improvement in compute
202 | time of this function would result from solving for the perturbation using GSVD
203 | methods such as those provided in Per Christian Hansen's toolkit (however note
204 | that toolkit would take a lot of tweaking/paring to make it Octave compatible):
205 | http://www2.imm.dtu.dk/~pch/Regutools  
206 | Aside from computational efficiency/speed, another important consequence of
207 | solving normal equations in this code is that it constrains the size of the
208 | problem (number of model parameters) relative to amount of computer memory due
209 | to calculation of a matrix inverse.  If useful to note I have successfully run
210 | this code for over 10,000 model parameters on a computer with 16GB of RAM, and
211 | for over 2500 model parameters on a computer with 8GB RAM (in both those cases
212 | the matrix inverse was the memory constraint rather than forward problem).
213 | 
214 | 
215 | EXAMPLES:  
216 | (leaving off output part "[var1,var2,etc]=" which is same format for all):
217 | 
218 | Regularized frequentist inversion with auto-chosen lambdas (numlambdas of them):  
219 |  invGN(fwdp,[],epsr,datameas,-invcovd,lb,ub,minit,L,-numlambdas,0,maxiters,3,1);
220 | 
221 | Regularized frequentist inversion with prechosen lambdas:  
222 |  invGN(fwdp,[],epsr,datameas,-invcovd,lb,ub,minit,L,...
223 |            [1.04193e+07; 10419.3; 10.4193; 0.0104193;],0,maxiters,3,1);
224 | 
225 | Regu inv, picking up after previous run (using lambda vector from prev run):  
226 |  invGN(fwdp,[],epsr,datameas,-invcovd,lb,ub,minit,L,lambda,0,maxiters,3,1);
227 | 
228 | Parameter estimation (no regularization):  
229 |  invGN(fwdp,[],epsr,datameas,-invcovd,lb,ub,minit,zeros(size(L)),1,0,...
230 |            maxiters,3,1);
231 | 
232 | Frequentist linear problem (should converge after 1st iter), w/ smoothing regu:  
233 |  deriv=@(x) A;  fwdp=@(x) A*x;  % define funcs using the A matrix inline  
234 |  invGN(fwdp,deriv,epsr,datameas,covd,lb,ub,zeros(M,1),L,-numlambdas,0,2,0,1);  
235 | (Note maxiters=2 there: actually the first "iter" in saved results is for the
236 | initial value, which admittedly isn't relevant for purely linear problems, but
237 | that's how this nonlinear script fits purely linear problems.  Yes definitely
238 | an inefficient kludge.)
239 | 
240 | Frequentist nonlinear problem with preferred model:  
241 |  invGN(fwdp,[],epsr,datameas,covd,lb,ub,mpref,eye(M),-numlambdas,1,maxiters,0,1);
242 | 
243 | Bayesian nonlinear problem:  
244 |  invGN(fwdp,[],epsr,datameas,covd,lb,ub,mprior,chol(inv(Cprior)),1,2,...  
245 |            maxiters,0,1);
246 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | _(here's a Markdown-enabled version of manpage.txt which describes InvGN...)_  
  2 | ##INVGN - Gauss-Newton inversion - version 1.0.  
  3 | _by Andrew Ganse, Applied Physics Laboratory, University of Washington, Seattle._  
  4 | _[andy@ganse.org](mailto:andy@ganse.org)_  
  5 | _Copyright (c) 2015, University of Washington, via 3-clause BSD license._  
  6 | _See LICENSE.txt for full license statement._  
  7 | 
  8 | -------------------------------------------------------------------------------
  9 | 
 10 | INVGN calculates Tikhonov-regularized, Gauss-Newton nonlinear iterated inversion
 11 | to solve the following damped nonlinear least squares problem:  
 12 | 
 13 |           minimize ||g(m)-d||^2_2 + lambda^2||Lm||^2_2  
 14 | 
 15 | For appropriate choices of regularization parameter lambda, this problem is
 16 | equivalent to:  
 17 | 
 18 |           minimize ||Lm||_2 subject to ||g(m)-d||_2<delta  
 19 |           (where delta is some statistically-determined noise threshold)  
 20 | 
 21 | and also to:  
 22 | 
 23 |           minimize ||g(m)-d||_2 subject to ||Lm||_2<epsilon  
 24 |           (where epsilon is some model-norm threshold)  
 25 |           
 26 | The function call is set up to allow use on both nonlinear and linear problems,
 27 | both regularized (inverse) and non-regularized (parameter estimation) problems,
 28 | and both frequentist and Bayesian problems.  See EXAMPLES below for various calls.
 29 | The dampled NLS regularization is accomplished with the L-curve method (see e.g.
 30 | Hansen 1987).
 31 | 
 32 | This code is based on material from:  
 33 | 
 34 | * Professor Ken Creager's ESS-523 inverse theory class, Univ of Washington, 2005  
 35 | * RC Aster, B Borchers, & CH Thurber, "Parameter Estimation and Inverse
 36 |     Problems," Elsevier Academic Press, 2004.  
 37 | * A Tarantola, "Inverse Problem Theory and Methods for Model Parameter
 38 |     Estimation", Society for Industrial and Applied Mathematics, 2005.  
 39 | * PC Hansen, "Rank-Deficient and Discrete Ill-Posed Problems: Numerical Aspects
 40 |     of Linear Inversion," Society for Industrial Mathematics, 1987.  
 41 | * PE Gill, W Murray, & MH Wright, "Practical Optimization," Academic Press, 1981.  
 42 | 
 43 | INVGN is compatible with [Octave](http://www.octave.org) (version >3), but to handle a few minor
 44 | remaining differences between Matlab and Octave, do note the "usingOctave"
 45 | flag among the options listed at top of the InvGN.m script.  Perhaps in future
 46 | versions INVGN will allow these options to be set via keyword/value pairs in the
 47 | input argument list, but not implemented yet.
 48 | 
 49 | <br>
 50 | ###USAGE:
 51 | 
 52 | ```
 53 |  [m,normdm,normmisfit,normrough,lambda,dpred,G,C,R,Ddiag,i_conv] = ...
 54 |  invGN(fwdfunc,derivfunc,epsr,dmeas,covd,lb,ub,m0,L,lambda,maxiters,useMpref,...
 55 |        verbose,fid,vargin);
 56 | ```
 57 | 
 58 | <br>
 59 | ###INPUTS:  (all vectors are always column vectors)  
 60 | 
 61 |   **`fwdfunc`** = Matlab/Octave function handle to forward problem function which
 62 |               is of form outvector=myfunction(invector,...).  (See example 1
 63 |               of "help function_handle" in Matlab for how to implement)  
 64 | **`derivfunc`** = Matlab/Octave function handle to derivatives function which
 65 |               is of form outvector=myfunction(invector,...).
 66 |               If derivfunc=[] then finitediffs are automatically used instead.  
 67 |      **`epsr`** = relative accuracy of forward problem computation (not the
 68 |               accuracy of the modeling of the physical process, but of the
 69 |               computation itself, see e.g. Gill,Murray,Wright 1981).  Affects
 70 |               convergence testing and is passed into finite diff function (if
 71 |               used), in both cases providing stepsize h (h is about sqrt(epsr)
 72 |               if the model params are all of order unity).  Epsr has a minimum
 73 |               at the tradeoff between degrading Talor series expansion accuracy
 74 |               if h too large, and increasing roundoff error in the numerator
 75 |               G(m+h)-G(m) of finite diffs if h too small.
 76 |               Epsr can be scalar (ie same for all elements of model vector) or 
 77 |               vector of same length as model vector for 1-to-1 correspondence
 78 |               to different variable "types" (eg layer bulk properties vs layer
 79 |               thicknesses).  
 80 |     **`dmeas`** = measured data vector (includes noise)  
 81 |      **`covd`** = if sqr matrix, then this is measurement noise covariance matrix.
 82 |               if col vector, then this is diag of meas noise covariance matrix.
 83 |               if neg, then = -covd^(-1/2), allowing e.g. use of zeros for a mask,
 84 |               so note in that case it's the inverse of the STDEVS (sqrt(cov)).  
 85 |        **`lb`** = lower model bounds vector (hard bnds, no tolerance)  
 86 |        **`ub`** = upper model bounds vector (hard bnds, no tolerance)
 87 |               These bounds are only implemented at the iteration steps after
 88 |               solving for the model perturbation without them; ie functions
 89 |               such as BVLS or LSQNONNEG are not used.  If the bounds are seen
 90 |               to be passed in a candidate model perturbation, the stepsize of
 91 |               the perturbation is reduced (but retaining its direction) until
 92 |               within bounds.  As long as we know the final solution must be
 93 |               within the bounds and not on them, this is merely like changing
 94 |               the starting point of inversion and is valid (perhaps somtimes
 95 |               not as efficient as one of those more complicated functions).
 96 |               But if the solution from this code ends up on a bound, note you
 97 |               should treat that solution with some caution and not consider it
 98 |               a true GN solution.  In my own inverse problems using this code
 99 |               the bounds only ever engage at the tails of my Lcurve where I'm
100 |               not too worried about being rigorous, so I haven't found it
101 |               necessary to complicate things with those other functions.  
102 |        **`m0`** = initial model vector estimate to start iterating from; if this
103 |               is instead a matrix (NxM), then it's a collection of M initial
104 |               model vectors of length N each, corresponding to M lambda values
105 |               (Typically this is done to continue iterations on previous runs
106 |               without starting over.)
107 |               Alternate use: if L (next input arg) = I (identity matrix), then
108 |               this is also the "preferred model" whose distance from the soln
109 |               model is minimized.  
110 |         **`L`** = If this is a matrix, then this is the user-supplied finite
111 |               difference operator for Tikhonov regularization (function
112 |               finiteDiffOP.m can help construct it) in the frequentist
113 |               framework.  If in the Bayesian framework and lambda is set to 1,
114 |               then L can be supplied as the Cholesky decomposition of the
115 |               inverse model prior covariance matrix.
116 |               If L is scalar then it is the number of profile segments (for
117 |               example if the model vector represents concatenated P & S wave
118 |               speed profiles then Lin=2) and 2nd order Tikhonov (smoothness)
119 |               frequentist regularization will be used.  
120 |    **`lambda`** = Used for the frequentist inversion framework (if using Bayesian
121 |               framework then set lambda=1 and see Bayesian note for L above).
122 |               If lambda is a negative scalar, auto calculate Lcurve with num
123 |               pts equal to magnitude of the negative scalar.  The auto-calc'd
124 |               curve starts at lambda_mid = sqrt(max(eig(G'*G))/max(eig(L'*L)))
125 |               and decreases logarithmically from there to 3 orders of mag
126 |               less and more than lambda_mid by default.  If lambda >=0, use
127 |               its value(s) directly as frequentist Tikhonov tradeoff param(s),
128 |               in which case lambda can be scalar or vector.
129 |               If lambda is a 3-element vector, lambda(1) is the pos or neg scalar
130 |               just described; and lambda(2:3) are how many orders of magnitude
131 |               greater (lambda(2)) or less (lambda(3)) than lambda_mid to sweep
132 |               in the auto-calc'd curve.  (lambda(2:3) defaults are 3,-3)  
133 |  **`useMpref`** = flag for one of three problem frameworks:
134 |               useMpref=0 is for a frequentist formulation with higher-order
135 |                 Tikhonov regularization.  Here the regularization doesn't
136 |                 handle distance from preferred (initial) model m0, but no
137 |                 restriction on L (unlike useMpref=1).
138 |               useMpref=1 is for using 0th-order-Tikh (ridge regr), ie
139 |                 L==eye(size(L)), in a frequentist formulation in which m0 is
140 |                 the "preferred model" the inversion minimizes distance to.
141 |               useMpref=2 is for using Bayesian formulation, in which case:
142 |                 m0 is the prior model mean, lambda must =1, L=chol(inv(Cprior))
143 |                 (user must set that L externally, note ironically L there must
144 |                 be RIGHT triangular, the "L" notation is from frequentist case),
145 |                 output C is now the Bayesian posterior covariance matrix (which
146 |                 note is different from the frequentist C), outputs R and Ddiag
147 |                 are empty, and normrough won't have literal meaning anymore.  
148 |  **`maxiters`** = Number of GN iterations to perform at each lambda on the Lcurve.
149 |               So total number of derivative estimations will be
150 |               numlambdas*maxiters.  
151 |   **`verbose`** = 0-3, with 0 = run quietly, and 3 = lots of progress text
152 |               Progress text (percent done) in findiffs is useful for slow
153 |               forward problems and/or debugging - but note percent done text
154 |               is unavailable in jacfwddist and jaccendist.  
155 |       **`fid`** = File handle to which progress text is output in verbose option.
156 |               Note fid=1 outputs to stdout, but an output file is useful for
157 |               long batch runs when the forward problem is slow.  
158 |     **`vargin`** = the extra arguments (if any) that must be passed to the fwdprob
159 |               function are tacked onto the end here.  You may have none, in
160 |               which case your last arg is the fid.)  
161 | <br>
162 | ###OUTPUTS:  (all vectors are always column vectors)  
163 | 
164 |   **`m`** = matrix of maximum likelihood model solution vectors for each
165 |               lambda.  So m is MxP for M model params and P points on Lcurve,
166 |               ie each column of m is a model vector for a different lambda.  
167 |    **`normdm`** = vectors of L2 norms of model perturbation steps "dm" for each
168 |               lambda.  (so normdm is imax x P, with i steps to convergence
169 |               for l'th lambda, imax=max(i(l)), and P points on Lcurve).  Unused
170 |               elements in normdm are set to -1, since norms can't be negative.  
171 | **`normmisfit`** = vector of L2 norms of data misfit between measured and 
172 |               predicted data at solution point for each lambda.  
173 | **`normrough`** = vector of L2 norms of roughness (for which units are 2nd deriv)
174 |               at solution points for each lambda.  "Roughness" refers to case
175 |               of 2nd order Tikhonov (smoothing constraint) but note really here
176 |               this norm's meaning depends on what the supplied L matrix is
177 |               (eg might be other model norms than roughness).  And in Bayesian
178 |               case (useMpref=2) this doesn't have same meaning anymore although
179 |               it's still part of the objective function as it relates to Cprior.  
180 |    **`lambda`** = vector of tradeoff parameters used for Tikhonov regularization.
181 |               (you might already have this if you passed in vector of lambdas
182 |               as one of the input arguments, but this is useful in the case of
183 |               specifying to auto-calculate N Lcurve points).  
184 |     **`dpred`** = predicted data at solution models (so there are as many vectors
185 |               in dpred as there are points on the Lcurve, so dpred is NxP, for
186 |               N data pts and P points on Lcurve).  
187 |         **`G`** = Jacobian matrices of partial derivatives at solution models
188 |               (so there are as many jacobian matrices in G as there are points
189 |               on the Lcurve, so G is NxMxP, for N data pts, M model params,
190 |               and P points on Lcurve).  
191 |         **`C`** = if useMpref<2, C contains the frequentist covariance matrices of
192 |               solution models (so there are as many cov matrices in C as there
193 |               are points on the Lcurve, so C is MxMxP, for M model params and
194 |               P points on Lcurve).  If useMpref==2 (flagging Bayesian case),
195 |               then C is instead the Bayesian posterior model covariance matrix.  
196 |         **`R`** = resolution matrices of solution models (so there are as many cov
197 |               matrices in C as there are points on the Lcurve, so C is MxMxP,
198 |               for M model params and P points on Lcurve).
199 |               (R is empty for Bayesian case when useMpref=2.)  
200 |     **`Ddiag`** = column vector of diagonal of data res matrix (length Ndata)
201 |               (Ddiag is empty for Bayesian case when useMpref=2.)  
202 |     **`iconv`** = column vector of number of iterations before convergence at each
203 |               lambda.  if didn't converge, the iconv value for that lambda
204 |               is 0.  (length Ndata).  
205 | 
206 | For more clarity in the code, this function computes its model perturbations via
207 | (regularized) normal equations, rather than generalized SVD or subiterations of
208 | gradient descent, which may be more efficient and/or scalable.  The assumption
209 | in this choice was that forward function code takes much longer to compute than
210 | the model perturbation solution itself (which can otherwise be computed faster
211 | via GSVD).  If that assumption is not true, then much improvement in compute
212 | time of this function would result from solving for the perturbation using GSVD
213 | methods such as those provided in Per Christian Hansen's toolkit (however note
214 | that toolkit would take a lot of tweaking/paring to make it Octave compatible):
215 | http://www2.imm.dtu.dk/~pch/Regutools  
216 | Aside from computational efficiency/speed, another important consequence of
217 | solving normal equations in this code is that it constrains the size of the
218 | problem (number of model parameters) relative to amount of computer memory due
219 | to calculation of a matrix inverse.  If useful to note I have successfully run
220 | this code for over 10,000 model parameters on a computer with 16GB of RAM, and
221 | for over 2500 model parameters on a computer with 8GB RAM (in both those cases
222 | the matrix inverse was the memory constraint rather than forward problem).
223 | 
224 | <br>
225 | ###EXAMPLES:  
226 | (Leaving off the output part `[var1,var2,etc]=` which is same format for all...)
227 | 
228 | **Calling invGN with forward problem function `fwdp` and derivatives function `derivp`:**  
229 | 
230 |     invGN(fwdp,derivp,epsr,datameas,...
231 | 
232 | **Calling invGN to calculate first finite differences of `fwdp` internally:**
233 | 
234 |     invGN(fwdp,[],epsr,datameas,...
235 | 
236 | **Regularized frequentist inversion with auto-chosen lambdas (numlambdas of them):**  
237 | 
238 |     invGN(fwdp,[],epsr,datameas,-invcovd,lb,ub,minit,L,-numlambdas,0,maxiters,3,1);
239 | 
240 | **Regularized frequentist inversion with prechosen lambdas:**  
241 | 
242 |     invGN(fwdp,[],epsr,datameas,-invcovd,lb,ub,minit,L,...
243 |     [1.04193e+07; 10419.3; 10.4193; 0.0104193;],0,maxiters,3,1);
244 | 
245 | **Regu inv, picking up after previous run (using lambda vector from prev run):**  
246 | 
247 |     invGN(fwdp,[],epsr,datameas,-invcovd,lb,ub,minit,L,lambda,0,maxiters,3,1);
248 | 
249 | **Parameter estimation (no regularization):**  
250 | 
251 |     invGN(fwdp,[],epsr,datameas,-invcovd,lb,ub,minit,zeros(size(L)),1,0,maxiters,3,1);
252 | 
253 | **Frequentist nonlinear problem with preferred model:**  
254 | 
255 |     invGN(fwdp,[],epsr,datameas,covd,lb,ub,mpref,eye(M),-numlambdas,1,maxiters,0,1);
256 | 
257 | **Bayesian nonlinear problem:**  
258 | 
259 |     invGN(fwdp,[],epsr,datameas,covd,lb,ub,mprior,chol(inv(Cprior)),1,2,maxiters,0,1);
260 | 
261 | **Frequentist linear problem (should converge after 1st iter), w/ smoothing regu:**  
262 | 
263 |     deriv=@(x) A;  fwdp=@(x) A*x;  % define funcs using the A matrix inline  
264 |     invGN(fwdp,deriv,epsr,datameas,covd,lb,ub,zeros(M,1),L,-numlambdas,0,2,0,1);
265 | 
266 | (Note `maxiters=2` there: actually the first "iter" in saved results is for the
267 | initial value, which admittedly isn't relevant for purely linear problems, but
268 | that's how this nonlinear script fits purely linear problems.  Yes absolutely
269 | an inefficient kludge for the linear case.)
270 | 
271 | 


--------------------------------------------------------------------------------
/invGN.m:
--------------------------------------------------------------------------------
  1 | function [m,normdm,normmisfit,normrough,lambda,dpred,G,C,R,Ddiag,i_conv]=...
  2 |    invGN(fwdfunc,derivfunc,epsr,dmeas,covd,lb,ub,m0,Lin,lambdaIn,useMpref,...
  3 |          maxiters,verbose,fid,varargin)
  4 | 
  5 | % INVGN - Compute Gauss-Newton nonlinear inversion, version 1.0
  6 | % Calculate Tikhonov-regularized, Gauss-Newton nonlinear iterated inversion
  7 | % to solve the damped nonlinear least squares problem.
  8 | %
  9 | % by Andrew Ganse, Applied Physics Laboratory, University of Washington, Seattle.  
 10 | % Contact at andy@ganse.org , http://research.ganse.org
 11 | % Copyright (c) 2015, University of Washington, via 3-clause BSD license.  
 12 | % See LICENSE.txt for full license statement.  
 13 | % See manpage.txt for full description and usage instructions.
 14 | %
 15 | % Usage summary:
 16 | % [m,normdm,normmisfit,normrough,lambda,dpred,G,C,R,Ddiag,i_conv] = ...
 17 | % invGN(fwdfunc,derivfunc,epsr,dmeas,covd,lb,ub,m0,L,lambda,useMpref,maxiters,...
 18 | %       verbose,fid,vargin);
 19 | 
 20 | 
 21 | % Some more programattic run options (so left off argument list):
 22 | usingOctave=0;       % 1=octave, 0=matlab (affects form of just a few commands).
 23 | use_central_diffs=0; % 0=no use fwd finite diffs, 1=yes use centr finite diffs.
 24 |                      % central diffs are more accurate but twice the calc time.
 25 |                      % In practice this often comes down to a tradeoff between
 26 |                      % calc time for one jacobian matrix vs number of iterations
 27 |                      % (more accurate derivs may make for fewer iterations).
 28 | use_dist_jaccalc=0;  % 0=compute all columns of Jacobian finite diffs one at 
 29 |                      % a time in sequence.  (only recommended for few params.)
 30 |                      % 1=compute all columns of Jacobian finite diffs in nproc
 31 |                      % simulaaneous instances of octave/matlab on same host.
 32 |                      % (nproc is set in jacfwdN.m and jaccenN.m, typically 4or8)
 33 |                      % 2=distribute computation of Jacobian finite diff columns
 34 |                      % over all nodes of bluewater cluster.
 35 | saveIntermediate=0;  % save intermediate results into a file mid-script in case
 36 |                      % of crash, for slow-running fwd problems and/or debugging.
 37 |                      % Filename is invGN_<pid>.mat.
 38 |                      % 0=none, 1=at end of each lambda loop, 2=at end of each
 39 |                      % iteration too.
 40 | keepalliters=0;      % 1=add a dimension to the output matrices to save all
 41 |                      % iters while debugging/studying a new inverse problem (but
 42 |                      % watch out - this can potentially eat up a LOT of memory).
 43 |                      % 0=only keep the info for the last iteration, ie solution
 44 |                      % point, but still keep normdm iterations (default).
 45 | reuse_first_G=1;     % save compute time by not recomuting the first (same)
 46 |                      % jacobian matrix for each lambda (they're same because
 47 |                      % same minit for all lambdas)
 48 | check_objfn=0;       % Ensure the objective function is decreasing with each
 49 |                      % iteration; if not then halve the step and check again
 50 |                      % till satisfied.  Presently this is set up to only kick in
 51 |                      % after "convergence" (based on dm); see next option...
 52 | check_objfn_iters=0 ;% If using check_objfn=1, then after hitting "convergence"
 53 |                      % (based on dm), continue this many iterations with the
 54 |                      % decreasing objective function constraint.  (This option
 55 |                      % is ignored if check_objfn=0.)
 56 | check_bnds=1;        % Ensure the iteration does not step out of the bounds;
 57 |                      % if so then halve the step and check again till satisfied.
 58 | outbnds_method=0;    % If using check_bnds=1, use one of the following methods:
 59 |                      % if 0 revert to half^n step in same direction for nth step.
 60 |                      % if 1 use reflection approach to handle overstepping bnds,
 61 |                      % if 2 set param to bounds if oversteps bnds.
 62 | maxstephalves=10;    % max # times to halve step when using either:
 63 |                      % (check_bnds AND outbnds_method=0) OR check_objfn.
 64 | warningsoff=1;       % Regardless of verbosity level, turn off calculational
 65 |                      % warnings; currently this is only the high cond# warning
 66 |                      % and is done so that invGN can give more specific warning.
 67 | 
 68 | 
 69 | warning('off','MATLAB:nearlySingularMatrix');  % instead caught manually and put in
 70 |                                                % better format for this script's output
 71 | 
 72 | 
 73 | % Dimensions and preliminaries:
 74 | ndata=length(dmeas);
 75 | nparams=size(m0,1);
 76 | if lambdaIn(1)<0
 77 |    nlambdas=-lambdaIn(1);
 78 |    if length(lambdaIn)==3
 79 |      maxlambdaexp=lambdaIn(2);
 80 |      minlambdaexp=lambdaIn(3);
 81 |    else
 82 |      % otherwise use these default values:
 83 |      maxlambdaexp=3;
 84 |      minlambdaexp=-3;
 85 |    end
 86 |    if verbose>1
 87 |      fprintf(fid,'Max/min exponent values for Lcurve: %d, %d...\n',maxlambdaexp,minlambdaexp);
 88 |      fprintf(fid,'Lambda values for Lcurve will be determined during 1st iteration...\n');
 89 |      if usingOctave, fflush(fid); end
 90 |    end
 91 | else
 92 |    nlambdas=length(lambdaIn);
 93 |    lambda=lambdaIn;
 94 |    if verbose>1
 95 |      fprintf(fid,'Using lambda values for Lcurve specified by user...\n');
 96 |      if usingOctave, fflush(fid); end
 97 |    end
 98 |    % FIXME: add a check that all lambdas are positive...
 99 | end;
100 | if verbose>1
101 |   fprintf(fid,'Number of lambdas (Lcurve points): %d\n',nlambdas);
102 |   fprintf(fid,'Max number of GN iterations at each lambda: %d\n',maxiters);
103 |   fprintf(fid,'Number of model parameters: %d\n',nparams);
104 |   fprintf(fid,'Number of data points: %d\n',ndata);
105 |   if outbnds_method==0
106 |      fprintf(fid,'Out-of-bounds steps will revert to half-step in same direction...\n');
107 |   elseif outbnds_method==1
108 |      fprintf(fid,'Out-of-bounds steps will reflect from bounds...\n');
109 |   elseif outbnds_method==2
110 |      fprintf(fid,'Out-of-bounds steps will stop at bounds...\n');
111 |   end
112 |   if usingOctave, fflush(fid); end
113 | end
114 | if size(m0,2)>1 && nlambdas~=size(m0,2)
115 |    fprintf(fid,'invGN: error: numcols of m0 not equal to numlambdas.\n');
116 |    if usingOctave, fflush(fid); end
117 |    return;
118 | end;
119 | if usingOctave
120 |   pid=getpid();  % this is a built-in function in Octave; for Matlab will need external getpid.m script
121 | else
122 |   pid=12345;
123 | end
124 | 
125 | 
126 | % Preallocate main (large) storage matrices:
127 | if keepalliters
128 |    m=zeros(nparams,nlambdas,maxiters+1);  % maxiters+1 because first iter holds init model
129 |    dm=zeros(nparams,nlambdas,maxiters);
130 |    normdm=zeros(nlambdas,maxiters);
131 |    normmisfit=zeros(nlambdas,maxiters);
132 |    normrough=zeros(nlambdas,maxiters);
133 |    dpred=zeros(ndata,nlambdas,maxiters);
134 |    residshat=zeros(ndata,nlambdas,maxiters);
135 | else
136 |    m=zeros(nparams,nlambdas);
137 |    dm=zeros(nparams,nlambdas);
138 |    %normdm=zeros(nlambdas,1);
139 |    normdm=zeros(nlambdas,maxiters);
140 |    normmisfit=zeros(nlambdas,1);
141 |    normrough=zeros(nlambdas,1);
142 |    dpred=zeros(ndata,nlambdas);
143 |    residshat=zeros(ndata,nlambdas);
144 | end
145 | % An extra dimension of nlambdas on each of the following matrix variables will
146 | % allow us to keep the matrix from the last iteration (hopefully equaling the
147 | % soln point) for each lambda, so we can study solution sensitivities.  If
148 | % that's taking too much mem then we can remove that dim.  Note before soln
149 | % point for given lambda is reached, these matrices simply hold the value for
150 | % the current iteration.
151 | G=zeros(ndata,nparams,nlambdas);
152 | C=zeros(nparams,nparams,nlambdas);
153 | R=zeros(nparams,nparams,nlambdas);
154 | Ddiag=zeros(ndata,nlambdas);
155 | i_conv=zeros(nlambdas,1);
156 | 
157 | % Compute invsqrtcovd once here since it's expensive to compute (for matrices):
158 | if size(covd,2)==1  % covd is either scalar or vector
159 | 
160 |   if size(covd,1)==1  % it's a scalar variance
161 |     if verbose>1
162 |       fprintf(fid,'Measurement noise variance specified as scalar Cd...\n');
163 |       if usingOctave, fflush(fid); end
164 |     end
165 |     if covd<0
166 |       invsqrtcovd=-covd*ones(ndata,1);
167 |     else
168 |       invsqrtcovd=covd^(-1/2)*ones(ndata,1);
169 |     end
170 | 
171 |   else  % it's a column vector = diag of cov matrix
172 |     if verbose>1
173 |       fprintf(fid,'Measurement noise variances specified = diag(covmatrix)...\n');
174 |       if usingOctave, fflush(fid); end
175 |     end
176 |     if covd(1,1)<0  % minus sign (in arglist as "-covd") flags that it's really
177 |                    % the invcond. (this works since diagonal of cov matrix must
178 |                    % be non-neg.)  Useful e.g. when implementing a data mask via
179 |                    % zeroes in the covd diagonal.
180 |       invsqrtcovd=-covd;  % already inverted and sqrt'd, just removing "-" flag
181 |     else
182 |       invsqrtcovd=covd.^(-1/2);  % otherwise it really is the variances; convert to invstdvs
183 |     end
184 |   end
185 | 
186 | else  % then covd is full covariance matrix
187 | 
188 |    if verbose>1
189 |      fprintf(fid,'Full measurement noise cov matrix specified; computing invsqrtcovd...\n');
190 |      if usingOctave, fflush(fid); end
191 |    end;
192 |    if covd(1,1)<1  % minus sign (in arglist as "-covd") flags that it's really the invcond.
193 |                    % (this works since diagonal of cov matrix must be non-neg.)
194 |                    % useful e.g. when implementing a data mask via zeroes in the covd diagonal.
195 |      invsqrtcovd=-covd;  % it's already inverted and sqrt'd, just removing the "-" flag...
196 |    else
197 |      invsqrtcovd=covd^(-1/2);
198 |    end
199 | end
200 | 
201 | % By default, do 2nd-order Tikh regularization, ie smoothing based regu.
202 | % (IDEA: if I allowed for Lin to be a vector - must be done for finiteDiffOp
203 | % too - I could specify where discontinuities are that way... better yet would
204 | % be an option to include discontinuity estimation with the rest, but I think
205 | % that'll significantly change the form of the dm solution expressions.)
206 | 
207 | % Auto-create "roughening" matrix L (2nd order finite diff operator matrix)
208 | % with no discontinuities or bdy conds:
209 | if length(Lin)==1 && size(m0,1)>=8  % the >=8 is because m is supposed to be
210 |                                       % a discretized approximation to a
211 |                                       % continuous function.  Hard to imagine
212 |                                       % that with length(m)<8 !
213 |    % in this case, Lin is scalar and meant to be number of profile segments:
214 |    dx=sqrt(epsr);  % (assumes that model vector elements are of order unity)
215 |    L=finiteDiffOp(2,nparams*ones(Lin,1),dx,0); %*sqrt(dx);  % non-sqr L, ie no bdy conds.
216 |    % finiteDiffOp() returns a findiff operator, eg L=1/deltax^2*[1 -2 1...].
217 |    % The *sqrt(dx) in the L= line above accommodates the application of the
218 |    % finitediffs in the continuous integral norm on m:
219 |    %   integ{(m'')^2}dx -> integ{(Lm)^2}dx -> (Lm)'*(Lm)*deltax
220 |    %     = m'*(L'*L*deltax)*m = m'*Lhat'*Lhat*m, where Lhat=L*sqrt(deltax).
221 |    % Rather than create a new var Lhat, I just included sqrt(dx) above.
222 |    %
223 |    % FIXME: temporarily commented out the sqrt(dx) part for num stability concerns...
224 | elseif length(Lin)~=1 && size(m0,1)<8 && norm(Lin)~=0 && useMpref<2
225 |    % catch the error case
226 |    fprintf(fid,'invGN: error: if using default L matrix, must have length(m)>=8.\n');
227 |    if usingOctave, fflush(fid); end
228 |    return;  % errors out & dies.
229 | else  
230 |    % ie Lin is actually a user-supplied finite diff operator matrix L
231 |    if norm(Lin)==0 && verbose>1
232 |      fprintf(fid,'Noting L matrix of zeros to signal unregularized parameter estimation.\n');
233 |      if usingOctave, fflush(fid); end
234 |    end;
235 |    L=Lin;
236 | end
237 | 
238 | 
239 | if size(m0,2)==1  % ie a single m0 col vector independent of lambda
240 |    % Put m0 into first slot of m results matrix for each lambda (repetitive but more intuitive)
241 |    if verbose>1
242 |      fprintf(fid,'Populating initial model guesses for each lambda from m0 since all same...\n');
243 |      if usingOctave, fflush(fid); end
244 |    end;
245 |    m(:,:,1)=m0*ones(1,nlambdas);  % ie make nlambdas cols of repeated m0 col vectors, equiv to repmat(m0,1,nlambdas)
246 | else
247 |    % In this case the initial model guess is diff't for each lambda, so m0 is a matrix.
248 |    if verbose>1
249 |      fprintf(fid,'Populating initial model guesses for each lambda from columns of m0...\n');
250 |      if usingOctave, fflush(fid); end
251 |    end;
252 |    m(:,:,1)=m0;
253 | end;
254 | 
255 | 
256 | for l=1:nlambdas     % regularization tradeoff parameters lambda
257 |    if verbose && l==1 && lambdaIn(1)<0  % lambdaIn(1)<0 tells us we must auto-find lambdas
258 |       % won't know lambda values yet since based on G in 1st iteration:
259 |       fprintf(fid,'Lambda(%d)  [value determined below in 1st iteration]...\n',l);
260 |       if usingOctave, fflush(fid); end
261 |    elseif verbose
262 |       % here we know lambda values either from 1st iteration or from input arg:
263 |       fprintf(fid,'Lambda(%d)=%g...\n',l,lambda(l));
264 |       if usingOctave, fflush(fid); end
265 |    end;
266 | converged=0;
267 | for iter=1:maxiters  % gauss-newton iterations for a given lambda
268 | if verbose
269 |   fprintf(fid,'  GN iteration #%d...\n',iter);
270 |   if usingOctave, fflush(fid); end
271 | end;
272 | 
273 | 
274 | if verbose>1, tic; end;
275 | 
276 | if keepalliters, i=iter; else i=1; end;  % for saving all vs. just one iteration
277 | 
278 | % Compute local jacobian matrix of derivs G and local predicted data:
279 | if isempty(derivfunc)
280 |     % Compute finite diffs:
281 |     if l==1 || iter>1 || size(m0,2)>1 || ~reuse_first_G
282 |        if length(m0)<=2 || use_dist_jaccalc==0
283 |           if use_central_diffs
284 |              if verbose>1
285 |                fprintf(fid,'    Computing finite diffs using jaccen() (single proc on local host)...\n');
286 |                if usingOctave, fflush(fid); end
287 |              end;
288 |              [G(:,:,l),dpred(:,l,i)] = ...
289 |                 jaccen(fwdfunc,m(:,l,i),epsr,verbose,fid,varargin{:});
290 |           else
291 |              if verbose>1
292 |                fprintf(fid,'    Computing finite diffs using jacfwd() (single proc on local host)...\n');
293 |                if usingOctave, fflush(fid); end
294 |              end;
295 |              [G(:,:,l),dpred(:,l,i)] = ...
296 |                 jacfwd(fwdfunc,m(:,l,i),epsr,verbose,fid,varargin{:});
297 |           end
298 |        elseif use_dist_jaccalc==1
299 |           if use_central_diffs
300 |              fprintf(fid,'ERROR:  invGN:  locally-computed central-diffs is specified for calculating Jacobian\n');
301 |              fprintf(fid,'  but this is not yet implemented in the code.  You could choose distributed-calc\n');
302 |              fprintf(fid,'  central diffs, or locally-computed forward diffs at top of invGN...\n');
303 |              return;
304 |           else
305 |              if verbose>1
306 |                 fprintf(fid,'    Computing finite diffs using jacfwdN() (distributed among procs of local host)...\n');
307 |                 if usingOctave, fflush(fid); end
308 |              end;
309 |              [G(:,:,l),dpred(:,l,i)] = ...
310 |                 jacfwdN(fwdfunc,m(:,l,i),epsr,verbose,fid,varargin{:});
311 |           end
312 |        elseif use_dist_jaccalc==2
313 |           if use_central_diffs
314 |             if verbose>1, fprintf(fid,'    Computing finite diffs using jaccendist() (network distributed calculation)...\n'); end;
315 |             [G(:,:,l),dpred(:,l,i)] = ...
316 |                jaccendist(fwdfunc,m(:,l,i),epsr,verbose,fid,varargin{:});
317 |           else
318 |             if verbose>1, fprintf(fid,'    Computing finite diffs using jacfwddist() (network distributed calculation)...\n'); end;
319 |             [G(:,:,l),dpred(:,l,i)] = ...
320 |                jacfwddist(fwdfunc,m(:,l,i),epsr,verbose,fid,varargin{:});
321 |           end
322 |        end
323 |        if saveIntermediate == 2 
324 |           % save expensive derivs immediately in case of crash before end of loop
325 |           eval(['save -v7 invGN_l' num2str(l) '_i' num2str(i) '.mat']);
326 |           if verbose>1
327 |              fprintf(fid,'    saving intermediate workspace to file invGN_%d.mat...\n',pid);
328 |              if usingOctave, fflush(fid); end
329 |           end;
330 |           eval(['save -v7 /tmp/invGN_' num2str(pid) '.mat']);
331 |        end
332 |        if nlambdas>1 && l==1 && iter==1
333 |           % since the first m vector for every lambda is the same (=m0),
334 |           % save the first G matrix so we don't unnecessarily recompute it for
335 |           % every new lambda:
336 |           if verbose>1
337 |             fprintf(fid,'    Saving finite diffs from l=1,i=1 to avoid recomputing for l>1,i=1...\n');
338 |             if usingOctave, fflush(fid); end
339 |           end;
340 |           G1=G(:,:,l); 
341 |        end;
342 |     elseif l>1 && iter==1 && size(m0,2)==1 && reuse_first_G
343 |        if verbose>1
344 |           fprintf(fid,'    Reusing equivalent finite diffs from l=1,i=1...\n');
345 |           if usingOctave, fflush(fid); end
346 |        end;
347 |        G(:,:,l)=G1;
348 |     end
349 | else
350 |     % Compute directly from analytical derivative function:
351 |     if verbose>1
352 |       fprintf(fid,'    Computing derivatives...\n');
353 |       if usingOctave, fflush(fid); end
354 |     end;
355 |     G(:,:,l)=feval(derivfunc,m(:,l,i),varargin{:});
356 |     dpred(:,l,i)=feval(fwdfunc,m(:,l,i),varargin{:});
357 | end
358 | 
359 | 
360 | % Prewhiten the local jacobian matrix and residuals:
361 | if verbose>1
362 |    fprintf(fid,'    Prewhitening local jacobian and residuals...\n'); 
363 |    if usingOctave, fflush(fid); end
364 | end;
365 | if size(covd,2)==1
366 |   Ghat=zeros(size(G(:,:,1)));
367 |   for q=1:size(G,1);
368 |      Ghat(q,:)=invsqrtcovd(q)*G(q,:,l); % weighting rows of G, equiv to diag*G
369 |   end
370 |   residshat(:,l,i)=invsqrtcovd.*(dmeas-dpred(:,l,i));
371 | else
372 |   Ghat=invsqrtcovd*G(:,:,l);
373 |   residshat(:,l,i)=invsqrtcovd*(dmeas-dpred(:,l,i));
374 | end
375 |  
376 | % On first of both iterations set the vector of lambdas based on G at m0:
377 | if l==1 && iter==1
378 |    if lambdaIn(1)<0  % ie specifying auto-find lambdas
379 |       if verbose>1
380 |         fprintf(fid,'    Computing lambda points for Lcurve...\n');
381 |         if usingOctave, fflush(fid); end
382 |       end;
383 |       % Automatically computing the lamdas (regularization params) for L-curve:
384 |       % These are computed using the first iteration's derivatives, which are
385 |       % the same for all lambas (note only true for the first iteration) since
386 |       % the first m is the same for all lambdas, equaling m0.
387 | 
388 |       %Per Christian Hansen approach, goes WAY far out on the Lcurve legs but
389 |       %has restrictions on form of L:
390 |       %lambdamax=max(eig(Ghat'*Ghat))/max(min(eig(L'*L)),16*eps);
391 |       %lambdamin=max(min(eig(Ghat'*Ghat))/max(eig(L'*L)),16*eps);
392 |       %lambda=logspace(log10(lambdamax),log10(lambdamin),nlambdas);
393 |       %disp(num2str(lambda,'%g\n'))
394 | 
395 |       %Ken Creager approach:
396 |       lambda0=sqrt(max(eig(Ghat'*Ghat))/max(eig(L'*L)));
397 |       % except Creager starts Lcurve (ie max misfit) at lambda0 and reduces from 
398 |       % there, whereas here we have lambda0 sortof roughly in the middle of
399 |       % Lcurve somewhere:
400 |       lambda=logspace(log10(lambda0)+maxlambdaexp,...
401 |                       log10(lambda0)+minlambdaexp, nlambdas);
402 |       if verbose>1
403 |         fprintf(fid,'      Lambda(1)=%g\n',lambda(1));
404 |         if usingOctave, fflush(fid); end
405 |       end
406 |    else
407 |       if verbose>1
408 |         fprintf(fid,'    Using lambda points for Lcurve specified by user...\n');
409 |         if usingOctave, fflush(fid); end
410 |       end;
411 |    end
412 | end
413 | 
414 | 
415 | % Compute residual and model norms, and objfn which is based on them, at current 
416 | % lambda & iteration.  In Bayesian case (useMpref==2) normrough doesn't have
417 | % same meaning and not very useful outside of the inversion (eg no Lcurve) but
418 | % it's still used in the objective fn.
419 | normmisfit(l,i)=norm(residshat(:,l,i));
420 | normrough(l,i)=norm(L*(m(:,l,i)));
421 | objfn(l,i)=normmisfit(l,i)+lambda(l)^2*normrough(l,i);
422 | 
423 | 
424 | 
425 | % Check condition number to warn about stability of the inv() in dm()=... below:
426 | if verbose>1
427 |   fprintf(fid,'    Computing model perturbation...\n');
428 |   if usingOctave, fflush(fid); end
429 | end;
430 | c=cond(Ghat'*Ghat+lambda(l)^2*L'*L);
431 | if c>1e15
432 |   if ~warningsoff
433 |   fprintf(fid,'      Warning: cond number of (Ghat''*Ghat+lambda^2*L''*L) is large (%g)\n',c);
434 |   if usingOctave, fflush(fid); end
435 |   end
436 | end
437 | % Compute maximum likelihood model perturbation:
438 | if useMpref
439 |   % Caution! This formulation only valid when:
440 |   %    a.) using 0th-order-Tikh (ridge regr), ie if L==eye(size(L)),
441 |   % or b.) using Bayesian formulation, in which case m0 is the prior model mean
442 |   %        and lambda=1 and L=chol(inv(Cprior)).
443 |   if size(m0,2)>1  % ie diff't m0 for each lambda
444 |     dm(:,l,i)=inv(Ghat'*Ghat+lambda(l)^2*L'*L)*(Ghat'*residshat(:,l,i)-...
445 |                                            lambda(l)^2*L'*L*(m(:,l,i)-m0(:,l)));
446 |   else  % if same m0 for each lambda (and this is the expected case when Bayesian)
447 |     dm(:,l,i)=inv(Ghat'*Ghat+lambda(l)^2*L'*L)*(Ghat'*residshat(:,l,i)-...
448 |                                                 lambda(l)^2*L'*L*(m(:,l,i)-m0));
449 |   end
450 | else
451 |   % This option is for frequentist framework with higher-order Tikh regularization.
452 |   % Here regu doesn't care about distance from preferred (initial) model, and
453 |   % there is no restriction on L (whereas above required L=I for useMpref case).
454 |   dm(:,l,i)=inv(Ghat'*Ghat+lambda(l)^2*L'*L)*(Ghat'*residshat(:,l,i)-...
455 |                                               lambda(l)^2*L'*L*m(:,l,i));
456 | end
457 | 
458 | 
459 | % Check dm for passing model bounds, ie that step is not too far
460 | if check_bnds
461 | if verbose>1
462 |   fprintf(fid,'    Checking bounds...\n');
463 |   if usingOctave, fflush(fid); end
464 | end;
465 | mtilde=m(:,l,i)+dm(:,l,i);  % candidate model based on current dm() value
466 | outofbnds=sum(mtilde<lb) + sum(mtilde>ub);
467 | if outofbnds>0
468 |     % find out of bounds components
469 |     outlow=mtilde<lb;
470 |     outhigh=mtilde>ub;
471 |     if outbnds_method==0
472 |         % if a component out of bnds, revert to half-step in same direction
473 |         j=0;
474 |         while outofbnds>0 && j<maxstephalves  % yes duplicates things on first round,
475 |                                               % but minimal extra calculation and
476 |                                               % I think easier to understand loop
477 |             j=j+1;
478 |             dm(:,l,i)=dm(:,l,i)/2;
479 |             mtilde=m(:,l,i)+dm(:,l,i);
480 |             outofbnds=sum(mtilde<lb) + sum(mtilde>ub);
481 |         end
482 |         if j>1
483 |             action=['passed bounds so stepsize halved^' num2str(j)];
484 |         else
485 |             action='passed bounds so stepsize halved';
486 |         end
487 |     elseif outbnds_method==1
488 |         % if a component out of bnds, reflect at that bnd
489 |         mtilde(outlow)=lb(outlow)+( lb(outlow)-mtilde(outlow) );
490 |         mtilde(outhigh)=ub(outhigh)-( mtilde(outhigh)-ub(outhigh) );
491 |         action='reflected from bounds';
492 |     elseif outbnds_method==2
493 |         % if a component out of bnds, set it equal to that bnd
494 |         mtilde(outlow)=lb(outlow);    % strange code notation here but it works
495 |         mtilde(outhigh)=ub(outhigh);  % (in both Matlab and Octave)
496 |         action='set to bounds';
497 |     end
498 |     if outbnds_method>0  % update dm given bounds behavior when mtilde altered
499 |         dm(:,l,i)=mtilde-m(:,l,i); 
500 |     end
501 |     if verbose>1
502 |         fprintf(fid,'      %d/%d model params %s in lambda(%d)...\n',outofbnds,nparams,action,l);
503 |         if usingOctave, fflush(fid); end
504 |     end;
505 | else
506 |     if verbose>1
507 |       fprintf(fid,'      All parameter updates within bounds.\n');
508 |       if usingOctave, fflush(fid); end
509 |     end;
510 | end
511 | end
512 | 
513 | 
514 | % Check dm for decreasing obj fn, ie once again that step is not too far, after
515 | % having hit "convergence" (presently based on dm)
516 | if check_objfn && converged
517 | if verbose>1
518 |   fprintf(fid,'    Checking for decreasing objfn...\n');
519 |   fprintf(fid,'      Calculating fwdprob for preliminary candidate step...\n');
520 |   if usingOctave, fflush(fid); end
521 | end;
522 | mtilde=m(:,l,i)+dm(:,l,i);  % candidate model based on current dm() value
523 | dpred_tilde=feval(fwdfunc,mtilde,varargin{:});
524 | if size(covd,2)==1
525 |   residshat_tilde=invsqrtcovd.*(dmeas-dpred_tilde);
526 | else
527 |   residshat_tilde=invsqrtcovd*(dmeas-dpred_tilde);
528 | end
529 | normmisfit_tilde=norm(residshat_tilde);
530 | normrough_tilde=norm(L*mtilde);
531 | objfn_tilde=normmisfit_tilde+lambda(l)^2*normrough_tilde;
532 | decreasing_objfn=objfn_tilde<=objfn(l,i);
533 | fprintf(fid,'        objfn(l,i)=%g, objfn_tilde=%g\n',objfn(l,i),objfn_tilde);
534 | if ~decreasing_objfn
535 |     j=0;
536 |     while ~decreasing_objfn && j<maxstephalves
537 |         j=j+1;
538 |         dm(:,l,i)=dm(:,l,i)/2;
539 |         mtilde2=m(:,l,i)+dm(:,l,i);
540 |         if verbose>1
541 |           fprintf(fid,'      Calculating fwdprob for halved candidate step...\n');
542 |           if usingOctave, fflush(fid); end
543 |         end;
544 |         dpred_tilde2=feval(fwdfunc,mtilde2,varargin{:});
545 |         if size(covd,2)==1
546 |           residshat_tilde2=invsqrtcovd.*(dmeas-dpred_tilde2);
547 |         else
548 |           residshat_tilde2=invsqrtcovd*(dmeas-dpred_tilde2);
549 |         end
550 |         normmisfit_tilde2=norm(residshat_tilde2);
551 |         normrough_tilde2=norm(L*mtilde2);
552 |         objfn_tilde2=normmisfit_tilde2+lambda(l)^2*normrough_tilde2;
553 |         fprintf(fid,'        objfn(l,i)=%g, objfn_tilde2=%g, objfn_tilde=%g, j=%d  \n',...
554 |                                             objfn(l,i),objfn_tilde2,objfn_tilde,j);
555 |         if objfn_tilde2 > objfn_tilde
556 |           fprintf(fid,'        WARNING: hump just found between last step and candidate step,\n');
557 |           fprintf(fid,'        implying a region not satisfying monotonicity...\n');
558 |           return;
559 |         else  % objfn_tilde2 <= objfn_tilde
560 |           decreasing_objfn=objfn_tilde2<=objfn(l,i);
561 |         end
562 |         objfn_tilde=objfn_tilde2;
563 |     end
564 |     if verbose>1
565 |         if j>1
566 |             fprintf(fid,'      Non-decreasing objfn initially, so stepsize halved^%d; objfn now decreasing.\n',j);
567 |         else
568 |            fprintf(fid,'      Non-decreasing objfn initially, so stepsize halved; objfn now decreasing.\n');
569 |         end
570 |         if usingOctave, fflush(fid); end
571 |     end;
572 | else
573 |     if verbose>1
574 |       fprintf(fid,'      Parameter update satisfies decreasing objfn.\n');
575 |       if usingOctave, fflush(fid); end
576 |     end;
577 | end
578 | end
579 | 
580 | 
581 | % Update the model vector with the new perturbation:
582 | if keepalliters
583 |     m(:,l,i+1)=dm(:,l,i)+m(:,l,i);
584 |     normdm(l,i)=norm(dm(:,l,i));
585 | else
586 |     % It's a bit of a hack, but note that i in here always =1 if keepalliters=0:
587 |     m(:,l,1)=dm(:,l,i)+m(:,l,i);
588 |     normdm(l,iter)=norm(dm(:,l,i));  % i=1 since keepalliters=0, so use iter here.
589 |     % FIXME: hm, the above means that at final iteration, the m(:,l,1) won't
590 |     % technically correspond to the resids & derivs for that i,l.  But if it's
591 |     % really the final iteration such that the solution converged, this shouldn't
592 |     % matter.  By comparison, up in the "keepalliters" part of the if block,
593 |     % at end the m() will have one more element in iters dimension than normdm
594 |     % since the first iter for m() holds the initial m vector.
595 | end
596 | 
597 | 
598 | if i>1 && keepalliters
599 |         % (since requires previous iteration's results for comparison in order
600 |         % check for convergence)
601 |         % Note goal is to go one step past convergence, ie obtain m_{i+1},
602 |         % so that can have derivs & resids at soln point m_i, so that's why the
603 |         % resids & Ghat here are from before the model vector update that just 
604 |         % happened.
605 |         % Anyway, model perturbation after convergence should be so small that
606 |         % additional update is negligible.
607 |    if verbose>1
608 |      fprintf(fid,'    Checking convergence criteria for grad(Fobj), dm, & df...\n');
609 |      if usingOctave, fflush(fid); end
610 |    end;
611 |    % for now, "convergence" is based only on dm getting really small:
612 |    % (still analyzing the full convergence test)   FIXME
613 |    %[converged,gradtest,mtest,ftest]=...
614 |    [dummy,gradtest,converged,ftest]=...
615 |    convinfo(fwdfunc,epsr,Ghat,residshat(:,l,i-1),residshat(:,l,i),...
616 |             m(:,l,i-1),m(:,l,i),verbose,fid);
617 | end
618 | 
619 | 
620 | if verbose>1
621 |   fprintf(fid,'    Norm(deltam)=%g\n',normdm(l,i));
622 |   fprintf(fid,'    Norm(datamisfit)=%g\n',normmisfit(l,i));
623 |   fprintf(fid,'    Norm(roughness)=%g\n',normrough(l,i));
624 |   fprintf(fid,'    Norm(m)=%g\n',norm(m(:,l,i)));
625 |   if verbose==4
626 |      fprintf(fid,'    dm={');
627 |      for q=1:length(m0)
628 |         fprintf(fid,' %g',dm(q,l,i));
629 |      end
630 |      fprintf(fid,' }\n');
631 |      fprintf(fid,'    m={'); for q=1:length(m0)
632 |         fprintf(fid,' %g',m(q,l,i));
633 |      end
634 |      fprintf(fid,' }\n');
635 |      fprintf(fid,'    1/m={');
636 |      for q=1:length(m0)
637 |         fprintf(fid,' %g',1/m(q,l,i));
638 |      end
639 |      fprintf(fid,' }\n');
640 |   end
641 |   fprintf(fid,'    Objfn=%g\n',objfn(l,i));
642 |   if usingOctave, fflush(fid); end
643 | end
644 | 
645 |    
646 | if verbose>1
647 |   % output compute time:
648 |   mytime=toc;
649 |   fprintf(fid,'    This iteration took %f seconds of wall-clock time.\n',mytime);
650 |   % output convergence info:
651 |   if converged
652 |     % note mechanism to go check_objfn_iters iters past "convergence" at i_conv(l) iters:
653 |     if i_conv(l)==0, i_conv(l)=iter; end
654 |     fprintf(fid,'  CONVERGED at i=%d based on dm criteria.\n',i_conv(l));
655 |     if iter>=i_conv(l)+check_objfn_iters, break; end  % break out of iters loop at end
656 |     if iter>=i_conv(l)
657 |       fprintf(fid,'  continuing with %d more iters imposing decreasing objfn...\n',...
658 |                                         check_objfn_iters-(iter-i_conv(l)));
659 |     end
660 |   else
661 |     if i_conv(l)>0
662 |       fprintf(fid,'    Reseting convergence flag based on suddenly increasing dm.\n',i_conv(l));
663 |       i_conv(l)=0;  % reset; ie must have 3 converged iters to consider it good
664 |     end
665 |   end
666 |   if usingOctave, fflush(fid); end
667 | end
668 | 
669 | 
670 | end;  % end of iteration loop over i
671 | 
672 | if verbose>1
673 |   if ~converged && iter==maxiters
674 |     fprintf(fid,'  Max iterations reached without convergence.\n');
675 |     if usingOctave, fflush(fid); end
676 |   end
677 | end
678 | 
679 | 
680 | % Compute model covariance and resolution matrices for current lambda:
681 | if verbose>1
682 |   fprintf(fid,'  Computing cov and res matrices for Lambda(%d)...\n',l);
683 |   if usingOctave, fflush(fid); end
684 | end;
685 | 
686 | % Note Ghat corresponds to final model solution m(:,l,i), not m(:,l,i)+dm(:,l,i).
687 | if useMpref<2  % frequentist
688 |   Gginv=inv(Ghat'*Ghat+lambda(l)^2*L'*L)*Ghat';
689 |   C(:,:,l)=Gginv*Gginv';  % model cov matrix for current lambda
690 |   R(:,:,l)=Gginv*Ghat;  % model resolution matrix for current lambda
691 |   % data res matrix D is too big to save whole thing, so just saving its diag:
692 |   Ddiag(:,l)=proddiag(Ghat,Gginv);
693 | else % Bayesian when useMpref==2
694 |   C(:,:,l)=inv(Ghat'*Ghat+lambda(l)^2*L'*L);  % posterior model covariance
695 |                            % note lambda(l) should always be 1 in this case,
696 |                            % and L=chol(inv(Cpriod)) but note notation confusion
697 |                            % in that the "L" does not refer to "left triangular"
698 |                            % but the frequentist findiff operator from earlier!
699 |                            % Sorry but what to do.  "L" here is right triangular.
700 |   % set these empty as they are not relevant for Bayesian formulation:
701 |   R(:,:,l)=[];
702 |   Ddiag(:,l)=[];
703 | end
704 | 
705 | 
706 | if saveIntermediate
707 |    if verbose>1
708 |       fprintf(fid,'    saving intermediate workspace to file invGN_%d.mat...\n',pid);
709 |       if usingOctave, fflush(fid); end
710 |    end;
711 |    eval(['save -v7 /tmp/invGN_' num2str(pid) '.mat']);
712 | end
713 | 
714 | 
715 | end;  % end of loop over lambdas index l
716 | 
717 | 
718 | if verbose
719 |   fprintf(fid,'Finished all iterations for all lambdas.\n');
720 |   if usingOctave, fflush(fid); end
721 | end
722 | 
723 | 


--------------------------------------------------------------------------------