├── optimc-doc.pdf
├── src
    ├── brent.h
    ├── neldermead.h
    ├── conjgrad.h
    ├── lls.h
    ├── nls.h
    ├── secant.h
    ├── optimc.h
    ├── newtonmin.h
    ├── lnsrchmp.h
    ├── matrix.h
    ├── brent.c
    ├── conjgrad.c
    ├── neldermead.c
    ├── lnsrchmp.c
    ├── optimc.c
    ├── secant.c
    ├── lls.c
    └── nls.c
├── test
    ├── ex2_fminbnd.c
    ├── ex5_levmar.c
    ├── ex0_optimize.c
    ├── ex3_fminunc.c
    ├── ex6_nls.c
    ├── ex4_fminnewt.c
    ├── testfunctions.h
    ├── ex1_fminsearch.c
    └── testfunctions.c
├── README.md
├── COPYRIGHT
├── MINPACK_COPYRIGHT
└── header
    └── optimc.h


/optimc-doc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rafat/optimc/HEAD/optimc-doc.pdf


--------------------------------------------------------------------------------
/src/brent.h:
--------------------------------------------------------------------------------
 1 | #ifndef BRENT_H_
 2 | #define BRENT_H_
 3 | 
 4 | #include "secant.h"
 5 | 
 6 | 
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | 
12 | double brent_zero(custom_funcuni *funcuni,double a, double b, double tol, double eps);
13 | 
14 | double brent_local_min(custom_funcuni *funcuni, double a, double b, double t, double eps, double *x);
15 | 
16 | #ifdef __cplusplus
17 | }
18 | #endif
19 | 
20 | #endif /* SECANT_H_ */
21 | 


--------------------------------------------------------------------------------
/src/neldermead.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * neldormead.h
 3 |  *
 4 |  *  Created on: Jan 5, 2014
 5 |  *      Author: HOME
 6 |  */
 7 | 
 8 | #ifndef NELDERMEAD_H_
 9 | #define NELDERMEAD_H_
10 | 
11 | #include "brent.h"
12 | 
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | int nel_min(custom_function *funcpt,double *xc,int N,double *dx,double fsval,int MAXITER,int *niter,
18 | 		double eps,double *xf);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | 
25 | #endif /* NELDORMEAD_H_ */
26 | 


--------------------------------------------------------------------------------
/test/ex2_fminbnd.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  ============================================================================
 3 |  Name        : optim.c
 4 |  Author      : Rafat
 5 |  ============================================================================
 6 |  */
 7 | 
 8 | #include <stdio.h>
 9 | #include <stdlib.h>
10 | #include "../header/optimc.h"
11 | #include "testfunctions.h"
12 | 
13 | 
14 | int main(void) {
15 |         double a,b,oup;
16 | 
17 |         a = 0.3;
18 |         b = 1;
19 | 
20 |         custom_funcuni humps_min = {humps,0};
21 | 
22 |         oup = fminbnd(&humps_min,a,b);
23 |         printf("OUP %g \n",oup);
24 | 
25 |         return 0;
26 | }
27 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | OptimC - Optimization / Unconstrained Minimization/ Nonlinear Least Squares Library in ANSI C
2 | =============================================================================================
3 | 
4 | OptimC is a C software package to minimize any unconstrained multivariable function. The algorithms implemented are Nelder-Mead,Newton Methods (Line Search and Trust Region methods), Conjugate Gradient and BFGS (regular and Limited Memory). Brent method is also available for single variable functions if the bounds are known.
5 | 
6 | Update 06/09/2014 - Nonlinear Squares Implementation [Levenberg-Marquardt Method] Added. 
7 | 
8 | Documentation - PDF file included in the GIT repository. Wiki to be added shortly.
9 | 


--------------------------------------------------------------------------------
/test/ex5_levmar.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include "../header/optimc.h"
 4 | #include "testfunctions.h"
 5 | 
 6 | int main(void) {
 7 | 	int M, N, i, ret;
 8 | 	custom_funcmult fpowell_min = { fpowell, 0 };
 9 | 
10 | 	M = N = 3;
11 | 
12 | 	double xi[3] = { -1, 0, 0 };
13 | 	double xf[3] = { 0, 0, 0 };
14 | 
15 | 	//levmar simply returns the end point and the termination code
16 | 	// For more options use the object-oriented approach employed by nls and nls_scale
17 | 	ret = levmar(&fpowell_min, NULL, xi, M, N, xf);
18 | 
19 | 	printf("Return Value : %d \n", ret);
20 | 	printf("\n");
21 | 	printf("Termination At : ");
22 | 	for (i = 0; i < N; ++i) {
23 | 		printf("%g ", xf[i]);
24 | 	}
25 | 
26 | 	return 0;
27 | }
28 | 


--------------------------------------------------------------------------------
/src/conjgrad.h:
--------------------------------------------------------------------------------
 1 | #ifndef CONJGRAD_H_
 2 | #define CONJGRAD_H_
 3 | 
 4 | #include "newtonmin.h"
 5 | 
 6 | 
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | int ichol(double *A, int N);
12 | 
13 | int stopcheck2(double fx,int N,double *xc,double *xf,double *jac,double *dx,double fsval,double gtol,double stol) ;
14 | 
15 | int cgpr_mt(custom_function *funcpt, custom_gradient *funcgrad, double *xi, int N, double *dx,double maxstep, int MAXITER, int *niter,
16 | 		double eps,double gtol,double ftol,double xtol,double *xf); //Polak Ribiere + (More Thuentes Line Search)
17 | 
18 | int conjgrad_min_lin(custom_function *funcpt, custom_gradient *funcgrad, double *xi, int N, double *dx, double maxstep, int MAXITER, int *niter,
19 | 		double eps,double gtol,double ftol,double xtol,double *xf);
20 | 
21 | 
22 | #ifdef __cplusplus
23 | }
24 | #endif
25 | 
26 | #endif /* CONJGRAD_H_ */
27 | 


--------------------------------------------------------------------------------
/test/ex0_optimize.c:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | /*
 4 | 
 5 | */
 6 | 
 7 | #include <stdio.h>
 8 | #include <stdlib.h>
 9 | #include "../header/optimc.h"
10 | #include "testfunctions.h"
11 | 
12 | int main(void) {
13 | 	int N, j, i;
14 | 	double *xi, *xf;
15 | 	opt_object optim;
16 | 	custom_function froth_min = { froth, 0 }; // The second argument is used to pass additional parameters. It is NULL (0) in this case.
17 | 
18 | 	N = 2;
19 | 
20 | 	xi = (double*)malloc(sizeof(double)* N);
21 | 	xf = (double*)malloc(sizeof(double)* N);
22 | 
23 | 	for (i = 0; i < N; ++i) {
24 | 		xi[i] = 2;
25 | 	}
26 | 	xi[0] = 0.5;
27 | 
28 | 	optim = opt_init(N);
29 | 
30 | 	for (j = 0; j < 8; ++j) {
31 | 		optimize(optim, &froth_min, NULL, N, xi, j);
32 | 		optsummary(optim);
33 | 		for (i = 0; i < N; ++i) {
34 | 			xi[i] = 2;
35 | 		}
36 | 		xi[0] = 0.5;
37 | 	}
38 | 
39 | 	free(xi);
40 | 	free(xf);
41 | 	free_opt(optim);
42 | 
43 | 	return 0;
44 | }
45 | 


--------------------------------------------------------------------------------
/src/lls.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * lls.h
 3 |  *
 4 |  *  Created on: Apr 14, 2014
 5 |  *      Author: HOME
 6 |  */
 7 | 
 8 | #ifndef LLS_H_
 9 | #define LLS_H_
10 | 
11 | #include "neldermead.h"
12 | 
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | int lls_normal(double *A,double *b,int M,int N,double *x);
18 | 
19 | int lls_qr(double *A,double *b,int M,int N,double *xo);
20 | 
21 | void bidiag(double *A, int M, int N);
22 | 
23 | void bidiag_orth(double *A, int M, int N,double *U,double *V);
24 | 
25 | int svd_gr(double *A,int M,int N,double *U,double *V,double *q);
26 | 
27 | int svd_gr2(double *A,int M,int N,double *U,double *V,double *q);
28 | 
29 | int minfit(double *AB,int M,int N,int P,double *q);
30 | 
31 | int lls_svd(double *Ai,double *bi,int M,int N,double *xo);
32 | 
33 | int lls_svd2(double *Ai,double *bi,int M,int N,double *xo);
34 | 
35 | #ifdef __cplusplus
36 | }
37 | #endif
38 | 
39 | #endif /* LLS_H_ */
40 | 


--------------------------------------------------------------------------------
/test/ex3_fminunc.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include "../header/optimc.h"
 4 | #include "testfunctions.h"
 5 | 
 6 | int main(void) {
 7 | 	int N, j, i, retval;
 8 | 	double *xi, *xf;
 9 | 	double maxstep;
10 | 
11 | 	custom_function myvalue_min = { myvalue, 0 };
12 | 	custom_gradient myvaluegrad_min = { myvaluegrad, 0 };
13 | 
14 | 	N = 4;
15 | 
16 | 	xi = (double*)malloc(sizeof(double)* N);
17 | 	xf = (double*)malloc(sizeof(double)* N);
18 | 
19 | 	for (i = 0; i < N; ++i) {
20 | 		xi[i] = 1;
21 | 	}
22 | 	printf("\n\n%-25s%-20s%-20s \n", "Method", "Return Value", "Objective Function");
23 | 	maxstep = 1000.0; // Set the value accordingly. Maximum Step size 
24 | 
25 | 
26 | 
27 | 	for (j = 0; j < 8; ++j) {
28 | 		retval = fminunc(&myvalue_min, &myvaluegrad_min, N, xi,maxstep, j, xf);
29 | 		printf("\n\n%-25d%-20d%-20g \n", j, retval, myvalue(xf, N,0));
30 | 		printf("Function Minimized At : ");
31 | 		for (i = 0; i < N; ++i) {
32 | 			printf("%g ", xf[i]);
33 | 		}
34 | 
35 | 		for (i = 0; i < N; ++i) {
36 | 			xi[i] = 1;
37 | 		}
38 | 
39 | 	}
40 | 
41 | 
42 | 	free(xi);
43 | 	free(xf);
44 | 
45 | 	return 0;
46 | }
47 | 


--------------------------------------------------------------------------------
/test/ex6_nls.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include "../header/optimc.h"
 4 | #include "testfunctions.h"
 5 | 
 6 | int main(void) {
 7 | 
 8 | 	int M, N, i;
 9 | 
10 | 	nls_object obj;
11 | 	custom_funcmult fk10_min = { fk10, 0 };
12 | 	custom_jacobian fk10jac_min = { fk10jac, 0 };
13 | 
14 | 	//Matlab Example http://www.mathworks.in/help/optim/ug/lsqnonlin.html See Examples
15 | 
16 | 	M = 10; N = 2;
17 | 	obj = nls_init(M, N);// M >= N
18 | 	double xi[2] = { 0.3, 0.4 };
19 | 	double diag[2] = { 1.0, 1.0 };
20 | 
21 | 	// Using Custom Scaling nls_scale function that allows diag values to be set by the user.
22 | 	// The program will only accept strictly positive diagonal values. diag has the size N
23 | 
24 | 	nls_scale(obj, &fk10_min, &fk10jac_min, diag, xi);
25 | 	for (i = 0; i < obj->N; ++i) {
26 | 		printf("%g ", obj->xopt[i]);
27 | 	}
28 | 	nlssummary(obj);
29 | 
30 | 
31 | 	free_nls(obj);
32 | 
33 | 	// Using Internal Scaling. Function nls.
34 | 
35 | 	obj = nls_init(M, N);
36 | 
37 | 	nls(obj, &fk10_min, &fk10jac_min, xi);
38 | 	for (i = 0; i < obj->N; ++i) {
39 | 		printf("%g ", obj->xopt[i]);
40 | 	}
41 | 	nlssummary(obj);
42 | 
43 | 	free_nls(obj);
44 | 
45 | 	return 0;
46 | }
47 | 


--------------------------------------------------------------------------------
/test/ex4_fminnewt.c:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include "../header/optimc.h"
 6 | #include "testfunctions.h"
 7 | 
 8 | int main(void) {
 9 | 	int N, j, i, retval;
10 | 	double fsval, delta,maxstep;
11 | 	double *xi, *xf, *dx;
12 | 
13 | 	custom_function brown_min = { brown, 0 };
14 | 	custom_gradient browngrad_min = { browngrad, 0 };
15 | 
16 | 	N = 2;
17 | 
18 | 	xi = (double*)malloc(sizeof(double)* N);
19 | 	xf = (double*)malloc(sizeof(double)* N);
20 | 	dx = (double*)malloc(sizeof(double)* N);
21 | 
22 | 	for (i = 0; i < N; ++i) {
23 | 		xi[i] = 1;
24 | 		//dx[i] = 1;
25 | 	}
26 | 	fsval = 1;// Default Value
27 | 	delta = -1.0;//Default Value
28 | 	dx[0] = 1.0e05; dx[1] = 1.0e-05;
29 | 	maxstep = 1000.0;//Default Value
30 | 	printf("\n\n%-25s%-20s%-20s \n", "Method", "Return Value", "Objective Function");
31 | 
32 | 
33 | 
34 | 	for (j = 1; j < 4; ++j) {
35 | 		retval = fminnewt(&brown_min, &browngrad_min, N, xi, delta, dx, fsval,maxstep, j, xf);
36 | 		printf("\n\n%-25d%-20d%-20g \n", j, retval, brown(xf, N,0));
37 | 		printf("Function Minimized At : ");
38 | 		for (i = 0; i < N; ++i) {
39 | 			printf("%g ", xf[i]);
40 | 		}
41 | 
42 | 		for (i = 0; i < N; ++i) {
43 | 			xi[i] = 1;
44 | 		}
45 | 
46 | 	}
47 | 
48 | 	free(xi);
49 | 	free(xf);
50 | 	free(dx);
51 | 
52 | 	return 0;
53 | }
54 | 


--------------------------------------------------------------------------------
/src/nls.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * nls.h
 3 |  *
 4 |  *  Created on: May 21, 2014
 5 |  *      Author: HOME
 6 |  */
 7 | 
 8 | #ifndef NLS_H_
 9 | #define NLS_H_
10 | 
11 | #include "lls.h"
12 | 
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | double enorm(double *x, int N);
18 | 
19 | void qrfac(double *A, int M, int N, int lda, int pivot, int *ipvt, int lipvt,double *rdiag, double *acnorm,double eps);
20 | 
21 | void qrsolv(double *r,int ldr,int N,int *ipvt,double *diag,double *qtb,double *x,double *sdiag);
22 | 
23 | void fdjac2(custom_funcmult *funcmult, double *x, int M, int N, double *fvec, double *fjac, int ldfjac,
24 | 		double epsfcn,double eps);
25 | 
26 | void lmpar(double *r,int ldr,int N,int *ipvt,double *diag,double *qtb,double delta,double *par,double *x,double *sdiag);
27 | 
28 | int lmder(custom_funcmult *funcmult,custom_jacobian *jacobian,double *xi,int M, int N,
29 | 		double *fvec,double *fjac,int ldfjac,int maxfev,double *diag,int mode,double factor,int nprint,
30 | 		double eps,double ftol,double gtol,double xtol,int *nfev,int *njev,int *ipvt, double *qtf);
31 | 
32 | int lmdif(custom_funcmult *funcmult, double *x, int M, int N, double *fvec, double *fjac, int ldfjac,
33 | 		int maxfev,double *diag,int mode,double factor,int nprint,double eps,double epsfcn,double ftol,double gtol,
34 | 		double xtol,int *nfev,int *njev,int *ipvt, double *qtf);
35 | 
36 | 
37 | #ifdef __cplusplus
38 | }
39 | #endif
40 | 
41 | 
42 | #endif /* NLS_H_ */
43 | 


--------------------------------------------------------------------------------
/src/secant.h:
--------------------------------------------------------------------------------
 1 | #ifndef SECANT_H_
 2 | #define SECANT_H_
 3 | 
 4 | #include "conjgrad.h"
 5 | 
 6 | 
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | void bfgs_naive(double *H,int N,double eps,double *xi,double *xf,double *jac,double *jacf);
12 | 
13 | int bfgs_min_naive(custom_function *funcpt, custom_gradient *funcgrad, double *xi, int N, double *dx, double fsval,double maxstep, int MAXITER,
14 | 		double eps, double *xf);
15 | 
16 | void bfgs_factored(double *H,int N,double eps,double *xi,double *xf,double *jac,double *jacf);
17 | 
18 | int bfgs_min(custom_function *funcpt, custom_gradient *funcgrad, double *xi, int N, double *dx, double fsval, double maxstep, int MAXITER, int *niter,
19 | 		double eps,double gtol,double stol,double *xf);
20 | 
21 | int bfgs_min2(custom_function *funcpt, custom_gradient *funcgrad, double *xi, int N, int m, double *dx, double fsval, double maxstep, int MAXITER, int *niter,
22 | 	double eps, double gtol, double ftol, double xtol, double *xf);
23 | 
24 | void inithess_l(double *H, int N, int k, double *tsk, double *tyk, double *dx);
25 | 
26 | void bfgs_rec(double *H, int N, int iter, int m, double *jac, double *sk, double *yk, double *r);
27 | 
28 | int bfgs_l_min(custom_function *funcpt, custom_gradient *funcgrad, double *xi, int N, int m, double *dx, double fsval,double maxstep, int MAXITER, int *niter,
29 | 		double eps,double gtol,double ftol,double xtol,double *xf);
30 | 
31 | #ifdef __cplusplus
32 | }
33 | #endif
34 | 
35 | #endif /* SECANT_H_ */
36 | 


--------------------------------------------------------------------------------
/COPYRIGHT:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Rafat Hussain
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 5 | 
 6 |  1.  Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 7 | 
 8 |  2.  Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer 
 9 |  	in the documentation and/or other materials provided with the distribution.
10 | 
11 |  3.  The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission.
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
14 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
15 | BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
16 | GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
17 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
18 |  OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/test/testfunctions.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * testfunctions.h
 3 |  *
 4 |  *  Created on: Mar 16, 2014
 5 |  *      Author: HOME
 6 |  */
 7 | 
 8 | #ifndef TESTFUNCTIONS_H_
 9 | #define TESTFUNCTIONS_H_
10 | 
11 | 
12 | #ifdef __cplusplus
13 | extern "C" {
14 | #endif
15 | 
16 | 	void fk10(double *x, int M, int N, double *f, void *params);
17 | 
18 | 	void fk10jac(double *x, int M, int N, double *f, void *params);
19 | 
20 | 	void fpowell(double *x, int M, int N, double *f, void *params);
21 | 
22 | double myvalue
23 | (
24 |     double   *x,
25 | 	int       n, 
26 | 	void *params
27 | );
28 | 
29 | void myvaluegrad(
30 | 		double *x,
31 | 		int n,
32 | 		double *jac,
33 | 		void *params
34 | );
35 | 
36 | double quartic(double *x,int N,void *params);
37 | 
38 | void quarticgrad(double *x, int N, double *jac, void *params);
39 | 
40 | double func4(double *x, int N, void *params);
41 | 
42 | double rosenbrock(double *x,int N,void *params);
43 | 
44 | void rosenbrockgrad(double *x, int N, double *g, void *params);
45 | 
46 | double brown(double *x, int N, void *params);
47 | 
48 | void browngrad(double *x, int N, double *g, void *params);
49 | 
50 | double powell(double *x, int N, void *params);
51 | 
52 | void powellgrad(double *x, int N, double *g, void *params);
53 | 
54 | double beale(double *x, int N, void *params);
55 | 
56 | double froth(double *x, int N, void *params);
57 | 
58 | double humps(double x,void *params);
59 | 
60 | double func1(double *x, int N, void *params);
61 | 
62 | double tf6(double *x,int N,void *params);
63 | 
64 | #ifdef __cplusplus
65 | }
66 | #endif
67 | 
68 | #endif /* TESTFUNCTIONS_H_ */
69 | 


--------------------------------------------------------------------------------
/test/ex1_fminsearch.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | ex1_fminsearch : demonstrates usage of fminsearch. fminsearch is a simple implementation of
 3 | Nelder-Mead method and it requires only the knowledge of function values and an initial
 4 | point.
 5 | 
 6 | retval = fminsearch(custom_function *funcpt,int N,double *xi,double *xf);
 7 | 
 8 | funcpt - function to be minimized. See below and check testfunctions.c for function examples.
 9 | N - Problem size. Number of Variables. fminsearch works best for N <= 15
10 | xi - Initial point (N-dimensional)
11 | xf - Minimized point (N-dimensional)
12 | retval - Return Value ( 1 for success. 0 and 4 for failure. 0 - Input error. 4 - Maximum
13 | Iterations exceeded)
14 | 
15 | */
16 | 
17 | #include <stdio.h>
18 | #include <stdlib.h>
19 | #include "../header/optimc.h"
20 | #include "testfunctions.h"
21 | 
22 | int main(void) {
23 | 	int N, i, retval;
24 | 	double *xi, *xf;
25 | 
26 | 	custom_function quartic_min = {quartic, 0};
27 | 	custom_function rosenbrock_min = {rosenbrock, 0};
28 | 
29 | 	N = 4;
30 | 
31 | 	xi = (double*)malloc(sizeof(double)* N);
32 | 	xf = (double*)malloc(sizeof(double)* N);
33 | 
34 | 	xi[0] = 3; xi[1] = -1; xi[2] = 0; xi[3] = -1;
35 | 
36 | 
37 | 	retval = fminsearch(&quartic_min, N, xi, xf);
38 | 
39 | 	printf("Powell's Quartic Function \n");
40 | 	printf("Return Value %d \nObjective Function %g \n", retval, quartic(xf, N,0));
41 | 
42 | 	printf("Function minimized at : ");
43 | 	for (i = 0; i < N; ++i) {
44 | 		printf("%g ", xf[i]);
45 | 	}
46 | 	printf("\n \n");
47 | 	free(xi);
48 | 	free(xf);
49 | 	printf("\nRosenbrock Function \n");
50 | 	N = 2;
51 | 	xi = (double*)malloc(sizeof(double)* N);
52 | 	xf = (double*)malloc(sizeof(double)* N);
53 | 
54 | 	xi[0] = -1.2; xi[1] = 1;
55 | 	retval = fminsearch(&rosenbrock_min, N, xi, xf);
56 | 
57 | 	printf("Return Value %d \nObjective Function %g \n", retval, rosenbrock(xf, N,0));
58 | 
59 | 	printf("Function minimized at : ");
60 | 	for (i = 0; i < N; ++i) {
61 | 		printf("%g ", xf[i]);
62 | 	}
63 | 
64 | 
65 | 	return 0;
66 | }
67 | 


--------------------------------------------------------------------------------
/MINPACK_COPYRIGHT:
--------------------------------------------------------------------------------
 1 | Minpack Copyright Notice (1999) University of Chicago.  All rights reserved
 2 | 
 3 | Redistribution and use in source and binary forms, with or
 4 | without modification, are permitted provided that the
 5 | following conditions are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above
 8 | copyright notice, this list of conditions and the following
 9 | disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above
12 | copyright notice, this list of conditions and the following
13 | disclaimer in the documentation and/or other materials
14 | provided with the distribution.
15 | 
16 | 3. The end-user documentation included with the
17 | redistribution, if any, must include the following
18 | acknowledgment:
19 | 
20 |    "This product includes software developed by the
21 |    University of Chicago, as Operator of Argonne National
22 |    Laboratory.
23 | 
24 | Alternately, this acknowledgment may appear in the software
25 | itself, if and wherever such third-party acknowledgments
26 | normally appear.
27 | 
28 | 4. WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS"
29 | WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE
30 | UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND
31 | THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR
32 | IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES
33 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE
34 | OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY
35 | OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR
36 | USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF
37 | THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4)
38 | DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION
39 | UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL
40 | BE CORRECTED.
41 | 
42 | 5. LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT
43 | HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF
44 | ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT,
45 | INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF
46 | ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF
47 | PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER
48 | SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT
49 | (INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE,
50 | EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE
51 | POSSIBILITY OF SUCH LOSS OR DAMAGES.


--------------------------------------------------------------------------------
/src/optimc.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * optimc.h
  3 |  *
  4 |  *  Created on: Mar 16, 2014
  5 |  *      Author: HOME
  6 |  */
  7 | 
  8 | #ifndef OPTIMC_H_
  9 | #define OPTIMC_H_
 10 | 
 11 | #include "nls.h"
 12 | 
 13 | #ifdef __cplusplus
 14 | extern "C" {
 15 | #endif
 16 | 
 17 | typedef struct opt_set* opt_object;
 18 | 
 19 | opt_object opt_init(int N);
 20 | 
 21 | struct opt_set{
 22 | 	int N;
 23 | 	double objfunc;
 24 | 	double eps;
 25 | 	double gtol;
 26 | 	double stol;
 27 | 	double ftol;
 28 | 	double xtol;
 29 | 	double maxstep;
 30 | 	int MaxIter;
 31 | 	int Iter;
 32 | 	int Method;
 33 | 	int retval;
 34 | 	char MethodName[50];
 35 | 	double xopt[1];
 36 | };
 37 | 
 38 | typedef struct nls_set* nls_object;
 39 | 
 40 | nls_object nls_init(int M,int N);
 41 | 
 42 | struct nls_set{
 43 | 	int M;
 44 | 	int N;
 45 | 	double eps;
 46 | 	double epsfcn;
 47 | 	double factor;
 48 | 	double gtol;
 49 | 	double ftol;
 50 | 	double xtol;
 51 | 	int MaxIter;
 52 | 	int Maxfev;
 53 | 	int Iter;
 54 | 	int nfev;
 55 | 	int njev;
 56 | 	int ldfjac;
 57 | 	int mode;
 58 | 	int retval;
 59 | 	double xopt[1];
 60 | };
 61 | 
 62 | void setnlsTOL(nls_object obj,double gtol,double ftol,double xtol);
 63 | 
 64 | void optsummary(opt_object obj);
 65 | 
 66 | void setMaxIter(opt_object obj,int MaxIter);
 67 | 
 68 | void setMaxStep(opt_object obj, double maxstep);
 69 | 
 70 | void setTOL(opt_object obj,double gtol,double stol,double ftol,double xtol);
 71 | 
 72 | int fminsearch(custom_function *funcpt,int N,double *xi,double *xf);
 73 | 
 74 | double fminbnd(custom_funcuni *funcuni,double a, double b);
 75 | 
 76 | int fminunc(custom_function *funcpt,custom_gradient *funcgrad,int N,double *xi,double maxstep, int method,double *xf);
 77 | 
 78 | int fminnewt(custom_function *funcpt, custom_gradient *funcgrad, int N, double *xi,
 79 | 	     double delta,double *dx,double fsval,double maxstep,int method,double *xf);
 80 | 
 81 | double brent_local_min(custom_funcuni *funcuni,double a, double b, double t, double eps, double *x);
 82 | 
 83 | void optimize(opt_object obj, custom_function *funcpt, custom_gradient *funcgrad, int N, double *xi,
 84 | 		int method);
 85 | 
 86 | void free_opt(opt_object object);
 87 | 
 88 | int levmar(custom_funcmult *funcmult, custom_jacobian *jacobian,
 89 | 		double *xi,int M, int N,double *xf);
 90 | 
 91 | void nls(nls_object obj, custom_funcmult *funcmult, custom_jacobian *jacobian,
 92 | 		double *xi);
 93 | 
 94 | void nls_scale(nls_object obj, custom_funcmult *funcmult, custom_jacobian *jacobian,
 95 | 		double *diag,double *xi);
 96 | 
 97 | void nlssummary(nls_object obj);
 98 | 
 99 | void free_nls(nls_object object);
100 | 
101 | #ifdef __cplusplus
102 | }
103 | #endif
104 | 
105 | #endif /* OPTIMC_H_ */
106 | 


--------------------------------------------------------------------------------
/src/newtonmin.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEWTONMIN_H_
 2 | #define NEWTONMIN_H_
 3 | 
 4 | #include "lnsrchmp.h"
 5 | 
 6 | 
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | double signx(double x);
12 | 
13 | double cholmod(double *A, int N, double *L, double eps,double maxinp);
14 | 
15 | double modelhess(double *A,int N,double *dx,double eps,double *L);
16 | 
17 | void linsolve_lower(double *L,int N,double *b,double *x);
18 | 
19 | int hessian_fd(custom_function *funcpt, double *x, int N, double *dx, double eps, double *f);
20 | 
21 | int hessian_fd2(custom_function *funcpt,double *x,int N,double *dx,double eps,double *f);
22 | 
23 | void fdjac(custom_gradient *funcgrad, double *x, int N, double *jac, double *dx, double eps2, double *J);
24 | 
25 | void hessian_fdg(custom_gradient *funcgrad, double *x, int N, double *jac, double *dx, double eps2, double *H);
26 | 
27 | int hessian_opt(custom_function *funcpt, custom_gradient *funcgrad, double *x, int N, double *jac,
28 | 		double *dx,double eps,double eps2,double *H);
29 | 
30 | int lnsrch(custom_function *funcpt, double *xi, double *jac, double *p, int N, double * dx, double maxstep, double stol, double *x);
31 | 
32 | int lnsrchmod(custom_function *funcpt, custom_gradient *funcgrad, double *xi, double *jac, double *p, int N, double * dx, double maxstep,
33 | 	double eps2,double stol,double *x,double *jacf);
34 | 	
35 | int lnsrchcg(custom_function *funcpt, custom_gradient *funcgrad, double *xi, double *jac, double *p, int N, double * dx, double maxstep,
36 | 	double eps2,double stol,double *x,double *jacf);
37 | 	
38 | int stopcheck(double fx,int N,double *xc,double *xf,double *jac,double *dx,double fsval,double gtol,double stol,int retval);
39 | 
40 | int newton_min_func(custom_function *funcpt, custom_gradient *funcgrad, double *xi, int N, double *dx, double fsval, double maxstep, int MAXITER,
41 | 	int *niter, double eps, double gtol, double stol, double *xf);
42 | 
43 | int trsrch(custom_function *funcpt, double *xi, double *jac, double *sN, int N, double * dx, double maxstep,
44 | 		int iter,double *L,double *hess,double stol,double *ioval,double eps,double *x);
45 | 		
46 | void trstep(double *jac,double *sN,int N,double * dx,double *L,double *hess,double nlen,double *ioval,double eps,
47 | 		double *step);
48 | 
49 | int trupdate(custom_function *funcpt, double *xi, double *jac, double *step, int N, double * dx, double maxstep,
50 | 		int retcode,double *L,double *hess,double stol,int method,double *ioval,double *xprev,double *funcprev,double *x);		
51 | 
52 | void trstep_ddl(double *jac,double *sN,int N,double * dx,double maxstep,double *L,double *hess,double nlen,double *ioval,
53 | 		double *ssd,double *v,double *step);		
54 | 		
55 | int trsrch_ddl(custom_function *funcpt, double *xi, double *jac, double *sN, int N, double * dx, double maxstep,
56 | 		int iter,double *L,double *hess,double stol,double *ioval,double *x);		
57 | 
58 | int newton_min_trust(custom_function *funcpt, custom_gradient *funcgrad, double *xi, int N, double *dx, double fsval, double delta,
59 | 		int method,int MAXITER,int *niter,double eps,double gtol,double stol,double *xf);
60 | 
61 | #ifdef __cplusplus
62 | }
63 | #endif
64 | 
65 | #endif /* NEWTONMIN_H_ */
66 | 


--------------------------------------------------------------------------------
/src/lnsrchmp.h:
--------------------------------------------------------------------------------
 1 | #ifndef LNSRCHMP_H_
 2 | #define LNSRCHMP_H_
 3 | 
 4 | #include "matrix.h"
 5 | 
 6 | #define EPSILON 2.7182818284590452353602874713526624977572
 7 | 
 8 | #ifdef __cplusplus
 9 | extern "C" {
10 | #endif
11 | 
12 | typedef struct custom_function_set custom_function;
13 | 
14 | struct custom_function_set{
15 | 	double(*funcpt) (double *x,int N,void *params);// Function in N variables
16 | 	void *params;
17 | };
18 | 
19 | typedef struct custom_gradient_set custom_gradient;
20 | 
21 | struct custom_gradient_set{
22 | 	void(*funcgrad) (double *x, int N,double *g, void *params);
23 | 	void *params;
24 | };
25 | 
26 | typedef struct custom_funcuni_set custom_funcuni;
27 | 
28 | struct custom_funcuni_set{
29 | 	double(*funcuni) (double x, void *params);// Function in one variable
30 | 	void *params;
31 | };
32 | 
33 | typedef struct custom_funcmult_set custom_funcmult;
34 | 
35 | struct custom_funcmult_set{
36 | 	void(*funcmult) (double *x,int M, int N, double *f, void *params);// M Functions in N variables
37 | 	void *params;
38 | };
39 | 
40 | typedef struct custom_jacobian_set custom_jacobian;
41 | 
42 | struct custom_jacobian_set{
43 | 	void(*jacobian) (double *x, int M, int N, double *jac, void *params);
44 | 	void *params;
45 | };
46 | 
47 | #define FUNCPT_EVAL(F,x,N) (*((F)->funcpt))(x,N,(F)->params)
48 | 
49 | #define FUNCGRAD_EVAL(F,x,N,g) (*((F)->funcgrad))(x,N,(g),(F)->params)
50 | 
51 | #define FUNCUNI_EVAL(F,x) (*((F)->funcuni))(x,(F)->params)
52 | 
53 | #define FUNCMULT_EVAL(F,x,M,N,f) (*((F)->funcmult))(x,M,N,(f),(F)->params)
54 | 
55 | #define JACOBIAN_EVAL(F,x,M,N,jac) (*((F)->jacobian))(x,M,N,(jac),(F)->params)
56 | 
57 | int stopcheck_mt(double fx, int N, double *xc, double *xf, double *jac, double *dx, double fsval, double gtol, double stol, int retval);
58 | 
59 | int stopcheck2_mt(double fx, int N, double fo, double *jac, double *dx, double eps,double stoptol, double functol, int retval);
60 | 
61 | int stopcheck3_mt(double *xi,double *xf,double fx, int N, double fo, double *jac, double *dx, double eps,
62 | 		double stoptol, double functol, int retval);
63 | 
64 | int grad_fd(custom_function *funcpt,custom_gradient *funcgrad, double *x, int N, double *dx, double eps2, double *f);
65 | 
66 | int grad_cd(custom_function *funcpt, custom_gradient *funcgrad, double *x, int N, double *dx,
67 | 		double eps3, double *f);
68 | 
69 | int grad_calc2(custom_function *funcpt, double *x, int N, double *dx, double eps3, double *f);
70 | 
71 | int grad_calc(custom_function *funcpt, double *x, int N, double *dx, double eps2, double *f);
72 | 
73 | int cstep(double *stx, double *fx, double *dx, double *sty, double *fy, double *dy, double *stp, double *fp, double *dp, int *brackt,
74 | 	double  stpmin, double stpmax);
75 | 
76 | int cvsrch(custom_function *funcpt, custom_gradient *funcgrad, double *x, double *f, double *g, double *stp, double *s, int N, double *dx, double maxstep,
77 | 	int MAXITER,double eps2,double ftol, double gtol, double xtol);
78 | 
79 | int lnsrchmt(custom_function *funcpt, custom_gradient *funcgrad, double *xi, double *f, double *jac, double *alpha, double *p, int N, double *dx, double maxstep, int MAXITER,
80 | 		double eps2,double ftol, double gtol, double xtol, double *x);
81 | 
82 | #ifdef __cplusplus
83 | }
84 | #endif
85 | 
86 | #endif /* LNSRCHMP_H_ */
87 | 


--------------------------------------------------------------------------------
/header/optimc.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * optimc.h
  3 |  *
  4 |  *  Created on: Mar 16, 2014
  5 |  *      Author: HOME
  6 |  */
  7 | 
  8 | #ifndef OPTIMC_H_
  9 | #define OPTIMC_H_
 10 | 
 11 | #ifdef __cplusplus
 12 | extern "C" {
 13 | #endif
 14 | 
 15 | typedef struct opt_set* opt_object;
 16 | 
 17 | opt_object opt_init(int N);
 18 | 
 19 | struct opt_set{
 20 | 	int N;
 21 | 	double objfunc;
 22 | 	double eps;
 23 | 	double gtol;
 24 | 	double stol;
 25 | 	double ftol;
 26 | 	double xtol;
 27 | 	double maxstep;
 28 | 	int MaxIter;
 29 | 	int Iter;
 30 | 	int Method;
 31 | 	int retval;
 32 | 	char MethodName[50];
 33 | 	double xopt[1];
 34 | };
 35 | 
 36 | typedef struct nls_set* nls_object;
 37 | 
 38 | nls_object nls_init(int M,int N);
 39 | 
 40 | struct nls_set{
 41 | 	int M;
 42 | 	int N;
 43 | 	double eps;
 44 | 	double epsfcn;
 45 | 	double factor;
 46 | 	double gtol;
 47 | 	double ftol;
 48 | 	double xtol;
 49 | 	int MaxIter;
 50 | 	int Maxfev;
 51 | 	int Iter;
 52 | 	int nfev;
 53 | 	int njev;
 54 | 	int ldfjac;
 55 | 	int mode;
 56 | 	int retval;
 57 | 	double xopt[1];
 58 | };
 59 | 
 60 | typedef struct custom_function_set custom_function;
 61 | 
 62 | struct custom_function_set{
 63 | 	double(*funcpt) (double *x,int N,void *params);// Function in N variables
 64 | 	void *params;
 65 | };
 66 | 
 67 | typedef struct custom_gradient_set custom_gradient;
 68 | 
 69 | struct custom_gradient_set{
 70 | 	void(*funcgrad) (double *x, int N,double *g, void *params);
 71 | 	void *params;
 72 | };
 73 | 
 74 | typedef struct custom_funcuni_set custom_funcuni;
 75 | 
 76 | struct custom_funcuni_set{
 77 | 	double(*funcuni) (double x, void *params);// Function in one variable
 78 | 	void *params;
 79 | };
 80 | 
 81 | typedef struct custom_funcmult_set custom_funcmult;
 82 | 
 83 | struct custom_funcmult_set{
 84 | 	void(*funcmult) (double *x,int M, int N, double *f, void *params);// M Functions in N variables
 85 | 	void *params;
 86 | };
 87 | 
 88 | typedef struct custom_jacobian_set custom_jacobian;
 89 | 
 90 | struct custom_jacobian_set{
 91 | 	void(*jacobian) (double *x, int M, int N, double *jac, void *params);
 92 | 	void *params;
 93 | };
 94 | 
 95 | #define FUNCPT_EVAL(F,x,N) (*((F)->funcpt))(x,N,(F)->params)
 96 | 
 97 | #define FUNCGRAD_EVAL(F,x,N,g) (*((F)->funcgrad))(x,N,(g),(F)->params)
 98 | 
 99 | #define FUNCUNI_EVAL(F,x) (*((F)->funcuni))(x,(F)->params)
100 | 
101 | #define FUNCMULT_EVAL(F,x,M,N,f) (*((F)->funcmult))(x,M,N,(f),(F)->params)
102 | 
103 | #define JACOBIAN_EVAL(F,x,M,N,jac) (*((F)->jacobian))(x,M,N,(jac),(F)->params)
104 | 
105 | void setnlsTOL(nls_object obj,double gtol,double ftol,double xtol);
106 | 
107 | void optsummary(opt_object obj);
108 | 
109 | void setMaxIter(opt_object obj,int MaxIter);
110 | 
111 | void setMaxStep(opt_object obj, double maxstep);
112 | 
113 | void setTOL(opt_object obj,double gtol,double stol,double ftol,double xtol);
114 | 
115 | int fminsearch(custom_function *funcpt,int N,double *xi,double *xf);
116 | 
117 | double fminbnd(custom_funcuni *funcuni,double a, double b);
118 | 
119 | int fminunc(custom_function *funcpt,custom_gradient *funcgrad,int N,double *xi,double maxstep, int method,double *xf);
120 | 
121 | int fminnewt(custom_function *funcpt, custom_gradient *funcgrad, int N, double *xi,
122 | 	     double delta,double *dx,double fsval,double maxstep,int method,double *xf);
123 | 
124 | double brent_local_min(custom_funcuni *funcuni,double a, double b, double t, double eps, double *x);
125 | 
126 | void optimize(opt_object obj, custom_function *funcpt, custom_gradient *funcgrad, int N, double *xi,
127 | 		int method);
128 | 
129 | void free_opt(opt_object object);
130 | 
131 | int levmar(custom_funcmult *funcmult, custom_jacobian *jacobian,
132 | 		double *xi,int M, int N,double *xf);
133 | 
134 | void nls(nls_object obj, custom_funcmult *funcmult, custom_jacobian *jacobian,
135 | 		double *xi);
136 | 
137 | void nls_scale(nls_object obj, custom_funcmult *funcmult, custom_jacobian *jacobian,
138 | 		double *diag,double *xi);
139 | 
140 | void nlssummary(nls_object obj);
141 | 
142 | void free_nls(nls_object object);
143 | 
144 | #ifdef __cplusplus
145 | }
146 | #endif
147 | 
148 | #endif /* OPTIMC_H_ */
149 | 


--------------------------------------------------------------------------------
/src/matrix.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * matrix.h
  3 |  *
  4 |  *  Created on: Jul 1, 2013
  5 |  *      Author: USER
  6 |  */
  7 | 
  8 | #ifndef MATRIX_H_
  9 | #define MATRIX_H_
 10 | 
 11 | #include <omp.h>
 12 | #include <stdio.h>
 13 | #include <stdlib.h>
 14 | #include <stddef.h>
 15 | #include <string.h>
 16 | #include <math.h>
 17 | #include <float.h>
 18 | 
 19 | #define CUTOFF 192
 20 | #define TOL 1e-12
 21 | #define BLOCKSIZE 64
 22 | #define SVDMAXITER 50
 23 | 
 24 | 
 25 | #ifdef __cplusplus
 26 | extern "C" {
 27 | #endif
 28 | 
 29 | double macheps();
 30 | 
 31 | double pmax(double a, double b);
 32 | 
 33 | double pmin(double a, double b);
 34 | 
 35 | int imax(int a, int b);
 36 | 
 37 | int imin(int a, int b);
 38 | 
 39 | double signx(double x);
 40 | 
 41 | double l2norm(double *vec, int N);
 42 | 
 43 | int compare (const void* ind1, const void* ind2);
 44 | 
 45 | void sort1d(double* v,int N, int* pos);
 46 | 
 47 | //Array Parallel Implementation may have a lot of overhead
 48 | 
 49 | double array_max_abs(double *array,int N);
 50 | 
 51 | double array_max(double *array,int N);
 52 | 
 53 | double array_min(double *array,int N);
 54 | 
 55 | //void mmult(double* A, double *B, double *C,int ra,int ca, int rb, int cb);
 56 | 
 57 | void dtranspose(double *sig, int rows, int cols,double *col);
 58 | 
 59 | void stranspose(double *sig, int rows, int cols,double *col);
 60 | 
 61 | void rtranspose(double *m, int rows, int cols,double *n, int r, int c);
 62 | 
 63 | void ctranspose(double *sig, int rows, int cols,double *col);
 64 | 
 65 | void mtranspose(double *sig, int rows, int cols,double *col);
 66 | 
 67 | //int minverse(double *xxt, int p);
 68 | 
 69 | void mdisplay(double *A, int row, int col);
 70 | 
 71 | void madd(double* A, double* B, double* C,int rows,int cols);
 72 | 
 73 | void msub(double* A, double* B, double* C,int rows,int cols);
 74 | 
 75 | void scale(double *A, int rows, int cols, double alpha);
 76 | 
 77 | void nmult(double* A, double* B, double* C,int m,int n, int p);
 78 | 
 79 | void tmult(double* A, double* B, double* C,int m,int n, int p);
 80 | 
 81 | void recmult(double* A, double* B, double* C,int m,int n, int p,int sA,int sB, int sC);
 82 | 
 83 | void rmult(double* A, double* B, double* C,int m,int n, int p);
 84 | 
 85 | int findrec(int *a, int *b, int *c);
 86 | 
 87 | double house_2(double*x,int N,double *v);
 88 | 
 89 | void add_zero_pad(double *X, int rows, int cols, int zrow, int zcol,double *Y);
 90 | 
 91 | void remove_zero_pad(double *X, int rows, int cols, int zrow, int zcol,double *Y);
 92 | 
 93 | void madd_stride(double* A, double* B, double* C,int rows,int cols,int sA,int sB,int sC);
 94 | 
 95 | void msub_stride(double* A, double* B, double* C,int rows,int cols,int sA,int sB,int sC);
 96 | 
 97 | void rmadd_stride(double* A, double* B, double* C,int rows,int cols,int p,int sA,int sB,int sC);
 98 | 
 99 | void rmsub_stride(double* A, double* B, double* C,int rows,int cols,int p,int sA,int sB,int sC);
100 | 
101 | void srecmult(double* A, double* B, double* C,int m,int n, int p,int sA,int sB,int sC);
102 | 
103 | void smult(double* A, double* B, double* C,int m,int n, int p);
104 | 
105 | void mmult(double* A, double* B, double* C,int m,int n, int p);
106 | 
107 | void ludecomp(double *A,int N,int *ipiv);
108 | 
109 | void linsolve(double *A,int N,double *b,int *ipiv,double *x);
110 | 
111 | void minverse(double *A,int M,int *ipiv,double *inv);
112 | 
113 | void eye(double *mat,int N);
114 | 
115 | double house(double*x,int N,double *v);
116 | 
117 | void housemat(double *v, int N,double beta,double *mat);
118 | 
119 | void qrdecomp(double *A, int M, int N,double *bvec);
120 | 
121 | void getQR(double *A,int M,int N,double *bvec,double *Q, double *R);
122 | 
123 | void hessenberg(double *A,int N);
124 | 
125 | void francisQR(double *A,int N);
126 | 
127 | void eig22(double *A, int stride,double *eigre,double *eigim);
128 | 
129 | int francis_iter(double *A, int N, double *H);
130 | 
131 | void eig(double *A,int N,double *eigre,double *eigim);
132 | 
133 | int cholu(double *A, int N);
134 | 
135 | int bcholu(double *A, int N);
136 | 
137 | int chol(double *A, int N);
138 | 
139 | void chold(double *A, int N);
140 | 
141 | void svd_sort(double *U,int M,int N,double *V,double *q);
142 | 
143 | int svd(double *A,int M,int N,double *U,double *V,double *q);
144 | 
145 | int rank(double *A, int M,int N);
146 | 
147 | #ifdef __cplusplus
148 | }
149 | #endif
150 | 
151 | #endif /* MATRIX_H_ */
152 | 


--------------------------------------------------------------------------------
/src/brent.c:
--------------------------------------------------------------------------------
  1 | #include "brent.h"
  2 | 
  3 | /*
  4 |  * brent.c
  5 |  *
  6 |  *  Copyright (c) 2014, Rafat Hussain
  7 |  *	License : BSD 3-Clause
  8 |  *	See COPYRIGHT for more details
  9 |  */
 10 | 
 11 | /*
 12 |  * Algorithms are C translation of Richard Brent's Algol 60 procedure as published in
 13 |  * Algoritms For Minimization Without Derivatives, Richard P. Brent
 14 |  * Chapters 3-5
 15 |  */ 
 16 | double brent_zero(custom_funcuni *funcuni, double ai, double bi, double tol, double eps) {
 17 | 	double bz;
 18 | 	double a,b,c,d,e,fa,fb,fc;
 19 | 	double m,s,etol,fd,p,q,r;
 20 | 	int iter;
 21 | 	
 22 | 	fd = eps;
 23 | 	
 24 | 	a = ai;
 25 | 	b = bi;
 26 | 	c = a;
 27 | 	fa = FUNCUNI_EVAL(funcuni,a);
 28 | 	fb = FUNCUNI_EVAL(funcuni,b);
 29 | 	fc = fa;
 30 | 	e = b - a;
 31 | 	d = e;
 32 | 	
 33 | 	if (fabs(fc) < fabs(fb)) {
 34 | 		a = b;
 35 | 		b = c;
 36 | 		c = a;
 37 | 		fa = fb;
 38 | 		fb = fc;
 39 | 		fc = fa;
 40 | 	}
 41 | 		
 42 | 	etol = 2 * fd * fabs(b) + tol;
 43 | 	m = 0.5 * (c - b);
 44 | 	
 45 | 	bz = b;
 46 | 	iter = 0;
 47 | 	while (fabs ( m ) >= etol && fb != 0.0) {
 48 | 
 49 | 		iter++;
 50 | 		
 51 | 		if (fabs(e) < etol || fabs(fa) <= fabs(fb)) {
 52 | 			e = m;
 53 | 			d = e;
 54 | 		} else {
 55 | 			
 56 | 			s = fb/fa;
 57 | 			if (a == c) {
 58 | 				// linear interpolation
 59 | 				p = 2.0 * m * s;
 60 | 				q = 1.0 - s;
 61 | 			} else {
 62 | 				// inverse quadratic interpolation
 63 | 				q = fa/fc;
 64 | 				r = fb/fc;
 65 | 				p = s * (2 * m * q * (q - r) - (b - a) * (r - 1.0));
 66 | 				q = (q - 1.0) * (r - 1.0) * (s - 1.0);
 67 | 			}
 68 | 			
 69 | 			if (p > 0.0) {
 70 | 				q = - q;
 71 | 			} else {
 72 | 				p = - p;
 73 | 			}
 74 | 
 75 | 			s = e;
 76 | 			e = d;
 77 | 			
 78 | 			if ( 2.0 * p < 3.0 * m * q - fabs( etol * q ) && p < fabs( 0.5 * s * q ) ) {
 79 | 				d = p /q;
 80 | 			} else {
 81 | 				e = m;
 82 | 				d = e;
 83 | 			}
 84 | 			
 85 | 		}
 86 | 		a = b;
 87 | 		fa = fb;
 88 | 		
 89 | 		if ( fabs(d) > etol ) {
 90 | 			b += d;
 91 | 		} else if ( 0.0 < m ) {
 92 | 			b += etol;
 93 | 		} else {
 94 | 			b -= etol;
 95 | 		}
 96 | 		
 97 | 		fb = FUNCUNI_EVAL(funcuni,b);
 98 | 		
 99 | 		bz = b;
100 | 		if ( ( 0.0 < fb && 0.0 < fc ) || ( fb <= 0.0 && fc <= 0.0 ) ) {
101 | 			c = a;
102 | 			fc = fa;
103 | 			e = b - a;
104 | 			d = e;
105 | 		}
106 |       
107 | 		// To the end of the loop
108 | 		
109 | 		if (fabs(fc) < fabs(fb)) {
110 | 			a = b;
111 | 			b = c;
112 | 			c = a;
113 | 			fa = fb;
114 | 			fb = fc;
115 | 			fc = fa;
116 | 		}
117 | 		
118 | 		etol = 2 * fd * fabs(b) + tol;
119 | 		m = 0.5 * (c - b);
120 | 		
121 | 	}
122 | 	
123 | 	return bz;
124 | }
125 | 
126 | double brent_local_min(custom_funcuni *funcuni, double a, double b, double t, double eps, double *x) {
127 | 	double c,d,e,m,p,q,r,tol,t2;
128 | 	double u,v,w,fu,fv,fw,fx;
129 | 	double fd;
130 | 	
131 | 	fd = eps;
132 | 	
133 | 	c = (3.0 - sqrt(5.0)) / 2.0;
134 | 	*x = a + c * (b - a);
135 | 	w = *x; v = w;
136 | 	e = 0;
137 | 	fx = FUNCUNI_EVAL(funcuni,*x);
138 | 	fw = fx; fv = fw;
139 | 	
140 | 	m = 0.5 * (a + b);
141 | 	tol = fd * fabs(*x) + t;
142 | 	t2 = 2.0 * tol;
143 | 	
144 | 	while (fabs(*x - m) > t2 - 0.5 * (b - a)) {
145 | 		p = 0; q = 0; r = 0;
146 | 		
147 | 		if (fabs(e) > tol) {
148 | 			r = (*x - w) * (fx - fv);
149 | 			q = (*x - v) * (fx - fw);
150 | 			p = (*x - v) * q - (*x - w) * r;
151 | 			q = 2.0 * (q - r);
152 | 			if (q > 0.0) {
153 | 				p = -p;
154 | 			} else {
155 | 				q = -q;
156 | 			}
157 | 			r = e;
158 | 			e = d;
159 | 		}
160 | 		
161 | 		if (fabs(p) < fabs(0.5 * q * r) && p < q * (a - *x) && p < q * (b - *x)) {
162 | 			d = p / q;
163 | 			u = *x + d;
164 | 			
165 | 			if ( (u - a) < t2 || (b - u) < t2) {
166 | 				if (*x < m) {
167 | 					d = tol;
168 | 				} else {
169 | 					d = -tol;
170 | 				}
171 | 			}
172 | 		} else {
173 | 			if (*x < m) {
174 | 				e = b - *x;
175 | 			} else {
176 | 				e = a - *x;
177 | 			}
178 | 			d = c * e;
179 | 		}
180 | 		
181 | 		if (fabs(d) >= tol) {
182 | 			u = *x + d;
183 | 		} else if (d > 0) {
184 | 			u = *x + tol;
185 | 		} else {
186 | 			u = *x - tol;
187 | 		}
188 | 		
189 | 		fu = FUNCUNI_EVAL(funcuni,u);
190 | 		
191 | 		if (fu <= fx) {
192 | 			if (u < *x) {
193 | 				b = *x;
194 | 			} else {
195 | 				a = *x;
196 | 			}
197 | 			v = w;
198 | 			fv = fw;
199 | 			w = *x;
200 | 			fw = fx;
201 | 			*x = u;
202 | 			fx = fu;
203 | 		} else {
204 | 			if (u < *x) {
205 | 				a = u;
206 | 			} else {
207 | 				b = u;
208 | 			}
209 | 			if (fu <= fw || w == *x) {
210 | 				v = w;
211 | 				fv = fw;
212 | 				w = u;
213 | 				fw = fu;
214 | 			} else if (fu <= fv || v == *x || v == w) {
215 | 				v = u;
216 | 				fv = fu;
217 | 			}
218 | 		}
219 | 		
220 | 		//End of the loop
221 | 		m = 0.5 * (a + b);
222 | 		tol = fd * fabs(*x) + t;
223 | 		t2 = 2.0 * tol;
224 | 		
225 | 	}
226 | 	
227 | 	return fx;
228 | }
229 | 


--------------------------------------------------------------------------------
/test/testfunctions.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * testfunctions.c
  3 |  *
  4 |  *  Created on: Mar 16, 2014
  5 |  *      Author: HOME
  6 |  */
  7 | 
  8 | #include <stdio.h>
  9 | #include <stdlib.h>
 10 | #include <math.h>
 11 | #include "testfunctions.h"
 12 | 
 13 | //levmar test
 14 | 
 15 | void fk10(double *x, int M, int N, double *f, void *params) {
 16 | 	int i;
 17 | 
 18 | 	for (i = 1; i <= M; ++i) {
 19 | 		f[i - 1] = 2 + 2*i - exp(i*x[0]) - exp(i*x[1]);
 20 | 	}
 21 | }
 22 | 
 23 | void fk10jac(double *x, int M, int N, double *f, void *params) {
 24 | 	int i;
 25 | 
 26 | 	for(i = 1; i <= M;++i) {
 27 | 		f[(i-1)*N] = -i * exp(i*x[0]);
 28 | 		f[(i-1)*N+1] = -i * exp(i*x[1]);
 29 | 	}
 30 | 
 31 | }
 32 | 
 33 | void fpowell(double *x, int M, int N, double *f, void *params) {
 34 | 	double pi;
 35 | 
 36 | 	pi = 3.14159265359;
 37 | 
 38 | 	if (x[0] > 0.0) {
 39 | 		f[0] = 10 * (x[2] - 10 * (atan(x[1]/x[0])/(2*pi)));
 40 | 	} else if (x[0] < 0.0) {
 41 | 		f[0] = 10 * (x[2] - 10 * (atan(x[1]/x[0])/(2*pi)) - 5);
 42 | 	}
 43 | 
 44 | 	f[1] = 10 * (sqrt(x[0]*x[0] + x[1]*x[1]) - 1);
 45 | 	f[2] = x[2];
 46 | }
 47 | 
 48 | double myvalue
 49 | (
 50 |     double   *x,
 51 | 	int       n, 
 52 | 	void *params
 53 | )
 54 | {
 55 |     double f ;
 56 |     int i ;
 57 |     f = 0. ;
 58 |     for (i = 0; i < n; i++)
 59 |     {
 60 |         //t = i+1 ;
 61 |         //t = sqrt (t) ;
 62 |         //f += exp (x [i]) - t*x [i] ;
 63 |     	f += x[i] * x[i] * x[i] * x[i];
 64 |     }
 65 |     return (f) ;
 66 | }
 67 | 
 68 | void myvaluegrad(
 69 | 		double *x,
 70 | 		int n,
 71 | 		double *jac, 
 72 | 		void *params
 73 | )
 74 | {
 75 | 	int i;
 76 | 	for(i = 0; i < n; ++i) {
 77 | 		jac[i] = 4 * x[i] * x[i] * x[i];
 78 | 	}
 79 | }
 80 | 
 81 | double quartic(double *x,int N,void *params) {
 82 | 	double f;
 83 | 	// Powell's Quartic Function
 84 | 	f = (x[0] + 10 * x[1] ) * (x[0] + 10 * x[1] ) + 5 * (x[2] - x[3]) * (x[2] - x[3])
 85 | 	+ (x[1] - 2 * x[2]) * (x[1] - 2 * x[2]) * (x[1] - 2 * x[2]) * (x[1] - 2 * x[2])
 86 | 	+ 10 * (x[0]-x[3]) * (x[0]-x[3]) * (x[0]-x[3]) * (x[0]-x[3]) ;
 87 | 	return f;
 88 | }
 89 | 
 90 | void quarticgrad(double *x, int N, double *g, void *params) {
 91 | 	double t1,t2,t3,t4;
 92 | 	// Gradient of Powell's Quartic Function
 93 | 	t1 = (x[0] + 10 * x[1]);
 94 | 	t2 = (x[2] - x[3]);
 95 | 	t3 = (x[1] - 2 * x[2]);
 96 | 	t4 = (x[0] - x[3]);
 97 | 
 98 | 	g[0] = 2*t1 + 40*t4*t4*t4;
 99 | 	g[1] = 20*t1 + 4*t3*t3*t3;
100 | 	g[2] = 10*t2 - 8*t3*t3*t3;
101 | 	g[3] = -10*t2 - 40*t4*t4*t4;
102 | 
103 | }
104 | 
105 | double rosenbrock(double *x,int N,void *params) {
106 | 	double f,alpha,alpha2;
107 | 	alpha = 1;
108 | 	alpha2 = alpha*alpha;
109 | 	f = 100 * (x[0]*x[0]*alpha2 - x[1]/alpha)* (x[0]*x[0]*alpha2 - x[1]/alpha) + (1 - x[0]*alpha) * (1 - x[0]*alpha);
110 | 
111 | 	return f;
112 | }
113 | 
114 | void rosenbrockgrad(double *x, int N, double *g, void *params) {
115 | 	double t,alpha,alpha2;
116 | 	alpha = 1;
117 | 	alpha2 = alpha * alpha;
118 | 
119 | 	t = (x[0]*x[0]*alpha2 - x[1]/alpha);
120 | 
121 | 	g[0] = 400*t*alpha2*x[0] - 2 * alpha * (1 - x[0]*alpha);
122 | 	g[1] = -200*t/alpha;
123 | }
124 | 
125 | double brown(double *x, int N, void *params) {
126 | 	double f;
127 | 	f = (x[0] - 1e06)*(x[0] - 1e06) + (x[1] - 2*1e-06)*(x[1] - 2*1e-06) + (x[0]*x[1] - 2)*(x[0]*x[1] - 2);
128 | 
129 | 	return f;
130 | }
131 | 
132 | void browngrad(double *x, int N, double *g, void *params) {
133 | 	double t1,t2,t3;
134 | 
135 | 	t1 = x[0] - 1e06;
136 | 	t2 = x[1] - 2*1e-06;
137 | 	t3 = x[0]*x[1] - 2;
138 | 	//printf("gradient %g %g %g \n",t1,t2,t3);
139 | 
140 | 	g[0] = 2 * t1 + 2 * t3 * x[1];
141 | 	g[1] = 2 * t2 + 2 * t3 * x[0];
142 | 	//printf("gradient %g %g \n",2 * t2,2 * t3 * x[0]);
143 | }
144 | 
145 | double powell(double *x, int N, void *params) {
146 | 	double f;
147 | 	f = (10000*x[0]*x[1] - 1) * (10000*x[0]*x[1] - 1) + (exp(-x[0]) + exp(-x[1]) - 1.0001) * (exp(-x[0]) + exp(-x[1]) - 1.0001);
148 | 	return f;
149 | }
150 | 
151 | void powellgrad(double *x, int N, double *g, void *params) {
152 | 	double t1,t2;
153 | 
154 | 	t1 = (10000*x[0]*x[1] - 1);
155 | 	t2 = (exp(-x[0]) + exp(-x[1]) - 1.0001);
156 | 
157 | 	g[0] = 2 * t1* 10000 * x[1] - 2 * exp(-x[0]) * t2;
158 | 	g[1] = 2 * t1* 10000 * x[0] - 2 * exp(-x[1]) * t2;
159 | }
160 | 
161 | double beale(double *x, int N, void *params) {
162 | 	double f;
163 | 	f = (1.5 - x[0] *(1 - x[1])) * (1.5 - x[0] *(1 - x[1])) + (2.25 - x[0] *(1 - x[1] * x[1])) * (2.25 - x[0] *(1 - x[1] * x[1])) +
164 | 			 	 (2.625 - x[0] *(1 - x[1] * x[1] * x[1])) * (2.625 - x[0] *(1 - x[1] * x[1] * x[1]));
165 | 	return f;
166 | }
167 | 
168 | double froth(double *x, int N, void *params) {
169 | 	double f;
170 | 	f = (-13 + x[0] + ((5 - x[1]) * x[1] - 2.0) *x[1]) * (-13 + x[0] + ((5 - x[1]) * x[1] - 2.0) *x[1]) +
171 | 			(-29 + x[0] + ((x[1] + 1) * x[1] - 14.0) *x[1]) * (-29 + x[0] + ((x[1] + 1) * x[1] - 14.0) *x[1]);
172 | 	return f;
173 | }
174 | 
175 | double humps(double x,void *params) {
176 | 	double f;
177 | 
178 | 	f = 1./((x - 0.3)*(x - 0.3) + 0.01) + 1./((x - 0.9)*(x - 0.9) + 0.04) - 6;
179 | 
180 | 	return f;
181 | }
182 | 
183 | double func4(double *x, int N, void *params) {
184 | 	double f;
185 | 	f = pow((x[0]-2.0),4.0) + pow((x[0]-2.0),2.0) * x[1]*x[1] + (x[1] + 1.0) * (x[1] + 1.0);
186 | 
187 | 
188 | 	return f;
189 | }
190 | 
191 | double func1(double *x, int N, void *params)
192 | {
193 | 	double f;
194 | 
195 | 	//f = pow((x[0]-2.0),4.0) + pow((x[0]-2.0),2.0) * x[1]*x[1] + (x[1] + 1.0) * (x[1] + 1.0);
196 | 	//f = 100 * (x[0]*x[0]*alpha2 - x[1]/alpha)* (x[0]*x[0]*alpha2 - x[1]/alpha) + (1 - x[0]*alpha) * (1 - x[0]*alpha);
197 | 	//f = (x[0] + 2 * x[1] - 7) * (x[0] + 2 * x[1] - 7) + (2*x[0] + x[1] - 5) * (2*x[0] + x[1] - 5);
198 | /*
199 | 	f = (x[0] + 10 * x[1] ) * (x[0] + 10 * x[1] ) + 5 * (x[2] - x[3]) * (x[2] - x[3])
200 | 	+ (x[1] - 2 * x[2]) * (x[1] - 2 * x[2]) * (x[1] - 2 * x[2]) * (x[1] - 2 * x[2])
201 | 	+ 10 * (x[0]-x[3]) * (x[0]-x[3]) * (x[0]-x[3]) * (x[0]-x[3]) ;
202 | */
203 | 
204 | 	//f = (1.0 /(1.0 + (x[0] - x[1]) * (x[0] - x[1]))) + sin(pi *x[1] * x[2] / 2.0)
205 | 	//+ exp(-(((x[0]+x[2])/x[1]) - 2) * (((x[0] +x[2])/x[1]) - 2));
206 | 	//f = -1.0 * f;
207 | 
208 | 
209 | 	//f = (10000*x[0]*x[1] - 1) * (10000*x[0]*x[1] - 1) + (exp(-x[0]) + exp(-x[1]) - 1.0001) * (exp(-x[0]) + exp(-x[1]) - 1.0001);
210 | 	//f = log(f);
211 | 	//printf("fval %g \n",f);
212 | 
213 | 	//f = (10000*x[0]*x[1] - 1) + (exp(-x[0]) + exp(-x[1]) - 1.0001) ;
214 | 
215 | 	//f = (x[0] - 1e06)*(x[0] - 1e06) + (x[1] - 2*1e-06)*(x[1] - 2*1e-06) + (x[0]*x[1] - 2)*(x[0]*x[1] - 2);
216 | 	f = 100 * (x[1]-x[0]*x[0]) * (x[1]-x[0]*x[0]) + ( 1.0 - x[0] ) * ( 1.0 - x[0] )  + 90 *(x[3]-x[2]*x[2])*(x[3]-x[2]*x[2])
217 | 	+ ( 1.0 - x[2])*( 1.0 - x[2]) + 10 * (x[1] + x[3] - 2)*(x[1] + x[3] - 2) + 0.1 * (x[1] - x[3]) *(x[1] - x[3]);
218 | 	return f;
219 | }
220 | 
221 | double tf6(double *x,int N,void *params) {
222 | 	double f;
223 | 	double *p = (double*) params;
224 | 
225 | 	f = p[0] * (x[1]-x[0]*x[0]) * (x[1]-x[0]*x[0]) + ( 1.0 - x[0] ) * ( 1.0 - x[0] )  + p[1] *(x[3]-x[2]*x[2])*(x[3]-x[2]*x[2])
226 | 	+ ( 1.0 - x[2])*( 1.0 - x[2]) + 10 * (x[1] + x[3] - 2)*(x[1] + x[3] - 2) + p[2] * (x[1] - x[3]) *(x[1] - x[3]);
227 | 	return f;
228 | }
229 | 


--------------------------------------------------------------------------------
/src/conjgrad.c:
--------------------------------------------------------------------------------
  1 | #include "conjgrad.h"
  2 | 
  3 | static int richolu(double *A,int N, int stride, double *U22) {
  4 | 	int sc;
  5 | 	int j,i,u,w;
  6 | 	double u11;
  7 | 	
  8 | 	if (N == 1) {
  9 | 		if (A[0] > 0) {
 10 | 			A[0] = sqrt(A[0]);
 11 | 			return 0;
 12 | 		} else {
 13 | 			return -1;
 14 | 		}
 15 | 	} else {
 16 | 		if (A[0] < 0) {
 17 | 			return -1;
 18 | 		}
 19 | 		u11 = sqrt(A[0]);
 20 | 		A[0] = u11;
 21 | 		for (j = 1; j < N;++j) {
 22 | 			if (A[j] != 0) {
 23 | 				A[j] /= u11;
 24 | 			}
 25 | 		}
 26 | 		mmult(A+1,A+1,U22,N-1,1,N-1);
 27 | 		for (i = 0; i < N-1; ++i) {
 28 | 			u = stride + 1 + i * stride;
 29 | 			w = i * (N-1);
 30 | 			for(j = i; j < N-1;j++) {
 31 | 				if (A[j + u] != 0) {
 32 | 					A[j + u] -= U22[j + w];
 33 | 				}
 34 | 			}
 35 | 		}
 36 | 		
 37 | 		sc = richolu(A+stride+1,N-1,stride,U22);
 38 | 		if (sc == -1) {
 39 | 			return -1;
 40 | 		}
 41 | 		
 42 | 	}
 43 | 	
 44 | 	return sc;
 45 | 	
 46 | }
 47 | 
 48 | 
 49 | static int icholu(double *A, int N) {
 50 | 	int stride,i,j,t,sc;
 51 | 	double *U22;
 52 | 	U22 = (double*) malloc(sizeof(double) * N * N);
 53 | 	stride = N; 
 54 | 	
 55 | 	sc = richolu(A,N,stride,U22);
 56 | 	
 57 | 	for(i=0; i < N;++i) {
 58 | 		t = i *N;
 59 | 		for(j=0;j < i;++j) {
 60 | 			A[t+j] = 0.;
 61 | 		}
 62 | 	}
 63 | 
 64 | 	free(U22);
 65 | 	return sc;
 66 | 	
 67 | }
 68 | 
 69 | int ichol(double *A, int N) {
 70 | 	int sc;
 71 | 	sc = icholu(A,N);
 72 | 	return sc;
 73 | }
 74 | 
 75 | int stopcheck2(double fx,int N,double *xc,double *xf,double *jac,double *dx,double fsval,double gtol,double stol) {
 76 | 	int rcode,i;
 77 | 	double num,den;
 78 | 	double stop0;
 79 | 	double *scheck;
 80 | 	
 81 | 	rcode = 0;	
 82 | 	
 83 | 	scheck = (double*) malloc(sizeof(double) *N);
 84 | 	
 85 | 	if (fabs(fx) > fabs(fsval)) {
 86 | 			den = fabs(fx);
 87 | 	} else {
 88 | 			den = fabs(fsval);
 89 | 	}
 90 | 	for(i = 0; i < N;++i) {
 91 | 		if (fabs(xf[i]) > 1.0 / fabs(dx[i])) {
 92 | 			num = fabs(xf[i]);
 93 | 		} else {
 94 | 			num = 1.0 / fabs(dx[i]);
 95 | 		}
 96 | 		scheck[i] = fabs(jac[i]) * num / den;
 97 | 	}
 98 | 	
 99 | 	stop0 = array_max_abs(scheck,N);
100 | 	
101 | 	if (stop0 <= gtol) {
102 | 		rcode = 1;
103 | 	} else {
104 | 		for(i = 0; i < N;++i) {
105 | 			if (fabs(xf[i]) > 1.0 / fabs(dx[i])) {
106 | 				den = fabs(xf[i]);
107 | 			} else {
108 | 				den = 1.0 / fabs(dx[i]);
109 | 			}
110 | 			num = fabs(xf[i] - xc[i]);
111 | 			scheck[i] = num / den;
112 | 		}
113 | 		stop0 = array_max_abs(scheck,N);
114 | 		if (stop0 <= stol) {
115 | 			rcode = 2;
116 | 		}
117 | 	}
118 | 	
119 | 	free(scheck);
120 | 	return rcode;
121 | }
122 | 
123 | 
124 | int cgpr_mt(custom_function *funcpt, custom_gradient *funcgrad, double *xc, int N, double *dx,double maxstep, int MAXITER, int *niter,
125 | 		double eps,double gtol,double ftol,double xtol,double *xf) {
126 | 	int i, rcode, retval, k, restart,gfdcode;
127 | 	int siter;
128 | 	double *xi,*temp, *rk, *pk, *jac, *jacf, *apk;
129 | 	double fsval,fxf,eps2,fo;
130 | 	double dt1, dt2,alpha;
131 | 
132 | 	temp = (double*)malloc(sizeof(double)* 8);
133 | 	rk = (double*)malloc(sizeof(double)*N);
134 | 	pk = (double*)malloc(sizeof(double)*N);
135 | 	apk = (double*)malloc(sizeof(double)*N);
136 | 	jac = (double*)malloc(sizeof(double)*N);
137 | 	jacf = (double*)malloc(sizeof(double)*N);
138 | 	xi = (double*)malloc(sizeof(double)*N);
139 | 
140 | 	*niter = 0;
141 | 	k = 0;
142 | 	rcode = 0;
143 | 	restart = N;
144 | 	siter = MAXITER;
145 | 	eps2 = sqrt(eps);
146 | 	gfdcode = 0;
147 | 
148 | 	//xtol = 1.0e-15;
149 | 	//ftol = 1e-10;
150 | 	//gtol = 1e-05;
151 | 
152 | 	// Values that may not be needed
153 | 
154 | 	fsval = 1.0;
155 | 	alpha = 1.0;
156 | 
157 | 	for (i = 0; i < N; ++i) {
158 | 		xi[i] = xc[i];
159 | 	}
160 | 	
161 | 	if (maxstep <= 0.0) {
162 | 		maxstep = 1000.0;
163 | 		dt1 = dt2 = 0.0;
164 | 		for (i = 0; i < N; ++i) {
165 | 			dt1 += dx[i] * dx[i];
166 | 			dt2 += dx[i] * xi[i] * dx[i] * xi[i];
167 | 		}
168 | 
169 | 		dt1 = sqrt(dt1);
170 | 		dt2 = sqrt(dt2);
171 | 		if (dt1 > dt2) {
172 | 			maxstep *= dt1;
173 | 		}
174 | 		else {
175 | 			maxstep *= dt2;
176 | 		}
177 | 	}
178 | 
179 | 
180 | 
181 | 	gfdcode = grad_fd(funcpt,funcgrad, xi, N, dx,eps2, jac);
182 | 	if (gfdcode == 15) {
183 | 		rcode = 15;
184 | 	}
185 | 	for (i = 0; i < N; ++i) {
186 | 		pk[i] = -jac[i];
187 | 		xf[i] = xi[i];
188 | 	}
189 | 
190 | 	fxf = FUNCPT_EVAL(funcpt,xi, N);
191 | 
192 | 	if (fxf >= DBL_MAX || fxf <= -DBL_MAX) {
193 | 		printf("Program Exiting as the function value exceeds the maximum double value");
194 | 		rcode = 15;
195 | 	}
196 | 	if (fxf != fxf) {
197 | 		printf("Program Exiting as the function returns NaN");
198 | 		rcode = 15;
199 | 	}
200 | 
201 | 	if (restart < N) {
202 | 		restart = N;
203 | 	}
204 | 
205 | 	fo = fxf;
206 | 
207 | 
208 | 	while (rcode == 0 && *niter < siter) {
209 | 		*niter = *niter + 1;
210 | 		k++;
211 | 
212 | 		mmult(jac, jac, temp, 1, N, 1);
213 | 		for (i = 0; i < N; ++i) {
214 | 			jacf[i] = jac[i];
215 | 		}
216 | 
217 | 		retval = lnsrchmt(funcpt,funcgrad, xi,&fxf,jac,&alpha, pk, N, dx, maxstep,MAXITER,eps2, ftol, gtol, xtol, xf);
218 | 		if (retval == 100) {
219 | 			printf("The Linesearch Algorithm didn't converge");
220 | 			break;
221 | 		}
222 | 		//mdisplay(xf,1,N);
223 | 		
224 | 		//mdisplay(xf, 1, N);
225 | 		//grad_fd(funcpt, xf, N, dx, jacf);
226 | 		mmult(jac, jac, temp + 1, 1, N, 1);
227 | 		mmult(jacf, jac, temp + 3, 1, N, 1);
228 | 		temp[2] = (temp[1] - temp[3]) / temp[0]; // beta
229 | 
230 | 		if (temp[2] < 0) {
231 | 			temp[2] = 0;
232 | 		}
233 | 		if (k == restart) {
234 | 			for (i = 0; i < N; ++i) {
235 | 				pk[i] = -jac[i];
236 | 			}
237 | 			k = 0;
238 | 		}
239 | 		else {
240 | 			for (i = 0; i < N; ++i) {
241 | 				pk[i] = temp[2] * pk[i] - jac[i];
242 | 			}
243 | 		}
244 | 		
245 | 
246 | 		rcode = stopcheck2_mt(fxf,N,fo,jac,dx,eps,gtol,ftol,retval);
247 | 		fo = fxf;
248 | 		for (i = 0; i < N; ++i) {
249 | 			xi[i] = xf[i];
250 | 		}
251 | 	}
252 | 
253 | 	if (rcode == 0 && *niter >= siter) {
254 | 		rcode = 4;
255 | 	}
256 | 
257 | 	free(temp);
258 | 	free(rk);
259 | 	free(pk);
260 | 	free(jac);
261 | 	free(apk);
262 | 	free(jacf);
263 | 	free(xi);
264 | 
265 | 	return rcode;
266 | }
267 | 
268 | int conjgrad_min_lin(custom_function *funcpt, custom_gradient *funcgrad, double *xi, int N, double *dx, double maxstep, int MAXITER, int *niter,
269 | 		double eps,double gtol,double ftol,double xtol,double *xf) {
270 | 	int rcode,i;
271 | 	
272 | 	/*
273 | 	 * Return Codes
274 | 	 * 
275 | 	 * Codes 1,2,3 denote possible success.
276 | 	 * Codes 0 and 4 denote failure.
277 | 	 * 
278 | 	 * 1 - df(x)/dx <= gtol achieved so xf may be the local minima.
279 | 	 * 2 - Distance between the last two steps is less than stol or |xf - xi| <= stol so xf may be the local minima.
280 | 	 * 3 - Global Step failed to locate a lower point than xi so xi may be the local minima.
281 | 	 * 4 - Iteration Limit exceeded. Convergence not achieved.
282 | 	 * 5.- Only First Strong Wolfe Condition Achieved (Using Conjugate Gradient Method)
283 | 	 * 
284 | 	 */ 
285 | 
286 | 	for(i = 0; i < N;++i) {
287 | 		xi[i] *= dx[i];
288 | 		dx[i] = 1.0 / dx[i];
289 | 	}
290 | 	
291 | 	//rcode = cgpc(xi,N,A,b,xf);
292 | 	//rcode = cgpr(funcpt,xi,N,dx,xf);// FR
293 | 	rcode = cgpr_mt(funcpt,funcgrad,xi,N,dx,maxstep,MAXITER,niter,eps,gtol,ftol,xtol,xf);//PR+
294 | 
295 | 	for(i = 0; i < N;++i) {
296 | 		xi[i] *= dx[i];
297 | 		dx[i] = 1.0 / dx[i];
298 | 	}
299 | 	
300 | 	
301 | 	return rcode;
302 | }
303 | 


--------------------------------------------------------------------------------
/src/neldermead.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "neldermead.h"
  3 | 
  4 | /*
  5 |  * neldermead.c
  6 |  *
  7 |  *  Copyright (c) 2014, Rafat Hussain
  8 |  *	License : BSD 3-Clause
  9 |  *	See COPYRIGHT for more details
 10 |  */
 11 |  
 12 | /*
 13 |  * C Routine nel_min is based on O'Neill's FORTRAN implementation of Nelder-Mead algorithm
 14 |  * Algorithm AS 47: Function Minimization Using a Simplex Procedure
 15 | 	Author(s): R. O'Neill
 16 | 	Source: Journal of the Royal Statistical Society. Series C (Applied Statistics), Vol. 20, No. 3
 17 | 	(1971), pp. 338-345
 18 | 	Published by: Blackwell
 19 | 
 20 | 	Main Reference
 21 | 
 22 | 	J. A. Nelder and R. Mead, A simplex method for function minimization. Compuer Journal,7,308-313
 23 | 
 24 |  */
 25 | 
 26 | int nel_min(custom_function *funcpt,double *xc,int N,double *dx,double fsval,int MAXITER,int *niter,
 27 | 		double eps,double *xf) {
 28 | 	int rcode,NN,i,j,ct,ctl;
 29 | 	int ihi,ilo,L,siter;
 30 | 	double reqmin,rcoeff,ecoeff,ccoeff,fn,del;
 31 | 	double ylo,ynl,temp,ys,yss;
 32 | 	double *P,*Y,*PB,*PS,*PSS,*PM,*PL,*xi,*xmin;
 33 | 	double ysum,yavg,s,dval;
 34 | 
 35 | 	rcode = 0;
 36 | 	reqmin = eps;
 37 | 	rcoeff = 1.0;
 38 | 	ecoeff = 2.0;
 39 | 	ccoeff = 0.5;
 40 | 	del = 1.0;
 41 | 	NN = N + 1;
 42 | 	ct = N * N;
 43 | 	*niter = 0;
 44 | 	siter = MAXITER;
 45 | 	s = 1.0;
 46 | 	ctl = 0;
 47 | 	dval = 1.0e-03;
 48 | 
 49 | 	P = (double*) malloc(sizeof(double) * N * NN);
 50 | 	Y = (double*) malloc(sizeof(double) * NN);
 51 | 	PB = (double*) malloc(sizeof(double) * N);
 52 | 	PS = (double*) malloc(sizeof(double) * N);
 53 | 	PSS = (double*) malloc(sizeof(double) * N);
 54 | 	PM = (double*) malloc(sizeof(double) * N);
 55 | 	PL = (double*) malloc(sizeof(double) * N);
 56 | 	xi = (double*) malloc(sizeof(double) * N);
 57 | 	xmin = (double*) malloc(sizeof(double) * N);
 58 | 
 59 | 	for (i = 0; i < N;++i) {
 60 | 		xi[i] = xc[i];
 61 | 	}
 62 | 
 63 | 	while (ctl == 0 && *niter < siter) {
 64 | 		// Create N+1 point Simplex
 65 | 		for (i = 0; i < N;++i) {
 66 | 			P[ct + i] = xi[i];
 67 | 		}
 68 | 
 69 | 		fn = FUNCPT_EVAL(funcpt,xi,N);
 70 | 		Y[N] = fn;
 71 | 
 72 | 
 73 | 		for (i = 0; i < N;++i) {
 74 | 			xi[i] += dx[i] * del;
 75 | 			ct = i * N;
 76 | 			for (j = 0; j < N;++j) {
 77 | 				P[ct + j] = xi[j];
 78 | 			}
 79 | 			fn = FUNCPT_EVAL(funcpt, xi, N);
 80 | 			Y[i] = fn;
 81 | 			xi[i] -= dx[i] * del;
 82 | 		}
 83 | 
 84 | 		ylo = Y[0];
 85 | 		ilo = 0;
 86 | 
 87 | 		for (i = 1; i < NN;++i) {
 88 | 
 89 | 			if (Y[i] < ylo) {
 90 | 				ylo = Y[i];
 91 | 				ilo = i;
 92 | 			}
 93 | 		}
 94 | 
 95 | 		ct = ilo * N;
 96 | 
 97 | 		for (i = 0; i < N;++i) {
 98 | 			PL[i] = P[ct + i];
 99 | 		}
100 | 
101 | 
102 | 		while (fabs(s) > reqmin && *niter < siter) {
103 | 			*niter = *niter + 1;
104 | 
105 | 			// Find highest and lowest function values.
106 | 
107 | 			ynl = Y[0];
108 | 			ihi = 0;
109 | 
110 | 			for (i = 1; i < NN;++i) {
111 | 
112 | 				if (Y[i] > ynl) {
113 | 					ynl = Y[i];
114 | 					ihi = i;
115 | 				}
116 | 
117 | 			}
118 | 
119 | 
120 | 			// Find PB [Centroid]
121 | 			ct = N;
122 | 			for(i = 0; i < N;i++) {
123 | 				temp = 0.0;
124 | 				for ( j = 0; j < NN;++j) {
125 | 					temp += P[j*ct+i];
126 | 				}
127 | 				temp -= P[ihi*ct+i];
128 | 				PB[i] = temp / (double) N;
129 | 			}
130 | 
131 | 			// Find reflection
132 | 			ct = ihi * N;
133 | 			for (i = 0; i < N;++i) {
134 | 				PS[i] = (1.0 + rcoeff) * PB[i] - rcoeff * P[ct+i];
135 | 			}
136 | 			ys = FUNCPT_EVAL(funcpt, PS, N);
137 | 
138 | 			if (ys < ylo) {
139 | 				for (i = 0; i < N;++i) {
140 | 					PSS[i] = ecoeff * PS[i] + (1.0 - ecoeff) * PB[i];
141 | 				}
142 | 				yss = FUNCPT_EVAL(funcpt, PSS, N);
143 | 
144 | 				if (yss < ys) {
145 | 					for(i = 0; i < N;++i) {
146 | 						P[ct + i] = PSS[i];
147 | 					}
148 | 					Y[ihi] = yss;
149 | 				} else {
150 | 					for(i = 0; i < N;++i) {
151 | 						P[ct + i] = PS[i];
152 | 					}
153 | 					Y[ihi] = ys;
154 | 				}
155 | 
156 | 
157 | 			} else {
158 | 				L = 0;
159 | 				for (i = 0; i < NN;++i) {
160 | 					if (Y[i] > ys) {
161 | 						L++;
162 | 					}
163 | 				}
164 | 	/*
165 | 				if (L == 1) {
166 | 					for ( i = 0; i < N;++i) {
167 | 						P[ct + i] = PS[i];
168 | 					}
169 | 					Y[ihi] = ys;
170 | 				}*/
171 | 				if (L == 0) {
172 | 					for (i = 0; i < N;++i) {
173 | 						PSS[i] = ccoeff * P[ct + i] + (1.0 - ccoeff) * PB[i];
174 | 					}
175 | 					yss = FUNCPT_EVAL(funcpt, PSS, N);
176 | 
177 | 					if (yss <= Y[ihi]) {
178 | 						for(i = 0; i < N;++i) {
179 | 							P[ct + i] = PSS[i];
180 | 						}
181 | 						Y[ihi] = yss;
182 | 
183 | 					} else {
184 | 
185 | 						//ctl = ilo * N;
186 | 						for(i = 0; i < NN;++i) {
187 | 							ct = i * N;
188 | 							for (j = 0;j < N;++j) {
189 | 								P[ct + j] = 0.5 * (P[ct + j] + PL[j]);
190 | 								PM[j] = P[ct + j];
191 | 							}
192 | 							Y[i] = FUNCPT_EVAL(funcpt, PM, N);
193 | 
194 | 						}
195 | 						ylo = Y[0];
196 | 						ilo = 0;
197 | 
198 | 						for (i = 1; i < NN;++i) {
199 | 
200 | 							if (Y[i] < ylo) {
201 | 								ylo = Y[i];
202 | 								ilo = i;
203 | 							}
204 | 
205 | 						}
206 | 						ct = ilo * N;
207 | 
208 | 						for (i = 0; i < N;++i) {
209 | 							PL[i] = P[ct + i];
210 | 						}
211 | 					}
212 | 				}
213 | 				if (L == 1) {
214 | 					for (i = 0; i < N;++i) {
215 | 						PSS[i] = ccoeff * P[ct + i] + (1.0 - ccoeff) * PB[i];
216 | 					}
217 | 					yss = FUNCPT_EVAL(funcpt, PSS, N);
218 | 
219 | 					if (yss <= ys) {
220 | 						ct = ihi * N;
221 | 						for(i = 0; i < N;++i) {
222 | 							P[ct + i] = PSS[i];
223 | 						}
224 | 						Y[ihi] = yss;
225 | 
226 | 					} else {
227 | 						ct = ihi * N;
228 | 						for(i = 0; i < N;++i) {
229 | 							P[ct + i] = PS[i];
230 | 						}
231 | 						Y[ihi] = ys;
232 | 
233 | 					}
234 | 
235 | 				}
236 | 
237 | 
238 | 				if (L > 1) {
239 | 					ct = ihi * N;
240 | 					for(i = 0; i < N;++i) {
241 | 						P[ct + i] = PS[i];
242 | 					}
243 | 					Y[ihi] = ys;
244 | 
245 | 				}
246 | 			}
247 | 
248 | 			if (Y[ihi] < ylo) {
249 | 				ylo = Y[ihi];
250 | 				ilo = ihi;
251 | 			}
252 | 
253 | 			//To the end of the loop
254 | 			ysum = 0.0;
255 | 			for (j=0 ;j <= N;j++) {
256 | 				ysum += Y[j];
257 | 			}
258 | 			yavg = ysum/(N+1);
259 | 			s = 0.0;
260 | 			for (j=0;j<=N;j++) {
261 | 				s += pow((Y[j]-yavg),2.0);
262 | 			}
263 | 			s = sqrt(s);//Remove this statement if you want convergence in fewer iterations
264 | 			if (fabs(s) < reqmin) {
265 | 				ctl = 1;
266 | 			}
267 | 
268 | 		}
269 | 		ct = ilo * N;
270 | 		for(i = 0; i < N;++i) {
271 | 			xmin[i] = P[ct + i];
272 | 		}
273 | 		ynl = Y[ilo];
274 | 
275 | 		for (i = 0; i < N;++i) {
276 | 			del = dx[i] * dval;
277 | 			xmin[i] += del;
278 | 			fn = FUNCPT_EVAL(funcpt, xmin, N);
279 | 			if (fn < ynl) {
280 | 				ctl = 0;
281 | 				break;
282 | 			} else {
283 | 				ctl++;
284 | 			}
285 | 			xmin[i] = xmin[i] - 2 * del;
286 | 			fn = FUNCPT_EVAL(funcpt, xmin, N);
287 | 			if (fn < ynl) {
288 | 				ctl = 0;
289 | 				break;
290 | 			} else {
291 | 				ctl++;
292 | 			}
293 | 		}
294 | 
295 | 		if (ctl == 0) {
296 | 			for (i = 0; i < N;++i) {
297 | 				xi[i] = xmin[i];
298 | 			}
299 | 			del = dval;
300 | 			s = 1.0;
301 | 		}
302 | 
303 | 	}
304 | 
305 | 	ct = N * N;
306 | 	for (i = 0; i < N;++i) {
307 | 		xf[i] = P[ct + i];
308 | 	}
309 | 
310 | 	if (rcode == 0 && *niter >= siter) {
311 | 		rcode = 4;
312 | 	} else if (*niter < siter) {
313 | 		rcode = 1;
314 | 	}
315 | 
316 | 
317 | 	free(Y);
318 | 	free(P);
319 | 	free(PB);
320 | 	free(PS);
321 | 	free(PSS);
322 | 	free(PM);
323 | 	free(PL);
324 | 	free(xi);
325 | 	free(xmin);
326 | 	return rcode;
327 | }
328 | 


--------------------------------------------------------------------------------
/src/lnsrchmp.c:
--------------------------------------------------------------------------------
  1 | #include "lnsrchmp.h"
  2 | 
  3 | /*
  4 |  * lnsrchmp.c
  5 |  *
  6 |  *  Copyright (c) 2014, Rafat Hussain
  7 |  *	License : BSD 3-Clause
  8 |  *	See COPYRIGHT for more details
  9 |  */
 10 | 
 11 | /*
 12 | This code is a C translation of Fortran Routine developed by
 13 | Argonne National Laboratory. MINPACK Project. June 1983
 14 |      Jorge J. More', David J. Thuente
 15 | 
 16 | A Copy of the original Fortran routine is available at
 17 | http://www.cs.umd.edu/~oleary/LBFGS/FORTRAN/linesearch.f
 18 | 
 19 | */
 20 | 
 21 | 
 22 | 
 23 | int grad_fd(custom_function *funcpt, custom_gradient *funcgrad, double *x, int N, double *dx,
 24 | 		double eps2, double *f) {
 25 | 	int retval;
 26 | 	retval = 0;
 27 | 	if (funcgrad == NULL) {
 28 | 		//printf("FD Gradient \n");
 29 | 		retval = grad_calc(funcpt,x,N,dx,eps2,f);
 30 | 	} else {
 31 | 		//printf("Analytic gradient \n");
 32 | 		FUNCGRAD_EVAL(funcgrad,x,N,f);
 33 | 	}
 34 | 
 35 | 	return retval;
 36 | 
 37 | }
 38 | 
 39 | int grad_cd(custom_function *funcpt, custom_gradient *funcgrad, double *x, int N, double *dx,
 40 | 		double eps3, double *f) {
 41 | 	int retval;
 42 | 	retval = 0;
 43 | 	if (funcgrad == NULL) {
 44 | 		//printf("FD Gradient \n");
 45 | 		retval = grad_calc2(funcpt,x,N,dx,eps3,f);
 46 | 	} else {
 47 | 		//printf("Analytic gradient \n");
 48 | 		FUNCGRAD_EVAL(funcgrad,x,N,f);
 49 | 	}
 50 | 	return retval;
 51 | 
 52 | }
 53 | 
 54 | int grad_calc2(custom_function *funcpt, double *x, int N, double *dx, double eps3, double *f) {
 55 | 	int j,retval;
 56 | 	double stepsize,stepmax,temp;
 57 | 	double fp,fm;
 58 | 
 59 | 	retval = 0;
 60 | 
 61 | 	for (j = 0; j < N;++j) {
 62 | 		if (fabs(x[j]) >= 1.0 / fabs(dx[j])) {
 63 | 			stepmax = x[j];
 64 | 		}
 65 | 		else {
 66 | 			stepmax = signx(x[j]) * 1.0 / fabs(dx[j]);
 67 | 		}
 68 | 
 69 | 		stepsize = stepmax * eps3;
 70 | 		temp = x[j];
 71 | 		x[j] += stepsize;
 72 | 		stepsize = x[j] - temp;
 73 | 		fp = FUNCPT_EVAL(funcpt,x,N);
 74 | 		if (fp >= DBL_MAX || fp <= -DBL_MAX) {
 75 | 			printf("Program Exiting as the function value exceeds the maximum double value");
 76 | 			return 15;
 77 | 		}
 78 | 		if (fp != fp) {
 79 | 			printf("Program Exiting as the function returns NaN");
 80 | 			return 15;
 81 | 		}
 82 | 		x[j] = temp - stepsize;
 83 | 		fm = FUNCPT_EVAL(funcpt,x,N);
 84 | 		if (fm >= DBL_MAX || fm <= -DBL_MAX) {
 85 | 			printf("Program Exiting as the function value exceeds the maximum double value");
 86 | 			return 15;
 87 | 		}
 88 | 		if (fm != fm) {
 89 | 			printf("Program Exiting as the function returns NaN");
 90 | 			return 15;
 91 | 		}
 92 | 		f[j] = (fp - fm)/ (2 * stepsize);
 93 | 		x[j] = temp;
 94 | 	}
 95 | 
 96 | 	return retval;
 97 | }
 98 | 
 99 | int grad_calc(custom_function *funcpt, double *x, int N, double *dx, double eps2, double *f) {
100 | 	int i, j,retval;
101 | 	double step, fd, stepmax;
102 | 	double *xi;
103 | 
104 | 	fd = eps2; // square root of macheps
105 | 	retval = 0;
106 | 	xi = (double*)malloc(sizeof(double)*N);
107 | 
108 | 	for (i = 0; i < N; ++i) {
109 | 		if (fabs(x[i]) >= 1.0 / fabs(dx[i])) {
110 | 			stepmax = x[i];
111 | 		}
112 | 		else {
113 | 			stepmax = signx(x[i]) * 1.0 / fabs(dx[i]);
114 | 		}
115 | 		step = fd * stepmax;
116 | 		for (j = 0; j < N; ++j) {
117 | 			xi[j] = x[j];
118 | 		}
119 | 		xi[i] += step;
120 | 		f[i] = (FUNCPT_EVAL(funcpt, xi, N) - FUNCPT_EVAL(funcpt, x, N)) / step;
121 | 		if (f[i] >= DBL_MAX || f[i] <= -DBL_MAX) {
122 | 			printf("Program Exiting as the function value exceeds the maximum double value");
123 | 			free(xi);
124 | 			return 15;
125 | 		}
126 | 		if (f[i] != f[i]) {
127 | 			printf("Program Exiting as the function returns NaN");
128 | 			free(xi);
129 | 			return 15;
130 | 		}
131 | 		//xi[i] -= step;
132 | 	}
133 | 
134 | 	free(xi);
135 | 	return retval;
136 | 
137 | }
138 | 
139 | int stopcheck3_mt(double *xi,double *xf,double fx, int N, double fo, double *jac, double *dx, double eps,
140 | 		double stoptol, double functol, int retval) {
141 | 	int rcode,i;
142 | 	double nrm,nrmnx,relfit,num,den,stop0;
143 | 	double *scheck;
144 | 	rcode = 0;
145 | 
146 | 	scheck = (double*)malloc(sizeof(double)*N);
147 | 
148 | 	if (retval == 3) {
149 | 		rcode = 4;
150 | 		return rcode;
151 | 	}
152 | 	if (retval == 15) {
153 | 		rcode = 15;
154 | 		return rcode;
155 | 	}
156 | 
157 | 	nrm = l2norm(jac, N);
158 | 	nrmnx = nrm / (double) N;
159 | 
160 | 	if (fabs(fo) < eps) {
161 | 		relfit = fabs(fx - fo);
162 | 	}
163 | 	else {
164 | 		relfit = fabs((fx - fo)/fo);
165 | 	}
166 | 
167 | 	if (nrmnx < stoptol) {
168 | 		rcode = 1; // Successful Convergence
169 | 	} else if (relfit < functol) {
170 | 		rcode = 6; // Relative fit less than function tolerance
171 | 	} else {
172 | 		for (i = 0; i < N; ++i) {
173 | 			den = 1.0+fabs(xf[i]);
174 | 			num = fabs(xf[i] - xi[i]);
175 | 			scheck[i] = num / den;
176 | 		}
177 | 		stop0 = array_max_abs(scheck, N);
178 | 		if (stop0 <= stoptol) {
179 | 			rcode = 2;
180 | 		}
181 | 	}
182 | 
183 | 	free(scheck);
184 | 	return rcode;
185 | }
186 | 
187 | int stopcheck2_mt(double fx, int N, double fo, double *jac, double *dx, double eps,double stoptol, double functol, int retval) {
188 | 	int rcode;
189 | 	double nrm,nrmnx,relfit;
190 | 	rcode = 0;
191 | 
192 | 
193 | 	if (retval == 3) {
194 | 		rcode = 4;
195 | 		return rcode;
196 | 	}
197 | 	if (retval == 15) {
198 | 		rcode = 15;
199 | 		return rcode;
200 | 	}
201 | 
202 | 	nrm = l2norm(jac, N);
203 | 	nrmnx = nrm / (double) N;
204 | 
205 | 	if (fabs(fo) < eps) {
206 | 		relfit = fabs(fx - fo);
207 | 	}
208 | 	else {
209 | 		relfit = fabs((fx - fo)/fo);
210 | 	}
211 | 
212 | 	if (nrmnx < stoptol) {
213 | 		return 1; // Successful Convergence
214 | 	} else if (relfit < functol) {
215 | 		return 6; // Relative fit less than function tolerance
216 | 	}
217 | 
218 | 	return rcode;
219 | }
220 | 
221 | int stopcheck_mt(double fx, int N, double *xc, double *xf, double *jac, double *dx, double fsval, double gtol, double stol, int retval) {
222 | 	int rcode, i;
223 | 	double num, den;
224 | 	double stop0;
225 | 	double *scheck;
226 | 
227 | 	rcode = 0;
228 | 
229 | 	if (retval == 3) {
230 | 		rcode = 4;
231 | 		return rcode;
232 | 	}
233 | 	if (retval == 15) {
234 | 		rcode = 15;
235 | 		return rcode;
236 | 	}
237 | 	
238 | 	scheck = (double*)malloc(sizeof(double)*N);
239 | 
240 | 	if (fabs(fx) > fabs(fsval)) {
241 | 		den = fabs(fx);
242 | 	}
243 | 	else {
244 | 		den = fabs(fsval);
245 | 	}
246 | 	for (i = 0; i < N; ++i) {
247 | 		if (fabs(xf[i]) > 1.0 / fabs(dx[i])) {
248 | 			num = fabs(xf[i]);
249 | 		}
250 | 		else {
251 | 			num = 1.0 / fabs(dx[i]);
252 | 		}
253 | 		scheck[i] = fabs(jac[i]) * num / den;
254 | 	}
255 | 
256 | 	stop0 = array_max_abs(scheck, N);
257 | 
258 | 	if (stop0 <= gtol) {
259 | 		rcode = 1;
260 | 	}
261 | 	else {
262 | 		for (i = 0; i < N; ++i) {
263 | 			if (fabs(xf[i]) > 1.0 / fabs(dx[i])) {
264 | 				den = fabs(xf[i]);
265 | 			}
266 | 			else {
267 | 				den = 1.0 / fabs(dx[i]);
268 | 			}
269 | 			num = fabs(xf[i] - xc[i]);
270 | 			scheck[i] = num / den;
271 | 		}
272 | 		stop0 = array_max_abs(scheck, N);
273 | 		if (stop0 <= stol) {
274 | 			rcode = 2;
275 | 		}
276 | 	}
277 | 
278 | 	free(scheck);
279 | 	return rcode;
280 | }
281 | 
282 | int cstep(double *stx,double *fx,double *dx,double *sty,double *fy,double *dy,double *stp,double *fp,double *dp,int *brackt,
283 |                        double  stpmin,double stpmax) {
284 | 	int info,bound;
285 | 	double gamma, p, p66, q, r, s, sgnd, stpc, stpf, stpq, theta;
286 | 
287 | 	info = 0;
288 | 	p66 = 0.66;
289 | 
290 | 	if ((*brackt == 1 && (*stp <= pmin(*stx, *sty) || *stp >= pmax(*stx, *sty))) || *dx*(*stp - *stx) >= 0.0 || stpmax < stpmin) {
291 | 		return info;
292 | 	}
293 | 
294 | 	sgnd = (*dp) * (*dx / fabs(*dx));
295 | 
296 | 	/*
297 | 	 First case. A higher function value. (fp > fx)
298 |      The minimum is bracketed. If the cubic step is closer
299 | 	 to stx than the quadratic step, the cubic step is taken,
300 |      else the average of the cubic and quadratic steps is taken.
301 | 	*/
302 | 
303 | 	if (*fp > *fx) {
304 | 		info = 1;
305 | 		bound = 1;
306 | 		theta = 3 * (*fx - *fp) / (*stp - *stx) + *dx + *dp;
307 | 		s = pmax(fabs(theta), fabs(*dx));
308 | 		s = pmax(s, fabs(*dp));
309 | 		gamma = s*sqrt((theta / s)*(theta/s) - (*dx / s)*(*dp / s));
310 | 		if (*stp < *stx) {
311 | 			gamma = -gamma;
312 | 		}
313 | 		p = (gamma - *dx) + theta;
314 | 		q = ((gamma - *dx) + gamma) + *dp;
315 | 		r = p / q;
316 | 		stpc = *stx + r*(*stp - *stx);
317 | 		stpq = *stx + ((*dx / ((*fx - *fp) / (*stp - *stx) + *dx)) / 2)*(*stp - *stx);
318 | 
319 | 		if (fabs(stpc - *stx) < fabs(stpq - *stx)) {
320 | 			stpf = stpc;
321 | 		} else {
322 | 			stpf = stpc + (stpq - stpc) / 2;
323 | 		}
324 | 		*brackt = 1;
325 | 	} else if (sgnd < 0.0) {
326 | 		/*
327 | 		Second case. A lower function value and derivatives of
328 | 	    opposite sign. The minimum is bracketed. If the cubic
329 | 	    step is closer to stx than the quadratic (secant) step, 
330 | 	    the cubic step is taken, else the quadratic step is taken.
331 | 		*/
332 | 
333 | 		info = 2;
334 | 		bound = 0;
335 | 		theta = 3 * (*fx - *fp) / (*stp - *stx) + *dx + *dp;
336 | 		s = pmax(fabs(theta), fabs(*dx));
337 | 		s = pmax(s, fabs(*dp));
338 | 		gamma = s*sqrt((theta / s)*(theta / s) - (*dx / s)*(*dp / s));
339 | 		if (*stp > *stx) {
340 | 			gamma = -gamma;
341 | 		}
342 | 		p = (gamma - *dp) + theta;
343 | 		q = ((gamma - *dp) + gamma) + *dx;
344 | 		r = p / q;
345 | 		stpc = *stp + r*(*stx - *stp);
346 | 		stpq = *stp + (*dp / (*dp - *dx))*(*stx - *stp);
347 | 		if (fabs(stpc - *stp) > fabs(stpq - *stp)) {
348 | 			stpf = stpc;
349 | 		} else {
350 | 			stpf = stpq;
351 | 		}
352 | 		*brackt = 1;
353 | 	}
354 | 	else if (fabs(*dp) < fabs(*dx)) {
355 | 		/*
356 |      Third case. A lower function value, derivatives of the
357 |      same sign, and the magnitude of the derivative decreases.
358 |      The cubic step is only used if the cubic tends to infinity 
359 |      in the direction of the step or if the minimum of the cubic
360 |      is beyond stp. Otherwise the cubic step is defined to be 
361 |      either stpmin or stpmax. The quadratic (secant) step is also 
362 |      computed and if the minimum is bracketed then the the step 
363 |      closest to stx is taken, else the step farthest away is taken.
364 | 		*/
365 | 
366 | 		info = 3;
367 | 		bound = 1;
368 | 		theta = 3 * (*fx - *fp) / (*stp - *stx) + *dx + *dp;
369 | 		s = pmax(fabs(theta), fabs(*dx));
370 | 		s = pmax(s, fabs(*dp));
371 | 
372 | 		/*
373 | 		 The case gamma = 0 only arises if the cubic does not tend
374 |          to infinity in the direction of the step.
375 | 		*/
376 | 
377 | 		gamma = s*sqrt(pmax(0., (theta / s)*(theta / s) - (*dx / s)*(*dp / s)));
378 | 		if (*stp > *stx) {
379 | 			gamma = -gamma;
380 | 		}
381 | 		p = (gamma - *dp) + theta;
382 | 		q = (gamma + (*dx - *dp)) + gamma;
383 | 		r = p / q;
384 | 
385 | 		if (r < 0.0 && gamma != 0.0) {
386 | 			stpc = *stp + r*(*stx - *stp);
387 | 		} else if (*stp > *stx) {
388 | 			stpc = stpmax;
389 | 		} else {
390 | 			stpc = stpmin;
391 | 		}
392 | 		stpq = *stp + (*dp / (*dp - *dx))*(*stx - *stp);
393 | 		if (*brackt == 1) {
394 | 			if (fabs(*stp - stpc) < fabs(*stp - stpq)) {
395 | 				stpf = stpc;
396 | 			} else {
397 | 				stpf = stpq;
398 | 			}
399 | 		} else {
400 | 			if (fabs(*stp - stpc) > fabs(*stp - stpq)) {
401 | 				stpf = stpc;
402 | 			} else {
403 | 				stpf = stpq;
404 | 			}
405 | 		}
406 | 	}
407 | 	else {
408 | 		/*
409 |      Fourth case. A lower function value, derivatives of the
410 |      same sign, and the magnitude of the derivative does
411 |      not decrease. If the minimum is not bracketed, the step
412 |      is either stpmin or stpmax, else the cubic step is taken.
413 | 
414 | 		*/
415 | 		info = 4;
416 | 		bound = 0;
417 | 		if (*brackt == 1) {
418 | 			theta = 3 * (*fp - *fy) / (*sty - *stp) + *dy + *dp;
419 | 			s = pmax(fabs(theta), fabs(*dy));
420 | 			s = pmax(s,fabs(*dp));
421 | 			gamma = s*sqrt((theta / s)*(theta / s) - (*dy / s)*(*dp / s));
422 | 			if (*stp > *sty) {
423 | 				gamma = -gamma;
424 | 			}
425 | 			p = (gamma - *dp) + theta;
426 | 			q = ((gamma - *dp) + gamma) + *dy;
427 | 			r = p / q;
428 | 			stpc = *stp + r*(*sty - *stp);
429 | 			stpf = stpc;
430 | 		}
431 | 		else if (*stp > *stx) {
432 | 			stpf = stpmax;
433 | 		}
434 | 		else {
435 | 			stpf = stpmin;
436 | 		}
437 | 		
438 | 	}
439 | 	/*
440 | 	Update the interval of uncertainty. This update does not
441 |     depend on the new step or the case analysis above.
442 | 	*/
443 | 
444 | 	if (*fp > *fx) {
445 | 		*sty = *stp;
446 | 		*fy = *fp;
447 | 		*dy = *dp;
448 | 	} else { 
449 | 		if (sgnd < 0.0) {
450 | 			*sty = *stx;
451 | 			*fy = *fx;
452 | 			*dy = *dx;
453 | 		}
454 | 		*stx = *stp;
455 | 		*fx = *fp;
456 | 		*dx = *dp;
457 | 	}
458 | 	/*
459 | 	Compute the new step and safeguard it.
460 | 	*/
461 | 
462 | 	stpf = pmin(stpmax, stpf);
463 | 	stpf = pmax(stpmin, stpf);
464 | 	*stp = stpf;
465 | 	if (*brackt == 1 && bound == 1) {
466 | 		if (*sty > *stx) {
467 | 			*stp = pmin(*stx + p66*(*sty - *stx), *stp);
468 | 		} else {
469 | 			*stp = pmax(*stx + p66*(*sty - *stx), *stp);
470 | 		}
471 | 	}
472 | 
473 | 
474 | 	return info;
475 | }
476 | 
477 | int cvsrch(custom_function *funcpt, custom_gradient *funcgrad, double *x, double *f, double *g, double *stp, double *s, int N, double *dx, double maxstep,
478 | 	int MAXITER,double eps2,double ftol, double gtol, double xtol) {
479 | 	int info,i,siter,nfev;
480 | 	int infoc, j, brackt, stage1;
481 | 	double dg, dgm, dginit, dgtest, dgx, dgxm, dgy, dgym, finit, ftest1, fm, fx, fxm, fy, fym, p5, p66, stx, sty,
482 | 		stmin, stmax, width, width1, xtrapf;
483 | 	double nlen,den,rell,stepmin;
484 | 	double *rcheck,*wa;
485 | 
486 | 	rcheck = (double*)malloc(sizeof(double)*N);
487 | 	wa = (double*)malloc(sizeof(double)*N);
488 | 
489 | 	nlen = 0.0;
490 | 	p5 = 0.5;
491 | 	p66 = 0.66;
492 | 	xtrapf = 4.0;
493 | 	info = 0;
494 | 	infoc = 1;
495 | 	siter = MAXITER;
496 | 
497 | 
498 | 	if (N <= 0 || *stp <= 0.0 || ftol < 0.0 || gtol < 0.0 || xtol < 0.0) {
499 | 		return info;
500 | 	}
501 | 
502 | 
503 | 	for (i = 0; i < N; ++i) {
504 | 		nlen += dx[i] * s[i] * dx[i] * s[i];
505 | 	}
506 | 	nlen = sqrt(nlen);
507 | 
508 | 	for (i = 0; i < N; ++i) {
509 | 		if (fabs(x[i]) > 1.0 / fabs(dx[i])) {
510 | 			den = fabs(x[i]);
511 | 		}
512 | 		else {
513 | 			den = 1.0 / fabs(dx[i]);
514 | 		}
515 | 		rcheck[i] = s[i] / den;
516 | 	}
517 | 
518 | 	rell = array_max_abs(rcheck, N);
519 | 
520 | 	stepmin = ftol / rell;
521 | 
522 | 	dginit = 0.0;
523 | 
524 | 	for (j = 0; j < N; ++j) {
525 | 		dginit += g[j] * s[j];
526 | 	}
527 | 
528 | 	if (dginit >= 0.0) {
529 | 		return info;
530 | 	}
531 | 
532 | 	brackt = 0;
533 | 	stage1 = 1;
534 | 	finit = *f;
535 | 	nfev = 0;
536 | 	dgtest = ftol*dginit;
537 | 	width = maxstep - stepmin;
538 | 	width1 = width / 0.5;
539 | 
540 | 	for (j = 0; j < N; ++j) {
541 | 		wa[j] = x[j];
542 | 	}
543 | 
544 | 	/*     The variables stx, fx, dgx contain the values of the step, 
545 |      function, and directional derivative at the best step.
546 |      The variables sty, fy, dgy contain the value of the step,
547 |      function, and derivative at the other endpoint of
548 |      the interval of uncertainty.
549 |      The variables stp, f, dg contain the values of the step,
550 |      function, and derivative at the current step.
551 | 	*/
552 | 
553 | 	stx = 0.0;
554 | 	fx = finit;
555 | 	dgx = dginit;
556 | 	sty = 0.0;
557 | 	fy = finit;
558 | 	dgy = dginit;
559 | 
560 | 	// Iteration
561 | 
562 | 	while (info == 0) {
563 | 
564 | 		if (brackt == 1) {
565 | 			stmin = pmin(stx, sty);
566 | 			stmax = pmax(stx, sty);
567 | 		} else {
568 | 			stmin = stx;
569 | 			stmax = *stp + xtrapf*(*stp - stx);
570 | 		}
571 | 
572 | 		*stp = pmax(*stp, stepmin);
573 | 		*stp = pmin(*stp, maxstep);
574 | 
575 | 		if ((brackt == 1 && (*stp <= stmin || *stp >= stmax)) || nfev >= siter - 1 || infoc == 0 || (brackt == 1 && (stmax - stmin) <= xtol*stmax)) {
576 | 			*stp = stx;
577 | 		}
578 | 
579 | 		for (j = 0; j < N; ++j) {
580 | 			x[j] = wa[j] + *stp * s[j];
581 | 		}
582 | 
583 | 		*f = FUNCPT_EVAL(funcpt,x, N);
584 | 		if (*f >= DBL_MAX || *f <= -DBL_MAX) {
585 | 			printf("Program Exiting as the function value exceeds the maximum double value");
586 | 			free(rcheck);
587 | 			free(wa);
588 | 			return 15;
589 | 		}
590 | 		if (*f != *f) {
591 | 			printf("Program Exiting as the function returns NaN");
592 | 			free(rcheck);
593 | 			free(wa);
594 | 			return 15;
595 | 		}
596 | 		grad_cd(funcpt,funcgrad, x, N, dx, eps2,g);
597 | 		nfev++;
598 | 
599 | 
600 | 		dg = 0.0;
601 | 		for (j = 0; j < N; ++j) {
602 | 			dg = dg + g[j]*s[j];
603 | 		}	
604 | 		ftest1 = finit + *stp * dgtest;
605 | 
606 | 		//       Test for convergence.
607 | 
608 | 		if ((brackt == 1 && (*stp <= stmin || *stp >= stmax)) || infoc == 0) {
609 | 			info = 6;
610 | 		}
611 | 
612 | 		if (*stp == maxstep && *f <= ftest1 && dg <= dgtest) {
613 | 			info = 5;
614 | 		}
615 | 
616 | 		if (*stp == stepmin && (*f > ftest1 || dg >= dgtest)) {
617 | 			info = 4;
618 | 		}
619 | 
620 | 		if (nfev >= siter) {
621 | 			info = 3;
622 | 		}
623 | 
624 | 		if (brackt == 1 && ((stmax - stmin) <= xtol*stmax)) {
625 | 			info = 2;
626 | 		}
627 | 
628 | 		if (*f <= ftest1 && fabs(dg) <= gtol*(-dginit)) {
629 | 			info = 1;
630 | 		}
631 | 
632 | 		if (stage1 == 1 && *f <= ftest1 && dg >= pmin(ftol, gtol)*dginit) {
633 | 			stage1 = 0;
634 | 		}
635 | 
636 | 
637 | 		/*
638 | 		A modified function is used to predict the step only if
639 |         we have not obtained a step for which the modified
640 |         function has a nonpositive function value and nonnegative
641 |         derivative, and if a lower function value has been
642 |         obtained but the decrease is not sufficient.
643 | 		*/
644 | 		if (stage1 == 1 && *f <= fx && *f > ftest1) {
645 | 
646 | 			fm = *f - *stp*dgtest;
647 | 			fxm = fx - stx*dgtest;
648 | 			fym = fy - sty*dgtest;
649 | 			dgm = dg - dgtest;
650 | 			dgxm = dgx - dgtest;
651 | 			dgym = dgy - dgtest;
652 | 
653 | 			infoc = cstep(&stx, &fxm, &dgxm, &sty, &fym, &dgym, stp, &fm, &dgm, &brackt, stmin, stmax);
654 | 
655 | 			fx = fxm + stx*dgtest;
656 | 			fy = fym + sty*dgtest;
657 | 			dgx = dgxm + dgtest;
658 | 			dgy = dgym + dgtest;
659 | 		} else {
660 | 			
661 | 			infoc = cstep(&stx, &fx, &dgx, &sty, &fy, &dgy, stp, f, &dg, &brackt, stmin, stmax);
662 | 		}
663 | 
664 | 		if (brackt == 1) {
665 | 			if (fabs(sty - stx) >= p66*width1) {
666 | 				*stp = stx + p5*(sty - stx);
667 | 			}
668 | 			width1 = width;
669 | 			width = fabs(sty - stx);
670 | 		}
671 | 
672 | 	}
673 | 
674 | 	free(rcheck);
675 | 	free(wa);
676 | 
677 | 	return info;
678 | }
679 | 
680 | int lnsrchmt(custom_function *funcpt, custom_gradient *funcgrad, double *xi, double *f, double *jac, double *alpha, double *p, int N, double *dx, double maxstep, int MAXITER,
681 | 		double eps2,double ftol, double gtol, double xtol, double *x) {
682 | 	int i,retval,info;
683 | 
684 | 	/*
685 | 	* Return Codes
686 | 	*
687 | 	* Codes 1,2,3 denote possible success.
688 | 	* Codes 0 and 4 denote failure.
689 | 	*
690 | 	* 1 - df(x)/dx <= gtol achieved so xf may be the local minima.
691 | 	* 2 - Distance between the last two steps is less than stol or |xf - xi| <= stol so xf may be the local minima.
692 | 	* 3 - Global Step failed to locate a lower point than xi so xi may be the local minima.
693 | 	* 4 - Iteration Limit exceeded. Convergence not achieved.
694 | 	*
695 | 	*/
696 | 
697 | 	//f = funcpt(xi, N);
698 | 
699 | 	// Important - All argument values are modified in this algorithm 
700 | 
701 | 	for (i = 0; i < N; ++i) {
702 | 		x[i] = xi[i];
703 | 	}
704 | 
705 | 	info = cvsrch(funcpt,funcgrad, x,f, jac, alpha, p, N, dx, maxstep,MAXITER,eps2,
706 | 		ftol, gtol, xtol);
707 | 	
708 | 	retval = info;
709 | 	return retval;
710 | 
711 | }
712 | 


--------------------------------------------------------------------------------
/src/optimc.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * optimize.c
  3 |  *
  4 |  *  Copyright (c) 2014, Rafat Hussain
  5 |  *	License : BSD 3-Clause
  6 |  *	See COPYRIGHT for more details
  7 |  */
  8 |  
  9 | 
 10 | 
 11 | #include "optimc.h"
 12 | 
 13 | opt_object opt_init(int N) {
 14 | 	opt_object obj = NULL;
 15 | 	int i;
 16 | 	double meps;
 17 | 
 18 | 	obj = (opt_object) malloc (sizeof(struct opt_set) + sizeof(double)* (N-1));
 19 | 
 20 | 	meps = macheps();
 21 | 	obj->eps = meps;
 22 | 	obj->xtol = meps;
 23 | 	obj->gtol = pow(meps,(double)1.0/3.0);
 24 | 	obj->ftol = obj->gtol * obj->gtol;
 25 | 	obj->stol = obj->ftol;
 26 | 	obj->N = N;
 27 | 	obj->MaxIter = 200*N;
 28 | 	obj->retval = 0;
 29 | 
 30 | 	if (obj->MaxIter < 1000) {
 31 | 		obj->MaxIter = 1000;
 32 | 	}
 33 | 
 34 | 	obj->Iter = 0;
 35 | 	obj->Method = 0;
 36 | 	obj->maxstep = -1.0;
 37 | 	strcpy(obj->MethodName,"Nelder-Mead");
 38 | 	obj->objfunc = 0.0;
 39 | 	for (i = 0; i < N;++i) {
 40 | 		obj->xopt[i] = 0.0;
 41 | 	}
 42 | 
 43 | 	return obj;
 44 | }
 45 | 
 46 | void optsummary(opt_object obj) {
 47 | 	int i;
 48 | 	printf("\n Return Value : %d \n",obj->retval);
 49 | 	printf("Method : %d %s \n",obj->Method,obj->MethodName);
 50 | 	printf("Iterations : %d \n",obj->Iter);
 51 | 	printf("Function Minimized At : [ ");
 52 | 	for(i = 0; i < obj->N;++i) {
 53 | 		printf("%g ",obj->xopt[i]);
 54 | 	}
 55 | 	printf(" ] \n");
 56 | 	printf("Function Value : %g \n \n",obj->objfunc);
 57 | 
 58 | }
 59 | 
 60 | void setMaxIter(opt_object obj,int MaxIter) {
 61 | 	obj->MaxIter = MaxIter;
 62 | }
 63 | 
 64 | void setMaxStep(opt_object obj, double maxstep) {
 65 | 	obj->maxstep = maxstep;
 66 | }
 67 | 
 68 | void setTOL(opt_object obj,double gtol,double stol,double ftol,double xtol) {
 69 | 	obj->gtol = gtol;
 70 | 	obj->stol = stol;
 71 | 	obj->ftol = ftol;
 72 | 	obj->xtol = xtol;
 73 | }
 74 | 
 75 | nls_object nls_init(int M,int N) {
 76 | 	nls_object obj = NULL;
 77 | 	int i;
 78 | 	double meps;
 79 | 
 80 | 	obj = (nls_object) malloc (sizeof(struct nls_set) + sizeof(double)* (N-1));
 81 | 
 82 | 	meps = macheps();
 83 | 	obj->eps = meps;
 84 | 	obj->epsfcn = meps;
 85 | 	obj->factor = 100.; //Takes values between 0.1 and 100.0
 86 | 	obj->xtol = meps;
 87 | 	obj->gtol = pow(meps,(double)1.0/3.0);
 88 | 	obj->ftol = obj->gtol * obj->gtol;
 89 | 	obj->M = M;
 90 | 	obj->N = N;
 91 | 	obj->MaxIter = 1000;
 92 | 	obj->Maxfev = 1000;
 93 | 	obj->nfev = 0;
 94 | 	obj->njev = 0;
 95 | 	obj->ldfjac = M;
 96 | 	obj->mode = 1;
 97 | 
 98 | 	if (obj->MaxIter < 1000) {
 99 | 		obj->MaxIter = 1000;
100 | 	}
101 | 
102 | 	obj->Iter = 0;
103 | 	for (i = 0; i < N;++i) {
104 | 		obj->xopt[i] = 0.0;
105 | 	}
106 | 
107 | 	return obj;
108 | }
109 | 
110 | void setnlsTOL(nls_object obj,double gtol,double ftol,double xtol) {
111 | 	obj->gtol = gtol;
112 | 	obj->ftol = ftol;
113 | 	obj->xtol = xtol;
114 | }
115 | 
116 | int fminsearch(custom_function *funcpt,int N,double *xi,double *xf) {
117 | 	int i,retval,MAXITER,niter;
118 | 	double fsval,eps;
119 | 	double *dx;
120 | 
121 | 	dx = (double*) malloc(sizeof(double) * N);
122 | 
123 | 	fsval = 1.0;
124 | 	MAXITER = 200*N;
125 | 	niter = 0;
126 | 	eps = macheps(); // Use macheps program
127 | 
128 | 	for(i = 0; i < N;++i) {
129 | 		dx[i] = 1.0;
130 | 	}
131 | 
132 | 	retval = nel_min(funcpt,xi,N,dx,fsval,MAXITER,&niter,eps,xf);
133 | 
134 | 	//printf("Iterations %d \n", niter);
135 | 
136 | 	free(dx);
137 | 	return retval;
138 | }
139 | 
140 | static int mvalue(int N) {
141 | 	int mval;
142 | 
143 | 	if (N <= 10) {
144 | 		mval = N;
145 | 	} else if (N > 10 && N <= 20) {
146 | 		mval = 10;
147 | 	} else if (N > 20 && N <= 200) {
148 | 		mval = 15;
149 | 	} else if ( N > 200) {
150 | 		mval = 20;
151 | 	}
152 | 
153 | 	return mval;
154 | }
155 | 
156 | int fminunc(custom_function *funcpt, custom_gradient *funcgrad, int N, double *xi,double maxstep, int method,double *xf) {
157 | 	int i,retval,MAXITER,niter,m;
158 | 	double fsval,eps,gtol,stol,ftol,xtol,delta;
159 | 	double *dx;
160 | 
161 | 	dx = (double*) malloc(sizeof(double) * N);
162 | 		/*
163 | 		 * Method 0 - Nelder-Mead
164 | 		 * Method 1 - Newton Line Search
165 | 		 * Method 2 - Newton Trust Region - Hook Step
166 | 		 * Method 3 - Newton Trust Region - Double Dog-Leg
167 | 		 * Method 4 - Conjugate Gradient
168 | 		 * Method 5 - BFGS
169 | 		 * Method 6 - Limited Memory BFGS
170 | 		 * Method 7 - BFGS More-Thuente Line Search
171 | 		 */
172 | 	/*
173 | 	 * Return Codes
174 | 	 *
175 | 	 * Code 1 denotes probable success.
176 | 	 * Codes 2,3 and 6 denote possible success.
177 | 	 * Codes 0, 4 and 15 denote failure. [Code 4 my also occur in cases where the minima is realized but
178 | 	 * convergence is not achieved due to tolerance values being too small. It is recommended that you use
179 | 	 * another method if you suspect that such a scenario has occured.]
180 | 	 *
181 | 	 * 0 - Input Error
182 | 	 * 1 - df(x)/dx <= gtol achieved so current point may be the local minima.
183 | 	 * 2 - Distance between the last two steps is less than stol or |xf - xi| <= stol so the point may be the local minima.
184 | 	 * 3 - Global Step failed to locate a lower point than the current point so it may be the local minima.
185 | 	 * 4 - Iteration Limit exceeded. Convergence probably not achieved.
186 | 	 * 6 - Function value drops below ftol (relative functional tolerance).
187 | 	 * 15 - Overflow occurs. Try a different method.
188 | 	 *
189 | 	 */
190 | 	fsval = 1.0;
191 | 	MAXITER = 200*N;
192 | 	niter = 0;
193 | 	delta = -1.0; // Trust Region default
194 | 	eps = macheps(); // Use macheps program
195 | 
196 | 	for(i = 0; i < N;++i) {
197 | 		dx[i] = 1.0;
198 | 	}
199 | 
200 | 	if (method == 0) {
201 | 		retval = nel_min(funcpt,xi,N,dx,fsval,MAXITER,&niter,eps,xf);
202 | 	} else if (method == 1) {
203 | 		gtol = pow(eps,1.0/3.0);
204 | 		stol = gtol * gtol;
205 | 		if (MAXITER < 1000) {
206 | 			MAXITER = 1000;
207 | 		}
208 | 		retval = newton_min_func(funcpt,funcgrad,xi,N,dx,fsval,maxstep,MAXITER,&niter,eps,gtol,stol,xf);
209 | 	} else if (method == 2) {
210 | 		gtol = pow(eps,1.0/3.0);
211 | 		stol = gtol * gtol;
212 | 		if (MAXITER < 1000) {
213 | 			MAXITER = 1000;
214 | 		}
215 | 		retval = newton_min_trust(funcpt,funcgrad,xi,N,dx,fsval,delta,0,MAXITER,&niter,eps,gtol,stol,xf);
216 | 	} else if (method == 3) {
217 | 		gtol = pow(eps,1.0/3.0);
218 | 		stol = gtol * gtol;
219 | 		if (MAXITER < 1000) {
220 | 			MAXITER = 1000;
221 | 		}
222 | 		retval = newton_min_trust(funcpt,funcgrad,xi,N,dx,fsval,delta,1,MAXITER,&niter,eps,gtol,stol,xf);
223 | 	} else if (method == 4) {
224 | 		gtol = pow(eps,1.0/3.0);
225 | 		ftol = gtol * gtol;
226 | 		xtol = eps;
227 | 		if (MAXITER < 1000) {
228 | 			MAXITER = 1000;
229 | 		}
230 | 		retval = conjgrad_min_lin(funcpt,funcgrad,xi,N,dx,maxstep,MAXITER,&niter,eps,gtol,ftol,xtol,xf);
231 | 	} else if (method == 5) {
232 | 		gtol = pow(eps,1.0/3.0);
233 | 		stol = gtol * gtol;
234 | 		if (MAXITER < 1000) {
235 | 			MAXITER = 1000;
236 | 		}
237 | 		retval = bfgs_min(funcpt,funcgrad,xi,N,dx,fsval,maxstep,MAXITER,&niter,eps,gtol,stol,xf);
238 | 	} else if (method == 6) {
239 | 		gtol = pow(eps,1.0/3.0);
240 | 		ftol = gtol * gtol;
241 | 		xtol = eps;
242 | 		if (MAXITER < 1000) {
243 | 			MAXITER = 1000;
244 | 		}
245 | 		m = mvalue(N);
246 | 		retval = bfgs_l_min(funcpt,funcgrad,xi,N,m,dx,fsval,maxstep,MAXITER,&niter,eps,gtol,ftol,xtol,xf);
247 | 	}
248 | 	else if (method == 7) {
249 | 		gtol = pow(eps, 1.0 / 3.0);
250 | 		ftol = gtol * gtol;
251 | 		xtol = eps;
252 | 		if (MAXITER < 1000) {
253 | 			MAXITER = 1000;
254 | 		}
255 | 		m = mvalue(N);
256 | 		retval = bfgs_min2(funcpt, funcgrad, xi, N, m, dx, fsval, maxstep, MAXITER, &niter, eps, gtol, ftol, xtol, xf);
257 | 	}
258 | 	else {
259 | 		printf("Method Value should be one of 0,1,2,3,4,5,6 or 7. See Documentation. \n");
260 | 		exit(1);
261 | 	}
262 | 
263 | 
264 | 
265 | 	free(dx);
266 | 	return retval;
267 | }
268 | 
269 | double fminbnd(custom_funcuni *funcuni,double a, double b) {
270 | 	double x,t,eps;
271 | 
272 | 	t = 1e-012;
273 | 	eps = macheps();
274 | 
275 | 	brent_local_min(funcuni,a,b,t,eps,&x);
276 | 
277 | 	return x;
278 | }
279 | 
280 | int fminnewt(custom_function *funcpt, custom_gradient *funcgrad, int N, double *xi,
281 | 	     double delta,double *dx,double fsval,double maxstep, int method,double *xf) {
282 | 	int retval;
283 | 	int MAXITER,niter;
284 | 	double eps,gtol,stol;
285 | 	/*
286 | 
287 | 	 * Method 1 - Newton Line Search
288 | 	 * Method 2 - Newton Trust Region - Hook Step
289 | 	 * Method 3 - Newton Trust Region - Double Dog-Leg
290 | 	 *
291 | 	 * Default Values :
292 | 	 *
293 | 	 * fsval = 1.0
294 | 	 * delta = -1.0
295 | 	 * dx = {1.0,1.0,...} - 1XN vector
296 | 
297 | 	 */
298 | 	/*
299 | 	 * Return Codes
300 | 	 *
301 | 	 * Code 1 denotes probable success.
302 | 	 * Codes 2,3 and 6 denote possible success.
303 | 	 * Codes 0, 4 and 15 denote failure. [Code 4 my also occur in cases where the minima is realized but
304 | 	 * convergence is not achieved due to tolerance values being too small. It is recommended that you use
305 | 	 * another method if you suspect that such a scenario has occured.]
306 | 	 *
307 | 	 * 0 - Input Error
308 | 	 * 1 - df(x)/dx <= gtol achieved so current point may be the local minima.
309 | 	 * 2 - Distance between the last two steps is less than stol or |xf - xi| <= stol so the point may be the local minima.
310 | 	 * 3 - Global Step failed to locate a lower point than the current point so it may be the local minima.
311 | 	 * 4 - Iteration Limit exceeded. Convergence probably not achieved.
312 | 	 * 6 - Function value drops below ftol (relative functional tolerance).
313 | 	 * 15 - Overflow occurs. Try a different method.
314 | 	 *
315 | 	 */
316 | 
317 | 	MAXITER = 200*N;
318 | 	niter = 0;
319 | 
320 | 	eps = macheps(); // Use macheps program
321 | 
322 | 	if (method == 1) {
323 | 		gtol = pow(eps,1.0/3.0);
324 | 		stol = gtol * gtol;
325 | 		if (MAXITER < 1000) {
326 | 			MAXITER = 1000;
327 | 		}
328 | 		retval = newton_min_func(funcpt,funcgrad,xi,N,dx,fsval,maxstep,MAXITER,&niter,eps,gtol,stol,xf);
329 | 	} else if (method == 2) {
330 | 		gtol = pow(eps,1.0/3.0);
331 | 		stol = gtol * gtol;
332 | 		if (MAXITER < 1000) {
333 | 			MAXITER = 1000;
334 | 		}
335 | 		retval = newton_min_trust(funcpt,funcgrad,xi,N,dx,fsval,delta,0,MAXITER,&niter,eps,gtol,stol,xf);
336 | 	} else if (method == 3) {
337 | 		gtol = pow(eps,1.0/3.0);
338 | 		stol = gtol * gtol;
339 | 		if (MAXITER < 1000) {
340 | 			MAXITER = 1000;
341 | 		}
342 | 		retval = newton_min_trust(funcpt,funcgrad,xi,N,dx,fsval,delta,1,MAXITER,&niter,eps,gtol,stol,xf);
343 | 	} else {
344 | 		printf("Method Value should be one of 1,2 or 3. See Documentation. \n");
345 | 		exit(1);
346 | 	}
347 | 
348 | 
349 | 	return retval;
350 | }
351 | 
352 | void optimize(opt_object obj, custom_function *funcpt, custom_gradient *funcgrad, int N, double *xi,
353 | 		int method) {
354 | 	int i,m;
355 | 	double fsval,delta;
356 | 	double *dx;
357 | 
358 | 	dx = (double*) malloc(sizeof(double) * N);
359 | 		/*
360 | 		 * Method 0 - Nelder-Mead
361 | 		 * Method 1 - Newton Line Search
362 | 		 * Method 2 - Newton Trust Region - Hook Step
363 | 		 * Method 3 - Newton Trust Region - Double Dog-Leg
364 | 		 * Method 4 - Conjugate Gradient
365 | 		 * Method 5 - BFGS
366 | 		 * Method 6 - Limited Memory BFGS
367 | 		 * Method 7 - BFGS More-Thuente Line Search
368 | 		 */
369 | 	/*
370 | 	 * Return Codes
371 | 	 *
372 | 	 * Code 1 denotes probable success.
373 | 	 * Codes 2,3 and 6 denote possible success.
374 | 	 * Codes 0, 4 and 15 denote failure. [Code 4 my also occur in cases where the minima is realized but
375 | 	 * convergence is not achieved due to tolerance values being too small. It is recommended that you use
376 | 	 * another method if you suspect that such a scenario has occurred.]
377 | 	 *
378 | 	 * 0 - Input Error
379 | 	 * 1 - df(x)/dx <= gtol achieved so current point may be the local minima.
380 | 	 * 2 - Distance between the last two steps is less than stol or |xf - xi| <= stol so the point may be the local minima.
381 | 	 * 3 - Global Step failed to locate a lower point than the current point so it may be the local minima.
382 | 	 * 4 - Iteration Limit exceeded. Convergence probably not achieved.
383 | 	 * 6 - Function value drops below ftol (relative functional tolerance).
384 | 	 * 15 - Overflow occurs. Try a different method.
385 | 	 *
386 | 	 */
387 | 	fsval = 1.0;
388 | 	delta = -1.0; // Trust Region default
389 | 
390 | 
391 | 	for(i = 0; i < N;++i) {
392 | 		dx[i] = 1.0;
393 | 	}
394 | 	obj->Method = method;
395 | 	obj->Iter = 0;
396 | 
397 | 	if (obj->N != N) {
398 | 		printf("The Object is initialized for a problem of size %d \n",obj->N);
399 | 		printf("Please Reinitialize the object for the new size %d using opt_init command \n",N);
400 | 		exit(1);
401 | 	}
402 | 
403 | 	if (method == 0) {
404 | 		strcpy(obj->MethodName,"Nelder-Mead");
405 | 		obj->retval = nel_min(funcpt,xi,obj->N,dx,fsval,obj->MaxIter,&obj->Iter,obj->eps,obj->xopt);
406 | 	} else if (method == 1) {
407 | 		strcpy(obj->MethodName,"Newton Line Search");
408 | 		obj->retval = newton_min_func(funcpt,funcgrad,xi,obj->N,dx,fsval,obj->maxstep,obj->MaxIter,&obj->Iter,obj->eps,obj->gtol,
409 | 				obj->stol,obj->xopt);
410 | 	} else if (method == 2) {
411 | 		strcpy(obj->MethodName,"Newton Trust Region - Hook Step");
412 | 
413 | 		obj->retval = newton_min_trust(funcpt,funcgrad,xi,obj->N,dx,fsval,delta,0,obj->MaxIter,&obj->Iter,obj->eps,
414 | 				obj->gtol,obj->stol,obj->xopt);
415 | 	} else if (method == 3) {
416 | 		strcpy(obj->MethodName,"Newton Trust Region - Double Dog-Leg");
417 | 
418 | 		obj->retval = newton_min_trust(funcpt,funcgrad,xi,obj->N,dx,fsval,delta,1,obj->MaxIter,&obj->Iter,obj->eps,
419 | 				obj->gtol,obj->stol,obj->xopt);
420 | 	} else if (method == 4) {
421 | 		strcpy(obj->MethodName,"Conjugate Gradient");
422 | 
423 | 		obj->retval = conjgrad_min_lin(funcpt, funcgrad, xi, obj->N, dx, obj->maxstep, obj->MaxIter, &obj->Iter, obj->eps, obj->gtol,
424 | 				obj->ftol,obj->xtol,obj->xopt);
425 | 	} else if (method == 5) {
426 | 		strcpy(obj->MethodName,"BFGS");
427 | 
428 | 		obj->retval = bfgs_min(funcpt, funcgrad, xi, obj->N, dx, fsval, obj->maxstep, obj->MaxIter, &obj->Iter, obj->eps, obj->gtol,
429 | 				obj->stol,obj->xopt);
430 | 	} else if (method == 6) {
431 | 		strcpy(obj->MethodName,"Limited Memory BFGS");
432 | 		m = mvalue(N);
433 | 		obj->retval = bfgs_l_min(funcpt, funcgrad, xi, obj->N, m, dx, fsval, obj->maxstep, obj->MaxIter, &obj->Iter, obj->eps,
434 | 				obj->gtol,obj->ftol,obj->xtol,obj->xopt);
435 | 	}
436 | 	else if (method == 7) {
437 | 		strcpy(obj->MethodName, "BFGS More-Thuente Line Search");
438 | 		m = mvalue(N);
439 | 		obj->retval = bfgs_min2(funcpt, funcgrad, xi, obj->N, m, dx, fsval, obj->maxstep, obj->MaxIter, &obj->Iter, obj->eps,
440 | 			obj->gtol, obj->ftol, obj->xtol, obj->xopt);
441 | 	}
442 | 	else {
443 | 		strcpy(obj->MethodName,"NULL");
444 | 		printf("Method Value should be one of 0,1,2,3,4,5 or 6. See Documentation. \n");
445 | 		exit(1);
446 | 	}
447 | 
448 | 	obj->objfunc = FUNCPT_EVAL(funcpt,obj->xopt,obj->N);
449 | 
450 | 	free(dx);
451 | }
452 | 
453 | void free_opt(opt_object object) {
454 | 
455 | 	free(object);
456 | 
457 | }
458 | 
459 | int levmar(custom_funcmult *funcmult, custom_jacobian *jacobian,
460 | 		double *xi,int M, int N,double *xf) {
461 | 	int info,i;
462 | 	double *fvec,*fjac,*diag,*qtf;
463 | 	int ldfjac,mode,nfev,njev,nprint,maxfev;
464 | 	int *ipvt;
465 | 	double factor,eps,epsfcn,ftol,gtol,xtol;
466 | 
467 | 	fvec = (double*) malloc(sizeof(double) *M);
468 | 	fjac = (double*) malloc(sizeof(double) *M * N);
469 | 	diag = (double*) malloc(sizeof(double) *N);
470 | 	qtf = (double*) malloc(sizeof(double) *N);
471 | 	ipvt = (int*) malloc(sizeof(int) *N);
472 | 
473 | 	eps = macheps();
474 | 	epsfcn = eps;
475 | 	gtol = pow(eps,1.0/3.0);
476 | 	ftol = gtol * gtol;
477 | 	xtol = eps;
478 | 	factor = 100.;
479 | 	ldfjac = M;
480 | 	nfev = njev = nprint = 0;
481 | 	mode = 1;
482 | 	maxfev = 1000;
483 | 
484 | 	for(i = 0; i < N;++i) {
485 | 		xf[i] = xi[i];
486 | 	}
487 | 
488 | 	if (jacobian == NULL) {
489 | 		info = lmdif(funcmult,xf,M,N,fvec,fjac,ldfjac,maxfev,diag,mode,factor,nprint,eps,epsfcn,ftol,gtol,
490 | 				xtol,&nfev,&njev,ipvt,qtf);
491 | 	} else {
492 | 		info = lmder(funcmult,jacobian,xf,M,N,fvec,fjac,ldfjac,maxfev,diag,mode,factor,nprint,eps,ftol,gtol,
493 | 				xtol,&nfev,&njev,ipvt,qtf);
494 | 	}
495 | 
496 | 
497 | 	free(fvec);
498 | 	free(fjac);
499 | 	free(diag);
500 | 	free(qtf);
501 | 	free(ipvt);
502 | 
503 | 	return info;
504 | }
505 | 
506 | void nls(nls_object obj, custom_funcmult *funcmult, custom_jacobian *jacobian,
507 | 		double *xi) {
508 | 
509 | 	int info,i,M,N,nprint;
510 | 	double *fvec,*fjac,*diag,*qtf;
511 | 	int *ipvt;
512 | 
513 | 	M = obj->M;
514 | 	N = obj-> N;
515 | 	nprint = 0;
516 | 
517 | 	fvec = (double*) malloc(sizeof(double) *M);
518 | 	fjac = (double*) malloc(sizeof(double) *M * N);
519 | 	diag = (double*) malloc(sizeof(double) *N);
520 | 	qtf = (double*) malloc(sizeof(double) *N);
521 | 	ipvt = (int*) malloc(sizeof(int) *N);
522 | 
523 | 	for(i = 0; i < N;++i) {
524 | 		obj->xopt[i] = xi[i];
525 | 	}
526 | 
527 | 	if (jacobian == NULL) {
528 | 		info = lmdif(funcmult,obj->xopt,obj->M,obj->N,fvec,fjac,obj->ldfjac,obj->Maxfev,diag,obj->mode,
529 | 				obj->factor,nprint,obj->eps,obj->epsfcn,obj->ftol,obj->gtol,obj->xtol,&obj->nfev,&obj->njev,ipvt,qtf);
530 | 	} else {
531 | 		info = lmder(funcmult,jacobian,obj->xopt,obj->M,obj->N,fvec,fjac,obj->ldfjac,obj->Maxfev,diag,obj->mode,
532 | 				obj->factor,nprint,obj->eps,obj->ftol,obj->gtol,obj->xtol,&obj->nfev,&obj->njev,ipvt,qtf);
533 | 	}
534 | 	obj->retval = info;
535 | 	obj->Iter = obj->nfev;
536 | 
537 | 	free(fvec);
538 | 	free(fjac);
539 | 	free(diag);
540 | 	free(qtf);
541 | 	free(ipvt);
542 | 
543 | }
544 | 
545 | void nls_scale(nls_object obj, custom_funcmult *funcmult, custom_jacobian *jacobian,
546 | 		double *diag,double *xi) {
547 | 
548 | 	int info,i,M,N,nprint;
549 | 	double *fvec,*fjac,*qtf;
550 | 	int *ipvt;
551 | 
552 | 	M = obj->M;
553 | 	N = obj-> N;
554 | 	obj->mode = 2;
555 | 	nprint = 0;
556 | 
557 | 	fvec = (double*) malloc(sizeof(double) *M);
558 | 	fjac = (double*) malloc(sizeof(double) *M * N);
559 | 	qtf = (double*) malloc(sizeof(double) *N);
560 | 	ipvt = (int*) malloc(sizeof(int) *N);
561 | 
562 | 	for(i = 0; i < N;++i) {
563 | 		obj->xopt[i] = xi[i];
564 | 	}
565 | 
566 | 	if (jacobian == NULL) {
567 | 		info = lmdif(funcmult,obj->xopt,obj->M,obj->N,fvec,fjac,obj->ldfjac,obj->Maxfev,diag,obj->mode,
568 | 				obj->factor,nprint,obj->eps,obj->epsfcn,obj->ftol,obj->gtol,obj->xtol,&obj->nfev,&obj->njev,ipvt,qtf);
569 | 	} else {
570 | 		info = lmder(funcmult,jacobian,obj->xopt,obj->M,obj->N,fvec,fjac,obj->ldfjac,obj->Maxfev,diag,obj->mode,
571 | 				obj->factor,nprint,obj->eps,obj->ftol,obj->gtol,obj->xtol,&obj->nfev,&obj->njev,ipvt,qtf);
572 | 	}
573 | 	obj->retval = info;
574 | 	obj->Iter = obj->nfev;
575 | 
576 | 	free(fvec);
577 | 	free(fjac);
578 | 	free(qtf);
579 | 	free(ipvt);
580 | 
581 | }
582 | 
583 | void nlssummary(nls_object obj) {
584 | 	int i;
585 | 	printf("\n\n");
586 | 	printf("Return Value : %d \n",obj->retval);
587 | 	if (obj->retval == 0) {
588 | 		printf("Termination Message : improper input parameters. \n");
589 | 	}
590 | 	if (obj->retval == 1) {
591 | 		printf("Termination Message : both actual and predicted relative reductions "
592 | 				"in the sum of squares are at most ftol. \n");
593 | 	}
594 | 	if (obj->retval == 2) {
595 | 		printf("Termination Message : relative error between two consecutive iterates "
596 | 				"is at most xtol. \n");
597 | 	}
598 | 	if (obj->retval == 3) {
599 | 		printf("Termination Message : conditions for info = 1 and info = 2 both hold. \n");
600 | 	}
601 | 	if (obj->retval == 4) {
602 | 		printf("Termination Message : the cosine of the angle between fvec and any "
603 | 				"column of the jacobian is at most gtol in absolute value. \n");
604 | 	}
605 | 	if (obj->retval == 5) {
606 | 		printf("Termination Message : number of calls to fcn has reached or "
607 | 				"exceeded maxfev. \n");
608 | 	}
609 | 	if (obj->retval == 6) {
610 | 		printf("Termination Message : ftol is too small. no further reduction in "
611 | 				"the sum of squares is possible. \n");
612 | 	}
613 | 	if (obj->retval == 7) {
614 | 		printf("Termination Message : xtol is too small. no further improvement in"
615 | 				"the approximate solution x is possible. \n");
616 | 	}
617 | 	if (obj->retval == 8) {
618 | 		printf("Termination Message : gtol is too small. fvec is orthogonal to the"
619 | 				"columns of the jacobian to machine precision. \n");
620 | 	}
621 | 	printf("Iterations : %d \n",obj->Iter);
622 | 	printf("Function Minimized At : [ ");
623 | 	for(i = 0; i < obj->N;++i) {
624 | 		printf("%g ",obj->xopt[i]);
625 | 	}
626 | 	printf(" ] \n");
627 | 	//printf("Function Value : %g \n \n",obj->objfunc);
628 | 
629 | }
630 | 
631 | void free_nls(nls_object object) {
632 | 
633 | 	free(object);
634 | 
635 | }
636 | 
637 | 


--------------------------------------------------------------------------------
/src/secant.c:
--------------------------------------------------------------------------------
   1 | #include "secant.h"
   2 | 
   3 | /*
   4 |  * secant.c
   5 |  *
   6 |  *  Copyright (c) 2014, Rafat Hussain
   7 |  *	License : BSD 3-Clause
   8 |  *	See COPYRIGHT for more details
   9 |  */
  10 | 
  11 | static void jrotate(double *A,int N,double a,double b,int i) {
  12 | 	int j,r,r1;
  13 | 	double c,s,den,y,w;
  14 | 	r = i * N;
  15 | 	r1 = r + N;
  16 | 	if (a == 0.0) {
  17 | 		c = 0.0;
  18 | 		s = signx(b);
  19 | 	} else {
  20 | 		den = sqrt(a*a+b*b);
  21 | 		c = a / den;
  22 | 		s = b / den;
  23 | 	}
  24 | 	
  25 | 	for (j = i; j < N;++j) {
  26 | 		y = A[r+j];
  27 | 		w = A[r1+j];
  28 | 		A[r+j] = c*y - s*w;
  29 | 		A[r1+j] = s*y + c*w;
  30 | 	}
  31 | }
  32 | 
  33 | static void qrupdate(double *R,int N,double *u,double *v) {
  34 | 	int i,j,k,l;
  35 | 	
  36 | 	for(i = 1; i < N;++i) {
  37 | 		R[i*N+i-1] = 0;
  38 | 	}
  39 | 	
  40 | 	k = N-1;
  41 | 	while (u[k] == 0 && k > 0) {
  42 | 		k--;
  43 | 	}
  44 | 	
  45 | 	for (i = k-1; i >= 0;--i) {
  46 | 		jrotate(R,N,u[i],-u[i+1],i);
  47 | 		if (u[i] == 0) {
  48 | 			u[i] = fabs(u[i+1]);
  49 | 		} else {
  50 | 			u[i] = sqrt(u[i]*u[i] + u[i+1]*u[i+1]);
  51 | 		}
  52 | 	}
  53 | 	
  54 | 	for(i = 0; i < N;++i) {
  55 | 		R[i] += u[0] * v[i];
  56 | 	}
  57 | 	
  58 | 	for(i = 0; i < k;++i) {
  59 | 		j = i * N;
  60 | 		l = j + N;
  61 | 		jrotate(R,N,R[j+i],-R[l+i],i);
  62 | 	}
  63 | }
  64 | 
  65 | void bfgs_naive(double *H,int N,double eps,double *xi,double *xf,double *jac,double *jacf) {
  66 | 	int i,j,supd,ct;
  67 | 	double sn,yn,fd,yt,jacm;
  68 | 	double *sk,*yk,*temp,*temp2,*t;
  69 | 	
  70 | 	sk = (double*) malloc(sizeof(double) *N);
  71 | 	yk = (double*) malloc(sizeof(double) *N);
  72 | 	temp = (double*) malloc(sizeof(double) *1);
  73 | 	temp2 = (double*) malloc(sizeof(double) *1);
  74 | 	t = (double*) malloc(sizeof(double) *N);
  75 | 	
  76 | 	msub(xf,xi,sk,1,N);
  77 | 	msub(jacf,jac,yk,1,N);
  78 | 	
  79 | 	sn = l2norm(sk,N);
  80 | 	yn = l2norm(yk,N);
  81 | 	fd = sqrt(eps);
  82 | 	
  83 | 	mmult(yk,sk,temp,1,N,1);
  84 | 	
  85 | 	if (temp[0] >= fd*sn*yn) {
  86 | 		supd = 1;
  87 | 		for(i = 0; i < N;++i) {
  88 | 			t[i] = 0.0;
  89 | 			for(j = 0; j < i;++j) {
  90 | 				ct = j * N;
  91 | 				t[i] += H[ct+i] * sk[j];
  92 | 			}
  93 | 			
  94 | 			for(j = i; j < N;++j) {
  95 | 				ct = i * N;
  96 | 				t[i] += H[ct + j] * sk[j];
  97 | 			}
  98 | 			yt = fabs(yk[i] - t[i]);
  99 | 			if (jac[i] > jacf[i]) {
 100 | 				jacm = jac[i];
 101 | 			} else {
 102 | 				jacm = jacf[i];
 103 | 			}
 104 | 			if (yt >= fd * jacm) {
 105 | 				supd = 0;
 106 | 			}
 107 | 		}
 108 | 		if (supd == 0) {
 109 | 			mmult(sk,t,temp2,1,N,1);
 110 | 			for(i = 0;i < N; ++i) {
 111 | 				ct = i * N;
 112 | 				for(j = i; j < N;++j) {
 113 | 					H[ct+j] += (yk[i]*yk[j]/temp[0]);
 114 | 					H[ct+j] -= (t[i]*t[j]/temp2[0]);
 115 | 				}
 116 | 			}
 117 | 		} 
 118 | 	} 
 119 | 	
 120 | 	free(sk);
 121 | 	free(yk);
 122 | 	free(temp);
 123 | 	free(temp2);
 124 | 	free(t);
 125 | }
 126 | 
 127 | static void inithess_naive(double *H,int N,double fi,double fsval,double *dx) {
 128 | 	int i,j,ct;
 129 | 	double temp;
 130 | 	
 131 | 	if (fabs(fi) > fsval) {
 132 | 		temp = fabs(fi);
 133 | 	} else {
 134 | 		temp = fsval;
 135 | 	}
 136 | 	
 137 | 	for(i = 0; i < N;++i) {
 138 | 		ct = i *N;
 139 | 		for(j = 0; j < N;++j) {
 140 | 			if (i == j) {
 141 | 				H[ct+j] = temp * dx[i] * dx[i];
 142 | 			} else {
 143 | 				H[ct+j] = 0.0;
 144 | 			}
 145 | 			
 146 | 		}
 147 | 	}
 148 | 	
 149 | }
 150 | 
 151 | int bfgs_min_naive(custom_function *funcpt, custom_gradient *funcgrad, double *xi, int N, double *dx, double fsval, double maxstep, int MAXITER,
 152 | 		double eps,double *xf)  {
 153 | 	int rcode,iter,gfdcode;
 154 | 	int i,siter,retval;
 155 | 	double gtol,stol,dt1,dt2;
 156 | 	double fx,num,den,stop0,fxf,eps2;
 157 | 	double *jac,*hess,*scheck,*xc,*L,*step,*jacf;
 158 | 	
 159 | 	jac = (double*) malloc(sizeof(double) *N);
 160 | 	scheck = (double*) malloc(sizeof(double) *N);
 161 | 	xc = (double*) malloc(sizeof(double) *N);
 162 | 	step = (double*) malloc(sizeof(double) *N);
 163 | 	hess = (double*) malloc(sizeof(double) *N * N);
 164 | 	L = (double*) malloc(sizeof(double) *N * N);
 165 | 	jacf = (double*) malloc(sizeof(double) *N);
 166 | 	
 167 | 	/*
 168 | 	 * Return Codes
 169 | 	 * 
 170 | 	 * Codes 1,2,3 denote possible success.
 171 | 	 * Codes 0 and 4 denote failure.
 172 | 	 * 
 173 | 	 * 1 - df(x)/dx <= gtol achieved so xf may be the local minima.
 174 | 	 * 2 - Distance between the last two steps is less than stol or |xf - xi| <= stol so xf may be the local minima.
 175 | 	 * 3 - Global Step failed to locate a lower point than xi so xi may be the local minima.
 176 | 	 * 4 - Iteration Limit exceeded. Convergence not achieved.
 177 | 	 * 
 178 | 	 */ 
 179 | 	
 180 | 	rcode = 0;
 181 | 	iter = 0;
 182 | 	siter = MAXITER;
 183 | 	gtol = pow(eps,1.0/3.0);
 184 | 	
 185 | 	stol = gtol * gtol;
 186 | 	eps2 = sqrt(eps);
 187 | 	gfdcode = 0;
 188 | 	//set values
 189 | 	for(i = 0; i < N;++i) {
 190 | 		xi[i] *= dx[i];
 191 | 		dx[i] = 1.0 / dx[i];
 192 | 	}
 193 | 	fx = FUNCPT_EVAL(funcpt,xi,N);
 194 | 	if (fx >= DBL_MAX || fx <= -DBL_MAX) {
 195 | 		printf("Program Exiting as the function value exceeds the maximum double value");
 196 | 		rcode = 15;
 197 | 	}
 198 | 	if (fx != fx) {
 199 | 		printf("Program Exiting as the function returns NaN");
 200 | 		rcode = 15;
 201 | 	}
 202 | 	
 203 | 	gfdcode = grad_fd(funcpt,funcgrad,xi,N,dx,eps2,jac);
 204 | 	if (gfdcode == 15) {
 205 | 		rcode = 15;
 206 | 	}
 207 | 	
 208 | 	
 209 | 	//maxstep = 1000.0; // Needs to be set at a much higher value proportional to l2 norm of dx
 210 | 
 211 | 	if (maxstep <= 0.0) {
 212 | 		maxstep = 1000.0;
 213 | 		dt1 = dt2 = 0.0;
 214 | 		for (i = 0; i < N; ++i) {
 215 | 			dt1 += dx[i] * dx[i];
 216 | 			dt2 += dx[i] * xi[i] * dx[i] * xi[i];
 217 | 		}
 218 | 
 219 | 		dt1 = sqrt(dt1);
 220 | 		dt2 = sqrt(dt2);
 221 | 
 222 | 		if (dt1 > dt2) {
 223 | 			maxstep *= dt1;
 224 | 		}
 225 | 		else {
 226 | 			maxstep *= dt2;
 227 | 		}
 228 | 	}
 229 | 	
 230 | 	//printf("dt1 dt2 %g \n", maxstep);
 231 | 
 232 | 	//Check Stop0
 233 | 	if (fabs(fx) > fabs(fsval)) {
 234 | 			den = fabs(fx);
 235 | 	} else {
 236 | 			den = fabs(fsval);
 237 | 	}
 238 | 	for(i = 0; i < N;++i) {
 239 | 		if (fabs(xi[i]) > 1.0 / fabs(dx[i])) {
 240 | 			num = fabs(xi[i]);
 241 | 		} else {
 242 | 			num = 1.0 / fabs(dx[i]);
 243 | 		}
 244 | 		scheck[i] = fabs(jac[i]) * num / den;
 245 | 	}
 246 | 	
 247 | 	stop0 = array_max_abs(scheck,N);
 248 | 	
 249 | 	if (stop0 <= gtol * 1e-03) {
 250 | 		rcode = 1;
 251 | 		for(i = 0; i < N;++i) {
 252 | 			xf[i] = xi[i];
 253 | 		}
 254 | 		return rcode;
 255 | 	}
 256 | 	
 257 | 	//hessian_fd(funcpt,xi,N,dx,hess);
 258 | 	inithess_naive(hess,N,fx,fsval,dx);
 259 | 	
 260 | 	for(i = 0; i < N;++i) {
 261 | 		xc[i] = xi[i];
 262 | 	}
 263 | 	
 264 | 	while (rcode == 0 && iter < siter) {
 265 | 		iter++;
 266 | 		modelhess(hess,N,dx,eps,L);
 267 | 		scale(jac,1,N,-1.0);
 268 | 		//mdisplay(hess,N,N);
 269 | 		
 270 | 		linsolve_lower(L,N,jac,step);
 271 | 		
 272 | 		scale(jac,1,N,-1.0);
 273 | 		//retval = lnsrchmod(funcpt,xc,jac,step,N,dx,maxstep,stol,xf,jacf); 
 274 | 		retval = lnsrch(funcpt,xc,jac,step,N,dx,maxstep,stol,xf);
 275 | 		
 276 | 		//retval = swolfe(funcpt,xc,jac,step,N,dx,maxstep,stol,xf);
 277 | 		
 278 | 		fxf = FUNCPT_EVAL(funcpt,xf,N);
 279 | 		if (fxf >= DBL_MAX || fxf <= -DBL_MAX) {
 280 | 			printf("Program Exiting as the function value exceeds the maximum double value");
 281 | 			rcode = 15;
 282 | 			break;
 283 | 		}
 284 | 		if (fxf != fxf) {
 285 | 			printf("Program Exiting as the function returns NaN");
 286 | 			rcode = 15;
 287 | 			break;
 288 | 		}
 289 | 		//printf("%d %g \n",iter,fxf);
 290 | 		gfdcode = grad_fd(funcpt,funcgrad,xf,N,dx,eps2,jacf);
 291 | 		if (gfdcode == 15) {
 292 | 			rcode = 15;
 293 | 			break;
 294 | 		}
 295 | 		rcode = stopcheck(fxf,N,xc,xf,jacf,dx,fsval,gtol,stol,retval);
 296 | 		//hessian_fd(funcpt,xf,N,dx,hess);
 297 | 		bfgs_naive(hess,N,eps,xc,xf,jac,jacf);
 298 | 		for(i = 0; i < N;++i) {
 299 | 			xc[i] = xf[i];
 300 | 			jac[i] = jacf[i];
 301 | 		}
 302 | 	}
 303 | 	
 304 | 	if (rcode == 0 && iter >= siter) {
 305 | 		rcode = 4;
 306 | 	}
 307 | 	/*
 308 | 	for(i = 0; i < N;++i) {
 309 | 		xf[i] *= dx[i];
 310 | 		dx[i] = 1.0 / dx[i];
 311 | 	}
 312 | 	*/
 313 | 	free(jac);
 314 | 	free(hess);
 315 | 	free(scheck);
 316 | 	free(xc);
 317 | 	free(L);
 318 | 	free(step);
 319 | 	free(jacf);
 320 | 	return rcode;
 321 | }
 322 | 
 323 | static void inithess_lower(double *L,int N,double fi,double fsval,double *dx) {
 324 | 	int i,j,ct;
 325 | 	double temp;
 326 | 	
 327 | 	if (fabs(fi) > fsval) {
 328 | 		temp = fabs(fi);
 329 | 	} else {
 330 | 		temp = fsval;
 331 | 	}
 332 | 	
 333 | 	temp = sqrt(temp);
 334 | 	
 335 | 	for(i = 0; i < N;++i) {
 336 | 		ct = i *N;
 337 | 		L[ct+i] = temp * dx[i];
 338 | 		for(j = 0; j < i;++j) {
 339 | 			L[ct+j] = 0;
 340 | 		}
 341 | 	}
 342 | 	
 343 | }
 344 | 
 345 | void bfgs_factored(double *H,int N,double eps,double *xi,double *xf,double *jac,double *jacf) {
 346 | 	int i,j,supd,ct;
 347 | 	double sn,yn,fd,yt,jacm,alpha,temp3;
 348 | 	double *sk,*yk,*temp,*temp2,*t,*u;
 349 | 	
 350 | 	sk = (double*) malloc(sizeof(double) *N);
 351 | 	yk = (double*) malloc(sizeof(double) *N);
 352 | 	temp = (double*) malloc(sizeof(double) *1);
 353 | 	temp2 = (double*) malloc(sizeof(double) *1);
 354 | 	t = (double*) malloc(sizeof(double) *N);
 355 | 	u = (double*) malloc(sizeof(double) *N);
 356 | 	
 357 | 	msub(xf,xi,sk,1,N);
 358 | 	msub(jacf,jac,yk,1,N);
 359 | 	
 360 | 	sn = l2norm(sk,N);
 361 | 	yn = l2norm(yk,N);
 362 | 	fd = sqrt(eps);
 363 | 	
 364 | 	mmult(yk,sk,temp,1,N,1);
 365 | 	
 366 | 	if (temp[0] >= fd*sn*yn) {
 367 | 		for(i = 0; i < N;++i) {
 368 | 			t[i] = 0.0;
 369 | 			for(j = i; j < N;++j) {
 370 | 				ct = j * N;
 371 | 				t[i] += H[ct + i] * sk[j];
 372 | 			}
 373 | 		}
 374 | 		mmult(t,t,temp2,1,N,1);
 375 | 		alpha = sqrt(temp[0]/temp2[0]);
 376 | 		supd = 1;
 377 | 		for(i = 0; i < N;++i) {
 378 | 			temp3 = 0.0;
 379 | 			ct = i * N;
 380 | 			for(j = 0; j < i+1;++j) {
 381 | 				temp3 += H[ct + j] * t[j];
 382 | 			}
 383 | 			yt = fabs(yk[i] - temp3);
 384 | 			if (jac[i] > jacf[i]) {
 385 | 				jacm = jac[i];
 386 | 			} else {
 387 | 				jacm = jacf[i];
 388 | 			}
 389 | 			if (yt >= fd * jacm) {
 390 | 				supd = 0;
 391 | 			}
 392 | 			u[i] = yk[i] - alpha*temp3;
 393 | 		}
 394 | 		if (supd == 0) {
 395 | 			temp3 = 1.0 / sqrt(temp[0] * temp2[0]);
 396 | 			for(i = 0; i < N; ++i) {
 397 | 				t[i] *= temp3;
 398 | 			}
 399 | 			for(i = 1; i < N; ++i) {
 400 | 				ct = i *N;
 401 | 				for(j = 0; j < i;++j) {
 402 | 					H[j*N+i] = H[ct+j];
 403 | 				}
 404 | 			}
 405 | 			qrupdate(H,N,t,u);
 406 | 			for(i = 1; i < N; ++i) {
 407 | 				ct = i *N;
 408 | 				for(j = 0; j < i;++j) {
 409 | 					H[ct+j] = H[j*N+i];
 410 | 				}
 411 | 			}
 412 | 		} 
 413 | 	} 
 414 | 	
 415 | 	free(sk);
 416 | 	free(yk);
 417 | 	free(temp);
 418 | 	free(temp2);
 419 | 	free(t);
 420 | 	free(u);
 421 | }
 422 | 
 423 | int bfgs_min(custom_function *funcpt, custom_gradient *funcgrad, double *xi, int N, double *dx, double fsval,double maxstep, int MAXITER, int *niter,
 424 | 		double eps,double gtol,double stol,double *xf)  {
 425 | 	int rcode,gfdcode;
 426 | 	int i,siter,retval;
 427 | 	double dt1,dt2;
 428 | 	double fx,num,den,stop0,fxf,eps2;
 429 | 	double *jac,*hess,*scheck,*xc,*L,*step,*jacf;
 430 | 	
 431 | 	jac = (double*) malloc(sizeof(double) *N);
 432 | 	scheck = (double*) malloc(sizeof(double) *N);
 433 | 	xc = (double*) malloc(sizeof(double) *N);
 434 | 	step = (double*) malloc(sizeof(double) *N);
 435 | 	hess = (double*) malloc(sizeof(double) *N * N);
 436 | 	L = (double*) malloc(sizeof(double) *N * N);
 437 | 	jacf = (double*) malloc(sizeof(double) *N);
 438 | 	
 439 | 	/*
 440 | 	 * Return Codes
 441 | 	 * 
 442 | 	 * Codes 1,2,3 denote possible success.
 443 | 	 * Codes 0 and 4 denote failure.
 444 | 	 * 
 445 | 	 * 1 - df(x)/dx <= gtol achieved so xf may be the local minima.
 446 | 	 * 2 - Distance between the last two steps is less than stol or |xf - xi| <= stol so xf may be the local minima.
 447 | 	 * 3 - Global Step failed to locate a lower point than xi so xi may be the local minima.
 448 | 	 * 4 - Iteration Limit exceeded. Convergence not achieved.
 449 | 	 15 -  Failure as Inf/Nan Values encountered 
 450 | 	 * 
 451 | 	 */ 
 452 | 	
 453 | 	rcode = 0;
 454 | 	*niter = 0;
 455 | 	siter = MAXITER;
 456 | 	eps2 = sqrt(eps);
 457 | 	gfdcode = 0;
 458 | 
 459 | 	//set values
 460 | 	for(i = 0; i < N;++i) {
 461 | 		xi[i] *= dx[i];
 462 | 		dx[i] = 1.0 / dx[i];
 463 | 	}
 464 | 	fx = FUNCPT_EVAL(funcpt, xi, N);
 465 | 	if (fx >= DBL_MAX || fx <= -DBL_MAX) {
 466 | 		printf("Program Exiting as the function value exceeds the maximum double value");
 467 | 		rcode = 15;
 468 | 	}
 469 | 	if (fx != fx) {
 470 | 		printf("Program Exiting as the function returns NaN");
 471 | 		rcode = 15;
 472 | 	}
 473 | 
 474 | 	gfdcode = grad_fd(funcpt,funcgrad,xi,N,dx,eps2,jac);
 475 | 	if (gfdcode == 15) {
 476 | 		rcode = 15;
 477 | 	}
 478 | 
 479 | 
 480 | 	//maxstep = 1000.0; // Needs to be set at a much higher value proportional to l2 norm of dx
 481 | 
 482 | 	if (maxstep <= 0.0) {
 483 | 		maxstep = 1000.0;
 484 | 		dt1 = dt2 = 0.0;
 485 | 		for (i = 0; i < N; ++i) {
 486 | 			dt1 += dx[i] * dx[i];
 487 | 			dt2 += dx[i] * xi[i] * dx[i] * xi[i];
 488 | 		}
 489 | 
 490 | 		dt1 = sqrt(dt1);
 491 | 		dt2 = sqrt(dt2);
 492 | 
 493 | 		if (dt1 > dt2) {
 494 | 			maxstep *= dt1;
 495 | 		}
 496 | 		else {
 497 | 			maxstep *= dt2;
 498 | 		}
 499 | 	}
 500 | 	
 501 | 	//Check Stop0
 502 | 	if (fabs(fx) > fabs(fsval)) {
 503 | 			den = fabs(fx);
 504 | 	} else {
 505 | 			den = fabs(fsval);
 506 | 	}
 507 | 	for(i = 0; i < N;++i) {
 508 | 		if (fabs(xi[i]) > 1.0 / fabs(dx[i])) {
 509 | 			num = fabs(xi[i]);
 510 | 		} else {
 511 | 			num = 1.0 / fabs(dx[i]);
 512 | 		}
 513 | 		scheck[i] = fabs(jac[i]) * num / den;
 514 | 	}
 515 | 	
 516 | 	stop0 = array_max_abs(scheck,N);
 517 | 	
 518 | 	if (stop0 <= gtol * 1e-03) {
 519 | 		rcode = 1;
 520 | 		for(i = 0; i < N;++i) {
 521 | 			xf[i] = xi[i];
 522 | 		}
 523 | 	}
 524 | 	
 525 | 	//hessian_fd(funcpt,xi,N,dx,hess);
 526 | 	inithess_lower(L,N,fx,fsval,dx);
 527 | 	
 528 | 	for(i = 0; i < N;++i) {
 529 | 		xc[i] = xi[i];
 530 | 	}
 531 | 	
 532 | 	while (rcode == 0 && *niter < siter) {
 533 | 		*niter = *niter + 1;
 534 | 		scale(jac,1,N,-1.0);
 535 | 		
 536 | 		linsolve_lower(L,N,jac,step);
 537 | 		
 538 | 		scale(jac,1,N,-1.0);
 539 | 
 540 | 		retval = lnsrch(funcpt,xc,jac,step,N,dx,maxstep,stol,xf);
 541 | 
 542 | 		fxf = FUNCPT_EVAL(funcpt, xf, N);
 543 | 		if (fxf >= DBL_MAX || fxf <= -DBL_MAX) {
 544 | 			printf("Program Exiting as the function value exceeds the maximum double value");
 545 | 			rcode = 15;
 546 | 			break;
 547 | 		}
 548 | 		if (fxf != fxf) {
 549 | 			printf("Program Exiting as the function returns NaN");
 550 | 			rcode = 15;
 551 | 			break;
 552 | 		}
 553 | 
 554 | 		gfdcode = grad_fd(funcpt,funcgrad,xf,N,dx,eps2,jacf);
 555 | 		if (gfdcode == 15) {
 556 | 			rcode = 15;
 557 | 			break;
 558 | 		}
 559 | 		rcode = stopcheck(fxf,N,xc,xf,jacf,dx,fsval,gtol,stol,retval);
 560 | 		//hessian_fd(funcpt,xf,N,dx,hess);
 561 | 		//bfgs_naive(hess,N,xc,xf,jac,jacf);
 562 | 		bfgs_factored(L,N,eps,xc,xf,jac,jacf);
 563 | 		for(i = 0; i < N;++i) {
 564 | 			xc[i] = xf[i];
 565 | 			jac[i] = jacf[i];
 566 | 		}
 567 | 	}
 568 | 	
 569 | 	if (rcode == 0 && *niter >= siter) {
 570 | 		rcode = 4;
 571 | 	}
 572 | 	
 573 | 	for(i = 0; i < N;++i) {
 574 | 		xi[i] *= dx[i];
 575 | 		dx[i] = 1.0 / dx[i];
 576 | 	}
 577 | 	
 578 | 	free(jac);
 579 | 	free(hess);
 580 | 	free(scheck);
 581 | 	free(xc);
 582 | 	free(L);
 583 | 	free(step);
 584 | 	free(jacf);
 585 | 	return rcode;
 586 | }
 587 | 
 588 | int bfgs_min2(custom_function *funcpt, custom_gradient *funcgrad, double *xi, int N, int m, double *dx, double fsval, double maxstep, int MAXITER, int *niter,
 589 | 	double eps, double gtol, double ftol, double xtol, double *xf)  {
 590 | 	int rcode, gfdcode;
 591 | 	int i, siter, retval;
 592 | 	double dt1, dt2;
 593 | 	double fx, num, den, stop0, fxf, eps2,fo,alpha;
 594 | 	double *jac, *hess, *scheck, *xc, *L, *step, *jacf;
 595 | 
 596 | 	jac = (double*)malloc(sizeof(double)*N);
 597 | 	scheck = (double*)malloc(sizeof(double)*N);
 598 | 	xc = (double*)malloc(sizeof(double)*N);
 599 | 	step = (double*)malloc(sizeof(double)*N);
 600 | 	hess = (double*)malloc(sizeof(double)*N * N);
 601 | 	L = (double*)malloc(sizeof(double)*N * N);
 602 | 	jacf = (double*)malloc(sizeof(double)*N);
 603 | 
 604 | 	/*
 605 | 	* Return Codes
 606 | 	*
 607 | 	* Codes 1,2,3 denote possible success.
 608 | 	* Codes 0 and 4 denote failure.
 609 | 	*
 610 | 	* 1 - df(x)/dx <= gtol achieved so xf may be the local minima.
 611 | 	* 2 - Distance between the last two steps is less than stol or |xf - xi| <= stol so xf may be the local minima.
 612 | 	* 3 - Global Step failed to locate a lower point than xi so xi may be the local minima.
 613 | 	* 4 - Iteration Limit exceeded. Convergence not achieved.
 614 | 	* 15 -Failure as Inf/Nan Values encountered
 615 | 	*
 616 | 	*/
 617 | 
 618 | 	rcode = 0;
 619 | 	*niter = 0;
 620 | 	siter = MAXITER;
 621 | 	eps2 = sqrt(eps);
 622 | 
 623 | 	alpha = 1.0;
 624 | 	gfdcode = 0;
 625 | 
 626 | 	//set values
 627 | 	for (i = 0; i < N; ++i) {
 628 | 		xi[i] *= dx[i];
 629 | 		dx[i] = 1.0 / dx[i];
 630 | 	}
 631 | 	fx = FUNCPT_EVAL(funcpt, xi, N);
 632 | 	if (fx >= DBL_MAX || fx <= -DBL_MAX) {
 633 | 		printf("Program Exiting as the function value exceeds the maximum double value");
 634 | 		rcode = 15;
 635 | 	}
 636 | 	if (fx != fx) {
 637 | 		printf("Program Exiting as the function returns NaN");
 638 | 		rcode = 15;
 639 | 	}
 640 | 
 641 | 	fo = fx;
 642 | 
 643 | 	gfdcode = grad_fd(funcpt, funcgrad, xi, N, dx, eps2, jac);
 644 | 	if (gfdcode == 15) {
 645 | 		rcode = 15;
 646 | 	}
 647 | 
 648 | 
 649 | 	//maxstep = 1000.0; // Needs to be set at a much higher value proportional to l2 norm of dx
 650 | 
 651 | 	if (maxstep <= 0.0) {
 652 | 		maxstep = 1000.0;
 653 | 		dt1 = dt2 = 0.0;
 654 | 		for (i = 0; i < N; ++i) {
 655 | 			dt1 += dx[i] * dx[i];
 656 | 			dt2 += dx[i] * xi[i] * dx[i] * xi[i];
 657 | 		}
 658 | 
 659 | 		dt1 = sqrt(dt1);
 660 | 		dt2 = sqrt(dt2);
 661 | 
 662 | 		if (dt1 > dt2) {
 663 | 			maxstep *= dt1;
 664 | 		}
 665 | 		else {
 666 | 			maxstep *= dt2;
 667 | 		}
 668 | 	}
 669 | 
 670 | 	//Check Stop0
 671 | 	if (fabs(fx) > fabs(fsval)) {
 672 | 		den = fabs(fx);
 673 | 	}
 674 | 	else {
 675 | 		den = fabs(fsval);
 676 | 	}
 677 | 	for (i = 0; i < N; ++i) {
 678 | 		if (fabs(xi[i]) > 1.0 / fabs(dx[i])) {
 679 | 			num = fabs(xi[i]);
 680 | 		}
 681 | 		else {
 682 | 			num = 1.0 / fabs(dx[i]);
 683 | 		}
 684 | 		scheck[i] = fabs(jac[i]) * num / den;
 685 | 	}
 686 | 
 687 | 	stop0 = array_max_abs(scheck, N);
 688 | 
 689 | 	if (stop0 <= gtol * 1e-03) {
 690 | 		rcode = 1;
 691 | 		for (i = 0; i < N; ++i) {
 692 | 			xf[i] = xi[i];
 693 | 		}
 694 | 	}
 695 | 
 696 | 	//hessian_fd(funcpt,xi,N,dx,hess);
 697 | 	inithess_lower(L, N, fx, fsval, dx);
 698 | 
 699 | 	for (i = 0; i < N; ++i) {
 700 | 		xc[i] = xi[i];
 701 | 	}
 702 | 	fxf = fx;
 703 | 
 704 | 	while (rcode == 0 && *niter < siter) {
 705 | 		*niter = *niter + 1;
 706 | 		scale(jac, 1, N, -1.0);
 707 | 
 708 | 		linsolve_lower(L, N, jac, step);
 709 | 
 710 | 		scale(jac, 1, N, -1.0);
 711 | 
 712 | 		for (i = 0; i < N; ++i) {
 713 | 			jacf[i] = jac[i];
 714 | 		}
 715 | 
 716 | 		//retval = lnsrch(funcpt, xc, jac, step, N, dx, maxstep, stol, xf);
 717 | 		retval = lnsrchmt(funcpt,funcgrad, xc, &fxf, jac, &alpha, step, N, dx, maxstep,MAXITER,eps2,ftol, gtol, xtol, xf);
 718 | 
 719 | 		//rcode = stopcheck(fxf, N, xc, xf, jacf, dx, fsval, gtol, stol, retval);
 720 | 		rcode = stopcheck2_mt(fxf, N, fo, jac, dx, eps, gtol, ftol, retval);
 721 | 		fo = fxf;
 722 | 		//hessian_fd(funcpt,xf,N,dx,hess);
 723 | 		//bfgs_naive(hess,N,xc,xf,jac,jacf);
 724 | 		bfgs_factored(L, N, eps, xc, xf, jacf, jac);
 725 | 		for (i = 0; i < N; ++i) {
 726 | 			xc[i] = xf[i];
 727 | 		}
 728 | 	}
 729 | 
 730 | 	if (rcode == 0 && *niter >= siter) {
 731 | 		rcode = 4;
 732 | 	}
 733 | 
 734 | 	for (i = 0; i < N; ++i) {
 735 | 		xi[i] *= dx[i];
 736 | 		dx[i] = 1.0 / dx[i];
 737 | 	}
 738 | 
 739 | 	free(jac);
 740 | 	free(hess);
 741 | 	free(scheck);
 742 | 	free(xc);
 743 | 	free(L);
 744 | 	free(step);
 745 | 	free(jacf);
 746 | 	return rcode;
 747 | }
 748 | 
 749 | void bfgs_rec(double *H,int N,int iter,int m,double *jac,double *sk,double *yk,double *r) {
 750 | 	int i,ptr,k,bound;
 751 | 	double *q,*temp,*rho,*alpha,*temp1,*temp2,*qs,*beta;
 752 | 
 753 | 
 754 | 	q = (double*) malloc(sizeof(double) *N);
 755 | 	temp = (double*) malloc(sizeof(double) *1);
 756 | 	temp1 = (double*) malloc(sizeof(double) *N);
 757 | 	temp2 = (double*) malloc(sizeof(double) *N);
 758 | 	alpha = (double*) malloc(sizeof(double) * m);
 759 | 	beta = (double*) malloc(sizeof(double) * m);
 760 | 	rho = (double*) malloc(sizeof(double) * m);
 761 | 	qs = (double*) malloc(sizeof(double) * m);
 762 | 
 763 | 	k = 0; ptr = 0; i = 0; bound = 0;
 764 | 	
 765 | 	for(i = 0;i < N;++i) {
 766 | 		q[i] = jac[i];
 767 | 		temp1[i] = 0.0;
 768 | 		temp2[i] = 0.0;
 769 | 	}
 770 | 
 771 | 	if (iter <= m) {
 772 | 		bound = iter;
 773 | 	} else {
 774 | 		bound = m;
 775 | 	}
 776 | 
 777 | 	for (i = 0; i < m;++i) {
 778 | 		alpha[i] = beta[i] = 0.0;
 779 | 	}
 780 | 
 781 | 	for (i = 0; i < m;++i) {
 782 | 		rho[i] = qs[i] = 0.0;
 783 | 	}
 784 | 
 785 | 	//mmult(yk,sk,temp,1,N,1);
 786 | 
 787 | 	for (i = bound - 1; i >= 0; --i) {
 788 | 		ptr = i * N;
 789 | 		for(k = 0; k < N;++k) {
 790 | 			temp1[k] = yk[ptr+k];
 791 | 			temp2[k] = sk[ptr+k];
 792 | 		}
 793 | 		mmult(temp1,temp2,temp,1,N,1);
 794 | 		rho[i] = 1.0/temp[0];
 795 | 		mmult(temp2,q,alpha+i,1,N,1);
 796 | 		alpha[i] *= rho[i];
 797 | 		for(k = 0; k < N; ++k) {
 798 | 			q[k] = q[k] - alpha[i] * yk[ptr + k];
 799 | 		}
 800 | 	}
 801 | 	mmult(H,q,r,N,N,1);
 802 | 	for (i = 0; i < bound;++i) {
 803 | 		ptr = i * N;
 804 | 		for(k = 0; k < N;++k) {
 805 | 			temp1[k] = yk[ptr+k];
 806 | 			//temp2[k] = r[ptr+k];
 807 | 		}
 808 | 		mmult(temp1,r,beta+i,1,N,1);
 809 | 		beta[i] *= rho[i];
 810 | 		for (k = 0; k < N;++k) {
 811 | 			r[k] += sk[ptr+k] * (alpha[i] - beta[i]);
 812 | 		}
 813 | 	}
 814 | 
 815 | 	
 816 | 
 817 | 	free(q);
 818 | 	free(temp);
 819 | 	free(rho);
 820 | 	free(alpha);
 821 | 	free(temp1);
 822 | 	free(temp2);
 823 | 	free(qs);
 824 | 	free(beta);
 825 | }
 826 | 
 827 | void inithess_l(double *H, int N, int k, double *tsk,double *tyk, double *dx) {
 828 | 	int i, j, ct;
 829 | 	double temp;
 830 | 	double *num, *den;
 831 | 
 832 | 	num = (double*)malloc(sizeof(double)*1);
 833 | 	den = (double*)malloc(sizeof(double)*1);
 834 | 
 835 | 	mmult(tsk, tyk, num, 1, N, 1);
 836 | 	mmult(tyk, tyk, den, 1, N, 1);
 837 | 
 838 | 	if (k > 0) {
 839 | 		temp = num[0] / den[0];
 840 | 	}
 841 | 	else {
 842 | 		temp = 1.0;
 843 | 	}
 844 | 
 845 | 	for (i = 0; i < N; ++i) {
 846 | 		ct = i *N;
 847 | 		for (j = 0; j < N; ++j) {
 848 | 			if (i == j) {
 849 | 				H[ct + j] = temp * dx[i] * dx[i];
 850 | 			}
 851 | 			else {
 852 | 				H[ct + j] = 0.0;
 853 | 			}
 854 | 
 855 | 		}
 856 | 	}
 857 | 
 858 | 	free(num);
 859 | 	free(den);
 860 | 
 861 | }
 862 | 
 863 | int bfgs_l_min(custom_function *funcpt, custom_gradient *funcgrad, double *xi, int N, int m, double *dx, double fsval,double maxstep, int MAXITER, int *niter,
 864 | 		double eps,double gtol,double ftol,double xtol,double *xf)  {
 865 | 	int rcode,gfdcode;
 866 | 	int i,j,siter,retval;
 867 | 	int ptr,iter;
 868 | 	double dt1,dt2,alpha,fo;
 869 | 	double fx,num,den,stop0,fxf,eps2;
 870 | 	double *jac,*scheck,*xc,*H,*step,*jacf,*sk,*yk,*tsk,*tyk;
 871 | 
 872 | 	jac = (double*) malloc(sizeof(double) *N);
 873 | 	scheck = (double*) malloc(sizeof(double) *N);
 874 | 	xc = (double*) malloc(sizeof(double) *N);
 875 | 	step = (double*) malloc(sizeof(double) *N);
 876 | 	H = (double*) malloc(sizeof(double) *N * N);
 877 | 	jacf = (double*) malloc(sizeof(double) *N);
 878 | 	sk = (double*) malloc(sizeof(double) * m * N);
 879 | 	yk = (double*) malloc(sizeof(double) * m * N);
 880 | 	tsk = (double*) malloc(sizeof(double) *N);
 881 | 	tyk = (double*) malloc(sizeof(double) *N);
 882 | 
 883 | 	/*
 884 | 	 * Return Codes
 885 | 	 *
 886 | 	 * Codes 1,2,3 denote possible success.
 887 | 	 * Codes 0 and 4 denote failure.
 888 | 	 *
 889 | 	 * 1 - df(x)/dx <= gtol achieved so xf may be the local minima.
 890 | 	 * 2 - Distance between the last two steps is less than stol or |xf - xi| <= stol so xf may be the local minima.
 891 | 	 * 3 - Global Step failed to locate a lower point than xi so xi may be the local minima.
 892 | 	 * 4 - Iteration Limit exceeded. Convergence not achieved.
 893 | 	 *
 894 | 	 */
 895 | 
 896 | 	rcode = 0;
 897 | 	*niter = 0;
 898 | 	siter = MAXITER;
 899 | 	eps2 = sqrt(eps);
 900 | 
 901 | 	alpha = 1.0;
 902 | 	gfdcode = 0;
 903 | 	
 904 | 	//set values
 905 | 	for(i = 0; i < N;++i) {
 906 | 		xi[i] *= dx[i];
 907 | 		dx[i] = 1.0 / dx[i];
 908 | 	}
 909 | 	fx = FUNCPT_EVAL(funcpt, xi, N);
 910 | 	if (fx >= DBL_MAX || fx <= -DBL_MAX) {
 911 | 		printf("Program Exiting as the function value exceeds the maximum double value");
 912 | 		rcode = 15;
 913 | 	}
 914 | 	if (fx != fx) {
 915 | 		printf("Program Exiting as the function returns NaN");
 916 | 		rcode = 15;
 917 | 	}
 918 | 	fo = fx;
 919 | 
 920 | 	gfdcode = grad_fd(funcpt,funcgrad,xi,N,dx,eps2,jac);
 921 | 	if (gfdcode == 15) {
 922 | 		rcode = 15;
 923 | 	}
 924 | 
 925 | 	if (maxstep <= 0.0) {
 926 | 		maxstep = 1000.0;
 927 | 		dt1 = dt2 = 0.0;
 928 | 		for (i = 0; i < N; ++i) {
 929 | 			dt1 += dx[i] * dx[i];
 930 | 			dt2 += dx[i] * xi[i] * dx[i] * xi[i];
 931 | 		}
 932 | 
 933 | 		dt1 = sqrt(dt1);
 934 | 		dt2 = sqrt(dt2);
 935 | 
 936 | 		if (dt1 > dt2) {
 937 | 			maxstep *= dt1;
 938 | 		}
 939 | 		else {
 940 | 			maxstep *= dt2;
 941 | 		}
 942 | 	}
 943 | 
 944 | 
 945 | 	//Check Stop0
 946 | 	if (fabs(fx) > fabs(fsval)) {
 947 | 			den = fabs(fx);
 948 | 	} else {
 949 | 			den = fabs(fsval);
 950 | 	}
 951 | 	for(i = 0; i < N;++i) {
 952 | 		if (fabs(xi[i]) > 1.0 / fabs(dx[i])) {
 953 | 			num = fabs(xi[i]);
 954 | 		} else {
 955 | 			num = 1.0 / fabs(dx[i]);
 956 | 		}
 957 | 		scheck[i] = fabs(jac[i]) * num / den;
 958 | 	}
 959 | 
 960 | 	stop0 = array_max_abs(scheck,N);
 961 | 
 962 | 	if (stop0 <= gtol * 1e-03) {
 963 | 		rcode = 1;
 964 | 		for(i = 0; i < N;++i) {
 965 | 			xf[i] = xi[i];
 966 | 		}
 967 | 	}
 968 | 
 969 | 	//hessian_fd(funcpt,xi,N,dx,hess);
 970 | 	//inithess_lower(L,N,fx,fsval,dx);
 971 | 
 972 | 	for(i = 0; i < N;++i) {
 973 | 		xc[i] = xi[i];
 974 | 	}
 975 | 	fxf = fx;
 976 | 
 977 | 	for (i = 0; i < N; ++i) {
 978 | 		tsk[i] = 0.0;
 979 | 		tyk[i] = 0.0;
 980 | 	}
 981 | 
 982 | 
 983 | 	while (rcode == 0 && *niter < siter) {
 984 | 		iter = *niter;
 985 | 		inithess_l(H,N,iter,tsk,tyk,dx);
 986 | 
 987 | 		bfgs_rec(H,N,iter,m,jac,sk,yk,step);
 988 | 		//mdisplay(step,1,N);
 989 | 
 990 | 		scale(step,1,N,-1.0);
 991 | 
 992 | 		for (i = 0; i < N; ++i) {
 993 | 			jacf[i] = jac[i];
 994 | 		}
 995 | 
 996 | 		//linsolve_lower(L,N,jac,step);
 997 | 
 998 | 		//scale(jac,1,N,-1.0);
 999 | 		//retval = lnsrchmod(funcpt,xc,jac,step,N,dx,maxstep,stol,xf,jacf);
1000 | 		retval = lnsrchmt(funcpt,funcgrad, xc, &fxf, jac, &alpha, step, N, dx, maxstep,MAXITER,eps2,ftol, gtol, xtol, xf);
1001 | 		//retval = lnsrch(funcpt,xc,jac,step,N,dx,maxstep,stol,xf);
1002 | 
1003 | 		//retval = swolfe(funcpt,xc,jac,step,N,dx,maxstep,stol,xf);
1004 | 		//printf("%g %g \n",fo,fxf);
1005 | 
1006 | 		//grad_fd(funcpt,xf,N,dx,jacf);
1007 | 		//rcode = stopcheck_mt(fxf, N, xc, xf, jac, dx, fsval, gtol, ftol, retval);
1008 | 		rcode = stopcheck2_mt(fxf,N,fo,jac,dx,eps,gtol,ftol,retval);
1009 | 		//rcode = stopcheck3_mt(xc,xf,fxf,N,fo,jac,dx,eps,gtol,ftol,retval);
1010 | 		//printf("\n CODE %d", rcode);
1011 | 		fo = fxf;
1012 | 		for (i = 0; i < N;++i) {
1013 | 			tsk[i] = xf[i] - xc[i];
1014 | 			tyk[i] = jac[i] - jacf[i];
1015 | 		}
1016 | 
1017 | 		if (*niter >= m) {
1018 | 			for (i = 0; i < (m-1) * N; ++i) {
1019 | 				sk[i] = sk[N + i];
1020 | 				yk[i] = yk[N + i];
1021 | 			}
1022 | 			j = 0;
1023 | 
1024 | 			for (i = (m - 1) * N; i < m * N; ++i) {
1025 | 				sk[i] = tsk[j];
1026 | 				yk[i] = tyk[j];
1027 | 				j++;
1028 | 			}
1029 | 
1030 | 		} else {
1031 | 			ptr = *niter * N;
1032 | 
1033 | 			for (i = 0; i < N;++i) {
1034 | 				sk[ptr + i] = tsk[i];
1035 | 				yk[ptr + i] = tyk[i];
1036 | 			}
1037 | 
1038 | 		}
1039 | 
1040 | 		
1041 | 		for(i = 0; i < N;++i) {
1042 | 			xc[i] = xf[i];
1043 | 			step[i] = 0.0;
1044 | 		}
1045 | 		*niter = *niter + 1;
1046 | 
1047 | 	}
1048 | 
1049 | 	//mdisplay(xc, 1, N);
1050 | 
1051 | 	if (rcode == 0 && *niter >= siter) {
1052 | 		rcode = 4;
1053 | 	}
1054 | 	
1055 | 	for(i = 0; i < N;++i) {
1056 | 		xi[i] *= dx[i];
1057 | 		dx[i] = 1.0 / dx[i];
1058 | 	}
1059 | 	
1060 | 
1061 | 
1062 | 	free(jac);
1063 | 	free(scheck);
1064 | 	free(xc);
1065 | 	free(H);
1066 | 	free(step);
1067 | 	free(jacf);
1068 | 	free(sk);
1069 | 	free(yk);
1070 | 	free(tsk);
1071 | 	free(tyk);
1072 | 	return rcode;
1073 | }
1074 | 
1075 | 
1076 | 


--------------------------------------------------------------------------------
/src/lls.c:
--------------------------------------------------------------------------------
   1 | /*
   2 |  * lls.c
   3 |  *
   4 |  *  Created on: Apr 14, 2014
   5 |  *      Author: Rafat Hussain
   6 |  */
   7 | 
   8 | #include "lls.h"
   9 | 
  10 | int lls_normal(double *A,double *b,int M,int N,double *x) {
  11 | 	int retcode,sc,i,j,c1,l;
  12 | 	double sum;
  13 | 	double *AT,*d,*C,*y,*CT;
  14 | 	// M - data points
  15 | 	// N - Number of parameters
  16 | 	// A - MXN; b size - M vector; AT - NXM
  17 | 
  18 | 	AT = (double*) malloc(sizeof(double) * M * N);
  19 | 	d = (double*) malloc(sizeof(double) * N);
  20 | 	C = (double*) malloc(sizeof(double) * N * N);
  21 | 	y = (double*) malloc(sizeof(double) * N);
  22 | 	CT = (double*) malloc(sizeof(double) * N * N);
  23 | 
  24 | 	retcode = 0;
  25 | 
  26 | 	mtranspose(A,M,N,AT);
  27 | 	mmult(AT,b,d,N,M,1);
  28 | 	mmult(AT,A,C,N,M,N);
  29 | 
  30 | 	sc = chol(C,N);
  31 | 	if (sc == -1) {
  32 | 		return -1;
  33 | 	}
  34 | 
  35 | 	mtranspose(C,N,N,CT);
  36 | 	//Forward Substitution
  37 | 
  38 | 	y[0] = d[0]/CT[0];
  39 | 	for(i = 1; i < N; ++i) {
  40 | 		sum = 0.;
  41 | 		c1 = i*N;
  42 | 		for(j = 0; j < i; ++j) {
  43 | 			sum += y[j] * CT[c1 + j];
  44 | 		}
  45 | 		y[i] = (d[i] - sum)/CT[c1+i];
  46 | 	}
  47 | 
  48 | 	//Back Substitution
  49 | 
  50 | 	x[N - 1] = y[N - 1]/C[N * N - 1];
  51 | 
  52 | 	for (i = N - 2; i >= 0; i--) {
  53 | 		sum = 0.;
  54 | 		c1 = i*(N+1);
  55 | 		l=0;
  56 | 		for(j = i+1; j < N;j++) {
  57 | 			l++;
  58 | 			sum += C[c1 + l] * x[j];
  59 | 		}
  60 | 		x[i] = (y[i] - sum) / C[c1];
  61 | 	}
  62 | 
  63 | 	free(AT);
  64 | 	free(d);
  65 | 	free(C);
  66 | 	free(y);
  67 | 	return retcode;
  68 | }
  69 | 
  70 | int lls_qr(double *Ai,double *bi,int M,int N,double *xo) {
  71 | 	int j,i,k,u,t,retcode,c1,l;
  72 | 	double *x,*v,*AT,*w,*bvec,*b,*A,*R;
  73 | 	double beta,sum;
  74 | 
  75 | 	retcode = 0;
  76 | 
  77 | 	if (M < N) {
  78 | 			printf("M should be greater than or equal to N");
  79 | 			exit(-1);
  80 | 	}
  81 | 	x = (double*) malloc(sizeof(double) * M);
  82 | 	b = (double*) malloc(sizeof(double) * M);
  83 | 	bvec = (double*) malloc(sizeof(double) * N);
  84 | 	v = (double*) malloc(sizeof(double) * M);
  85 | 	AT = (double*) malloc(sizeof(double) * M * N);
  86 | 	A = (double*) malloc(sizeof(double) * M * N);
  87 | 	w = (double*) malloc(sizeof(double) * 1);
  88 | 	R = (double*) malloc(sizeof(double) * N * N);
  89 | 
  90 | 	for(j = 0; j < M;++j) {
  91 | 		b[j] = bi[j];
  92 | 	}
  93 | 	for(j = 0; j < M*N;++j) {
  94 | 		A[j] = Ai[j];
  95 | 	}
  96 | 
  97 | 	for(j = 0; j < N;++j) {
  98 | 		for(i=j;i < M;++i) {
  99 | 			x[i-j] = A[i*N+j];
 100 | 
 101 | 		}
 102 | 
 103 | 		beta = house(x,M-j,v);
 104 | 		bvec[j] = beta;
 105 | 
 106 | 		for (i=j; i < M; i++) {
 107 | 			t = i * N;
 108 | 			u = 0;
 109 | 			for (k=j; k < N; k++) {
 110 | 				AT[u+i-j] = A[k+t];
 111 | 				u+=(M-j);
 112 | 
 113 | 			}
 114 | 
 115 | 		}
 116 | 
 117 | 
 118 | 		mmult(AT,v,w,N-j,M-j,1);
 119 | 		scale(w,N-j,1,beta);
 120 | 		mmult(v,w,AT,M-j,1,N-j);
 121 | 		for (i=j; i < M; i++) {
 122 | 			t = i *N;
 123 | 			for (k=j; k < N; k++) {
 124 | 				A[t+k] -= AT[(i-j)*(N-j) + k - j];
 125 | 			}
 126 | 		}
 127 | 		if (j < M) {
 128 | 
 129 | 			for(i=j+1;i < M;++i) {
 130 | 				A[i*N+j] = v[i-j];
 131 | 			}
 132 | 		}
 133 | 
 134 | 	}
 135 | 
 136 | 	for(i = 0; i < N;++i) {
 137 | 		t = i *N;
 138 | 		for(j = 0; j < N;++j) {
 139 | 			if (i > j) {
 140 | 				R[t+j] = 0.;
 141 | 			} else {
 142 | 				R[t+j] = A[t+j];
 143 | 			}
 144 | 		}
 145 | 	}
 146 | 
 147 | 
 148 | 	for(j = 0; j < N;++j) {
 149 | 		v[j] = 1;
 150 | 		for(i = j+1; i < M;++i) {
 151 | 			v[i] = A[i * N + j];//edit
 152 | 		}
 153 | 		mmult(b+j,v+j,w,1,M-j,1);
 154 | 		*w = *w * bvec[j];
 155 | 		for(i = j; i < M;++i) {
 156 | 			v[i] = *w * v[i];
 157 | 		}
 158 | 		for(i = j; i < M;++i) {
 159 | 			b[i] = b[i] - v[i];
 160 | 		}
 161 | 
 162 | 
 163 | 	}
 164 | 
 165 | 	//mdisplay(b,1,M);
 166 | 
 167 | 	//back substitution
 168 | 
 169 | 	xo[N - 1] = b[N - 1]/R[N * N - 1];
 170 | 
 171 | 	for (i = N - 2; i >= 0; i--) {
 172 | 		sum = 0.;
 173 | 		c1 = i*(N+1);
 174 | 		l=0;
 175 | 		for(j = i+1; j < N;j++) {
 176 | 			l++;
 177 | 			sum += R[c1 + l] * xo[j];
 178 | 		}
 179 | 		xo[i] = (b[i] - sum) / R[c1];
 180 | 	}
 181 | 
 182 | 	free(x);
 183 | 	free(v);
 184 | 	free(AT);
 185 | 	free(w);
 186 | 	free(bvec);
 187 | 	free(R);
 188 | 	free(b);
 189 | 	free(A);
 190 | 
 191 | 	return retcode;
 192 | }
 193 | 
 194 | void bidiag(double *A, int M, int N) {
 195 | 	int j,i,k,u,t;
 196 | 	double *x,*v,*AT,*w;
 197 | 	double beta;
 198 | 
 199 | 	if (M < N) {
 200 | 			printf("M should be greater than or equal to N");
 201 | 			exit(1);
 202 | 	}
 203 | 	x = (double*) malloc(sizeof(double) * M);
 204 | 	v = (double*) malloc(sizeof(double) * M);
 205 | 	AT = (double*) malloc(sizeof(double) * M * N);
 206 | 	w = (double*) malloc(sizeof(double) * M * M);
 207 | 
 208 | 
 209 | 	for(j = 0; j < N;++j) {
 210 | 		for(i=j;i < M;++i) {
 211 | 			x[i-j] = A[i*N+j];
 212 | 
 213 | 		}
 214 | 
 215 | 		beta = house(x,M-j,v);
 216 | 
 217 | 		for (i=j; i < M; i++) {
 218 | 			t = i * N;
 219 | 			u = 0;
 220 | 			for (k=j; k < N; k++) {
 221 | 				AT[u+i-j] = A[k+t];
 222 | 				u+=(M-j);
 223 | 
 224 | 			}
 225 | 
 226 | 		}
 227 | 
 228 | 
 229 | 		mmult(AT,v,w,N-j,M-j,1);
 230 | 		scale(w,N-j,1,beta);
 231 | 		mmult(v,w,AT,M-j,1,N-j);
 232 | 		for (i=j; i < M; i++) {
 233 | 			t = i *N;
 234 | 			for (k=j; k < N; k++) {
 235 | 				A[t+k] -= AT[(i-j)*(N-j) + k - j];
 236 | 			}
 237 | 		}
 238 | 
 239 | 		for(i=j+1;i < M;++i) {
 240 | 			A[i*N+j] = v[i-j];
 241 | 		}
 242 | 
 243 | 		if (j < N - 2) {
 244 | 			for(i=j+1;i < N;++i) {
 245 | 				x[i-j-1] = A[j*N+i];
 246 | 			}
 247 | 			beta = house(x,N-j-1,v);
 248 | 
 249 | 			for (i=j; i < M; i++) {
 250 | 				t = i * N;
 251 | 				u = (i-j) *(N-j-1);
 252 | 				for (k=j+1; k < N; k++) {
 253 | 					AT[u+k-j-1] = A[k+t];
 254 | 				}
 255 | 			}
 256 | 
 257 | 			mmult(AT,v,w,M-j,N-j-1,1);
 258 | 			scale(w,M-j,1,beta);
 259 | 			mmult(w,v,AT,M-j,1,N-j-1);
 260 | 
 261 | 			for (i=j; i < M; i++) {
 262 | 				t = i * N;
 263 | 				u = (i-j) *(N-j-1);
 264 | 				for (k=j+1; k < N; k++) {
 265 | 					A[k+t] -= AT[u+k-j-1];
 266 | 				}
 267 | 			}
 268 | 			u = 1;
 269 | 			t = j*N;
 270 | 			for(i = j + 2; i < N;++i) {
 271 | 				A[t+i] = v[u];
 272 | 				u++;
 273 | 			}
 274 | 
 275 | 		}
 276 | 
 277 | 	}
 278 | 
 279 | 
 280 | 	free(x);
 281 | 	free(v);
 282 | 	free(AT);
 283 | 
 284 | }
 285 | 
 286 | void bidiag_orth(double *A, int M, int N,double *U,double *V) {
 287 | 	int j,i,k,u,t;
 288 | 	double *x,*v,*AT,*w;
 289 | 	double beta;
 290 | 
 291 | 	if (M < N) {
 292 | 			printf("M should be greater than or equal to N");
 293 | 			exit(1);
 294 | 	}
 295 | 	x = (double*) malloc(sizeof(double) * M);
 296 | 	v = (double*) malloc(sizeof(double) * M);
 297 | 	AT = (double*) malloc(sizeof(double) * M * M);
 298 | 	w = (double*) malloc(sizeof(double) * M * M);
 299 | 
 300 | 
 301 | 	eye(U,M);
 302 | 	eye(V,N);
 303 | 
 304 | 	for(j = 0; j < N;++j) {
 305 | 		for(i=j;i < M;++i) {
 306 | 			x[i-j] = A[i*N+j];
 307 | 
 308 | 		}
 309 | 
 310 | 		beta = house(x,M-j,v);
 311 | 		//mdisplay(v,M-j,1);
 312 | 
 313 | 		for (i=j; i < M; i++) {
 314 | 			t = i * N;
 315 | 			u = 0;
 316 | 			for (k=j; k < N; k++) {
 317 | 				AT[u+i-j] = A[k+t];
 318 | 				u+=(M-j);
 319 | 
 320 | 			}
 321 | 
 322 | 		}
 323 | 
 324 | 
 325 | 		mmult(AT,v,w,N-j,M-j,1);
 326 | 		scale(w,N-j,1,beta);
 327 | 		mmult(v,w,AT,M-j,1,N-j);
 328 | 		for (i=j; i < M; i++) {
 329 | 			t = i *N;
 330 | 			for (k=j; k < N; k++) {
 331 | 				A[t+k] -= AT[(i-j)*(N-j) + k - j];
 332 | 			}
 333 | 		}
 334 | 
 335 | 		for(i=j+1;i < M;++i) {
 336 | 			A[i*N+j] = v[i-j];
 337 | 		}
 338 | 
 339 | 
 340 | 		for (i=j; i < M; i++) {
 341 | 			t = i * M;
 342 | 			u = (i-j) *(M-j);
 343 | 			for (k=j; k < M; k++) {
 344 | 				AT[u+k-j] = U[k+t];
 345 | 			}
 346 | 		}
 347 | 
 348 | 		mmult(AT,v,w,M-j,M-j,1);
 349 | 		scale(w,M-j,1,beta);
 350 | 		mmult(w,v,AT,M-j,1,M-j);
 351 | 
 352 | 		for (i=j; i < M; i++) {
 353 | 			t = i * M;
 354 | 			u = (i-j) *(M-j);
 355 | 			for (k=j; k < M; k++) {
 356 | 				U[k+t] -= AT[u+k-j];
 357 | 			}
 358 | 		}
 359 | 
 360 | 		//mdisplay(U,M,M);
 361 | 
 362 | 		if (j < N - 2) {
 363 | 			for(i=j+1;i < N;++i) {
 364 | 				x[i-j-1] = A[j*N+i];
 365 | 			}
 366 | 			beta = house(x,N-j-1,v);
 367 | 
 368 | 			for (i=j; i < M; i++) {
 369 | 				t = i * N;
 370 | 				u = (i-j) *(N-j-1);
 371 | 				for (k=j+1; k < N; k++) {
 372 | 					AT[u+k-j-1] = A[k+t];
 373 | 				}
 374 | 			}
 375 | 
 376 | 			mmult(AT,v,w,M-j,N-j-1,1);
 377 | 			scale(w,M-j,1,beta);
 378 | 			mmult(w,v,AT,M-j,1,N-j-1);
 379 | 
 380 | 			for (i=j; i < M; i++) {
 381 | 				t = i * N;
 382 | 				u = (i-j) *(N-j-1);
 383 | 				for (k=j+1; k < N; k++) {
 384 | 					A[k+t] -= AT[u+k-j-1];
 385 | 				}
 386 | 			}
 387 | 			u = 1;
 388 | 			t = j*N;
 389 | 			for(i = j + 2; i < N;++i) {
 390 | 				A[t+i] = v[u];
 391 | 				u++;
 392 | 			}
 393 | 
 394 | 		}
 395 | 
 396 | 	}
 397 | 
 398 | 
 399 | 	free(x);
 400 | 	free(v);
 401 | 	free(AT);
 402 | 
 403 | }
 404 | 
 405 | 
 406 | int svd_gr(double *A,int M,int N,double *U,double *V,double *q) {
 407 | 	int i,j,k,l,t,t2,ierr,cancel,iter,l1;
 408 | 	double eps,g,x,s,temp,f,h,scale,c,y,z;
 409 | 	double *e;
 410 | 	/*
 411 |      THIS SUBROUTINE IS THE MODIFIED C TRANSLATION OF THE
 412 |      EISPACK FORTRAN TRANSLATION OF THE ALGOL PROCEDURE SVD,
 413 |      NUM. MATH. 14, 403-420(1970) BY GOLUB AND REINSCH.
 414 |      HANDBOOK FOR AUTO. COMP., VOL II-LINEAR ALGEBRA, 134-151(1971).
 415 | 	 */
 416 | 	/*
 417 | 	 * U = MXN
 418 | 	 * V - NXN
 419 | 	 * Q - NX1
 420 | 	 */
 421 | 
 422 | 	e = (double*) malloc(sizeof(double) * N);
 423 | 	ierr = 0;
 424 | 	eps = macheps();
 425 | 	g = scale = x = 0.0;
 426 | 
 427 | 	for(i = 0; i < M*N;++i) {
 428 | 		U[i] = A[i];
 429 | 	}
 430 | 
 431 | 	for(i = 0; i < N;++i) {
 432 | 		l = i+1;
 433 | 		e[i] = scale * g;
 434 | 		g = 0.0;
 435 | 		s = 0.0;
 436 | 		scale = 0.0;
 437 | 
 438 | 		if (i < M) {
 439 | 			for(k = i; k < M;++k) {
 440 | 				scale += fabs(U[k*N+i]);
 441 | 			}
 442 | 
 443 | 			if (scale != 0.0) {
 444 | 				for(k = i; k < M;++k) {
 445 | 					t = k * N;
 446 | 					U[t+i] /= scale;
 447 | 					temp = U[t+i];
 448 | 					s += temp*temp;
 449 | 				}
 450 | 				f = U[i*N+i];
 451 | 				g = (f < 0) ? sqrt(s) : -sqrt(s);
 452 | 				h = f * g - s;
 453 | 				U[i*N+i] = f - g;
 454 | 
 455 | 				if (i < N - 1) {
 456 | 					for(j = l; j < N;++j) {
 457 | 						s = 0.0;
 458 | 						for(k = i; k < M;++k) {
 459 | 							t = k * N;
 460 | 							s += U[t+i]*U[t+j];
 461 | 						}
 462 | 						f = s / h;
 463 | 						for(k = i; k < M;++k) {
 464 | 							t = k * N;
 465 | 							U[t+j] += f * U[t+i];
 466 | 						}
 467 | 					}
 468 | 				}
 469 | 				for(k = i; k < M;++k) {
 470 | 					t = k * N;
 471 | 					U[t+i] *= scale;
 472 | 				}
 473 | 			}
 474 | 		}
 475 |         q[i] = scale * g;
 476 |         g = 0.0;
 477 |         s = 0.0;
 478 |         scale = 0.0;
 479 | 
 480 |         if (i < M && i != N - 1) {
 481 |         	t = i *N;
 482 |         	for(k = l; k < M;++k) {
 483 |         		scale += fabs(U[t+k]);
 484 |         	}
 485 |         	if (scale != 0.0) {
 486 |         		for(k = l; k < N;++k) {
 487 |         			U[t+k] /= scale;
 488 |         			temp = U[t+k];
 489 |         			s = s + temp*temp;
 490 |         		}
 491 |         		f = U[t+l];
 492 |         		g = (f < 0) ? sqrt(s) : -sqrt(s);
 493 |                 h = f * g - s;
 494 |                 U[t+l] = f - g;
 495 |                 for(k = l;k < N;++k) {
 496 |                 	e[k] = U[t+k] / h;
 497 |                 }
 498 | 
 499 | 				for (j = l; j < M; j++) {
 500 | 					s = 0.0;
 501 | 					t2 = j * N;
 502 | 					for (k = l; k < N; k++) {
 503 | 						s += U[t2+k] * U[t+k];
 504 | 					}
 505 | 					for (k = l; k < N; k++) {
 506 | 						U[t2+k] += s * e[k];
 507 | 					}
 508 | 				}
 509 |                 for (k = l; k < N; k++)
 510 |                     U[t+k] *= scale;
 511 |         	}
 512 | 
 513 |         }
 514 | 
 515 |         temp = fabs(q[i]) + fabs(e[i]);
 516 | 
 517 |         if (x < temp) {
 518 |         	x = temp;
 519 |         }
 520 | 	}
 521 | 
 522 | 	//Accumulating Right Hand Transformations
 523 | 
 524 | 		for(i = N - 1;i >= 0;--i) {
 525 | 			t = i * N;
 526 | 			if (i < N - 1) {
 527 | 				if (g != 0.0) {
 528 | 					h = U[t+i+1] * g;
 529 | 					for(j = l;j < N;++j) {
 530 | 						V[j*N+i] = U[t+j] / h;
 531 | 					}
 532 | 					for(j = l;j < N;++j) {
 533 | 						s = 0.0;
 534 | 						for(k = l; k < N;++k) {
 535 | 							s += U[t+k] * V[k*N+j];
 536 | 						}
 537 | 						for(k = l; k < N;++k) {
 538 | 							V[k*N+j] += (s * V[k*N+i]);
 539 | 						}
 540 | 					}
 541 | 				}
 542 | 				for(j = l; j < N;++j) {
 543 | 					V[t+j] = V[j*N+i] = 0.0;
 544 | 				}
 545 | 			}
 546 | 		    V[t+i] = 1.0;
 547 | 			g = e[i];
 548 | 			l = i;
 549 | 		}
 550 | 
 551 | 
 552 | 
 553 | 	//Accumulating Left Hand Transformations
 554 | 
 555 | 		for(i = N - 1;i >= 0;--i) {
 556 | 			t = i * N;
 557 | 			l = i+1;
 558 | 			g = q[i];
 559 | 
 560 | 			if (i < N - 1) {
 561 | 				for(j = l;j < N;++j) {
 562 | 					U[t+j] = 0.0;
 563 | 				}
 564 | 			}
 565 | 
 566 | 			if (g != 0.0) {
 567 | 				if (i != N - 1) {
 568 | 					//h = U[t+i] * g;
 569 | 					for(j = l;j < N;++j) {
 570 | 						s = 0.0;
 571 | 						for(k = l; k < M;++k) {
 572 | 							s += (U[k*N+i] * U[k*N+j]);
 573 | 						}
 574 | 						f = (s / U[t+i]) / g;
 575 | 						for(k = i; k < M;++k) {
 576 | 							U[k*N+j] += (f * U[k*N+i]);
 577 | 						}
 578 | 					}
 579 | 				}
 580 | 				for(j = i; j < M;++j) {
 581 | 					U[j*N+i] = U[j*N+i] / g;
 582 | 				}
 583 | 			} else {
 584 | 				for(j = i; j < M;++j) {
 585 | 					U[j*N+i] = 0.0;
 586 | 				}
 587 | 			}
 588 | 
 589 | 			U[t+i] += 1.0;
 590 | 		}
 591 | 	//	mdisplay(U,M,N);
 592 | 
 593 | 		eps = eps * x;
 594 | 
 595 | 		for(k = N - 1; k >= 0; --k) {
 596 | 			iter = 0;
 597 | 
 598 | 			while(1) {
 599 | 				iter++;
 600 | 				if (iter > SVDMAXITER) {
 601 | 					printf("Convergence Not Achieved \n");
 602 | 					return 15;
 603 | 				}
 604 | 
 605 | 				cancel = 1;
 606 | 				for(l = k; l >= 0; --l) {
 607 | 					if (fabs(e[l]) <= eps) {
 608 | 						cancel = 0; //test f convergence
 609 | 						break;
 610 | 					}
 611 | 					if (fabs(q[l-1]) <= eps) {
 612 | 						//Cancel
 613 | 						break;
 614 | 					}
 615 | 				}
 616 | 				if (cancel) {
 617 | 					c = 0.0;
 618 | 					s = 1.0;
 619 | 					l1 = l - 1;
 620 | 					for(i = l; i <= k;++i) {
 621 | 						f = s*e[i];
 622 | 						e[i] *= c;
 623 | 						if (fabs(f) <= eps) {
 624 | 							break;
 625 | 						}
 626 | 						g = q[i];
 627 | 						h = q[i] = hypot(f,g);
 628 | 						c = g/h;
 629 | 						s = -f/h;
 630 | 						for(j = 0; j < M;++j) {
 631 | 							t = j * N;
 632 | 							y = U[t+l1];
 633 | 							z = U[t+i];
 634 | 
 635 | 							U[t+l1] = y * c + z * s;
 636 | 							U[t+i] = z * c - y * s;
 637 | 						}
 638 | 					}
 639 | 				}
 640 | 				z = q[k];
 641 | 				if (l != k) {
 642 | 					x = q[l];
 643 | 					y = q[k-1];
 644 | 					g = e[k-1];
 645 | 					h = e[k];
 646 | 					f = 0.5 * (((g + z) / h) * ((g - z) / y) + y / h - h / y);
 647 | 					g = hypot(f,1.0);
 648 | 					if (f < 0.0) {
 649 | 						temp = f - g;
 650 | 					} else {
 651 | 						temp = f+g;
 652 | 					}
 653 | 					f = x - (z / x) * z + (h / x) * (y / temp - h);
 654 | 
 655 | 					//Next QR Transformation
 656 | 
 657 | 					c = s = 1.0;
 658 | 					for(i = l+1; i <= k;++i) {
 659 | 						g = e[i];
 660 | 						y = q[i];
 661 | 						h = s * g;
 662 | 						g = c * g;
 663 | 						e[i-1] = z = hypot(f,h);
 664 | 	                    c = f / z;
 665 | 	                    s = h / z;
 666 | 	                    f = x * c + g * s;
 667 | 	                    g = g * c - x * s;
 668 | 	                    h = y * s;
 669 | 	                    y *= c;
 670 | 	                    for(j = 0; j < N;++j) {
 671 | 	                    	t = j * N;
 672 | 	                        x = V[t+i-1];
 673 | 	                        z = V[t+i];
 674 | 	                        V[t+i-1] = x * c + z * s;
 675 | 	                        V[t+i] = z * c - x * s;
 676 | 	                    }
 677 | 	                    q[i-1] = z = hypot(f,h);
 678 | 	                    if (z != 0.0) {
 679 | 	                        c = f / z;
 680 | 	                        s = h / z;
 681 | 	                    }
 682 | 	                    f = c * g + s * y;
 683 | 	                    x = c * y - s * g;
 684 | 	                    for(j = 0; j < M;++j) {
 685 | 	                    	t = j * N;
 686 | 	                        y = U[t+i-1];
 687 | 	                        z = U[t+i];
 688 | 	                        U[t+i-1] = y * c + z * s;
 689 | 	                        U[t+i] = z * c - y * s;
 690 | 	                    }
 691 | 					}
 692 | 	                    e[l] = 0.0;
 693 | 	                    e[k] = f;
 694 | 	                    q[k] = x;
 695 | 
 696 | 				} else {
 697 | 					//convergence
 698 | 	                if (z < 0.0) {
 699 | 	                    q[k] = -z;
 700 | 	                    for (j = 0; j < N; j++) {
 701 | 	                    	t = j *N;
 702 | 	                        V[t+k] = -V[t+k];
 703 | 	                    }
 704 | 	                }
 705 | 	                break;
 706 | 				}
 707 | 			}
 708 | 		}
 709 | 
 710 | 		svd_sort(U,M,N,V,q);
 711 | 
 712 | 		free(e);
 713 | 		return ierr;
 714 | }
 715 | 
 716 | 
 717 | int svd_gr2(double *A,int M,int N,double *U,double *V,double *q) {
 718 | 	int i,j,k,l,t,t2,ierr,cancel,iter,l1;
 719 | 	double eps,g,x,s,temp,f,h,tol,c,y,z;
 720 | 	double *e;
 721 | 	/*
 722 |      THIS SUBROUTINE IS THE MODIFIED C TRANSLATION OF THE
 723 |      EISPACK FORTRAN TRANSLATION OF THE ALGOL PROCEDURE SVD,
 724 |      NUM. MATH. 14, 403-420(1970) BY GOLUB AND REINSCH.
 725 |      HANDBOOK FOR AUTO. COMP., VOL II-LINEAR ALGEBRA, 134-151(1971).
 726 | 	 */
 727 | 	/*
 728 | 	 * U = MXN
 729 | 	 * V - NXN
 730 | 	 * Q - NX1
 731 | 	 */
 732 | 	if (M < N) {
 733 | 		printf("Rows (M) should be greater than Columns (B) \n");
 734 | 		printf("Retry By Transposing the Input Matrix");
 735 | 		return -1;
 736 | 	}
 737 | 	e = (double*) malloc(sizeof(double) * N);
 738 | 	ierr = 0;
 739 | 	eps = macheps();
 740 | 	tol = eps * eps;
 741 | 	g = x = 0.0;
 742 | 
 743 | 	for(i = 0; i < M*N;++i) {
 744 | 		U[i] = A[i];
 745 | 	}
 746 | 
 747 | 	for(i = 0; i < N;++i) {
 748 | 		l = i+1;
 749 | 		e[i] = g;
 750 | 		s = 0.0;
 751 | 
 752 | 		for(k = i; k < M;++k) {
 753 | 			t = k * N;
 754 | 			temp = U[t+i];
 755 | 			s += temp*temp;
 756 | 		}
 757 | 		if (s < tol) {
 758 | 			g = 0.0;
 759 | 		} else {
 760 | 			f = U[i*N+i];
 761 | 			g = (f < 0) ? sqrt(s) : -sqrt(s);
 762 | 			h = f * g - s;
 763 | 			U[i*N+i] = f - g;
 764 | 
 765 | 			for(j = l; j < N;++j) {
 766 | 				s = 0.0;
 767 | 				for(k = i; k < M;++k) {
 768 | 					t = k * N;
 769 | 					s += (U[t+i]*U[t+j]);
 770 | 				}
 771 | 				f = s / h;
 772 | 				for(k = i; k < M;++k) {
 773 | 					t = k * N;
 774 | 					U[t+j] += (f * U[t+i]);
 775 | 				}
 776 | 			}
 777 | 
 778 | 		}
 779 | 
 780 |         q[i] = g;
 781 |         s = 0.0;
 782 |         t = i * N;
 783 |     	for(k = l; k < N;++k) {
 784 |     		temp = U[t+k];
 785 |     		s = s + temp*temp;
 786 |     	}
 787 |         if (s < tol) {
 788 |         	g = 0.0;
 789 |         } else {
 790 |         	f = U[t+l];
 791 | 			g = (f < 0) ? sqrt(s) : -sqrt(s);
 792 | 			h = f * g - s;
 793 |             U[t+l] = f - g;
 794 |             for(k = l;k < N;++k) {
 795 |               	e[k] = U[t+k] / h;
 796 |             }
 797 | 
 798 |             for (j = l; j < M; j++) {
 799 |                 s = 0.0;
 800 |                 t2 = j * N;
 801 |                 for (k = l; k < N; k++) {
 802 |                      s += U[t2+k] * U[t+k];
 803 |                 }
 804 |                 for (k = l; k < N; k++) {
 805 |                      U[t2+k] += s * e[k];
 806 |                 }
 807 |             }
 808 | 
 809 |         }
 810 | 
 811 |         temp = fabs(q[i]) + fabs(e[i]);
 812 | 
 813 |         if (x < temp) {
 814 |         	x = temp;
 815 |         }
 816 | 	}
 817 | 
 818 | 
 819 | //Accumulating Right Hand Transformations
 820 | 
 821 | 	for(i = N - 1;i >= 0;--i) {
 822 | 		t = i * N;
 823 | 		if (i < N - 1) {
 824 | 			if (g != 0.0) {
 825 | 				h = U[t+i+1] * g;
 826 | 				for(j = l;j < N;++j) {
 827 | 					V[j*N+i] = U[t+j] / h;
 828 | 				}
 829 | 				for(j = l;j < N;++j) {
 830 | 					s = 0.0;
 831 | 					for(k = l; k < N;++k) {
 832 | 						s += U[t+k] * V[k*N+j];
 833 | 					}
 834 | 					for(k = l; k < N;++k) {
 835 | 						V[k*N+j] += (s * V[k*N+i]);
 836 | 					}
 837 | 				}
 838 | 			}
 839 | 			for(j = l; j < N;++j) {
 840 | 				V[t+j] = V[j*N+i] = 0.0;
 841 | 			}
 842 | 		}
 843 | 	    V[t+i] = 1.0;
 844 | 		g = e[i];
 845 | 		l = i;
 846 | 	}
 847 | 
 848 | 
 849 | 
 850 | //Accumulating Left Hand Transformations
 851 | 
 852 | 	for(i = N - 1;i >= 0;--i) {
 853 | 		t = i * N;
 854 | 		l = i+1;
 855 | 		g = q[i];
 856 | 
 857 | 		if (i < N - 1) {
 858 | 			for(j = l;j < N;++j) {
 859 | 				U[t+j] = 0.0;
 860 | 			}
 861 | 		}
 862 | 
 863 | 		if (g != 0.0) {
 864 | 			if (i != N - 1) {
 865 | 				//h = U[t+i] * g;
 866 | 				for(j = l;j < N;++j) {
 867 | 					s = 0.0;
 868 | 					for(k = l; k < M;++k) {
 869 | 						s += (U[k*N+i] * U[k*N+j]);
 870 | 					}
 871 | 					f = (s / U[t+i]) / g;
 872 | 					for(k = i; k < M;++k) {
 873 | 						U[k*N+j] += (f * U[k*N+i]);
 874 | 					}
 875 | 				}
 876 | 			}
 877 | 			for(j = i; j < M;++j) {
 878 | 				U[j*N+i] = U[j*N+i] / g;
 879 | 			}
 880 | 		} else {
 881 | 			for(j = i; j < M;++j) {
 882 | 				U[j*N+i] = 0.0;
 883 | 			}
 884 | 		}
 885 | 
 886 | 		U[t+i] += 1.0;
 887 | 	}
 888 | //	mdisplay(U,M,N);
 889 | 
 890 | 	eps = eps * x;
 891 | 
 892 | 	for(k = N - 1; k >= 0; --k) {
 893 | 		iter = 0;
 894 | 
 895 | 		while(1) {
 896 | 			iter++;
 897 | 			if (iter > SVDMAXITER) {
 898 | 				printf("Convergence Not Achieved \n");
 899 | 				return 15;
 900 | 			}
 901 | 
 902 | 			cancel = 1;
 903 | 			for(l = k; l >= 0; --l) {
 904 | 				if (fabs(e[l]) <= eps) {
 905 | 					cancel = 0; //test f convergence
 906 | 					break;
 907 | 				}
 908 | 				if (fabs(q[l-1]) <= eps) {
 909 | 					//Cancel
 910 | 					break;
 911 | 				}
 912 | 			}
 913 | 			if (cancel) {
 914 | 				c = 0.0;
 915 | 				s = 1.0;
 916 | 				l1 = l - 1;
 917 | 				for(i = l; i <= k;++i) {
 918 | 					f = s*e[i];
 919 | 					e[i] *= c;
 920 | 					if (fabs(f) <= eps) {
 921 | 						break;
 922 | 					}
 923 | 					g = q[i];
 924 | 					h = q[i] = hypot(f,g);
 925 | 					c = g/h;
 926 | 					s = -f/h;
 927 | 					for(j = 0; j < M;++j) {
 928 | 						t = j * N;
 929 | 						y = U[t+l1];
 930 | 						z = U[t+i];
 931 | 
 932 | 						U[t+l1] = y * c + z * s;
 933 | 						U[t+i] = z * c - y * s;
 934 | 					}
 935 | 				}
 936 | 			}
 937 | 			z = q[k];
 938 | 			if (l != k) {
 939 | 				x = q[l];
 940 | 				y = q[k-1];
 941 | 				g = e[k-1];
 942 | 				h = e[k];
 943 | 				f = 0.5 * (((g + z) / h) * ((g - z) / y) + y / h - h / y);
 944 | 				g = hypot(f,1.0);
 945 | 				if (f < 0.0) {
 946 | 					temp = f - g;
 947 | 				} else {
 948 | 					temp = f+g;
 949 | 				}
 950 | 				f = x - (z / x) * z + (h / x) * (y / temp - h);
 951 | 
 952 | 				//Next QR Transformation
 953 | 
 954 | 				c = s = 1.0;
 955 | 				for(i = l+1; i <= k;++i) {
 956 | 					g = e[i];
 957 | 					y = q[i];
 958 | 					h = s * g;
 959 | 					g = c * g;
 960 | 					e[i-1] = z = hypot(f,h);
 961 |                     c = f / z;
 962 |                     s = h / z;
 963 |                     f = x * c + g * s;
 964 |                     g = g * c - x * s;
 965 |                     h = y * s;
 966 |                     y *= c;
 967 |                     for(j = 0; j < N;++j) {
 968 |                     	t = j * N;
 969 |                         x = V[t+i-1];
 970 |                         z = V[t+i];
 971 |                         V[t+i-1] = x * c + z * s;
 972 |                         V[t+i] = z * c - x * s;
 973 |                     }
 974 |                     q[i-1] = z = hypot(f,h);
 975 |                     if (z != 0.0) {
 976 |                         c = f / z;
 977 |                         s = h / z;
 978 |                     }
 979 |                     f = c * g + s * y;
 980 |                     x = c * y - s * g;
 981 |                     for(j = 0; j < M;++j) {
 982 |                     	t = j * N;
 983 |                         y = U[t+i-1];
 984 |                         z = U[t+i];
 985 |                         U[t+i-1] = y * c + z * s;
 986 |                         U[t+i] = z * c - y * s;
 987 |                     }
 988 | 				}
 989 |                     e[l] = 0.0;
 990 |                     e[k] = f;
 991 |                     q[k] = x;
 992 | 
 993 | 			} else {
 994 | 				//convergence
 995 |                 if (z < 0.0) {
 996 |                     q[k] = -z;
 997 |                     for (j = 0; j < N; j++) {
 998 |                     	t = j *N;
 999 |                         V[t+k] = -V[t+k];
1000 |                     }
1001 |                 }
1002 |                 break;
1003 | 			}
1004 | 		}
1005 | 	}
1006 | 
1007 | 	svd_sort(U,M,N,V,q);
1008 | 
1009 | 	free(e);
1010 | 	return ierr;
1011 | }
1012 | 
1013 | int minfit(double *AB,int M,int N,int P,double *q) {
1014 | 	int i,j,k,l,t,t2,ierr,cancel,iter,l1,np,n1;
1015 | 	double eps,g,x,s,temp,f,h,tol,c,y,z;
1016 | 	double *e;
1017 | 	/*
1018 |      THIS SUBROUTINE IS THE MODIFIED C TRANSLATION OF THE
1019 |      EISPACK FORTRAN TRANSLATION OF THE ALGOL PROCEDURE MINFIT,
1020 |      NUM. MATH. 14, 403-420(1970) BY GOLUB AND REINSCH.
1021 |      HANDBOOK FOR AUTO. COMP., VOL II-LINEAR ALGEBRA, 134-151(1971).
1022 | 	 */
1023 | 	/*
1024 | 	 * AB MAX(M,N) X (N+P)
1025 | 	 * q N X 1
1026 | 	 *
1027 | 	 * On Input AB = [A:B]
1028 | 	 * On Output AB = [V:C] where C = U'*B
1029 | 	 */
1030 | 
1031 | 	e = (double*) malloc(sizeof(double) * N);
1032 | 	ierr = 0;
1033 | 	eps = macheps();
1034 | 	tol = eps * eps;
1035 | 	g = x = 0.0;
1036 | 	np = N + P;
1037 | 
1038 | 	for(i = 0; i < N;++i) {
1039 | 		l = i+1;
1040 | 		e[i] = g;
1041 | 		s = 0.0;
1042 | 
1043 | 		for(k = i; k < M;++k) {
1044 | 			t = k * np;
1045 | 			temp = AB[t+i];
1046 | 			s += temp*temp;
1047 | 		}
1048 | 		if (s < tol) {
1049 | 			g = 0.0;
1050 | 		} else {
1051 | 			f = AB[i*np+i];
1052 | 			g = (f < 0) ? sqrt(s) : -sqrt(s);
1053 | 			h = f * g - s;
1054 | 			AB[i*np+i] = f - g;
1055 | 
1056 | 			for(j = l; j < np;++j) {
1057 | 				s = 0.0;
1058 | 				for(k = i; k < M;++k) {
1059 | 					t = k * np;
1060 | 					s += (AB[t+i]*AB[t+j]);
1061 | 				}
1062 | 				f = s / h;
1063 | 				for(k = i; k < M;++k) {
1064 | 					t = k * np;
1065 | 					AB[t+j] += (f * AB[t+i]);
1066 | 				}
1067 | 			}
1068 | 
1069 | 		}
1070 | 
1071 |         q[i] = g;
1072 |         s = 0.0;
1073 |         t = i * np;
1074 |         if (i < M) {
1075 | 			for(k = l; k < N;++k) {
1076 | 				temp = AB[t+k];
1077 | 				s = s + temp*temp;
1078 | 			}
1079 |         }
1080 |         if (s < tol) {
1081 |         	g = 0.0;
1082 |         } else {
1083 |         	f = AB[t+i+1];
1084 | 			g = (f < 0) ? sqrt(s) : -sqrt(s);
1085 | 			h = f * g - s;
1086 |             AB[t+i+1] = f - g;
1087 |             for(k = l;k < N;++k) {
1088 |               	e[k] = AB[t+k] / h;
1089 |             }
1090 | 
1091 |             for (j = l; j < M; j++) {
1092 |                 s = 0.0;
1093 |                 t2 = j * np;
1094 |                 for (k = l; k < N; k++) {
1095 |                      s += AB[t2+k] * AB[t+k];
1096 |                 }
1097 |                 for (k = l; k < N; k++) {
1098 |                      AB[t2+k] += s * e[k];
1099 |                 }
1100 |             }
1101 | 
1102 |         }
1103 | 
1104 |         temp = fabs(q[i]) + fabs(e[i]);
1105 | 
1106 |         if (x < temp) {
1107 |         	x = temp;
1108 |         }
1109 | 	}
1110 | 
1111 | 	//Accumulating Right Hand Transformations
1112 | 
1113 | 	for(i = N - 1;i >= 0;--i) {
1114 | 		t = i * np;
1115 | 		if (g != 0.0) {
1116 | 			h = AB[t+i+1] * g;
1117 | 			for(j = l;j < N;++j) {
1118 | 				AB[j*np+i] = AB[t+j] / h;
1119 | 			}
1120 | 			for(j = l;j < N;++j) {
1121 | 				s = 0.0;
1122 | 				for(k = l; k < N;++k) {
1123 | 					s += AB[t+k] * AB[k*np+j];
1124 | 				}
1125 | 				for(k = l; k < N;++k) {
1126 | 					AB[k*np+j] += (s * AB[k*np+i]);
1127 | 				}
1128 | 			}
1129 | 		}
1130 | 		for(j = l; j < N;++j) {
1131 | 			AB[t+j] = AB[j*np+i] = 0.0;
1132 | 		}
1133 | 
1134 | 		AB[t+i] = 1.0;
1135 | 		g = e[i];
1136 | 		l = i;
1137 | 	}
1138 | 
1139 | 	eps = eps *x;
1140 | 	n1 = N + 1;
1141 | 
1142 | 	for(i = M; i < N;++i) {
1143 | 		for(j = n1 - 1; j < np;++j)  {
1144 | 			AB[i * np +j] = 0.0;
1145 | 		}
1146 | 	}
1147 | 
1148 | 	for(k = N - 1; k >= 0; --k) {
1149 | 		iter = 0;
1150 | 
1151 | 		while(1) {
1152 | 			iter++;
1153 | 			if (iter > SVDMAXITER) {
1154 | 				printf("Convergence Not Achieved \n");
1155 | 				return 15;
1156 | 			}
1157 | 
1158 | 			cancel = 1;
1159 | 			for(l = k; l >= 0; --l) {
1160 | 				if (fabs(e[l]) <= eps) {
1161 | 					cancel = 0; //test f convergence
1162 | 					break;
1163 | 				}
1164 | 				if (fabs(q[l-1]) <= eps) {
1165 | 					//Cancel
1166 | 					break;
1167 | 				}
1168 | 			}
1169 | 			if (cancel) {
1170 | 				c = 0.0;
1171 | 				s = 1.0;
1172 | 				l1 = l - 1;
1173 | 				for(i = l; i <= k;++i) {
1174 | 					f = s*e[i];
1175 | 					e[i] *= c;
1176 | 					if (fabs(f) <= eps) {
1177 | 						break;
1178 | 					}
1179 | 					g = q[i];
1180 | 					h = q[i] = hypot(f,g);
1181 | 					c = g/h;
1182 | 					s = -f/h;
1183 | 					for(j = n1-1; j < np;++j) {
1184 | 						y = AB[l1 * np + j];
1185 | 						z = AB[i * np + j];
1186 | 
1187 | 						AB[l1 * np + j] = y * c + z * s;
1188 | 						AB[i * np + j] = z * c - y * s;
1189 | 					}
1190 | 				}
1191 | 			}
1192 | 			z = q[k];
1193 | 			if (l != k) {
1194 | 				x = q[l];
1195 | 				y = q[k-1];
1196 | 				g = e[k-1];
1197 | 				h = e[k];
1198 | 				f = 0.5 * (((g + z) / h) * ((g - z) / y) + y / h - h / y);
1199 | 				g = hypot(f,1.0);
1200 | 				if (f < 0.0) {
1201 | 					temp = f - g;
1202 | 				} else {
1203 | 					temp = f+g;
1204 | 				}
1205 | 				f = x - (z / x) * z + (h / x) * (y / temp - h);
1206 | 
1207 | 				//Next QR Transformation
1208 | 
1209 | 				c = s = 1.0;
1210 | 				for(i = l+1; i <= k;++i) {
1211 | 					g = e[i];
1212 | 					y = q[i];
1213 | 					h = s * g;
1214 | 					g = c * g;
1215 | 					e[i-1] = z = hypot(f,h);
1216 |                     c = f / z;
1217 |                     s = h / z;
1218 |                     f = x * c + g * s;
1219 |                     g = g * c - x * s;
1220 |                     h = y * s;
1221 |                     y *= c;
1222 |                     for(j = 0; j < N;++j) {
1223 |                     	t = j * np;
1224 |                         x = AB[t+i-1];
1225 |                         z = AB[t+i];
1226 |                         AB[t+i-1] = x * c + z * s;
1227 |                         AB[t+i] = z * c - x * s;
1228 |                     }
1229 |                     q[i-1] = z = hypot(f,h);
1230 |                     if (z != 0.0) {
1231 |                         c = f / z;
1232 |                         s = h / z;
1233 |                     }
1234 |                     f = c * g + s * y;
1235 |                     x = c * y - s * g;
1236 |                     for(j = n1-1; j < np;++j) {
1237 |                         y = AB[(i-1) * np + j];
1238 |                         z = AB[i * np + j];
1239 |                         AB[(i-1) * np + j] = y * c + z * s;
1240 |                         AB[i * np + j] = z * c - y * s;
1241 |                     }
1242 | 				}
1243 |                     e[l] = 0.0;
1244 |                     e[k] = f;
1245 |                     q[k] = x;
1246 | 
1247 | 			} else {
1248 | 				//convergence
1249 |                 if (z < 0.0) {
1250 |                     q[k] = -z;
1251 |                     for (j = 0; j < N; j++) {
1252 |                     	t = j * np;
1253 |                         AB[t+k] = -AB[t+k];
1254 |                     }
1255 |                 }
1256 |                 break;
1257 | 			}
1258 | 		}
1259 | 	}
1260 | 
1261 | 	free(e);
1262 | 	return ierr;
1263 | 
1264 | }
1265 | 
1266 | int lls_svd2(double *Ai,double *bi,int M,int N,double *xo) {
1267 | 	int retcode,P,mnmax,np,i,j,t,t2;
1268 | 	double *AB,*q,*V,*C;
1269 | 	double eps;
1270 | 
1271 | 	if (M > N) {
1272 | 		mnmax = M;
1273 | 	} else {
1274 | 		mnmax = N;
1275 | 	}
1276 | 	retcode = 0;
1277 | 	P = 1;
1278 | 	np = N + P;
1279 | 	eps = macheps();
1280 | 
1281 | 	AB = (double*) malloc(sizeof(double) * mnmax * np);
1282 | 	q = (double*) malloc(sizeof(double) * N);
1283 | 	C = (double*) malloc(sizeof(double) * N);
1284 | 	V = (double*) malloc(sizeof(double) * N * N);
1285 | 
1286 | 	for(i = 0; i < M;++i) {
1287 | 		t = i * N;
1288 | 		t2 = i * np;
1289 | 		for(j = 0; j < N;++j) {
1290 | 			AB[t2 + j] = Ai[t+j];
1291 | 		}
1292 | 	}
1293 | 
1294 | 	for(i = 0; i < M;++i) {
1295 | 		t2 = i * np;
1296 | 		AB[t2+N] = bi[i];
1297 | 	}
1298 | 
1299 | 	minfit(AB,M,N,P,q);
1300 | 
1301 | 	for(i = 0; i < N;++i) {
1302 | 		t = i * N;
1303 | 		t2 = i * np;
1304 | 		for(j = 0; j < N;++j) {
1305 | 			V[t+j] = AB[t2 + j];
1306 | 		}
1307 | 	}
1308 | 
1309 | 	for(i = 0; i < N;++i) {
1310 | 		t = i *N;
1311 | 		for(j = 0; j < N;++j) {
1312 | 			if (fabs(q[j]) > eps) {
1313 | 				V[t+j] /= q[j];
1314 | 			} else {
1315 | 				V[t+j] = 0.0;
1316 | 			}
1317 | 		}
1318 | 	}
1319 | 
1320 | 	for(i = 0; i < N;++i) {
1321 | 		t2 = i * np;
1322 | 		C[i] = AB[t2+N];
1323 | 	}
1324 | 	mmult(V,C,xo,N,N,1);
1325 | 
1326 | 	free(AB);
1327 | 	free(q);
1328 | 	free(V);
1329 | 	free(C);
1330 | 	return retcode;
1331 | }
1332 | 
1333 | int lls_svd(double *Ai,double *bi,int M,int N,double *xo) {
1334 | 	int retcode,i,j,t;
1335 | 	double *U,*V,*q,*C;
1336 | 	double eps;
1337 | 
1338 | 	U = (double*) malloc(sizeof(double) * M*N);
1339 | 	V = (double*) malloc(sizeof(double) * N*N);
1340 | 	q = (double*) malloc(sizeof(double) * N);
1341 | 	C = (double*) malloc(sizeof(double) * N);
1342 | 
1343 | 	retcode = 0;
1344 | 	eps = macheps();
1345 | 
1346 | 	retcode = svd(Ai,M,N,U,V,q);;
1347 | 	if (retcode != 0) {
1348 | 		return retcode;
1349 | 	}
1350 | 
1351 | 	mmult(bi,U,C,1,M,N);
1352 | 
1353 | 	for(i = 0; i < N;++i) {
1354 | 		t = i *N;
1355 | 		for(j = 0; j < N;++j) {
1356 | 			if (fabs(q[j]) > eps) {
1357 | 				V[t+j] /= q[j];
1358 | 			} else {
1359 | 				V[t+j] = 0.0;
1360 | 			}
1361 | 		}
1362 | 	}
1363 | 
1364 | 	mmult(V,C,xo,N,N,1);
1365 | 
1366 | 	free(U);
1367 | 	free(V);
1368 | 	free(q);
1369 | 
1370 | 	return retcode;
1371 | }
1372 | 


--------------------------------------------------------------------------------
/src/nls.c:
--------------------------------------------------------------------------------
   1 | /*
   2 |  * nls.c
   3 |  *
   4 |  *  Created on: May 21, 2014
   5 |  *      Author: Rafat Hussain
   6 |  */
   7 | 
   8 | #include "nls.h"
   9 | 
  10 | double enorm(double *x, int N) {
  11 | 	double enrm;
  12 | 	int i;
  13 |     double agiant,floatn,one,rdwarf,rgiant,s1,s2,s3,xabs,x1max,x3max,zero;
  14 | 
  15 | 	/*
  16 | 	 * C Translation of Fortran Code
  17 | 	 * Argonne national laboratory. minpack project. march 1980.
  18 |       burton s. garbow, kenneth e. hillstrom, jorge j. more
  19 | 	 */
  20 | 
  21 |     one = 1.0;
  22 |     zero = 0.0;
  23 |     rdwarf = 3.834e-20;
  24 |     rgiant = 1.304e19;
  25 |     s1 = s2 = s3 = x1max = x3max = zero;
  26 |     floatn = N;
  27 |     agiant = rgiant / floatn;
  28 | 
  29 |     for(i = 0; i < N;++i) {
  30 |     	xabs = fabs(x[i]);
  31 |     	if (xabs >= agiant) {
  32 |     		//sum for large components
  33 |     		if (xabs > x1max) {
  34 |     			s1 = one + s1*(x1max/xabs)*(x1max/xabs);
  35 |     			x1max = xabs;
  36 |     		} else {
  37 |     			s1 = s1 + (xabs/x1max)*(xabs/x1max);
  38 |     		}
  39 |     	} else if (xabs <= rdwarf) {
  40 |     		//sum for small components
  41 |     		if (xabs > x3max) {
  42 |                 s3 = one + s3*(x3max/xabs)*(x3max/xabs);
  43 |                 x3max = xabs;
  44 |     		} else {
  45 |     			if (xabs != zero) {
  46 |     				s3 = s3 + (xabs/x3max)*(xabs/x3max);
  47 |     			}
  48 |     		}
  49 | 
  50 |     	} else {
  51 |     		//sum for intermediate components.
  52 |     		s2 += xabs*xabs;
  53 |     	}
  54 |     }
  55 | 
  56 |     if (s1 != zero) {
  57 |     	enrm = x1max*sqrt(s1+(s2/x1max)/x1max);
  58 |     } else {
  59 |     	if (s2 != zero) {
  60 |     		if (s2 >= x3max) {
  61 |     			enrm = sqrt(s2*(one+(x3max/s2)*(x3max*s3)));
  62 |     		}
  63 |     		if (s2 < x3max) {
  64 |     			enrm = sqrt(x3max*((s2/x3max)+(x3max*s3)));
  65 |     		}
  66 |     	} else {
  67 |     		enrm = x3max*sqrt(s3);
  68 |     	}
  69 |     }
  70 | 
  71 | 	return enrm;
  72 | }
  73 | 
  74 | void qrfac(double *A, int M, int N, int lda, int pivot, int *ipvt, int lipvt,double *rdiag, double *acnorm,double eps) {
  75 | 	int i,j,jp1,k,kmax,minmn,t;
  76 |     double ajnorm,epsmch,one,p05,sum,temp,zero,temp2,pmaxval;
  77 |     double *AT,*wa,*wa2;
  78 | 
  79 |  /*
  80 |      * This routine is a C translation of Fortran Code by
  81 |      *     argonne national laboratory. minpack project. march 1980.
  82 |       burton s. garbow, kenneth e. hillstrom, jorge j. more
  83 |      *
  84 |      * M is a positive integer input variable set to the number
  85 |          of rows of a.
  86 | 
  87 |        N is a positive integer input variable set to the number
  88 |          of columns of a.
  89 | 
  90 |        A is an M by N array. on input a contains the matrix for
  91 |          which the qr factorization is to be computed. on output
  92 |          the strict upper trapezoidal part of a contains the strict
  93 |          upper trapezoidal part of r, and the lower trapezoidal
  94 |          part of a contains a factored form of q (the non-trivial
  95 |          elements of the u vectors described above).
  96 | 
  97 |        lda is a positive integer input variable not less than m
  98 |          which specifies the leading dimension of the array a.
  99 | 
 100 |        pivot is an integer input variable. if pivot is set to 1,
 101 |          then column pivoting is enforced. if pivot is set to 0,
 102 |          then no column pivoting is done.
 103 | 
 104 |        ipvt is an integer output array of length lipvt. ipvt
 105 |          defines the permutation matrix p such that a*p = q*r.
 106 |          column j of p is column ipvt(j) of the identity matrix.
 107 |          if pivot is false, ipvt is not referenced.
 108 | 
 109 |        lipvt is a positive integer input variable. if pivot is false,
 110 |          then lipvt may be as small as 1. if pivot is true, then
 111 |          lipvt must be at least n.
 112 | 
 113 |        rdiag is an output array of length N which contains the
 114 |          diagonal elements of r.
 115 | 
 116 |        acnorm is an output array of length N which contains the
 117 |          norms of the corresponding columns of the input matrix a.
 118 |          if this information is not needed, then acnorm can coincide
 119 |          with rdiag.
 120 |      *
 121 |      */
 122 | 
 123 | 
 124 |     if (pivot != 0 && pivot != 1) {
 125 |     	printf("Pivot only takes binary values 0 and 1 \n");
 126 |     	exit(-1);
 127 |     }
 128 | 
 129 |     AT = (double*) malloc(sizeof(double) *N*M);
 130 |     wa = (double*) malloc(sizeof(double) *N);
 131 |     wa2 = (double*) malloc(sizeof(double) *M);
 132 | 
 133 |     one = 1.0; zero = 0.0; p05 = 5.0e-02;
 134 |     epsmch = eps;
 135 | 
 136 |     mtranspose(A,M,N,AT);// AT is size NXM
 137 | 
 138 |     //compute the initial column norms and initialize several arrays.
 139 | 
 140 |     for(j = 0; j < N;++j) {
 141 |     	acnorm[j] = enorm(AT+j*M,M);
 142 |     	rdiag[j] = acnorm[j];
 143 |     	wa[j] = rdiag[j];
 144 |     	if (pivot == 1) {
 145 |     		ipvt[j] = j;
 146 |     	}
 147 |     }
 148 | 
 149 |     //reduce a to r with householder transformations.
 150 | 
 151 |     if (M < N) {
 152 |     	minmn = M;
 153 |     } else {
 154 |     	minmn = N;
 155 |     }
 156 | 
 157 |     for (j = 0; j < minmn;++j) {
 158 |     	if (pivot == 1) {
 159 |     		//bring the column of largest norm into the pivot position.
 160 |     		kmax = j;
 161 |     		for(k = j; k < N;++k) {
 162 |     			if (rdiag[k] > rdiag[kmax]) {
 163 |     				kmax = k;
 164 |     			}
 165 |     		}
 166 | 			if (kmax != j) {
 167 | 				for(i = 0; i < M;++i) {
 168 | 					t = i * N;
 169 | 					temp = A[t+j];
 170 | 					A[t+j] = A[t+kmax];
 171 | 					A[t+kmax] = temp;
 172 | 				}
 173 | 				rdiag[kmax] = rdiag[j];
 174 | 				wa[kmax] = wa[j];
 175 | 				k = ipvt[j];
 176 | 				ipvt[j] = ipvt[kmax];
 177 | 				ipvt[kmax] = k;
 178 | 			}
 179 |     	}
 180 |     	//        compute the householder transformation to reduce the
 181 |     	//       j-th column of a to a multiple of the j-th unit vector.
 182 |     	t = j * N + j;
 183 | 
 184 |     	for(i = 0; i < M-j;++i) {
 185 |     		wa2[i] = A[t+i*N];
 186 |     	}
 187 |     	ajnorm = enorm(wa2,M-j);
 188 |     	if (ajnorm != zero) {
 189 |     		if (A[t] < zero) {
 190 |     			ajnorm = - ajnorm;
 191 |     		}
 192 |     		for(i = j; i < M;++i) {
 193 |     			A[i*N+j] /= ajnorm;
 194 |     		}
 195 |     		A[t] += one;
 196 |     		//        apply the transformation to the remaining columns
 197 |     		//        and update the norms.
 198 | 
 199 |     		jp1 = j + 1; // Breakpoint
 200 |     		if (N >= jp1+1) {
 201 |     			for(k = jp1; k < N;++k) {
 202 |     				sum = zero;
 203 |     				for(i = j; i < M;++i) {
 204 |     					sum += (A[i*N+j] * A[i*N+k]);
 205 |     				}
 206 |     				temp = sum / A[t];
 207 |     				for(i = j; i < M;++i) {
 208 |     					A[i*N+k] -= (temp * A[i*N+j]);
 209 |     				}
 210 |     				// Breakpoint 2
 211 |     				if (pivot == 1 && rdiag[k] != zero) {
 212 |     					temp = A[j*N+k] / rdiag[k];
 213 | 						pmaxval = pmax(zero, one - temp*temp);
 214 |     					rdiag[k] = rdiag[k]*sqrt(pmaxval);
 215 |     					temp2 = (p05*(rdiag[k]/wa[k]));
 216 |     					temp2 = temp2 * temp2;
 217 |     					if (temp2 <= epsmch) {
 218 |     				    	for(i = 0; i < M-j-1;++i) {
 219 |     				    		wa2[i] = A[jp1*N+k+i*N];
 220 |     				    	}
 221 |     				    	rdiag[k] = enorm(wa2,M-j-1);
 222 |     				    	wa[k] = rdiag[k];
 223 |     					}
 224 |     				}
 225 |     			}
 226 |     		}
 227 |     	}
 228 |     	rdiag[j] = -ajnorm;
 229 |     }
 230 | 
 231 |     free(AT);
 232 |     free(wa);
 233 |     free(wa2);
 234 | }
 235 | 
 236 | void qrsolv(double *r,int ldr,int N,int *ipvt,double *diag,double *qtb,double *x,double *sdiag) {
 237 | 	int i,j,jp1,k,kp1,l,nsing,t;
 238 | 	double cos,cotan,p5,p25,qtbpj,sin,sum,tan,temp,zero;
 239 | 	double *wa;
 240 | 
 241 | 	/*
 242 | 	 *   This routine is a C translation of Fortran Code by
 243 |      *     argonne national laboratory. minpack project. march 1980.
 244 |       	  burton s. garbow, kenneth e. hillstrom, jorge j. more
 245 | 	 * N is a positive integer input variable set to the order of r.
 246 | 
 247 |        r is an N by N array. on input the full upper triangle
 248 |          must contain the full upper triangle of the matrix r.
 249 |          on output the full upper triangle is unaltered, and the
 250 |          strict lower triangle contains the strict upper triangle
 251 |          (transposed) of the upper triangular matrix s.
 252 | 
 253 |        ldr is a positive integer input variable not less than n
 254 |          which specifies the leading dimension of the array r.
 255 | 
 256 |        ipvt is an integer input array of length N which defines the
 257 |          permutation matrix p such that a*p = q*r. column j of p
 258 |          is column ipvt(j) of the identity matrix.
 259 | 
 260 |        diag is an input array of length N which must contain the
 261 |          diagonal elements of the matrix d.
 262 | 
 263 |        qtb is an input array of length N which must contain the first
 264 |          N elements of the vector (q transpose)*b.
 265 | 
 266 |        x is an output array of length N which contains the least
 267 |          squares solution of the system a*x = b, d*x = 0.
 268 | 
 269 |        sdiag is an output array of length N which contains the
 270 |          diagonal elements of the upper triangular matrix s.
 271 | 	 */
 272 | 
 273 | 	wa = (double*) malloc(sizeof(double) *N);
 274 | 
 275 | 	p5 = 5.0e-1;
 276 | 	p25 = 2.5e-1;
 277 | 	zero = 0.0;
 278 | 
 279 | 	//     copy r and (q transpose)*b to preserve input and initialize s.
 280 | 	//     in particular, save the diagonal elements of r in x.
 281 | 
 282 | 	for(j = 0; j < N;++j) {
 283 | 		for(i = j; i < N;++i) {
 284 | 			r[i*N+j] = r[j*N+i];
 285 | 		}
 286 | 		x[j] = r[j*N+j];
 287 | 		wa[j] = qtb[j];
 288 | 	}
 289 | 
 290 | 	//eliminate the diagonal matrix d using a givens rotation.
 291 | 
 292 | 	for(j = 0; j < N;++j) { //100
 293 | 
 294 | 		//        prepare the row of d to be eliminated, locating the
 295 | 		//        diagonal element using p from the qr factorization.
 296 | 
 297 | 		l = ipvt[j];
 298 | 
 299 | 		if (diag[l] != zero) { //90
 300 | 			for(k = j; k < N;++k) {
 301 | 				sdiag[k] = zero;
 302 | 			}
 303 | 
 304 | 			sdiag[j] = diag[l];
 305 | 
 306 | 			//        the transformations to eliminate the row of d
 307 | 			//        modify only a single element of (q transpose)*b
 308 | 			//        beyond the first n, which is initially zero.
 309 | 
 310 | 			qtbpj = zero;
 311 | 
 312 | 			for(k = j; k < N;++k) { //80
 313 | 				t = k * N;
 314 | 				//     determine a givens rotation which eliminates the
 315 | 				//     appropriate element in the current row of d.
 316 | 
 317 | 				if (sdiag[k] != zero) { //70
 318 | 					if (fabs(r[t+k]) < fabs(sdiag[k])) {
 319 | 						cotan = r[t+k]/sdiag[k];
 320 | 			            sin = p5/sqrt(p25+p25*cotan*cotan);
 321 | 			            cos = sin*cotan;
 322 | 					} else {
 323 | 			            tan = sdiag[k]/r[t+k];
 324 | 			            cos = p5/sqrt(p25+p25*tan*tan);
 325 | 			            sin = cos*tan;
 326 | 					}
 327 | 					//           compute the modified diagonal element of r and
 328 | 					//           the modified element of ((q transpose)*b,0).
 329 | 		            r[t+k] = cos*r[t+k] + sin*sdiag[k];
 330 | 		            temp = cos*wa[k] + sin*qtbpj;
 331 | 		            qtbpj = -sin*wa[k] + cos*qtbpj;
 332 | 		            wa[k] = temp;
 333 | 
 334 | 		            //           accumulate the tranformation in the row of s.
 335 | 		            kp1 = k + 1;
 336 | 
 337 | 		            if (N >= kp1+1) {//71
 338 | 		            	for(i = kp1;i < N;++i) {
 339 | 		                    temp = cos*r[i*N+k] + sin*sdiag[i];
 340 | 		                    sdiag[i] = -sin*r[i*N+k] + cos*sdiag[i];
 341 | 		                    r[i*N+k] = temp;
 342 | 		            	}
 343 | 		            }//71
 344 | 				}//70
 345 | 			}//80
 346 | 		} //90
 347 | 		//        store the diagonal element of s and restore
 348 | 		//        the corresponding diagonal element of r.
 349 | 
 350 |         sdiag[j] = r[j*N+j];
 351 |         r[j*N+j] = x[j];
 352 | 
 353 | 	}//100
 354 | 
 355 | 	//     solve the triangular system for z. if the system is
 356 | 	//     singular, then obtain a least squares solution.
 357 | 
 358 | 	nsing = N;
 359 | 
 360 | 	for(j = 1; j <= N;++j) {
 361 | 		if (sdiag[j-1] == zero && nsing == N) {
 362 | 			nsing = j - 1;
 363 | 		}
 364 | 		if (nsing < N) {
 365 | 			wa[j-1] = zero;
 366 | 		}
 367 | 	}
 368 | 
 369 | 	if (nsing >= 1) {//150
 370 | 		for(k = 1; k <= nsing;++k) {
 371 | 	         j = nsing - k + 1;
 372 | 	         sum = zero;
 373 | 	         jp1 = j + 1;
 374 | 
 375 | 	         if (nsing >= jp1) {
 376 | 	        	 for (i = jp1; i <= nsing;++i) {
 377 | 	        		 sum = sum + r[(i-1)*N+j-1]*wa[i-1];
 378 | 	        	 }
 379 | 	         }
 380 | 	         wa[j-1] = (wa[j-1] - sum)/sdiag[j-1];
 381 | 		}
 382 | 	}//150
 383 | 
 384 | 	//     permute the components of z back to components of x.
 385 | 
 386 | 	for (j = 0; j < N;++j) {
 387 |         l = ipvt[j];
 388 |         x[l] = wa[j];
 389 | 	}
 390 | 
 391 | 
 392 | 	free(wa);
 393 | }
 394 | 
 395 | void fdjac2(custom_funcmult *funcmult, double *x, int M, int N, double *fvec, double *fjac, int ldfjac,
 396 | 		double epsfcn,double eps) {
 397 | 	int i,j;
 398 | 	double epsmch,h,temp,zero;
 399 | 	double *wa;
 400 | 
 401 | 	zero = 0.0;
 402 | 	epsmch = eps;
 403 | 	eps = sqrt(pmax(epsfcn,epsmch));
 404 | 
 405 | 	wa = (double*) malloc(sizeof(double) *M);
 406 | 
 407 | 	for(j = 0; j < N;++j) {
 408 |         temp = x[j];
 409 |         h = eps*fabs(temp);
 410 |         if (h == zero) {
 411 |         	h = eps;
 412 |         }
 413 |         x[j] = temp + h;
 414 |         FUNCMULT_EVAL(funcmult,x,M,N,wa);
 415 |         x[j] = temp;
 416 |         for(i = 0; i < M;++i) {
 417 |         	fjac[i*N+j] = (wa[i] - fvec[i])/h;
 418 |         }
 419 | 	}
 420 | 
 421 | 	free(wa);
 422 | 
 423 | }
 424 | 
 425 | void lmpar(double *r,int ldr,int N,int *ipvt,double *diag,double *qtb,double delta,double *par,double *x,double *sdiag) {
 426 | 	int i,iter,j,jm1,jp1,k,l,nsing;
 427 | 	double dxnorm,dwarf,fp,gnorm,parc,parl,paru,p1,p001,sum,temp,zero;
 428 | 	double *wa1,*wa2;
 429 | 	/*
 430 | 	*   This routine is a C translation of Fortran Code by
 431 |     *     argonne national laboratory. minpack project. march 1980.
 432 |      	  burton s. garbow, kenneth e. hillstrom, jorge j. more
 433 | 
 434 | 	 * N is a positive integer input variable set to the order of r.
 435 | 
 436 |        r is an N by N array. on input the full upper triangle
 437 |          must contain the full upper triangle of the matrix r.
 438 |          on output the full upper triangle is unaltered, and the
 439 |          strict lower triangle contains the strict upper triangle
 440 |          (transposed) of the upper triangular matrix s.
 441 | 
 442 |        ldr is a positive integer input variable not less than n
 443 |          which specifies the leading dimension of the array r.
 444 | 
 445 |        ipvt is an integer input array of length N which defines the
 446 |          permutation matrix p such that a*p = q*r. column j of p
 447 |          is column ipvt(j) of the identity matrix.
 448 | 
 449 |        diag is an input array of length N which must contain the
 450 |          diagonal elements of the matrix d.
 451 | 
 452 |        qtb is an input array of length N which must contain the first
 453 |          N elements of the vector (q transpose)*b.
 454 | 
 455 |        delta is a positive input variable which specifies an upper
 456 |          bound on the euclidean norm of d*x.
 457 | 
 458 |        par is a nonnegative variable. on input par contains an
 459 |          initial estimate of the levenberg-marquardt parameter.
 460 |          on output par contains the final estimate.
 461 | 
 462 |        x is an output array of length N which contains the least
 463 |          squares solution of the system a*x = b, sqrt(par)*d*x = 0,
 464 |          for the output par.
 465 | 
 466 |        sdiag is an output array of length N which contains the
 467 |          diagonal elements of the upper triangular matrix s.
 468 |     */
 469 | 
 470 | 	wa1 = (double*) malloc(sizeof(double) *N);
 471 | 	wa2 = (double*) malloc(sizeof(double) *N);
 472 | 
 473 | 	p1 = 1.0e-01;
 474 | 	p001 = 1.0e-03;
 475 | 	zero = 0.0;
 476 | 	dwarf = 2.22507385852e-308;
 477 | 
 478 | 	//     compute and store in x the gauss-newton direction. if the
 479 | 	//     jacobian is rank-deficient, obtain a least squares solution.
 480 | 
 481 | 	nsing = N;
 482 | 
 483 | 	for(j = 1; j <= N;++j) {
 484 | 		wa1[j-1] = qtb[j-1];
 485 | 		if (r[(j-1)*N+j-1] == zero && nsing == N) {
 486 | 			nsing = j - 1;
 487 | 		}
 488 | 		if (nsing < N) {
 489 | 			wa1[j-1] = zero;
 490 | 		}
 491 | 	}
 492 | 
 493 | 	if (nsing >= 1) {//50
 494 | 		for(k = 1; k <= nsing;++k) {
 495 | 	         j = nsing - k + 1;
 496 | 	         wa1[j-1] = wa1[j-1]/r[(j-1)*N+j-1];
 497 | 	         temp = wa1[j-1];
 498 | 	         jm1 = j - 1;
 499 | 	         if (jm1 >= 1) {
 500 | 	        	 for(i = 1; i <= jm1;++i) {
 501 | 	        		 wa1[i-1] = wa1[i-1] - r[(i-1)*N+j-1]*temp;
 502 | 	        	 }
 503 | 	         }
 504 | 		}
 505 | 	}//50
 506 | 
 507 | 	for (j = 0; j < N;++j) {
 508 |         l = ipvt[j];
 509 |         x[l] = wa1[j];
 510 | 	}
 511 | 
 512 | 	//     initialize the iteration counter.
 513 | 	//     evaluate the function at the origin, and test
 514 | 	//     for acceptance of the gauss-newton direction.
 515 | 
 516 | 	iter = 0;
 517 | 
 518 | 	for(j = 0; j < N;++j) {
 519 | 		wa2[j] = diag[j]*x[j];
 520 | 	}
 521 | 
 522 | 	dxnorm = enorm(wa2,N);
 523 | 	fp = dxnorm - delta;
 524 | 
 525 | 	if (fp > p1*delta) {//220
 526 | 		//     if the jacobian is not rank deficient, the newton
 527 | 		//    step provides a lower bound, parl, for the zero of
 528 | 		//     the function. otherwise set this bound to zero.
 529 | 		parl = zero;
 530 | 
 531 | 		if (nsing >= N) { //120 nsing only takes values upto N
 532 | 			for(j = 0; j < N;++j) {
 533 | 				l = ipvt[j];
 534 | 				wa1[j] = diag[l]*(wa2[l]/dxnorm);
 535 | 			}
 536 | 
 537 | 			for(j = 0; j < N;++j) {//110
 538 | 		         sum = zero;
 539 | 		         jm1 = j - 1;
 540 | 		         if (jm1 >= 0) {//100
 541 | 		        	 for(i = 0; i <= jm1;++i) {//check
 542 | 		        		 sum = sum + r[i*N+j]*wa1[i];
 543 | 		        	 }
 544 | 		         }//100
 545 | 		         wa1[j] = (wa1[j] - sum)/r[j*N+j];
 546 | 			}//110
 547 | 		      temp = enorm(wa1,N);
 548 | 		      parl = ((fp/delta)/temp)/temp;
 549 | 		}//120
 550 | 
 551 | 		//     calculate an upper bound, paru, for the zero of the function.
 552 | 
 553 | 		for(j = 0; j < N;++j) {//140
 554 | 			sum = zero;
 555 | 			for(i = 0; i <= j;++i) {//check
 556 | 				sum = sum + r[i*N+j]*qtb[i];
 557 | 			}
 558 | 	         l = ipvt[j];
 559 | 	         wa1[j] = sum/diag[l];
 560 | 		}//140
 561 | 		gnorm = enorm(wa1,N);
 562 | 		paru = gnorm/delta;
 563 | 
 564 | 		if (paru == zero) {
 565 | 			paru = dwarf/pmin(delta,p1);
 566 | 		}
 567 | 
 568 | 		//     if the input par lies outside of the interval (parl,paru),
 569 | 		//     set par to the closer endpoint.
 570 | 
 571 | 	      *par = pmax(*par,parl);
 572 | 	      *par = pmin(*par,paru);
 573 | 
 574 | 	      if (*par == zero) {
 575 | 	    	  *par = gnorm/dxnorm;
 576 | 	      }
 577 | 	      //Iteration begins
 578 | 	      while(1) {
 579 | 	    	  iter++;
 580 | 	    	  //        evaluate the function at the current value of par.
 581 | 	    	  if (*par == zero) {
 582 | 	    		  *par = pmax(dwarf,p001*paru);
 583 | 	    	  }
 584 | 	    	  temp = sqrt(*par);
 585 | 	    	  for(j = 0; j < N;++j) {
 586 | 	    		  wa1[j] = temp*diag[j];
 587 | 	    	  }
 588 | 
 589 | 	    	  qrsolv(r,ldr,N,ipvt,wa1,qtb,x,sdiag);
 590 | 	    	  for(j = 0; j < N;++j) {
 591 | 	              wa2[j] = diag[j]*x[j];
 592 | 	    	  }
 593 | 
 594 | 	          dxnorm = enorm(wa2,N);
 595 | 	          temp = fp;
 596 | 	          fp = dxnorm - delta;
 597 | 
 598 | 	          //        if the function is small enough, accept the current value
 599 | 	          //        of par. also test for the exceptional cases where parl
 600 | 	          //        is zero or the number of iterations has reached 10.
 601 | 	          if (fabs(fp) <= p1*delta) {
 602 | 	        	  break;
 603 | 	          }
 604 | 
 605 | 	          if (iter == 10) {
 606 | 	        	  break;
 607 | 	          }
 608 | 
 609 | 	          if (parl == zero && fp <= temp && temp < zero) {
 610 | 	        	  break;
 611 | 	          }
 612 | 
 613 | 	          //        compute the newton correction.
 614 | 
 615 | 	          for(j = 0;j < N;++j) {//180
 616 | 	              l = ipvt[j];
 617 | 	              wa1[j] = diag[l]*(wa2[l]/dxnorm);
 618 | 	          }//180
 619 | 
 620 | 	          for(j = 0; j < N;++j) {//210
 621 | 	              wa1[j] = wa1[j]/sdiag[j];
 622 | 	              temp = wa1[j];
 623 | 	              jp1 = j + 1;
 624 | 	              if (N >= jp1+1) {
 625 | 	            	  for(i = jp1; i < N;++i) {
 626 | 	            		  wa1[i] = wa1[i] - r[i*N+j]*temp;
 627 | 	            	  }
 628 | 	              }
 629 | 	          }//210
 630 | 	          temp = enorm(wa1,N);
 631 | 	          parc = ((fp/delta)/temp)/temp;
 632 | 	          //        depending on the sign of the function, update parl or paru.
 633 | 
 634 | 	          if (fp > zero) {
 635 | 	        	  parl = pmax(parl,*par);
 636 | 	          }
 637 | 	          if (fp < zero) {
 638 | 	        	  paru = pmin(paru,*par);
 639 | 	          }
 640 | 
 641 | 	          //       compute an improved estimate for par.
 642 | 	          *par = pmax(parl,*par+parc);
 643 | 
 644 | 	      }
 645 | 
 646 | 	}//220
 647 | 
 648 | 	if (iter == 0) {
 649 | 		*par = zero;
 650 | 	}
 651 | 
 652 | 	free(wa1);
 653 | 	free(wa2);
 654 | }
 655 | 
 656 | int lmder(custom_funcmult *funcmult, custom_jacobian *jacobian, double *x, int M, int N,
 657 | 		double *fvec,double *fjac,int ldfjac,int maxfev,double *diag,int mode,double factor,int nprint,
 658 | 		double eps,double ftol,double gtol,double xtol,int *nfev,int *njev,int *ipvt, double *qtf) {
 659 | 	int info;
 660 | 	int i,j,l,iter;
 661 |     double actred,delta,dirder,epsmch,fnorm,fnorm1,gnorm,one,par,pnorm,prered,p1,p5,p25,p75,p0001,ratio,
 662 |     sum,temp,temp1,temp2,xnorm,zero;
 663 |     double *wa1,*wa2,*wa3,*wa4;
 664 | 
 665 |     /*
 666 |      * 	*   This routine is a C translation of Fortran Code by
 667 |     *     argonne national laboratory. minpack project. march 1980.
 668 |      	  burton s. garbow, kenneth e. hillstrom, jorge j. more
 669 |      *  M is a positive integer input variable set to the number
 670 | c         of functions.
 671 | c
 672 | c       N is a positive integer input variable set to the number
 673 | c         of variables. N must not exceed M.
 674 | c
 675 | c       x is an array of length N. on input x must contain
 676 | c         an initial estimate of the solution vector. on output x
 677 | c         contains the final estimate of the solution vector.
 678 | c
 679 | c       fvec is an output array of length M which contains
 680 | c         the functions evaluated at the output x.
 681 | c
 682 | c       fjac is an output M by N array. the upper N by N submatrix
 683 | c         of fjac contains an upper triangular matrix r with
 684 | c         diagonal elements of nonincreasing magnitude such that
 685 | c
 686 | c                t     t           t
 687 | c               p *(jac *jac)*p = r *r,
 688 | c
 689 | c         where p is a permutation matrix and jac is the final
 690 | c         calculated jacobian. column j of p is column ipvt(j)
 691 | c         (see below) of the identity matrix. the lower trapezoidal
 692 | c         part of fjac contains information generated during
 693 | c         the computation of r.
 694 | c
 695 | c       ldfjac is a positive integer input variable not less than M
 696 | c         which specifies the leading dimension of the array fjac.
 697 | c
 698 | c       ftol is a nonnegative input variable. termination
 699 | c         occurs when both the actual and predicted relative
 700 | c         reductions in the sum of squares are at most ftol.
 701 | c         therefore, ftol measures the relative error desired
 702 | c         in the sum of squares.
 703 | c
 704 | c       xtol is a nonnegative input variable. termination
 705 | c         occurs when the relative error between two consecutive
 706 | c         iterates is at most xtol. therefore, xtol measures the
 707 | c         relative error desired in the approximate solution.
 708 | c
 709 | c       gtol is a nonnegative input variable. termination
 710 | c         occurs when the cosine of the angle between fvec and
 711 | c         any column of the jacobian is at most gtol in absolute
 712 | c         value. therefore, gtol measures the orthogonality
 713 | c         desired between the function vector and the columns
 714 | c         of the jacobian.
 715 | c
 716 | c       maxfev is a positive integer input variable. termination
 717 | c         occurs when the number of calls to fcn with iflag = 1
 718 | c         has reached maxfev.
 719 | c
 720 | c       diag is an array of length N. if mode = 1 (see
 721 | c         below), diag is internally set. if mode = 2, diag
 722 | c         must contain positive entries that serve as
 723 | c         multiplicative scale factors for the variables.
 724 | c
 725 | c       mode is an integer input variable. if mode = 1, the
 726 | c         variables will be scaled internally. if mode = 2,
 727 | c         the scaling is specified by the input diag. other
 728 | c         values of mode are equivalent to mode = 1.
 729 | c
 730 | c       factor is a positive input variable used in determining the
 731 | c         initial step bound. this bound is set to the product of
 732 | c         factor and the euclidean norm of diag*x if nonzero, or else
 733 | c         to factor itself. in most cases factor should lie in the
 734 | c         interval (.1,100.).100. is a generally recommended value.
 735 | c
 736 | c       nprint is an integer input variable that enables controlled
 737 | c         printing of iterates if it is positive. in this case,
 738 | c         fcn is called with iflag = 0 at the beginning of the first
 739 | c         iteration and every nprint iterations thereafter and
 740 | c         immediately prior to return, with x, fvec, and fjac
 741 | c         available for printing. fvec and fjac should not be
 742 | c         altered. if nprint is not positive, no special calls
 743 | c         of fcn with iflag = 0 are made.
 744 | c
 745 | c       info is an integer output variable. if the user has
 746 | c         terminated execution, info is set to the (negative)
 747 | c         value of iflag. see description of fcn. otherwise,
 748 | c         info is set as follows.
 749 | c
 750 | c         info = 0  improper input parameters.
 751 | c
 752 | c         info = 1  both actual and predicted relative reductions
 753 | c                   in the sum of squares are at most ftol.
 754 | c
 755 | c         info = 2  relative error between two consecutive iterates
 756 | c                   is at most xtol.
 757 | c
 758 | c         info = 3  conditions for info = 1 and info = 2 both hold.
 759 | c
 760 | c         info = 4  the cosine of the angle between fvec and any
 761 | c                   column of the jacobian is at most gtol in
 762 | c                   absolute value.
 763 | c
 764 | c         info = 5  number of calls to fcn with iflag = 1 has
 765 | c                   reached maxfev.
 766 | c
 767 | c         info = 6  ftol is too small. no further reduction in
 768 | c                   the sum of squares is possible.
 769 | c
 770 | c         info = 7  xtol is too small. no further improvement in
 771 | c                   the approximate solution x is possible.
 772 | c
 773 | c         info = 8  gtol is too small. fvec is orthogonal to the
 774 | c                   columns of the jacobian to machine precision.
 775 | c
 776 | c       nfev is an integer output variable set to the number of
 777 | c         calls to fcn with iflag = 1.
 778 | c
 779 | c       njev is an integer output variable set to the number of
 780 | c         calls to fcn with iflag = 2.
 781 | c
 782 | c       ipvt is an integer output array of length N. ipvt
 783 | c         defines a permutation matrix p such that jac*p = q*r,
 784 | c         where jac is the final calculated jacobian, q is
 785 | c         orthogonal (not stored), and r is upper triangular
 786 | c         with diagonal elements of nonincreasing magnitude.
 787 | c         column j of p is column ipvt(j) of the identity matrix.
 788 | c
 789 | c       qtf is an output array of length N which contains
 790 | c         the first n elements of the vector (q transpose)*fvec.
 791 |      */
 792 | 
 793 | 	wa1 = (double*) malloc(sizeof(double) *N);
 794 | 	wa2 = (double*) malloc(sizeof(double) *N);
 795 | 	wa3 = (double*) malloc(sizeof(double) *N);
 796 | 	wa4 = (double*) malloc(sizeof(double) *M);
 797 | 
 798 |     one = 1.0;
 799 |     zero = 0.0;
 800 |     p1 = 1.0e-1; p5 = 5.0e-1; p25 = 2.5e-1; p75 = 7.5e-1; p0001 = 1.0e-4;
 801 |     epsmch = eps;
 802 | 
 803 |     info = 0;
 804 |     *nfev = 0;
 805 |     *njev = 0;
 806 | 
 807 |     if (N <= 0 || M < N || ldfjac < M || ftol < zero || xtol < zero || gtol < zero || maxfev <= 0 || factor <= zero) {
 808 |     	return info;
 809 |     }
 810 |     if (mode == 2) {
 811 | 		for(j = 0; j < N;++j) {
 812 | 			if (diag[j] <= 0.0) {
 813 | 				return info;
 814 | 			}
 815 | 		}
 816 |     }
 817 | 
 818 |     //     evaluate the function at the starting point
 819 |     //     and calculate its norm.
 820 | 
 821 |     FUNCMULT_EVAL(funcmult,x,M,N,fvec);
 822 |     *nfev= 1;
 823 |     fnorm = enorm(fvec,M);
 824 | 
 825 |     //     initialize levenberg-marquardt parameter and iteration counter.
 826 |     par = zero;
 827 |     iter = 1;
 828 |     ratio = zero;
 829 | 
 830 |     //     beginning of the outer loop.
 831 | 
 832 |     while(1) {
 833 |     	//        calculate the jacobian matrix.
 834 |     	ratio = zero;
 835 |     	JACOBIAN_EVAL(jacobian,x,M,N,fjac);
 836 |     	*njev = *njev +1;
 837 | 
 838 |     	//        compute the qr factorization of the jacobian.
 839 | 
 840 |     	qrfac(fjac,M,N,ldfjac,1,ipvt,N,wa1,wa2,eps);
 841 | 
 842 |     	//        on the first iteration and if mode is 1, scale according
 843 |     	//        to the norms of the columns of the initial jacobian.
 844 | 
 845 |     	if (iter == 1) {//80
 846 |     		if (mode != 2) {//60
 847 |     			for(j = 0;j < N;++j) {
 848 |     				diag[j] = wa2[j];
 849 |     				if (wa2[j] == zero) {
 850 |     					diag[j] = one;
 851 |     				}
 852 |     			}
 853 |     		}//60
 854 | 
 855 |     		//        on the first iteration, calculate the norm of the scaled x
 856 |     		//        and initialize the step bound delta.
 857 | 
 858 |     		for(j = 0; j < N;++j) {
 859 |     			wa3[j] = diag[j]*x[j];
 860 |     		}
 861 |             xnorm = enorm(wa3,N);
 862 |             delta = factor*xnorm;
 863 | 
 864 |             if (delta == zero) {
 865 |             	delta = factor;
 866 |             }
 867 | 
 868 |     	}//80
 869 | 
 870 |         //        form (q transpose)*fvec and store the first n components in
 871 |         //        qtf.
 872 | 
 873 |     	for(i = 0; i < M;++i) {
 874 |     		wa4[i] = fvec[i];
 875 |     	}
 876 | 
 877 |     	for(j = 0; j < N;++j) {//130
 878 |     		if (fjac[j*N+j] != zero) {//120
 879 |     			sum = zero;
 880 |     			for(i = j; i < M;++i) {//100
 881 |     				sum = sum + fjac[i*N+j]*wa4[i];
 882 |     			}//100
 883 |     			temp = -sum/fjac[j*N+j];
 884 |     			for(i = j; i < M;++i) {//110
 885 |     				wa4[i] = wa4[i] + fjac[i*N+j]*temp;
 886 |     			}//110
 887 |     		}//120
 888 |             fjac[j*N+j] = wa1[j];
 889 |             qtf[j] = wa4[j];
 890 |     	}//130
 891 | 
 892 |     	//        compute the norm of the scaled gradient.
 893 |     	gnorm = zero;
 894 | 
 895 |     	if (fnorm != zero) {//170
 896 |     		for(j = 0; j < N;++j) {//160
 897 |     			l = ipvt[j];
 898 |     			if (wa2[l] != zero) {//150
 899 |     				sum = zero;
 900 |     				for(i = 0; i <= j;++i) { //140
 901 |     					sum = sum + fjac[i*N+j]*(qtf[i]/fnorm);
 902 |     				}//140
 903 |     				gnorm = pmax(gnorm,fabs(sum/wa2[l]));
 904 |     			}//150
 905 |     		}//160
 906 |     	}//170
 907 | 
 908 |     	//        test for convergence of the gradient norm.
 909 |     	if (gnorm <= gtol) {
 910 |     		info = 4;
 911 |     	}
 912 |     	if (info != 0) {
 913 |     		break;
 914 |     	}
 915 | 
 916 |     	//        rescale if necessary.
 917 |     	if (mode != 2) { //190
 918 |     		for(j = 0; j < N;++j) {
 919 |     			diag[j] = pmax(diag[j],wa2[j]);
 920 |     		}
 921 |     	}//190
 922 | 
 923 |     	//        beginning of the inner loop.
 924 | 
 925 |     	while(ratio < p0001) {
 926 |     		//           determine the levenberg-marquardt parameter.
 927 |     		lmpar(fjac,ldfjac,N,ipvt,diag,qtf,delta,&par,wa1,wa2);
 928 |     		//           store the direction p and x + p. calculate the norm of p.
 929 |     		for(j = 0; j < N;++j) {
 930 |                 wa1[j] = -wa1[j];
 931 |                 wa2[j] = x[j] + wa1[j];
 932 |                 wa3[j] = diag[j]*wa1[j];
 933 |     		}
 934 |     		pnorm = enorm(wa3,N);
 935 |     		//           on the first iteration, adjust the initial step bound.
 936 |     		if (iter == 1) {
 937 |     			delta = pmin(delta,pnorm);
 938 |     		}
 939 |     		//           evaluate the function at x + p and calculate its norm.
 940 | 
 941 |     		FUNCMULT_EVAL(funcmult,wa2,M,N,wa4);
 942 |     		*nfev = *nfev + 1;
 943 |     		fnorm1 = enorm(wa4,M);
 944 | 
 945 |     		//           compute the scaled actual reduction.
 946 | 
 947 |             actred = -one;
 948 |             if (p1*fnorm1 < fnorm) {
 949 |             	actred = one - (fnorm1/fnorm)*(fnorm1/fnorm);
 950 |             }
 951 | 
 952 |             //           compute the scaled predicted reduction and
 953 |             //           the scaled directional derivative.
 954 | 
 955 |             for(j = 0; j < N;++j) {
 956 |                 wa3[j] = zero;
 957 |                 l = ipvt[j];
 958 |                 temp = wa1[l];
 959 |                 for(i = 0;i <= j;++i) {
 960 |                 	wa3[i] = wa3[i] + fjac[i*N+j]*temp;
 961 |                 }
 962 |             }
 963 | 
 964 |             temp1 = enorm(wa3,N);
 965 |             temp1 = temp1/fnorm;
 966 |             temp2 = (sqrt(par)*pnorm)/fnorm;
 967 |             prered = temp1*temp1 + temp2*temp2/p5;
 968 |             dirder = -(temp1*temp1 + temp2*temp2);
 969 |             //           compute the ratio of the actual to the predicted
 970 |             //           reduction.
 971 |             ratio = zero;
 972 |             if (prered != zero) {
 973 |             	ratio = actred/prered;
 974 |             }
 975 |             //           update the step bound.
 976 | 
 977 |             if (ratio <= p25) {//240
 978 |             	if (actred >= zero) {
 979 |             		temp = p5;
 980 |             	}
 981 |             	if (actred < zero) {
 982 |             		temp = p5*dirder/(dirder + p5*actred);
 983 |             	}
 984 |             	if (p1*fnorm1 >= fnorm || temp < p1) {
 985 |             		temp = p1;
 986 |             	}
 987 |                 delta = temp*pmin(delta,pnorm/p1);
 988 |                 par = par/temp;
 989 |             } else if (par == zero || ratio >= p75){//240 - 260
 990 |                 delta = pnorm/p5;
 991 |                 par = p5*par;
 992 |             }//260
 993 | 
 994 |             //           test for successful iteration.
 995 | 
 996 |             if (ratio >= p0001) {//290
 997 |             	//           successful iteration. update x, fvec, and their norms.
 998 |             	for(j = 0; j < N;++j) {
 999 |                     x[j] = wa2[j];
1000 |                     wa2[j] = diag[j]*x[j];
1001 |             	}
1002 |             	for(i = 0; i < M;++i) {
1003 |             		fvec[i] = wa4[i];
1004 |             	}
1005 |                 xnorm = enorm(wa2,N);
1006 |                 fnorm = fnorm1;
1007 |                 iter = iter + 1;
1008 |             }//290
1009 |             //           tests for convergence.
1010 |             if ((fabs(actred) <= ftol) && (prered <= ftol) && (p5*ratio <= one)) {
1011 |             	info = 1;
1012 |             }
1013 |             if (delta <= xtol*xnorm) {
1014 |             	info = 2;
1015 |             }
1016 |             if ((fabs(actred) <= ftol) && (prered <= ftol) && (p5*ratio <= one) && (info == 2)) {
1017 |             	info = 3;
1018 |             }
1019 |             if (info != 0) {
1020 |             	break;
1021 |             }
1022 | 
1023 |             //           tests for termination and stringent tolerances.
1024 |             if (*nfev >= maxfev) {
1025 |             	info = 5;
1026 |             }
1027 |             if ((fabs(actred) <= epsmch) && (prered <= epsmch) && (p5*ratio <= one)) {
1028 |             	info = 6;
1029 |             }
1030 |             if (delta <= epsmch*xnorm) {
1031 |             	info = 7;
1032 |             }
1033 |             if (gnorm <= epsmch) {
1034 |             	info = 8;
1035 |             }
1036 |             if (info != 0) {
1037 |             	break;
1038 |             }
1039 | 
1040 |     	}
1041 | 
1042 |         if (info != 0) {
1043 |         	break;
1044 |         }
1045 | 
1046 | 
1047 |     }
1048 | 
1049 | 
1050 |     free(wa1);
1051 |     free(wa2);
1052 |     free(wa3);
1053 |     free(wa4);
1054 | 
1055 | 	return info;
1056 | }
1057 | 
1058 | int lmdif(custom_funcmult *funcmult, double *x, int M, int N, double *fvec, double *fjac, int ldfjac,
1059 | 		int maxfev,double *diag,int mode,double factor,int nprint,double eps,double epsfcn,double ftol,double gtol,
1060 | 		double xtol,int *nfev,int *njev,int *ipvt, double *qtf) {
1061 | 	int info;
1062 | 	int i,j,l,iter;
1063 |     double actred,delta,dirder,epsmch,fnorm,fnorm1,gnorm,one,par,pnorm,prered,p1,p5,p25,p75,p0001,ratio,
1064 |     sum,temp,temp1,temp2,xnorm,zero;
1065 |     double *wa1,*wa2,*wa3,*wa4;
1066 | 
1067 |     /*
1068 |      * 	*   This routine is a C translation of Fortran Code by
1069 |     *     argonne national laboratory. minpack project. march 1980.
1070 |      	  burton s. garbow, kenneth e. hillstrom, jorge j. more
1071 |      *  M is a positive integer input variable set to the number
1072 | c         of functions.
1073 | c
1074 | c       N is a positive integer input variable set to the number
1075 | c         of variables. N must not exceed M.
1076 | c
1077 | c       x is an array of length N. on input x must contain
1078 | c         an initial estimate of the solution vector. on output x
1079 | c         contains the final estimate of the solution vector.
1080 | c
1081 | c       fvec is an output array of length M which contains
1082 | c         the functions evaluated at the output x.
1083 | c
1084 | c       fjac is an output M by N array. the upper N by N submatrix
1085 | c         of fjac contains an upper triangular matrix r with
1086 | c         diagonal elements of nonincreasing magnitude such that
1087 | c
1088 | c                t     t           t
1089 | c               p *(jac *jac)*p = r *r,
1090 | c
1091 | c         where p is a permutation matrix and jac is the final
1092 | c         calculated jacobian. column j of p is column ipvt(j)
1093 | c         (see below) of the identity matrix. the lower trapezoidal
1094 | c         part of fjac contains information generated during
1095 | c         the computation of r.
1096 | c
1097 | c       ldfjac is a positive integer input variable not less than M
1098 | c         which specifies the leading dimension of the array fjac.
1099 | c
1100 | c       ftol is a nonnegative input variable. termination
1101 | c         occurs when both the actual and predicted relative
1102 | c         reductions in the sum of squares are at most ftol.
1103 | c         therefore, ftol measures the relative error desired
1104 | c         in the sum of squares.
1105 | c
1106 | c       xtol is a nonnegative input variable. termination
1107 | c         occurs when the relative error between two consecutive
1108 | c         iterates is at most xtol. therefore, xtol measures the
1109 | c         relative error desired in the approximate solution.
1110 | c
1111 | c       gtol is a nonnegative input variable. termination
1112 | c         occurs when the cosine of the angle between fvec and
1113 | c         any column of the jacobian is at most gtol in absolute
1114 | c         value. therefore, gtol measures the orthogonality
1115 | c         desired between the function vector and the columns
1116 | c         of the jacobian.
1117 | c
1118 | c       maxfev is a positive integer input variable. termination
1119 | c         occurs when the number of calls to fcn with iflag = 1
1120 | c         has reached maxfev.
1121 | c
1122 | c       epsfcn is an input variable used in determining a suitable
1123 | c         step length for the forward-difference approximation. this
1124 | c         approximation assumes that the relative errors in the
1125 | c         functions are of the order of epsfcn. if epsfcn is less
1126 | c         than the machine precision, it is assumed that the relative
1127 | c         errors in the functions are of the order of the machine
1128 | c         precision.
1129 | c
1130 | c       diag is an array of length N. if mode = 1 (see
1131 | c         below), diag is internally set. if mode = 2, diag
1132 | c         must contain positive entries that serve as
1133 | c         multiplicative scale factors for the variables.
1134 | c
1135 | c       mode is an integer input variable. if mode = 1, the
1136 | c         variables will be scaled internally. if mode = 2,
1137 | c         the scaling is specified by the input diag. other
1138 | c         values of mode are equivalent to mode = 1.
1139 | c
1140 | c       factor is a positive input variable used in determining the
1141 | c         initial step bound. this bound is set to the product of
1142 | c         factor and the euclidean norm of diag*x if nonzero, or else
1143 | c         to factor itself. in most cases factor should lie in the
1144 | c         interval (.1,100.).100. is a generally recommended value.
1145 | c
1146 | c       nprint is an integer input variable that enables controlled
1147 | c         printing of iterates if it is positive. in this case,
1148 | c         fcn is called with iflag = 0 at the beginning of the first
1149 | c         iteration and every nprint iterations thereafter and
1150 | c         immediately prior to return, with x, fvec, and fjac
1151 | c         available for printing. fvec and fjac should not be
1152 | c         altered. if nprint is not positive, no special calls
1153 | c         of fcn with iflag = 0 are made.
1154 | c
1155 | c       info is an integer output variable. if the user has
1156 | c         terminated execution, info is set to the (negative)
1157 | c         value of iflag. see description of fcn. otherwise,
1158 | c         info is set as follows.
1159 | c
1160 | c         info = 0  improper input parameters.
1161 | c
1162 | c         info = 1  both actual and predicted relative reductions
1163 | c                   in the sum of squares are at most ftol.
1164 | c
1165 | c         info = 2  relative error between two consecutive iterates
1166 | c                   is at most xtol.
1167 | c
1168 | c         info = 3  conditions for info = 1 and info = 2 both hold.
1169 | c
1170 | c         info = 4  the cosine of the angle between fvec and any
1171 | c                   column of the jacobian is at most gtol in
1172 | c                   absolute value.
1173 | c
1174 | c         info = 5  number of calls to fcn with iflag = 1 has
1175 | c                   reached maxfev.
1176 | c
1177 | c         info = 6  ftol is too small. no further reduction in
1178 | c                   the sum of squares is possible.
1179 | c
1180 | c         info = 7  xtol is too small. no further improvement in
1181 | c                   the approximate solution x is possible.
1182 | c
1183 | c         info = 8  gtol is too small. fvec is orthogonal to the
1184 | c                   columns of the jacobian to machine precision.
1185 | c
1186 | c       nfev is an integer output variable set to the number of
1187 | c         calls to fcn with iflag = 1.
1188 | c
1189 | c       njev is an integer output variable set to the number of
1190 | c         calls to fcn with iflag = 2.
1191 | c
1192 | c       ipvt is an integer output array of length N. ipvt
1193 | c         defines a permutation matrix p such that jac*p = q*r,
1194 | c         where jac is the final calculated jacobian, q is
1195 | c         orthogonal (not stored), and r is upper triangular
1196 | c         with diagonal elements of nonincreasing magnitude.
1197 | c         column j of p is column ipvt(j) of the identity matrix.
1198 | c
1199 | c       qtf is an output array of length N which contains
1200 | c         the first n elements of the vector (q transpose)*fvec.
1201 |      */
1202 | 
1203 | 	wa1 = (double*) malloc(sizeof(double) *N);
1204 | 	wa2 = (double*) malloc(sizeof(double) *N);
1205 | 	wa3 = (double*) malloc(sizeof(double) *N);
1206 | 	wa4 = (double*) malloc(sizeof(double) *M);
1207 | 
1208 |     one = 1.0;
1209 |     zero = 0.0;
1210 |     p1 = 1.0e-1; p5 = 5.0e-1; p25 = 2.5e-1; p75 = 7.5e-1; p0001 = 1.0e-4;
1211 |     epsmch = eps;
1212 | 
1213 |     info = 0;
1214 |     *nfev = 0;
1215 |     *njev = 0;
1216 | 
1217 |     if (N <= 0 || M < N || ldfjac < M || ftol < zero || xtol < zero || gtol < zero || maxfev <= 0 || factor <= zero) {
1218 |     	return info;
1219 |     }
1220 | 
1221 |     if (mode == 2) {
1222 | 		for(j = 0; j < N;++j) {
1223 | 			if (diag[j] <= 0.0) {
1224 | 				return info;
1225 | 			}
1226 | 		}
1227 |     }
1228 | 
1229 |     //     evaluate the function at the starting point
1230 |     //     and calculate its norm.
1231 | 
1232 |     FUNCMULT_EVAL(funcmult,x,M,N,fvec);
1233 |     *nfev= 1;
1234 |     fnorm = enorm(fvec,M);
1235 | 
1236 |     //     initialize levenberg-marquardt parameter and iteration counter.
1237 |     par = zero;
1238 |     iter = 1;
1239 |     ratio = zero;
1240 | 
1241 |     //     beginning of the outer loop.
1242 | 
1243 |     while(1) {
1244 |     	//        calculate the jacobian matrix.
1245 |     	ratio = zero;
1246 |     	fdjac2(funcmult,x,M,N,fvec,fjac,ldfjac,epsfcn,epsmch);
1247 |     	*njev = *njev + N;
1248 | 
1249 |     	//        compute the qr factorization of the jacobian.
1250 | 
1251 |     	qrfac(fjac,M,N,ldfjac,1,ipvt,N,wa1,wa2,eps);
1252 | 
1253 |     	//        on the first iteration and if mode is 1, scale according
1254 |     	//        to the norms of the columns of the initial jacobian.
1255 | 
1256 |     	if (iter == 1) {//80
1257 |     		if (mode != 2) {//60
1258 |     			for(j = 0;j < N;++j) {
1259 |     				diag[j] = wa2[j];
1260 |     				if (wa2[j] == zero) {
1261 |     					diag[j] = one;
1262 |     				}
1263 |     			}
1264 |     		}//60
1265 | 
1266 |     		//        on the first iteration, calculate the norm of the scaled x
1267 |     		//        and initialize the step bound delta.
1268 | 
1269 |     		for(j = 0; j < N;++j) {
1270 |     			wa3[j] = diag[j]*x[j];
1271 |     		}
1272 |             xnorm = enorm(wa3,N);
1273 |             delta = factor*xnorm;
1274 | 
1275 |             if (delta == zero) {
1276 |             	delta = factor;
1277 |             }
1278 | 
1279 |     	}//80
1280 | 
1281 |         //        form (q transpose)*fvec and store the first n components in
1282 |         //        qtf.
1283 | 
1284 |     	for(i = 0; i < M;++i) {
1285 |     		wa4[i] = fvec[i];
1286 |     	}
1287 | 
1288 |     	for(j = 0; j < N;++j) {//130
1289 |     		if (fjac[j*N+j] != zero) {//120
1290 |     			sum = zero;
1291 |     			for(i = j; i < M;++i) {//100
1292 |     				sum = sum + fjac[i*N+j]*wa4[i];
1293 |     			}//100
1294 |     			temp = -sum/fjac[j*N+j];
1295 |     			for(i = j; i < M;++i) {//110
1296 |     				wa4[i] = wa4[i] + fjac[i*N+j]*temp;
1297 |     			}//110
1298 |     		}//120
1299 |             fjac[j*N+j] = wa1[j];
1300 |             qtf[j] = wa4[j];
1301 |     	}//130
1302 | 
1303 |     	//        compute the norm of the scaled gradient.
1304 |     	gnorm = zero;
1305 | 
1306 |     	if (fnorm != zero) {//170
1307 |     		for(j = 0; j < N;++j) {//160
1308 |     			l = ipvt[j];
1309 |     			if (wa2[l] != zero) {//150
1310 |     				sum = zero;
1311 |     				for(i = 0; i <= j;++i) { //140
1312 |     					sum = sum + fjac[i*N+j]*(qtf[i]/fnorm);
1313 |     				}//140
1314 |     				gnorm = pmax(gnorm,fabs(sum/wa2[l]));
1315 |     			}//150
1316 |     		}//160
1317 |     	}//170
1318 | 
1319 |     	//        test for convergence of the gradient norm.
1320 |     	if (gnorm <= gtol) {
1321 |     		info = 4;
1322 |     	}
1323 |     	if (info != 0) {
1324 |     		break;
1325 |     	}
1326 | 
1327 |     	//        rescale if necessary.
1328 |     	if (mode != 2) { //190
1329 |     		for(j = 0; j < N;++j) {
1330 |     			diag[j] = pmax(diag[j],wa2[j]);
1331 |     		}
1332 |     	}//190
1333 | 
1334 |     	//        beginning of the inner loop.
1335 | 
1336 |     	while(ratio < p0001) {
1337 |     		//           determine the levenberg-marquardt parameter.
1338 |     		lmpar(fjac,ldfjac,N,ipvt,diag,qtf,delta,&par,wa1,wa2);
1339 |     		//           store the direction p and x + p. calculate the norm of p.
1340 |     		for(j = 0; j < N;++j) {
1341 |                 wa1[j] = -wa1[j];
1342 |                 wa2[j] = x[j] + wa1[j];
1343 |                 wa3[j] = diag[j]*wa1[j];
1344 |     		}
1345 | 			
1346 |     		pnorm = enorm(wa3,N);
1347 |     		//           on the first iteration, adjust the initial step bound.
1348 |     		if (iter == 1) {
1349 |     			delta = pmin(delta,pnorm);
1350 |     		}
1351 |     		//           evaluate the function at x + p and calculate its norm.
1352 | 
1353 |     		FUNCMULT_EVAL(funcmult,wa2,M,N,wa4);
1354 |     		*nfev = *nfev + 1;
1355 |     		fnorm1 = enorm(wa4,M);
1356 | 
1357 |     		//           compute the scaled actual reduction.
1358 | 
1359 |             actred = -one;
1360 |             if (p1*fnorm1 < fnorm) {
1361 |             	actred = one - (fnorm1/fnorm)*(fnorm1/fnorm);
1362 |             }
1363 | 
1364 |             //           compute the scaled predicted reduction and
1365 |             //           the scaled directional derivative.
1366 | 
1367 |             for(j = 0; j < N;++j) {
1368 |                 wa3[j] = zero;
1369 |                 l = ipvt[j];
1370 |                 temp = wa1[l];
1371 |                 for(i = 0;i <= j;++i) {
1372 |                 	wa3[i] = wa3[i] + fjac[i*N+j]*temp;
1373 |                 }
1374 |             }
1375 | 
1376 |             temp1 = enorm(wa3,N);
1377 |             temp1 = temp1/fnorm;
1378 |             temp2 = (sqrt(par)*pnorm)/fnorm;
1379 |             prered = temp1*temp1 + temp2*temp2/p5;
1380 |             dirder = -(temp1*temp1 + temp2*temp2);
1381 |             //           compute the ratio of the actual to the predicted
1382 |             //           reduction.
1383 |             ratio = zero;
1384 |             if (prered != zero) {
1385 |             	ratio = actred/prered;
1386 |             }
1387 |             //           update the step bound.
1388 | 			
1389 | 
1390 |             if (ratio <= p25) {//240
1391 |             	if (actred >= zero) {
1392 |             		temp = p5;
1393 |             	}
1394 |             	if (actred < zero) {
1395 |             		temp = p5*dirder/(dirder + p5*actred);
1396 |             	}
1397 |             	if (p1*fnorm1 >= fnorm || temp < p1) {
1398 |             		temp = p1;
1399 |             	}
1400 |                 delta = temp*pmin(delta,pnorm/p1);
1401 |                 par = par/temp;
1402 |             } else if (par == zero || ratio >= p75){//240 - 260
1403 |                 delta = pnorm/p5;
1404 |                 par = p5*par;
1405 |             }//260
1406 | 
1407 |             //           test for successful iteration.
1408 | 
1409 |             if (ratio >= p0001) {//290
1410 |             	//           successful iteration. update x, fvec, and their norms.
1411 |             	for(j = 0; j < N;++j) {
1412 |                     x[j] = wa2[j];
1413 |                     wa2[j] = diag[j]*x[j];
1414 |             	}
1415 |             	for(i = 0; i < M;++i) {
1416 |             		fvec[i] = wa4[i];
1417 |             	}
1418 |                 xnorm = enorm(wa2,N);
1419 |                 fnorm = fnorm1;
1420 |                 iter = iter + 1;
1421 |             }//290
1422 |             //           tests for convergence.
1423 |             if ((fabs(actred) <= ftol) && (prered <= ftol) && (p5*ratio <= one)) {
1424 |             	info = 1;
1425 |             }
1426 |             if (delta <= xtol*xnorm) {
1427 |             	info = 2;
1428 |             }
1429 |             if ((fabs(actred) <= ftol) && (prered <= ftol) && (p5*ratio <= one) && (info == 2)) {
1430 |             	info = 3;
1431 |             }
1432 |             if (info != 0) {
1433 |             	break;
1434 |             }
1435 | 
1436 |             //           tests for termination and stringent tolerances.
1437 |             if (*nfev >= maxfev) {
1438 |             	info = 5;
1439 |             }
1440 |             if ((fabs(actred) <= epsmch) && (prered <= epsmch) && (p5*ratio <= one)) {
1441 |             	info = 6;
1442 |             }
1443 |             if (delta <= epsmch*xnorm) {
1444 |             	info = 7;
1445 |             }
1446 |             if (gnorm <= epsmch) {
1447 |             	info = 8;
1448 |             }
1449 |             if (info != 0) {
1450 |             	break;
1451 |             }
1452 | 
1453 |     	}
1454 | 
1455 |         if (info != 0) {
1456 |         	break;
1457 |         }
1458 | 
1459 | 
1460 |     }
1461 | 
1462 | 
1463 |     free(wa1);
1464 |     free(wa2);
1465 |     free(wa3);
1466 |     free(wa4);
1467 | 
1468 | 	return info;
1469 | }
1470 | 
1471 | 
1472 | 
1473 | 
1474 | 
1475 | 


--------------------------------------------------------------------------------