├── README.md
├── LICENSE
├── CMakeLists.txt
├── include
    └── MCL
    │   ├── Newton.hpp
    │   ├── NonLinearCG.hpp
    │   ├── WolfeBisection.hpp
    │   ├── Minimizer.hpp
    │   ├── LBFGS.hpp
    │   ├── Problem.hpp
    │   ├── Backtracking.hpp
    │   ├── TrustRegion.hpp
    │   └── MoreThuente.hpp
├── cmake
    └── FindEigen3.cmake
└── test
    ├── TestProblem.hpp
    └── testSolvers.cpp


/README.md:
--------------------------------------------------------------------------------
 1 | # mcloptlib
 2 | 
 3 | By Matt Overby  
 4 | [http://www.mattoverby.net](http://www.mattoverby.net)
 5 | 
 6 | mcloptlib is a header-only optimization library for C++ using Eigen and is geared towards lower-dimension graphics problems.
 7 | Originally a fork of [Patrick Wieschollek's CppOptimizationLibrary](https://github.com/PatWie/CppNumericalSolvers), but has diverged considerably.
 8 | 
 9 | ## Contents:
10 | 
11 | Optimization algorithms:
12 | - Newton's
13 | - Non-linear conjugate gradient
14 | - L-BFGS
15 | - Trust Region with
16 |   - Cauchy Point
17 |   - Dog Leg
18 | 
19 | Linesearch methods:
20 | - Backtracking (Armijo)
21 | - Backtracking with cubic interpolation
22 | - Bisection
23 | - MoreThuente
24 | 
25 | ## To-do:
26 | 
27 | - Option of std::function for value/gradient instead of Problem class
28 | - Sparse Hessians
29 | - Auto-diff
30 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2017 Matt Overby
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # The MIT License (MIT)
 2 | # Copyright (c) 2017 Matt Overby
 3 | # 
 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | # of this software and associated documentation files (the "Software"), to deal
 6 | # in the Software without restriction, including without limitation the rights
 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | # copies of the Software, and to permit persons to whom the Software is
 9 | # furnished to do so, subject to the following conditions:
10 | # 
11 | # The above copyright notice and this permission notice shall be included in all
12 | # copies or substantial portions of the Software.
13 | # 
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 | # SOFTWARE.
21 | 
22 | cmake_minimum_required(VERSION 3.1)
23 | project(mcloptlib C CXX)
24 | set(CMAKE_CXX_STANDARD 11)
25 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
26 | set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
27 | set(CMAKE_BUILD_TYPE Debug)
28 | add_definitions( -DMCL_DEBUG=1 ) 
29 | 
30 | if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX)
31 | 	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-long-long")
32 | endif()
33 | 
34 | find_package(Eigen3 REQUIRED)
35 | include_directories(SYSTEM ${EIGEN3_INCLUDE_DIR})
36 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
37 | 
38 | enable_testing()
39 | add_executable(testSolvers test/testSolvers.cpp)
40 | add_test(testLBFGS testSolvers lbfgs)
41 | add_test(testCG testSolvers cg)
42 | add_test(testNewton testSolvers newton)
43 | 


--------------------------------------------------------------------------------
/include/MCL/Newton.hpp:
--------------------------------------------------------------------------------
 1 | // The MIT License (MIT)
 2 | // Copyright (c) 2017 Matt Overby
 3 | // 
 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | // of this software and associated documentation files (the "Software"), to deal
 6 | // in the Software without restriction, including without limitation the rights
 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | // copies of the Software, and to permit persons to whom the Software is
 9 | // furnished to do so, subject to the following conditions:
10 | // 
11 | // The above copyright notice and this permission notice shall be included in all
12 | // copies or substantial portions of the Software.
13 | // 
14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 | // SOFTWARE.
21 | 
22 | #ifndef MCL_NEWTON_H
23 | #define MCL_NEWTON_H
24 | 
25 | #include "Minimizer.hpp"
26 | 
27 | namespace mcl {
28 | namespace optlib {
29 | 
30 | template<typename Scalar, int DIM>
31 | class Newton : public Minimizer<Scalar,DIM> {
32 | private:
33 | 	typedef Eigen::Matrix<Scalar,DIM,1> VectorX;
34 | 	typedef Eigen::Matrix<Scalar,DIM,DIM> MatrixX;
35 | 
36 | public:
37 | 	Newton() {
38 | 		this->m_settings.max_iters = 20;
39 | 	}
40 | 
41 | 	int minimize(Problem<Scalar,DIM> &problem, VectorX &x){
42 | 
43 | 		VectorX grad, delta_x, x_last;
44 | 		if( DIM  == Eigen::Dynamic ){
45 | 			int dim = x.rows();
46 | 			x_last.resize(dim);
47 | 			grad.resize(dim);
48 | 			delta_x.resize(dim);
49 | 		}
50 | 
51 | 		int verbose = this->m_settings.verbose;
52 | 		int max_iters = this->m_settings.max_iters;
53 | 		int iter = 0;
54 | 		for( ; iter < max_iters; ++iter ){
55 | 
56 | 			problem.gradient(x,grad);
57 | 			problem.solve_hessian(x,grad,delta_x);
58 | 
59 | 			Scalar rate = this->linesearch(x, delta_x, problem, 1.0);
60 | 
61 | 			if( rate <= 0 ){
62 | 				if( verbose > 0 ){ printf("Newton::minimize: Failure in linesearch\n"); }
63 | 				return Minimizer<Scalar,DIM>::FAILURE;
64 | 			}
65 | 
66 | 			x_last = x;
67 | 			x += rate * delta_x;
68 | 			if( problem.converged(x_last,x,grad) ){ break; }
69 | 		}
70 | 
71 | 		return iter;
72 | 	}
73 | 
74 | };
75 | 
76 | } // ns optlib
77 | } // ns mcl
78 | 
79 | #endif
80 | 


--------------------------------------------------------------------------------
/include/MCL/NonLinearCG.hpp:
--------------------------------------------------------------------------------
 1 | // The MIT License (MIT)
 2 | // Copyright (c) 2017 Matt Overby
 3 | // 
 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | // of this software and associated documentation files (the "Software"), to deal
 6 | // in the Software without restriction, including without limitation the rights
 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | // copies of the Software, and to permit persons to whom the Software is
 9 | // furnished to do so, subject to the following conditions:
10 | // 
11 | // The above copyright notice and this permission notice shall be included in all
12 | // copies or substantial portions of the Software.
13 | // 
14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 | // SOFTWARE.
21 | 
22 | #ifndef MCL_NONLINEARCG_H
23 | #define MCL_NONLINEARCG_H
24 | 
25 | #include "Minimizer.hpp"
26 | 
27 | namespace mcl {
28 | namespace optlib {
29 | 
30 | template<typename Scalar, int DIM>
31 | class NonLinearCG : public Minimizer<Scalar,DIM> {
32 | private:
33 | 	typedef Eigen::Matrix<Scalar,DIM,1> VectorX;
34 | 	typedef Eigen::Matrix<Scalar,DIM,DIM> MatrixX;
35 | 
36 | public:
37 | 	NonLinearCG() {
38 | 		this->m_settings.max_iters = 100;
39 | 	}
40 | 
41 | 	int minimize(Problem<Scalar,DIM> &problem, VectorX &x){
42 | 
43 | 		VectorX grad, grad_old, p, x_last;
44 | 		if( DIM == Eigen::Dynamic ){
45 | 			int dim = x.rows();
46 | 			x_last.setZero(dim);
47 | 			grad.resize(dim);
48 | 			grad_old.resize(dim);
49 | 			p.resize(dim);
50 | 		}
51 | 
52 | 		int verbose = this->m_settings.verbose;
53 | 		int max_iters = this->m_settings.max_iters;
54 | 		int iter=0;
55 | 		for( ; iter<max_iters; ++iter ){
56 | 
57 | 			problem.gradient(x, grad);
58 | 
59 | 			if( iter==0 ){ p = -grad; }
60 | 			else {
61 | 				Scalar beta = grad.dot(grad) / (grad_old.dot(grad_old));
62 | 				p = -grad + beta*p;
63 | 			}
64 | 
65 | 			Scalar rate = this->linesearch(x, p, problem, 1.0);
66 | 
67 | 			if( rate <= 0 ){
68 | 				if( verbose > 0 ){ printf("NonLinearCG::minimize: Failure in linesearch\n"); }
69 | 				return Minimizer<Scalar,DIM>::FAILURE;
70 | 			}
71 | 
72 | 			x_last = x;
73 | 			x += rate*p;
74 | 			grad_old = grad;
75 | 
76 | 			if( problem.converged(x_last,x,grad) ){ break; }
77 | 		}
78 | 		return iter;
79 | 	} // end minimize
80 | 
81 | };
82 | 
83 | } // ns optlib
84 | } // ns mcl
85 | 
86 | #endif
87 | 


--------------------------------------------------------------------------------
/cmake/FindEigen3.cmake:
--------------------------------------------------------------------------------
 1 | # - Try to find Eigen3 lib
 2 | #
 3 | # This module supports requiring a minimum version, e.g. you can do
 4 | #   find_package(Eigen3 3.1.2)
 5 | # to require version 3.1.2 or newer of Eigen3.
 6 | #
 7 | # Once done this will define
 8 | #
 9 | #  EIGEN3_FOUND - system has eigen lib with correct version
10 | #  EIGEN3_INCLUDE_DIR - the eigen include directory
11 | #  EIGEN3_VERSION - eigen version
12 | 
13 | # Copyright (c) 2006, 2007 Montel Laurent, <montel@kde.org>
14 | # Copyright (c) 2008, 2009 Gael Guennebaud, <g.gael@free.fr>
15 | # Copyright (c) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
16 | # Redistribution and use is allowed according to the terms of the 2-clause BSD license.
17 | 
18 | if(NOT Eigen3_FIND_VERSION)
19 |   if(NOT Eigen3_FIND_VERSION_MAJOR)
20 |     set(Eigen3_FIND_VERSION_MAJOR 2)
21 |   endif(NOT Eigen3_FIND_VERSION_MAJOR)
22 |   if(NOT Eigen3_FIND_VERSION_MINOR)
23 |     set(Eigen3_FIND_VERSION_MINOR 91)
24 |   endif(NOT Eigen3_FIND_VERSION_MINOR)
25 |   if(NOT Eigen3_FIND_VERSION_PATCH)
26 |     set(Eigen3_FIND_VERSION_PATCH 0)
27 |   endif(NOT Eigen3_FIND_VERSION_PATCH)
28 | 
29 |   set(Eigen3_FIND_VERSION "${Eigen3_FIND_VERSION_MAJOR}.${Eigen3_FIND_VERSION_MINOR}.${Eigen3_FIND_VERSION_PATCH}")
30 | endif(NOT Eigen3_FIND_VERSION)
31 | 
32 | macro(_eigen3_check_version)
33 |   file(READ "${EIGEN3_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen3_version_header)
34 | 
35 |   string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen3_world_version_match "${_eigen3_version_header}")
36 |   set(EIGEN3_WORLD_VERSION "${CMAKE_MATCH_1}")
37 |   string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen3_major_version_match "${_eigen3_version_header}")
38 |   set(EIGEN3_MAJOR_VERSION "${CMAKE_MATCH_1}")
39 |   string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen3_minor_version_match "${_eigen3_version_header}")
40 |   set(EIGEN3_MINOR_VERSION "${CMAKE_MATCH_1}")
41 | 
42 |   set(EIGEN3_VERSION ${EIGEN3_WORLD_VERSION}.${EIGEN3_MAJOR_VERSION}.${EIGEN3_MINOR_VERSION})
43 |   if(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
44 |     set(EIGEN3_VERSION_OK FALSE)
45 |   else(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
46 |     set(EIGEN3_VERSION_OK TRUE)
47 |   endif(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
48 | 
49 |   if(NOT EIGEN3_VERSION_OK)
50 | 
51 |     message(STATUS "Eigen3 version ${EIGEN3_VERSION} found in ${EIGEN3_INCLUDE_DIR}, "
52 |                    "but at least version ${Eigen3_FIND_VERSION} is required")
53 |   endif(NOT EIGEN3_VERSION_OK)
54 | endmacro(_eigen3_check_version)
55 | 
56 | if (EIGEN3_INCLUDE_DIR)
57 | 
58 |   # in cache already
59 |   _eigen3_check_version()
60 |   set(EIGEN3_FOUND ${EIGEN3_VERSION_OK})
61 | 
62 | else (EIGEN3_INCLUDE_DIR)
63 | 
64 |   find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library
65 |       PATHS
66 |       ${CMAKE_INSTALL_PREFIX}/include
67 |       ${KDE4_INCLUDE_DIR}
68 |       PATH_SUFFIXES eigen3 eigen
69 |     )
70 | 
71 |   if(EIGEN3_INCLUDE_DIR)
72 |     _eigen3_check_version()
73 |   endif(EIGEN3_INCLUDE_DIR)
74 | 
75 |   include(FindPackageHandleStandardArgs)
76 |   find_package_handle_standard_args(Eigen3 DEFAULT_MSG EIGEN3_INCLUDE_DIR EIGEN3_VERSION_OK)
77 | 
78 |   mark_as_advanced(EIGEN3_INCLUDE_DIR)
79 | 
80 | endif(EIGEN3_INCLUDE_DIR)
81 | 
82 | 


--------------------------------------------------------------------------------
/include/MCL/WolfeBisection.hpp:
--------------------------------------------------------------------------------
  1 | // The MIT License (MIT)
  2 | // Copyright (c) 2018 Matt Overby
  3 | // 
  4 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | // of this software and associated documentation files (the "Software"), to deal
  6 | // in the Software without restriction, including without limitation the rights
  7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 | // copies of the Software, and to permit persons to whom the Software is
  9 | // furnished to do so, subject to the following conditions:
 10 | // 
 11 | // The above copyright notice and this permission notice shall be included in all
 12 | // copies or substantial portions of the Software.
 13 | // 
 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 20 | // SOFTWARE.
 21 | 
 22 | #ifndef MCL_WOLFEBISECTION_H
 23 | #define MCL_WOLFEBISECTION_H
 24 | 
 25 | #include "Problem.hpp"
 26 | 
 27 | namespace mcl {
 28 | namespace optlib {
 29 | 
 30 | // Bisection method for Weak Wolfe conditions
 31 | template<typename Scalar, int DIM>
 32 | class WolfeBisection {
 33 | private:
 34 | 	// Strong wolfe conditions: the armijo rule and a stronger curvature condition
 35 | 	// alpha = step length
 36 | 	// fx_ap = f(x + alpha p)
 37 | 	// fx = f(x)
 38 | 	// pT_gx = p^T ( grad f(x) )
 39 | 	// pT_gx_ap = p^T ( grad f(x + alpha p) )
 40 | 	static inline bool strong_wolfe( Scalar alpha,
 41 | 		Scalar fx, Scalar pT_gx, 
 42 | 		Scalar fx_ap, Scalar pT_gx_ap,
 43 | 		Scalar wolfe_c1, Scalar wolfe_c2 ){
 44 | 		if( !(fx_ap <= fx + wolfe_c1 * alpha * pT_gx) ){ return false; } // armijo rule
 45 | 		return std::abs( pT_gx_ap ) <= wolfe_c2 * std::abs( pT_gx );
 46 | 	}
 47 | 
 48 | public:
 49 | 	typedef Eigen::Matrix<Scalar,DIM,1> VecX;
 50 | 	typedef Eigen::Matrix<Scalar,DIM,DIM> MatX;
 51 | 
 52 | 	static inline Scalar search(int verbose, int max_iters, const VecX &x, const VecX &p, Problem<Scalar,DIM> &problem, Scalar alpha0) {
 53 | 
 54 | 		const Scalar t_eps = std::numeric_limits<Scalar>::epsilon();
 55 | 		const Scalar wolfe_c1 = 0.0001;
 56 | 		const Scalar wolfe_c2 = 0.8; // should be 0.1 for CG!
 57 | 		const int dim = x.rows();
 58 | 		double alpha = alpha0;
 59 | 		double alpha_min = 1e-8;
 60 | 		double alpha_max = 1;
 61 | 
 62 | 		VecX grad0, grad_new;
 63 | 		if( DIM == Eigen::Dynamic ){
 64 | 			grad0 = VecX::Zero(dim);
 65 | 			grad_new = VecX::Zero(dim);
 66 | 		}
 67 | 		Scalar fx0 = problem.gradient(x, grad0);
 68 | 		const Scalar gtp = grad0.dot(p);
 69 | 		bool min_set = false;
 70 | 
 71 | 		int iter = 0;
 72 | 		for( ; iter < max_iters; ++iter ){
 73 | 
 74 | 			// Should we stop iterating?
 75 | 			if( std::abs(alpha_max-alpha_min) <= t_eps ){ break; }
 76 | 
 77 | 			// Step halfway
 78 | 			alpha = ( alpha_max + alpha_min ) * 0.5;
 79 | 			grad_new.setZero();
 80 | 			Scalar fx_ap = problem.gradient(x + alpha*p, grad_new);
 81 | 			Scalar gt_ap = grad_new.dot( p );
 82 | 
 83 | 			// Check the wolfe conditions
 84 | 			bool happy_wolfe = strong_wolfe( alpha, fx0, gtp, fx_ap, gt_ap, wolfe_c1, wolfe_c2 );
 85 | 			if( happy_wolfe ){
 86 | 				alpha_min = alpha;
 87 | 				min_set = true;
 88 | 			}
 89 | 			else { alpha_max = alpha; }
 90 | 
 91 | 		} // end bs iters
 92 | 
 93 | 		if( iter == max_iters ){
 94 | 			if( verbose > 0 ){ printf("WolfeBisection::linesearch Error: Reached max_iters\n"); }
 95 | 			return -1;
 96 | 		}
 97 | 		if( !min_set ){
 98 | 			if( verbose > 0 ){ printf("WolfeBisection::linesearch Error: LS blocked\n"); }
 99 | 			return -1;
100 | 		}
101 | 
102 | 		return alpha_min;
103 | 
104 | 	}
105 | };
106 | 
107 | }
108 | }
109 | 
110 | #endif
111 | 


--------------------------------------------------------------------------------
/include/MCL/Minimizer.hpp:
--------------------------------------------------------------------------------
  1 | // The MIT License (MIT)
  2 | // Copyright (c) 2017 Matt Overby
  3 | // 
  4 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | // of this software and associated documentation files (the "Software"), to deal
  6 | // in the Software without restriction, including without limitation the rights
  7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 | // copies of the Software, and to permit persons to whom the Software is
  9 | // furnished to do so, subject to the following conditions:
 10 | // 
 11 | // The above copyright notice and this permission notice shall be included in all
 12 | // copies or substantial portions of the Software.
 13 | // 
 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 20 | // SOFTWARE.
 21 | 
 22 | #ifndef MCL_MINIMIZER_H
 23 | #define MCL_MINIMIZER_H
 24 | 
 25 | #include "Problem.hpp"
 26 | #include "Backtracking.hpp"
 27 | #include "MoreThuente.hpp"
 28 | #include "WolfeBisection.hpp"
 29 | #include <memory>
 30 | 
 31 | namespace mcl {
 32 | namespace optlib {
 33 | 
 34 | // The different line search methods currently implemented
 35 | enum class LSMethod {
 36 | 	None = 0, // use step length = 1, not recommended
 37 | 	MoreThuente, // TODO test this one for correctness
 38 | 	Backtracking, // basic backtracking with sufficient decrease
 39 | 	BacktrackingCubic, // backtracking with cubic interpolation
 40 | 	WeakWolfeBisection // slow
 41 | };
 42 | 
 43 | // Trust region subproblem method (see TrustRegion.hpp)
 44 | enum class TRMethod {
 45 | 	CauchyPoint,
 46 | 	DogLeg
 47 | };
 48 | 
 49 | //
 50 | // Base class for optimization algs
 51 | //
 52 | template<typename Scalar, int DIM>
 53 | class Minimizer {
 54 | public:
 55 | 	typedef Eigen::Matrix<Scalar,DIM,1> VecX;
 56 | 	static const int FAILURE = -1; // returned by minimize if an error is encountered
 57 | 
 58 | 	struct Settings {
 59 | 		int verbose; // higher = more printouts
 60 | 		int max_iters; // usually changed by derived constructors
 61 | 		int ls_max_iters; // max line search iters
 62 | 		Scalar ls_decrease; // sufficient decrease param
 63 | 		LSMethod ls_method; // see LSMethod (above)
 64 | 		TRMethod tr_method; // see TRMethod (above)
 65 | 
 66 | 		Settings() : verbose(0), max_iters(100),
 67 | 			ls_max_iters(100000), ls_decrease(1e-4),
 68 | 			ls_method(LSMethod::BacktrackingCubic),
 69 | 			tr_method(TRMethod::DogLeg)
 70 | 			{}
 71 | 	} m_settings;
 72 | 
 73 | 	//
 74 | 	// Performs optimization
 75 | 	//
 76 | 	virtual int minimize(Problem<Scalar,DIM> &problem, VecX &x) = 0;
 77 | 
 78 | 
 79 | protected:
 80 | 
 81 | 	// Line search method/options can be changed through m_settings.
 82 | 	Scalar linesearch(const VecX &x, const VecX &p, Problem<Scalar,DIM> &prob, double alpha0) const {
 83 | 		double alpha = alpha0;
 84 | 		int mi = m_settings.ls_max_iters;
 85 | 		int v = m_settings.verbose;
 86 | 		Scalar sd = m_settings.ls_decrease;
 87 | 		switch( m_settings.ls_method ){
 88 | 			default:{
 89 | 				alpha = Backtracking<Scalar,DIM>::search(v, mi, sd, x, p, prob, alpha0);
 90 | 			} break;
 91 | 			case LSMethod::None: { alpha = 1.0; } break;
 92 | 			case LSMethod::MoreThuente: {
 93 | 				alpha = MoreThuente<Scalar,DIM>::search(x, p, prob, alpha0);
 94 | 			} break;
 95 | 			case LSMethod::Backtracking: {
 96 | 				alpha = Backtracking<Scalar,DIM>::search(v, mi, sd, x, p, prob, alpha0);
 97 | 			} break;
 98 | 			case LSMethod::BacktrackingCubic: {
 99 | 				alpha = BacktrackingCubic<Scalar,DIM>::search(v, mi, sd, x, p, prob, alpha0);
100 | 			} break;
101 | 			case LSMethod::WeakWolfeBisection: {
102 | 				alpha = WolfeBisection<Scalar,DIM>::search(v, mi, x, p, prob, alpha0);
103 | 			} break;
104 | 		}
105 | 		return alpha;
106 | 	} // end do linesearch
107 | 
108 | }; // class minimizer
109 | 
110 | } // ns optlib
111 | } // ns mcl
112 | 
113 | #endif
114 | 


--------------------------------------------------------------------------------
/test/TestProblem.hpp:
--------------------------------------------------------------------------------
  1 | // The MIT License (MIT)
  2 | // Copyright (c) 2017 Matt Overby
  3 | // 
  4 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | // of this software and associated documentation files (the "Software"), to deal
  6 | // in the Software without restriction, including without limitation the rights
  7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 | // copies of the Software, and to permit persons to whom the Software is
  9 | // furnished to do so, subject to the following conditions:
 10 | // 
 11 | // The above copyright notice and this permission notice shall be included in all
 12 | // copies or substantial portions of the Software.
 13 | // 
 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 20 | // SOFTWARE.
 21 | 
 22 | #include "MCL/Problem.hpp"
 23 | 
 24 | // min |Ax-b|
 25 | class DynProblem : public mcl::optlib::Problem<double,Eigen::Dynamic> {
 26 | public:
 27 | 	typedef Eigen::Matrix<double,Eigen::Dynamic,1> VectorX;
 28 | 	typedef Eigen::Matrix<double,Eigen::Dynamic,Eigen::Dynamic> MatrixX;
 29 | 
 30 | 	MatrixX A;
 31 | 	VectorX b;
 32 | 	DynProblem( int dim_ ){
 33 | 
 34 | 		// Test on random SPD
 35 | 		A = MatrixX::Random(dim_,dim_);
 36 | 		A = A.transpose() * A;
 37 | 		A = A + MatrixX::Identity(dim_,dim_);
 38 | 
 39 | 		b = VectorX::Random(dim_);
 40 | 	}
 41 | 
 42 | 	int dim() const { return b.rows(); }
 43 | 	bool converged(const VectorX &x0, const VectorX &x1, const VectorX &grad){
 44 | 
 45 | 		// Check sizes of input
 46 | 		int m_dim = dim();
 47 | 		if( x0.rows() != m_dim ){
 48 | 			throw std::runtime_error("Error in Problem::converged: x0 wrong dimension");
 49 | 		}
 50 | 		if( x1.rows() != m_dim ){
 51 | 			throw std::runtime_error("Error in Problem::converged: x1 wrong dimension");
 52 | 		}
 53 | 		if( grad.rows() != m_dim ){
 54 | 			throw std::runtime_error("Error in Problem::converged: gradient wrong dimension");
 55 | 		}
 56 | 
 57 | 		return grad.norm() < 1e-10 || (x0-x1).norm() < 1e-10;
 58 | 	}
 59 | 
 60 | 	double value(const VectorX &x){
 61 | 
 62 | 		// Check sizes of input
 63 | 		int m_dim = dim();
 64 | 		if( x.rows() != m_dim ){
 65 | 			throw std::runtime_error("Error in Problem::value: x wrong dimension");
 66 | 		}
 67 | 
 68 | 		return (A*x-b).norm();
 69 | 	}
 70 | 
 71 | 	double gradient(const VectorX &x, VectorX &grad){
 72 | 
 73 | 		// Check sizes of input
 74 | 		int m_dim = dim();
 75 | 		if( x.rows() != m_dim ){
 76 | 			throw std::runtime_error("Error in Problem::gradient: x wrong dimension");
 77 | 		}
 78 | 		if( grad.rows() != m_dim ){
 79 | 			throw std::runtime_error("Error in Problem::gradient: gradient wrong dimension");
 80 | 		}
 81 | 
 82 | 		grad = A*x-b; return value(x);
 83 | 	}
 84 | 	void hessian(const VectorX &x, MatrixX &hess){
 85 | 
 86 | 		// Check sizes of input
 87 | 		int m_dim = dim();
 88 | 		if( x.rows() != m_dim ){
 89 | 			throw std::runtime_error("Error in Problem::hessian: x wrong dimension");
 90 | 		}
 91 | 		if( hess.rows() != m_dim || hess.cols() != m_dim ){
 92 | 			throw std::runtime_error("Error in Problem::hessian: hessian wrong dimension");
 93 | 		}
 94 | 		hess = A;
 95 | 	}
 96 | 
 97 | 	void solve_hessian(const VectorX &x, const VectorX &grad, VectorX &dx){
 98 | 
 99 | 		// Check sizes of input
100 | 		int m_dim = dim();
101 | 		if( x.rows() != m_dim ){
102 | 			throw std::runtime_error("Error in Problem::solve_hessian: x wrong dimension");
103 | 		}
104 | 		if( dx.rows() != m_dim ){
105 | 			throw std::runtime_error("Error in Problem::solve_hessian: dx wrong dimension");
106 | 		}
107 | 		if( grad.rows() != m_dim ){
108 | 			throw std::runtime_error("Error in Problem::solve_hessian: gradient wrong dimension");
109 | 		}
110 | 
111 | 		// Check to make sure base class function works as expected
112 | 		Problem::solve_hessian(x,grad,dx);
113 | 	}
114 | };
115 | 
116 | class Rosenbrock : public mcl::optlib::Problem<double,2> {
117 | public:
118 | 	typedef Eigen::Matrix<double,2,1> VectorX;
119 | 	bool converged(const VectorX &x0, const VectorX &x1, const VectorX &grad){
120 | 		(void)(x1); (void)(x0);
121 | 		return grad.norm() < 1e-10;
122 | 	}
123 | 	double value(const VectorX &x){
124 | 		double a = 1.0 - x[0];
125 | 		double b = x[1] - x[0]*x[0];
126 | 		return a*a + b*b*100.0;
127 | 	}
128 | 	// Test finite diff as well I guess
129 | };
130 | 
131 | 


--------------------------------------------------------------------------------
/include/MCL/LBFGS.hpp:
--------------------------------------------------------------------------------
  1 | // The MIT License (MIT)
  2 | // Copyright (c) 2017 University of Minnesota
  3 | // 
  4 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | // of this software and associated documentation files (the "Software"), to deal
  6 | // in the Software without restriction, including without limitation the rights
  7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 | // copies of the Software, and to permit persons to whom the Software is
  9 | // furnished to do so, subject to the following conditions:
 10 | // 
 11 | // The above copyright notice and this permission notice shall be included in all
 12 | // copies or substantial portions of the Software.
 13 | // 
 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 20 | // SOFTWARE.
 21 | 
 22 | #ifndef MCL_LBFGS_H
 23 | #define MCL_LBFGS_H
 24 | 
 25 | #include "Minimizer.hpp"
 26 | 
 27 | namespace mcl {
 28 | namespace optlib {
 29 | 
 30 | // L-BFGS implementation based on Nocedal & Wright Numerical Optimization book (Section 7.2)
 31 | // DIM = dimension of the problem
 32 | // M = history window
 33 | //
 34 | // Original Author: Ioannis Karamouzas
 35 | //
 36 | template<typename Scalar, int DIM, int M=8>
 37 | class LBFGS : public Minimizer<Scalar,DIM> {
 38 | private:
 39 | 	typedef Eigen::Matrix<Scalar,DIM,1> VecX;
 40 | 	typedef Eigen::Matrix<Scalar,DIM,M> MatM;
 41 | 	typedef Eigen::Matrix<Scalar,M,1> VecM;
 42 | 
 43 | public:
 44 | 	bool show_denom_warning; // Print out warning for zero denominators
 45 | 
 46 | 	LBFGS() : show_denom_warning(false) {
 47 | 		this->m_settings.max_iters = 50;
 48 | 		show_denom_warning = this->m_settings.verbose > 0 ? true : false;
 49 | 	}
 50 | 
 51 | 	// Returns number of iterations used
 52 | 	int minimize(Problem<Scalar,DIM> &problem, VecX &x){
 53 | 
 54 | 		MatM s, y;
 55 | 		VecM alpha, rho;
 56 | 		VecX grad, q, grad_old, x_old, x_last;
 57 | 
 58 | 		if( DIM==Eigen::Dynamic ){
 59 | 			int dim = x.rows();
 60 | 			s = MatM::Zero(dim,M);
 61 | 			y = MatM::Zero(dim,M);
 62 | 			alpha = VecM::Zero(M);
 63 | 			rho = VecM::Zero(M);
 64 | 			grad = VecX::Zero(dim);
 65 | 			q = VecX::Zero(dim);
 66 | 			grad_old = VecX::Zero(dim);
 67 | 			x_old = VecX::Zero(dim);
 68 | 			x_last = VecX::Zero(dim);
 69 | 		}
 70 | 
 71 | 		problem.gradient(x, grad);
 72 | 
 73 | 		Scalar gamma_k = 1.0;
 74 | 		Scalar alpha_init = 1.0;
 75 | 
 76 | 		int global_iter = 0;
 77 | 		int max_iters = this->m_settings.max_iters;
 78 | 		int verbose = this->m_settings.verbose;
 79 | 
 80 | 		for( int k=0; k<max_iters; ++k ){
 81 | 
 82 | 			x_old = x;
 83 | 			grad_old = grad;
 84 | 			q = grad;
 85 | 			global_iter++;
 86 | 	
 87 | 			// L-BFGS first - loop recursion		
 88 | 			int iter = std::min(M, k);
 89 | 			for(int i = iter - 1; i >= 0; --i){
 90 | 				rho(i) = 1.0 / ((s.col(i)).dot(y.col(i)));
 91 | 				alpha(i) = rho(i)*(s.col(i)).dot(q);
 92 | 				q = q - alpha(i)*y.col(i);
 93 | 			}
 94 | 
 95 | 			// L-BFGS second - loop recursion			
 96 | 			q = gamma_k*q;
 97 | 			for(int i = 0; i < iter; ++i){
 98 | 				Scalar beta = rho(i)*q.dot(y.col(i));
 99 | 				q = q + (alpha(i) - beta)*s.col(i);
100 | 			}
101 | 
102 | 			// is there a descent
103 | 			Scalar dir = q.dot(grad);
104 | 			if(dir <= 0 ){
105 | 				q = grad;
106 | 				max_iters -= k;
107 | 				k = 0;
108 | 				alpha_init = std::min(1.0, 1.0 / grad.template lpNorm<Eigen::Infinity>() );
109 | 			}
110 | 
111 | 			Scalar rate = this->linesearch(x, -q, problem, alpha_init);
112 | 
113 | 			if( rate <= 0 ){
114 | 				if( verbose > 0 ){ printf("LBFGS::minimize: Failure in linesearch\n"); }
115 | 				return Minimizer<Scalar,DIM>::FAILURE;
116 | 			}
117 | 
118 | 			x_last = x;
119 | 			x -= rate * q;
120 | 			if( problem.converged(x_last,x,grad) ){ break; }
121 | 
122 | 			problem.gradient(x,grad);
123 | 			VecX s_temp = x - x_old;
124 | 			VecX y_temp = grad - grad_old;
125 | 
126 | 			// update the history
127 | 			if(k < M){
128 | 				s.col(k) = s_temp;
129 | 				y.col(k) = y_temp;
130 | 			}
131 | 			else {
132 | 				s.leftCols(M - 1) = s.rightCols(M - 1).eval();
133 | 				s.rightCols(1) = s_temp;
134 | 				y.leftCols(M - 1) = y.rightCols(M - 1).eval();
135 | 				y.rightCols(1) = y_temp;
136 | 			}
137 | 		
138 | 			Scalar denom = y_temp.dot(y_temp);
139 | 			if( std::abs(denom) <= 0 ){
140 | 				if( show_denom_warning ){
141 | 					printf("LBFGS::minimize Warning: Encountered a zero denominator\n");
142 | 				}
143 | 				break;
144 | 			}
145 | 			gamma_k = s_temp.dot(y_temp) / denom;
146 | 			alpha_init = 1.0;
147 | 
148 | 		}
149 | 
150 | 		return global_iter;
151 | 
152 | 	} // end minimize
153 | };
154 | 
155 | }
156 | }
157 | 
158 | #endif
159 | 


--------------------------------------------------------------------------------
/include/MCL/Problem.hpp:
--------------------------------------------------------------------------------
  1 | // The MIT License (MIT)
  2 | // 
  3 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  4 | // of this software and associated documentation files (the "Software"), to deal
  5 | // in the Software without restriction, including without limitation the rights
  6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7 | // copies of the Software, and to permit persons to whom the Software is
  8 | // furnished to do so, subject to the following conditions:
  9 | // 
 10 | // The above copyright notice and this permission notice shall be included in all
 11 | // copies or substantial portions of the Software.
 12 | // 
 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 19 | // SOFTWARE.
 20 | 
 21 | #ifndef MCL_PROBLEM_H
 22 | #define MCL_PROBLEM_H
 23 | 
 24 | #if MCL_DEBUG == 1
 25 | #include <iostream>
 26 | #endif
 27 | 
 28 | #include <Eigen/Dense>
 29 | 
 30 | namespace mcl {
 31 | namespace optlib {
 32 | 
 33 | template<typename Scalar, int DIM>
 34 | class Problem {
 35 | private:
 36 | 	typedef Eigen::Matrix<Scalar,DIM,1> VecX;
 37 | 	typedef Eigen::Matrix<Scalar,DIM,DIM> MatX;
 38 | 
 39 | public:
 40 | 	// Returns true if the solver has converged
 41 | 	// x0 is the result of the previous iteration
 42 | 	// x1 is the result at the current iteration
 43 | 	// grad is the gradient at the last iteration
 44 | 	virtual bool converged(const VecX &x0, const VecX &x1, const VecX &grad) = 0;
 45 | 
 46 | 	// Compute just the value
 47 | 	virtual Scalar value(const VecX &x) = 0;
 48 | 
 49 | 	// Compute the objective value and the gradient
 50 | 	virtual Scalar gradient(const VecX &x, VecX &grad){
 51 | 		finiteGradient(x, grad);
 52 | 		return value(x);
 53 | 	}
 54 | 
 55 | 	// Compute hessian
 56 | 	virtual void hessian(const VecX &x, MatX &hessian){
 57 | 		finiteHessian(x, hessian);
 58 | 	}
 59 | 
 60 | 	// Solve dx = H^-1 -g (used by Newton's)
 61 | 	virtual void solve_hessian(const VecX &x, const VecX &grad, VecX &dx){
 62 | 		MatX hess;
 63 | 		if( DIM  == Eigen::Dynamic ){
 64 | 			int dim = x.rows();
 65 | 			hess = MatX::Zero(dim,dim);
 66 | 		}
 67 | 		hessian(x,hess); // hessian at x_n
 68 | 
 69 | 		// Going with with high-accurate, low requirements as default factorization for lin-solve
 70 | 		// Copied from https://eigen.tuxfamily.org/dox/group__TutorialLinearAlgebra.html
 71 | 		//	Method			Requirements	Spd (sm)	Spd (lg)	Accuracy
 72 | 		//	partialPivLu()		Invertible	++		++		+
 73 | 		//	fullPivLu()		None		-		- -		+++
 74 | 		//	householderQr()		None		++		++		+
 75 | 		//	colPivHouseholderQr()	None		++		-		+++
 76 | 		//	fullPivHouseholderQr()	None		-		- -		+++
 77 | 		//	llt()			PD		+++		+++		+
 78 | 		//	ldlt()			P/N SD 		+++		+		++
 79 | 		if( DIM == Eigen::Dynamic || DIM > 4 ){
 80 | 			dx = hess.fullPivLu().solve(-grad);
 81 | 		}
 82 | 		else {
 83 | 			dx = -hess.inverse()*grad;
 84 | 		}
 85 | 	}
 86 | 
 87 | 	// Gradient with finite differences
 88 | 	inline void finiteGradient(const VecX &x, VecX &grad){
 89 | 		const int accuracy = 0; // accuracy can be 0, 1, 2, 3
 90 | 		const Scalar eps = 2.2204e-6;
 91 | 		const std::vector< std::vector<Scalar> > coeff =
 92 | 		{ {1, -1}, {1, -8, 8, -1}, {-1, 9, -45, 45, -9, 1}, {3, -32, 168, -672, 672, -168, 32, -3} };
 93 | 		const std::vector< std::vector<Scalar> > coeff2 =
 94 | 		{ {1, -1}, {-2, -1, 1, 2}, {-3, -2, -1, 1, 2, 3}, {-4, -3, -2, -1, 1, 2, 3, 4} };
 95 | 		const std::vector<Scalar> dd = {2, 12, 60, 840};
 96 | 		int dim = x.rows();
 97 | 		if( grad.rows() != dim ){ grad = VecX::Zero(dim); }
 98 | 		else{ grad.setZero(); }
 99 | 		for(int d = 0; d < dim; ++d){
100 | 			for (int s = 0; s < 2*(accuracy+1); ++s){
101 | 				VecX xx = x.eval();
102 | 				xx[d] += coeff2[accuracy][s]*eps;
103 | 				grad[d] += coeff[accuracy][s]*value(xx);
104 | 			}
105 | 			grad[d] /= (dd[accuracy]* eps);
106 | 		}
107 | 	} // end finite grad
108 | 
109 | 	// Hessian with finite differences
110 | 	inline void finiteHessian(const VecX &x, MatX &hess){
111 | 		const Scalar eps = std::numeric_limits<Scalar>::epsilon()*10e7;
112 | 		int dim = x.rows();
113 | 		if( hess.rows() != dim || hess.cols() != dim ){ hess = MatX::Zero(dim,dim); }
114 | 		for(int i = 0; i < dim; ++i){
115 | 			for(int j = 0; j < dim; ++j){
116 | 				VecX xx = x;
117 | 				Scalar f4 = value(xx);
118 | 				xx[i] += eps;
119 | 				xx[j] += eps;
120 | 				Scalar f1 = value(xx);
121 | 				xx[j] -= eps;
122 | 				Scalar f2 = value(xx);
123 | 				xx[j] += eps;
124 | 				xx[i] -= eps;
125 | 				Scalar f3 = value(xx);
126 | 				hess(i, j) = (f1 - f2 - f3 + f4) / (eps * eps);
127 | 			}
128 | 		}
129 | 	} // end finite hess
130 | };
131 | 
132 | }
133 | }
134 | 
135 | #endif
136 | 


--------------------------------------------------------------------------------
/include/MCL/Backtracking.hpp:
--------------------------------------------------------------------------------
  1 | // The MIT License (MIT)
  2 | // Copyright (c) 2017 Matt Overby
  3 | // 
  4 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | // of this software and associated documentation files (the "Software"), to deal
  6 | // in the Software without restriction, including without limitation the rights
  7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 | // copies of the Software, and to permit persons to whom the Software is
  9 | // furnished to do so, subject to the following conditions:
 10 | // 
 11 | // The above copyright notice and this permission notice shall be included in all
 12 | // copies or substantial portions of the Software.
 13 | // 
 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 20 | // SOFTWARE.
 21 | 
 22 | #ifndef MCL_BACKTRACKING_H
 23 | #define MCL_BACKTRACKING_H
 24 | 
 25 | #include "Problem.hpp"
 26 | 
 27 | namespace mcl {
 28 | namespace optlib {
 29 | 
 30 | 
 31 | //
 32 | // Old reliable backtracking Armijo
 33 | //
 34 | template<typename Scalar, int DIM>
 35 | class Backtracking {
 36 | public:
 37 | 	typedef Eigen::Matrix<Scalar,DIM,1> VecX;
 38 | 
 39 | 	static inline Scalar search(int verbose, int max_iters, Scalar decrease, const VecX &x, const VecX &p, Problem<Scalar,DIM> &problem, Scalar alpha0) {
 40 | 
 41 | 		// First things first, check descent norm
 42 | 		const Scalar t_eps = std::numeric_limits<Scalar>::epsilon();
 43 | 		if( p.norm() <= t_eps ){ return decrease; }
 44 | 
 45 | 		const Scalar tau = 0.7;
 46 | 		Scalar alpha = alpha0;
 47 | 		VecX grad;
 48 | 		if( DIM == Eigen::Dynamic ){ grad = VecX::Zero(x.rows()); }
 49 | 		Scalar fx0 = problem.gradient(x, grad);
 50 | 		Scalar gtp = grad.dot(p);
 51 | 
 52 | 		int iter = 0;
 53 | 		for( ; iter < max_iters; ++iter ){
 54 | 			Scalar fxa = problem.value(x + alpha*p);
 55 | 			Scalar fx0_fxa = fx0 + alpha*decrease*gtp; // Armijo condition I
 56 | 			if( fxa <= fx0_fxa ){ break; } // sufficient decrease
 57 | 			alpha *= tau;
 58 | 		}
 59 | 
 60 | 		if( iter >= max_iters ){
 61 | 			if( verbose > 0 ){ printf("Backtracking::search Error: Reached max_iters\n"); }
 62 | 			return -1;
 63 | 		}
 64 | 
 65 | 		return alpha;
 66 | 	}
 67 | 
 68 | }; // end class Backtracking
 69 | 
 70 | 
 71 | //
 72 | // Backtracking Armijo with cubic interpolation
 73 | //
 74 | template<typename Scalar, int DIM>
 75 | class BacktrackingCubic {
 76 | public:
 77 | 	typedef Eigen::Matrix<Scalar,DIM,1> VecX;
 78 | 
 79 | 	static inline Scalar search(int verbose, int max_iters, Scalar decrease, const VecX &x, const VecX &p, Problem<Scalar,DIM> &problem, Scalar alpha0) {
 80 | 
 81 | 		// First things first, check descent norm
 82 | 		const Scalar t_eps = std::numeric_limits<Scalar>::epsilon();
 83 | 		if( p.norm() <= t_eps ){ return decrease; }
 84 | 
 85 | 		Scalar alpha = alpha0;
 86 | 		VecX grad;
 87 | 		if( DIM == Eigen::Dynamic ){ grad = VecX::Zero(x.rows()); }
 88 | 		Scalar fx0 = problem.gradient(x, grad);
 89 | 		Scalar gtp = grad.dot(p);
 90 | 		Scalar fxp = fx0;
 91 | 		Scalar alphap = alpha;
 92 | 
 93 | 		int iter = 0;
 94 | 		for( ; iter < max_iters; ++iter ){
 95 | 			Scalar fxa = problem.value(x + alpha*p);
 96 | 			Scalar fx0_fxa = fx0 + alpha*decrease*gtp; // Armijo condition I
 97 | 			if( fxa <= fx0_fxa ){ break; } // sufficient decrease
 98 | 
 99 | 			Scalar alpha_tmp = iter == 0 ?
100 | 				( gtp / (2.0 * (fx0 + gtp - fxa)) ) :
101 | 				cubic( fx0, gtp, fxa, alpha, fxp, alphap );
102 | 			fxp = fxa;
103 | 			alphap = alpha;
104 | 			alpha = range( alpha_tmp, 0.1*alpha, 0.5*alpha );
105 | 		}
106 | 
107 | 		if( iter >= max_iters ){
108 | 			if( verbose > 0 ){ printf("BacktrackingCubic::search Error: Reached max_iters\n"); }
109 | 			return -1;
110 | 		}
111 | 
112 | 		return alpha;
113 | 	}
114 | 
115 | private:
116 | 	static inline Scalar range( Scalar alpha, Scalar low, Scalar high ){
117 | 		if( alpha < low ){ return low; }
118 | 		else if( alpha > high ){ return high; }
119 | 		return alpha;
120 | 	}
121 | 
122 | 	// Cubic interpolation
123 | 	// fx0 = f(x0)
124 | 	// gtp = f'(x0)^T p
125 | 	// fxa = f(x0 + alpha*p)
126 | 	// alpha = step length
127 | 	// fxp = previous fxa
128 | 	// alphap = previous alpha
129 | 	static inline Scalar cubic( Scalar fx0, Scalar gtp, Scalar fxa, Scalar alpha, Scalar fxp, Scalar alphap ){
130 | 		typedef Eigen::Matrix<Scalar,2,1> Vec2;
131 | 		typedef Eigen::Matrix<Scalar,2,2> Mat2;
132 | 
133 | 		Scalar mult = 1.0 / ( alpha*alpha * alphap*alphap * (alpha-alphap) );
134 | 		Mat2 A;
135 | 		A(0,0) = alphap*alphap;		A(0,1) = -alpha*alpha;
136 | 		A(1,0) = -alphap*alphap*alphap;	A(1,1) = alpha*alpha*alpha;	
137 | 		Vec2 B;
138 | 		B[0] = fxa - fx0 - alpha*gtp; B[1] = fxp - fx0 - alphap*gtp;
139 | 		Vec2 r = mult * A * B;
140 | 		if( std::abs(r[0]) <= 0.0 ){ return -gtp / (2.0*r[1]); } // if quadratic
141 | 		Scalar d = std::sqrt( r[1]*r[1] - 3.0*r[0]*gtp ); // discrim
142 | 		return (-r[1] + d) / (3.0*r[0]);
143 | 	}
144 | 
145 | }; // end class BacktrackingCubic
146 | 
147 | } // ns optlib
148 | } // ns mcl
149 | 
150 | #endif
151 | 


--------------------------------------------------------------------------------
/include/MCL/TrustRegion.hpp:
--------------------------------------------------------------------------------
  1 | // The MIT License (MIT)
  2 | // Copyright (c) 2017 Matt Overby
  3 | // 
  4 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | // of this software and associated documentation files (the "Software"), to deal
  6 | // in the Software without restriction, including without limitation the rights
  7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 | // copies of the Software, and to permit persons to whom the Software is
  9 | // furnished to do so, subject to the following conditions:
 10 | // 
 11 | // The above copyright notice and this permission notice shall be included in all
 12 | // copies or substantial portions of the Software.
 13 | // 
 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 20 | // SOFTWARE.
 21 | 
 22 | #ifndef MCL_TRUSTREGION_H
 23 | #define MCL_TRUSTREGION_H
 24 | 
 25 | #include "Minimizer.hpp"
 26 | 
 27 | namespace mcl {
 28 | namespace optlib {
 29 | 
 30 | template<typename Scalar, int DIM>
 31 | class TrustRegion : public Minimizer<Scalar,DIM> {
 32 | private:
 33 | 	typedef Eigen::Matrix<Scalar,DIM,1> VecX;
 34 | 	typedef Eigen::Matrix<Scalar,DIM,DIM> MatX;
 35 | 
 36 | public:
 37 | 	//
 38 | 	// The trust region method operates on an approximate hessian (B).
 39 | 	// To keep the interface simple, Problem::hessian is used to obtain
 40 | 	// this approximation. It shouldn't be much of a problem, since you
 41 | 	// can still use approximations for Newton's (the only other method
 42 | 	// implemented that requires a hessian evaluation).
 43 | 	//
 44 | 	// TODO Replace B with possibly sparse representation
 45 | 	//
 46 | 	TrustRegion() {
 47 | 		this->m_settings.max_iters = 100;
 48 | 	}
 49 | 
 50 | 	int minimize(Problem<Scalar,DIM> &problem, VecX &x){
 51 | 
 52 | 		Scalar delta_k = 2.0; // trust region radius
 53 | 		const Scalar delta_max = 8.0; // max trust region radius
 54 | 		const Scalar eta = 0.125; // min reduction ratio allowed (0<eta<0.25)
 55 | 		const TRMethod m = this->m_settings.tr_method;
 56 | 		int max_iters = this->m_settings.max_iters;
 57 | 		int verbose = this->m_settings.verbose;
 58 | 
 59 | 		VecX grad, dx, x_last;
 60 | 		MatX B; // Approximate hessian
 61 | 		if( DIM  == Eigen::Dynamic ){
 62 | 			int dim = x.rows();
 63 | 			x_last.resize(dim); // last variable
 64 | 			grad.resize(dim); // grad at k
 65 | 			dx.resize(dim); // descent direction
 66 | 			B.resize(dim,dim);
 67 | 		}
 68 | 
 69 | 		// Init gradient and hessian
 70 | 		Scalar fxk = problem.gradient(x,grad); // gradient and objective
 71 | 		problem.hessian(x,B); // get hessian (or approximation)
 72 | 		problem.solve_hessian(x,grad,dx); // attempt with newtons
 73 | 
 74 | 		int iter = 0;
 75 | 		for( ; iter < max_iters; ++iter ){
 76 | 
 77 | 			// If it's outside the trust region, pick new descent
 78 | 			if( dx.norm() > delta_k ){
 79 | 				eval_subproblem(m, delta_k, grad, B, dx);
 80 | 			}
 81 | 
 82 | 			Scalar fxdx = problem.value(x+dx);
 83 | 
 84 | 			// Compute reduction ratio
 85 | 			Scalar rho_k = eval_reduction(fxk, fxdx, dx, grad, B);
 86 | 			Scalar dx_norm = dx.norm();
 87 | 
 88 | 			// Update trust region radius
 89 | 			if( rho_k < 0.25 ){ delta_k = 0.25*delta_k; }
 90 | 
 91 | 			// Full step, good approximation
 92 | 			else if( rho_k > 0.75 && std::abs(dx_norm-delta_k) <= 0.0 ){
 93 | 				delta_k = std::min( 2.0*delta_k, delta_max );
 94 | 			}
 95 | 
 96 | 			// Take a step, otherwise need to re-eval sub problem
 97 | 			if( rho_k > eta ){
 98 | 
 99 | 				x_last = x;
100 | 				x = x + dx;
101 | 
102 | 				// Only need to compute gradient and hessian
103 | 				// if x has actually changed.
104 | 				fxk = problem.gradient(x,grad); // gradient and objective
105 | 				if( problem.converged(x_last,x,grad) ){ break; }
106 | 
107 | 				// I think I should improve this as to not call both hessian
108 | 				// and solve_hessian, which likely causes redundant computation.
109 | 				problem.hessian(x,B); // get hessian (or approximation)
110 | 				problem.solve_hessian(x,grad,dx); // attempt with newtons
111 | 			}
112 | 
113 | 			if( std::isnan(rho_k) ){
114 | 				if( verbose ){ printf("\n**TrustRegion Error: NaN reduction"); }
115 | 				return Minimizer<Scalar,DIM>::FAILURE;
116 | 			}
117 | 
118 | 		}
119 | 
120 | 		return iter;
121 | 
122 | 	} // end minimize
123 | 
124 | protected:
125 | 
126 | 	// Assumes coeffs size 3
127 | 	static inline Scalar max_roots(Scalar a, Scalar b, Scalar c){
128 | 
129 | 		Scalar d = (b*b) - (4.0*a*c);
130 | 		if( d > 0 ){
131 | 			Scalar sqrt_d = std::sqrt(d);
132 | 			Scalar r1 = (-b + sqrt_d) / (2.0*a);
133 | 			Scalar r2 = (-b - sqrt_d) / (2.0*a);
134 | 			return std::max(r1,r2);
135 | 		}
136 | 		// Should I do something with the real/imaginary parts?
137 | 		// Scalar real_part = -b/(2.0*a);
138 | 		// Scalar imaginary = std::sqrt(-d)/(2.0*a);
139 | 		throw std::runtime_error("TrustRegion Error: Problem in quadratic roots");
140 | 		return 0;
141 | 	}
142 | 
143 | 	// fxk = objective at x_k
144 | 	// fxdx = objective at x_k + dx
145 | 	// dx = descent direction
146 | 	// grad_k = gradient at x_k 
147 | 	// B_k = hessian guess.
148 | 	static inline Scalar eval_reduction( Scalar fxk, Scalar fxdx, const VecX &dx,
149 | 		const VecX &grad_k, const MatX &B_k ){
150 | 		// rho = ( f(x) - f(x-dx) ) / ( model(0) - model(dx) )
151 | 		// with model = f(x) + dx^T grad + 0.5 dx^T B dx
152 | 		Scalar num = fxk - fxdx;
153 | 		Scalar denom = fxk - ( fxk + dx.dot(grad_k) + 0.5 * dx.dot( B_k * dx ) );
154 | 		return num/denom;
155 | 	}
156 | 
157 | 	static inline void eval_subproblem(
158 | 		const TRMethod &m, Scalar delta_k,
159 | 		const VecX &grad, const MatX &B,
160 | 		VecX &dx ){
161 | 
162 | 		Scalar gTBg = grad.dot(B*grad);
163 | 
164 | 		switch( m ){
165 | 
166 | 			// Generally requires a large number of outer solver iterations
167 | 			case TRMethod::CauchyPoint: {
168 | 
169 | 				Scalar grad_norm = grad.norm();
170 | 				Scalar tau = 1.0;
171 | 				if( gTBg > 0.0 ){ tau = std::min( 1.0, std::pow(grad_norm,3) / (delta_k*gTBg) ); }
172 | 				dx = ( -tau * delta_k / grad_norm ) * grad;
173 | 
174 | 			} break;
175 | 
176 | 			// Uses steepest descent if possible
177 | 			case TRMethod::DogLeg: {
178 | 
179 | 				Scalar gTg = grad.dot(grad);
180 | 				VecX dx_U = ( -gTg / gTBg ) * grad;
181 | 				Scalar dx_U_norm = dx_U.norm();
182 | 
183 | 				// Use steepest descent
184 | 				if( dx_U_norm >= delta_k ){ dx = delta_k/dx_U_norm * dx_U; }
185 | 				else{
186 | 					// Compute tau and update descent
187 | 					VecX dx_C = dx - dx_U;
188 | 					Scalar dx_C_norm = dx_C.norm();
189 | 					Scalar tau = max_roots( // Ax^2 + Bx + c
190 | 						dx_C_norm*dx_C_norm,
191 | 						2.0*dx_C.dot(dx_U),
192 | 						dx_U_norm*dx_U_norm - delta_k*delta_k
193 | 					);
194 | 					dx = dx_U + tau*dx_C;
195 | 				}
196 | 
197 | 			} break;
198 | 
199 | 		} // end swithc method
200 | 
201 | 	} // end eval sub problem
202 | 
203 | };
204 | 
205 | } // ns optlib
206 | } // ns mcl
207 | 
208 | #endif
209 | 


--------------------------------------------------------------------------------
/test/testSolvers.cpp:
--------------------------------------------------------------------------------
  1 | // The MIT License (MIT)
  2 | // Copyright (c) 2017 Matt Overby
  3 | // 
  4 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | // of this software and associated documentation files (the "Software"), to deal
  6 | // in the Software without restriction, including without limitation the rights
  7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 | // copies of the Software, and to permit persons to whom the Software is
  9 | // furnished to do so, subject to the following conditions:
 10 | // 
 11 | // The above copyright notice and this permission notice shall be included in all
 12 | // copies or substantial portions of the Software.
 13 | // 
 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 20 | // SOFTWARE.
 21 | 
 22 | #include <iostream>
 23 | #include "TestProblem.hpp"
 24 | #include "MCL/LBFGS.hpp"
 25 | #include "MCL/NonLinearCG.hpp"
 26 | #include "MCL/Newton.hpp"
 27 | #include "MCL/TrustRegion.hpp"
 28 | #include <memory>
 29 | #include <vector>
 30 | 
 31 | using namespace mcl::optlib;
 32 | typedef std::shared_ptr< Minimizer<double,2> > MinPtr2; // rb
 33 | typedef std::shared_ptr< Minimizer<double,Eigen::Dynamic> > MinPtrD; // linear
 34 | 
 35 | bool test_linear( std::vector<MinPtrD> &solvers, std::vector<std::string> &names ){
 36 | 
 37 | 	std::cout << "\nTest linear:" << std::endl;
 38 | 	typedef Eigen::Matrix<double,Eigen::Dynamic,1> VecX;
 39 | 	bool success = true;
 40 | 
 41 | 	// Using multiple dimensions in the linear case, which changes
 42 | 	// how Newton's performs a solve.
 43 | 	std::vector<int> test_dims = { 4, 16, 64 };
 44 | 
 45 | 	int n_test_dims = test_dims.size();
 46 | 	for( int i=0; i<n_test_dims; ++ i ){
 47 | 
 48 | 		int dim = test_dims[i];
 49 | 
 50 | 		// High/Low dimensions, linear, and Eigen::Dynamic.
 51 | 		// The solvers should work, since the Vecs/Mats are resized at run time.
 52 | 		DynProblem cp(dim);
 53 | 
 54 | 		int n_solvers = solvers.size();
 55 | 		for( int i=0; i<n_solvers; ++i ){
 56 | 			bool curr_success = true;
 57 | 
 58 | 			// Special case for Newtons: should converge in 1 iter because its 2nd order
 59 | 			if( names[i] == "newton" ){ solvers[i]->m_settings.max_iters = 1; }
 60 | 			else { solvers[i]->m_settings.max_iters = 100; }
 61 | 			solvers[i]->m_settings.verbose = 1;
 62 | 			VecX x = VecX::Zero(dim);
 63 | 			solvers[i]->minimize( cp, x );
 64 | 			for( int i=0; i<dim; ++i ){
 65 | 				if( std::isnan(x[i]) || std::isinf(x[i]) ){
 66 | 					std::cerr << "(" << names[i] << ") Bad values in x: " << x[i] << std::endl;
 67 | 					curr_success = false;
 68 | 				}
 69 | 			}
 70 | 			VecX r = cp.A*x - cp.b;
 71 | 			double rn = r.norm(); // x should minimize |Ax-b|
 72 | 			if( rn > 1e-3 ){
 73 | 				std::cerr << "(" << names[i] << ") Failed to minimize: |Ax-b| = " << rn << std::endl;
 74 | 				curr_success = false;
 75 | 			}
 76 | 
 77 | 			if( curr_success ){ std::cout << "(" << names[i] << ") Linear (" << dim << "): Success" << std::endl; }
 78 | 			else{ success = false; }
 79 | 
 80 | 		} // end loop solvers
 81 | 
 82 | 	} // end loop test dims
 83 | 
 84 | 	return success;
 85 | 
 86 | }
 87 | 
 88 | 
 89 | bool test_rb( std::vector<MinPtr2> &solvers, std::vector<std::string> &names ){
 90 | 
 91 | 	std::cout << "\nTest Rosenbrock:" << std::endl;
 92 | 	Rosenbrock rb; // Dim = 2, also tests finite gradient/hessian
 93 | 	bool success = true;
 94 | 
 95 | 	int n_solvers = solvers.size();
 96 | 	for( int i=0; i<n_solvers; ++i ){
 97 | 		bool curr_success = true;
 98 | 
 99 | 		solvers[i]->m_settings.max_iters = 1000;
100 | 		solvers[i]->m_settings.verbose = 1;
101 | 		Eigen::Vector2d x = Eigen::Vector2d::Zero();
102 | 		solvers[i]->minimize( rb, x );
103 | 
104 | 		for( int i=0; i<2; ++i ){
105 | 			if( std::isnan(x[i]) || std::isinf(x[i]) ){
106 | 				std::cerr << "(" << names[i] << ") Bad values in x: " << x[i] << std::endl;
107 | 				curr_success = false;
108 | 			}
109 | 		}
110 | 
111 | 		double rn = (Eigen::Vector2d(1,1) - x).norm();
112 | 		if( rn > 1e-8 ){
113 | 			std::cerr << "(" << names[i] << ") Failed to minimize: Rosenbrock = " << rn << std::endl;
114 | 			curr_success = false;
115 | 		}
116 | 
117 | 		if( curr_success ){ std::cout << "(" << names[i] << ") Rosenbrock: Success" << std::endl; }
118 | 		else{ success = false; }
119 | 	}
120 | 
121 | 	return success;
122 | }
123 | 
124 | // Test what happens when the energy is already minimized
125 | bool test_zero( std::vector<MinPtrD> &solvers, std::vector<std::string> &names ){
126 | 
127 | 	std::cout << "\nTest zero energy:" << std::endl;
128 | 	typedef Eigen::Matrix<double,Eigen::Dynamic,1> VecX;
129 | 	bool success = true;
130 | 	int dim = 2;
131 | 
132 | 	// High/Low dimensions, linear, and Eigen::Dynamic.
133 | 	// The solvers should work, since the Vecs/Mats are resized at run time.
134 | 	DynProblem cp(dim);
135 | 
136 | 	int n_solvers = solvers.size();
137 | 	for( int i=0; i<n_solvers; ++i ){
138 | 		bool curr_success = true;
139 | 
140 | 		// Set max iterations to 1, should exit right away (at min)
141 | 		solvers[i]->m_settings.max_iters = 1;
142 | 		solvers[i]->m_settings.verbose = 1;
143 | 		VecX x = cp.A.inverse() * cp.b;
144 | 		solvers[i]->minimize( cp, x );
145 | 
146 | 		for( int i=0; i<dim; ++i ){
147 | 			if( std::isnan(x[i]) || std::isinf(x[i]) ){
148 | 				std::cerr << "(" << names[i] << ") Bad values in x: " << x[i] << std::endl;
149 | 				curr_success = false;
150 | 			}
151 | 		}
152 | 		VecX r = cp.A*x - cp.b;
153 | 		double rn = r.norm(); // x should minimize |Ax-b|
154 | 		if( rn > 1e-10 ){
155 | 			std::cerr << "(" << names[i] << ") Failed to minimize: |Ax-b| = " << rn << std::endl;
156 | 			curr_success = false;
157 | 		}
158 | 
159 | 		if( curr_success ){ std::cout << "(" << names[i] << ") Linear (" << dim << "): Success" << std::endl; }
160 | 		else{ success = false; }
161 | 
162 | 	} // end loop solvers
163 | 
164 | 	return success;
165 | }
166 | 
167 | 
168 | int main(int argc, char *argv[] ){
169 | 	srand(100);
170 | 	std::vector< std::string > names;
171 | 	std::vector< MinPtr2 > min2;
172 | 	std::vector< MinPtrD > minD;
173 | 
174 | 	std::string mode = "all";
175 | 	if( argc == 2 ){ mode = std::string(argv[1]); }
176 | 
177 | 	if( mode=="lbfgs" || mode=="all" ){
178 | 		min2.emplace_back( std::make_shared< LBFGS<double,2> >( LBFGS<double,2>() ) );
179 | 		minD.emplace_back( std::make_shared< LBFGS<double,Eigen::Dynamic> >( LBFGS<double,Eigen::Dynamic>() ) );
180 | 		names.emplace_back( "lbfgs" );
181 | 	}
182 | 	if( mode=="cg" || mode=="all" ){
183 | 		min2.emplace_back( std::make_shared< NonLinearCG<double,2> >( NonLinearCG<double,2>() ) );
184 | 		minD.emplace_back( std::make_shared< NonLinearCG<double,Eigen::Dynamic> >( NonLinearCG<double,Eigen::Dynamic>() ) );
185 | 		names.emplace_back( "cg" );
186 | 	}
187 | 	if( mode=="newton" || mode=="all" ){
188 | 		min2.emplace_back( std::make_shared< Newton<double,2> >( Newton<double,2>() ) );
189 | 		minD.emplace_back( std::make_shared< Newton<double,Eigen::Dynamic> >( Newton<double,Eigen::Dynamic>() ) );
190 | 		names.emplace_back( "newton" );
191 | 	}
192 | 	if( mode=="trustregion" || mode=="all" ){
193 | 		min2.emplace_back( std::make_shared< TrustRegion<double,2> >( TrustRegion<double,2>() ) );
194 | 		minD.emplace_back( std::make_shared< TrustRegion<double,Eigen::Dynamic> >( TrustRegion<double,Eigen::Dynamic>() ) );
195 | 		names.emplace_back( "trustregion" );
196 | 	}
197 | 
198 | 	bool success = true;
199 | 	success &= test_linear( minD, names );
200 | 	success &= test_rb( min2, names );
201 | 	success &= test_zero( minD, names );
202 | 	if( success ){
203 | 		std::cout << "\nSUCCESS!" << std::endl;
204 | 		return EXIT_SUCCESS;
205 | 	}
206 | 	else{ std::cout << "\n**FAILURE!" << std::endl; }
207 | 	return EXIT_FAILURE;
208 | }
209 | 
210 | 
211 | 
212 | 


--------------------------------------------------------------------------------
/include/MCL/MoreThuente.hpp:
--------------------------------------------------------------------------------
  1 | // The MIT License (MIT)
  2 | //
  3 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  4 | // of this software and associated documentation files (the "Software"), to deal
  5 | // in the Software without restriction, including without limitation the rights
  6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7 | // copies of the Software, and to permit persons to whom the Software is
  8 | // furnished to do so, subject to the following conditions:
  9 | // 
 10 | // The above copyright notice and this permission notice shall be included in all
 11 | // copies or substantial portions of the Software.
 12 | // 
 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 19 | // SOFTWARE.
 20 | //
 21 | // From https://github.com/PatWie/CppNumericalSolvers
 22 | //
 23 | 
 24 | #ifndef MCL_MORETHUENTE_H
 25 | #define MCL_MORETHUENTE_H
 26 | 
 27 | #include "Problem.hpp"
 28 | 
 29 | namespace mcl {
 30 | namespace optlib {
 31 | 
 32 | template<typename Scalar, int DIM>
 33 | class MoreThuente {
 34 | private:
 35 | 	typedef Eigen::Matrix<Scalar,DIM,1> VectorX;
 36 | 
 37 | public:
 38 | 
 39 | 	static inline Scalar search(const VectorX &x, const VectorX &p, Problem<Scalar,DIM> &problem, Scalar alpha0){
 40 | 		Scalar alpha = alpha0;
 41 | 		cvsrch(problem, x, alpha, p);
 42 | 		return alpha;
 43 | 	}
 44 | 
 45 | 	static void cvsrch(Problem<Scalar,DIM> &problem, const VectorX &x0, Scalar &stp, const VectorX &s) {
 46 | 		int info           = 0;
 47 | 		int infoc          = 1;
 48 | 		const Scalar xtol   = 1e-15;
 49 | 		const Scalar ftol   = 1e-4;
 50 | 		const Scalar gtol   = 1e-2;
 51 | 		const Scalar stpmin = 1e-15;
 52 | 		const Scalar stpmax = 1e15;
 53 | 		const Scalar xtrapf = 4;
 54 | 		const int maxfev   = 20;
 55 | 		int nfev           = 0;
 56 | 		int dim = x0.rows();
 57 | 
 58 | 		VectorX g;
 59 | 		if( DIM == Eigen::Dynamic ){ g = VectorX::Zero(dim); }
 60 | 		else{ g.setZero(); }
 61 | 
 62 | 		Scalar f = problem.gradient(x0, g);
 63 | 		Scalar dginit = g.dot(s);
 64 | 		if (dginit >= 0.0) {
 65 | 			// no descent direction
 66 | 			return;
 67 | 		}
 68 | 
 69 | 		bool brackt      = false;
 70 | 		bool stage1      = true;
 71 | 
 72 | 		Scalar finit      = f;
 73 | 		Scalar dgtest     = ftol * dginit;
 74 | 		Scalar width      = stpmax - stpmin;
 75 | 		Scalar width1     = 2 * width;
 76 | 		VectorX x = x0.eval();
 77 | 
 78 | 		Scalar stx        = 0.0;
 79 | 		Scalar fx         = finit;
 80 | 		Scalar dgx        = dginit;
 81 | 		Scalar sty        = 0.0;
 82 | 		Scalar fy         = finit;
 83 | 		Scalar dgy        = dginit;
 84 | 
 85 | 		Scalar stmin = 0.0;
 86 | 		Scalar stmax = 0.0;
 87 | 
 88 | 		const int max_iters = 100000;
 89 | 		int iter = 0;
 90 | 		for( ; iter<max_iters; ++iter ){
 91 | 
 92 | 			// make sure we stay in the interval when setting min/max-step-width
 93 | 			if (brackt) {
 94 | 				stmin = std::min(stx, sty);
 95 | 				stmax = std::max(stx, sty);
 96 | 			} else {
 97 | 				stmin = stx;
 98 | 				stmax = stp + xtrapf * (stp - stx);
 99 | 			}
100 | 
101 | 			// Force the step to be within the bounds stpmax and stpmin.
102 | 			stp = std::max(stp, stpmin);
103 | 			stp = std::min(stp, stpmax);
104 | 
105 | 			// Oops, let us return the last reliable values
106 | 			if (
107 | 			(brackt && ((stp <= stmin) || (stp >= stmax)))
108 | 			|| (nfev >= maxfev - 1 ) || (infoc == 0)
109 | 			|| (brackt & (stmax - stmin <= xtol * stmax))) {
110 | 				stp = stx;
111 | 			}
112 | 
113 | 			// test new point
114 | 			x = x0 + stp * s;
115 | 			f = problem.gradient(x, g);
116 | 			nfev++;
117 | 			Scalar dg = g.dot(s);
118 | 			Scalar ftest1 = finit + stp * dgtest;
119 | 
120 | 			// all possible convergence tests
121 | 			if ((brackt & ((stp <= stmin) | (stp >= stmax))) | (infoc == 0))
122 | 				info = 6;
123 | 
124 | 			if ((stp == stpmax) & (f <= ftest1) & (dg <= dgtest))
125 | 				info = 5;
126 | 	
127 | 			if ((stp == stpmin) & ((f > ftest1) | (dg >= dgtest)))
128 | 				info = 4;
129 | 		
130 | 			if (nfev >= maxfev)
131 | 				info = 3;
132 | 	
133 | 			if (brackt & (stmax - stmin <= xtol * stmax))
134 | 				info = 2;
135 | 	
136 | 			if ((f <= ftest1) & (fabs(dg) <= gtol * (-dginit)))
137 | 				info = 1;
138 | 
139 | 			// terminate when convergence reached
140 | 			if (info != 0)
141 | 				return;
142 | 
143 | 			if (stage1 & (f <= ftest1) & (dg >= std::min(ftol, gtol)*dginit))
144 | 				stage1 = false;
145 | 
146 | 			if (stage1 & (f <= fx) & (f > ftest1)) {
147 | 				Scalar fm = f - stp * dgtest;
148 | 				Scalar fxm = fx - stx * dgtest;
149 | 				Scalar fym = fy - sty * dgtest;
150 | 				Scalar dgm = dg - dgtest;
151 | 				Scalar dgxm = dgx - dgtest;
152 | 				Scalar dgym = dgy - dgtest;
153 | 
154 | 				cstep( stx, fxm, dgxm, sty, fym, dgym, stp, fm, dgm, brackt, stmin, stmax, infoc);
155 | 
156 | 				fx = fxm + stx * dgtest;
157 | 				fy = fym + sty * dgtest;
158 | 				dgx = dgxm + dgtest;
159 | 				dgy = dgym + dgtest;
160 | 			} else {
161 | 				// this is ugly and some variables should be moved to the class scope
162 | 				cstep( stx, fx, dgx, sty, fy, dgy, stp, f, dg, brackt, stmin, stmax, infoc);
163 | 			}
164 | 
165 | 			if (brackt) {
166 | 				if (fabs(sty - stx) >= 0.66 * width1)
167 | 					stp = stx + 0.5 * (sty - stx);
168 | 
169 | 				width1 = width;
170 | 				width = fabs(sty - stx);
171 | 			}
172 | 
173 | 		} // end while true
174 | 
175 | 		if( iter == max_iters ){
176 | 			throw std::runtime_error("MoreThuente::linesearch Error: Reached max_iter");
177 | 		}
178 | 
179 | 		return;
180 | 	}
181 | 
182 | 	static void cstep(Scalar& stx, Scalar& fx, Scalar& dx, Scalar& sty, Scalar& fy, Scalar& dy, Scalar& stp,
183 | 	Scalar& fp, Scalar& dp, bool& brackt, Scalar& stpmin, Scalar& stpmax, int& info) {
184 | 		info = 0;
185 | 		bool bound = false;
186 | 
187 | 		// Check the input parameters for errors.
188 | 		if ((brackt & ((stp <= std::min(stx, sty) ) | (stp >= std::max(stx, sty)))) | (dx * (stp - stx) >= 0.0)
189 | 		| (stpmax < stpmin)) {
190 | 			return;
191 | 		}
192 | 
193 | 		Scalar sgnd = dp * (dx / fabs(dx));
194 | 
195 | 		Scalar stpf = 0;
196 | 		Scalar stpc = 0;
197 | 		Scalar stpq = 0;
198 | 
199 | 		if (fp > fx) {
200 | 			info = 1;
201 | 			bound = true;
202 | 			Scalar theta = 3. * (fx - fp) / (stp - stx) + dx + dp;
203 | 			Scalar s = std::max(theta, std::max(dx, dp));
204 | 			Scalar gamma = s * sqrt((theta / s) * (theta / s) - (dx / s) * (dp / s));
205 | 			if (stp < stx)
206 | 				gamma = -gamma;
207 | 
208 | 			Scalar p = (gamma - dx) + theta;
209 | 			Scalar q = ((gamma - dx) + gamma) + dp;
210 | 			Scalar r = p / q;
211 | 			stpc = stx + r * (stp - stx);
212 | 			stpq = stx + ((dx / ((fx - fp) / (stp - stx) + dx)) / 2.) * (stp - stx);
213 | 			if (fabs(stpc - stx) < fabs(stpq - stx))
214 | 				stpf = stpc;
215 | 			else
216 | 				stpf = stpc + (stpq - stpc) / 2;
217 | 
218 | 			brackt = true;
219 | 		} else if (sgnd < 0.0) {
220 | 			info = 2;
221 | 			bound = false;
222 | 			Scalar theta = 3 * (fx - fp) / (stp - stx) + dx + dp;
223 | 			Scalar s = std::max(theta, std::max(dx, dp));
224 | 			Scalar gamma = s * sqrt((theta / s) * (theta / s)  - (dx / s) * (dp / s));
225 | 			if (stp > stx)
226 | 				gamma = -gamma;
227 | 
228 | 			Scalar p = (gamma - dp) + theta;
229 | 			Scalar q = ((gamma - dp) + gamma) + dx;
230 | 			Scalar r = p / q;
231 | 			stpc = stp + r * (stx - stp);
232 | 			stpq = stp + (dp / (dp - dx)) * (stx - stp);
233 | 			if (fabs(stpc - stp) > fabs(stpq - stp))
234 | 				stpf = stpc;
235 | 			else
236 | 				stpf = stpq;
237 | 
238 | 			brackt = true;
239 | 		} else if (fabs(dp) < fabs(dx)) {
240 | 			info = 3;
241 | 			bound = 1;
242 | 			Scalar theta = 3 * (fx - fp) / (stp - stx) + dx + dp;
243 | 			Scalar s = std::max(theta, std::max( dx, dp));
244 | 			Scalar gamma = s * sqrt(std::max(static_cast<Scalar>(0.), (theta / s) * (theta / s) - (dx / s) * (dp / s)));
245 | 			if (stp > stx)
246 | 				gamma = -gamma;
247 | 
248 | 			Scalar p = (gamma - dp) + theta;
249 | 			Scalar q = (gamma + (dx - dp)) + gamma;
250 | 			Scalar r = p / q;
251 | 			if ((r < 0.0) & (gamma != 0.0)) {
252 | 				stpc = stp + r * (stx - stp);
253 | 			} else if (stp > stx) {
254 | 				stpc = stpmax;
255 | 			} else {
256 | 				stpc = stpmin;
257 | 			}
258 | 			stpq = stp + (dp / (dp - dx)) * (stx - stp);
259 | 			if (brackt) {
260 | 				if (fabs(stp - stpc) < fabs(stp - stpq)) {
261 | 					stpf = stpc;
262 | 				} else {
263 | 					stpf = stpq;
264 | 				}
265 | 			} else {
266 | 				if (fabs(stp - stpc) > fabs(stp - stpq)) {
267 | 					stpf = stpc;
268 | 				} else {
269 | 					stpf = stpq;
270 | 				}
271 | 			}
272 | 		} else {
273 | 			info = 4;
274 | 			bound = false;
275 | 			if (brackt) {
276 | 				Scalar theta = 3 * (fp - fy) / (sty - stp) + dy + dp;
277 | 				Scalar s = std::max(theta, std::max(dy, dp));
278 | 				Scalar gamma = s * sqrt((theta / s) * (theta / s) - (dy / s) * (dp / s));
279 | 				if (stp > sty)
280 | 					gamma = -gamma;
281 | 
282 | 				Scalar p = (gamma - dp) + theta;
283 | 				Scalar q = ((gamma - dp) + gamma) + dy;
284 | 				Scalar r = p / q;
285 | 				stpc = stp + r * (sty - stp);
286 | 				stpf = stpc;
287 | 			} else if (stp > stx)
288 | 				stpf = stpmax;
289 | 			else {
290 | 				stpf = stpmin;
291 | 			}
292 | 		}
293 | 
294 | 		if (fp > fx) {
295 | 			sty = stp;
296 | 			fy = fp;
297 | 			dy = dp;
298 | 		} else {
299 | 			if (sgnd < 0.0) {
300 | 				sty = stx;
301 | 				fy = fx;
302 | 				dy = dx;
303 | 			}
304 | 			stx = stp;
305 | 			fx = fp;
306 | 			dx = dp;
307 | 		}
308 | 
309 | 		stpf = std::min(stpmax, stpf);
310 | 		stpf = std::max(stpmin, stpf);
311 | 		stp = stpf;
312 | 
313 | 		if (brackt & bound) {
314 | 			if (sty > stx) {
315 | 				stp = std::min(stx + static_cast<Scalar>(0.66) * (sty - stx), stp);
316 | 			} else {
317 | 				stp = std::max(stx + static_cast<Scalar>(0.66) * (sty - stx), stp);
318 | 			}
319 | 		}
320 | 
321 | 		return;
322 | 
323 | 	} // end cstep
324 | 
325 | };
326 | 
327 | }
328 | }
329 | 
330 | #endif
331 | 


--------------------------------------------------------------------------------