├── .gitignore ├── Alpha_GPs.cpp ├── Comparison_with_GPyTorch.py ├── Comparison_with_SciKit_Learn.py ├── GPs.cpp ├── GPs.h ├── LICENSE ├── Plot.py ├── Plot_Comparisons.py ├── README.md ├── include ├── CppOptLib │ ├── Experimental_lbfgsbsolver.h │ ├── Experimental_morethuente.h │ ├── Original_lbfgsbsolver.h │ ├── Original_morethuente.h │ ├── README.md │ ├── lbfgsbsolver.h │ └── morethuente.h └── README.md ├── main.cpp ├── makefile ├── misc ├── 1D_example.png ├── 2D_Comparison.png ├── Custom_SciKit_Learn_Comparison.py ├── Notes │ └── Kernel_Post.pdf ├── TEST_CODE │ ├── Cholesky_Time_Comparison │ │ ├── GPs.cpp │ │ ├── GPs.h │ │ ├── Plot.py │ │ ├── SciPy_Cholesky.py │ │ ├── include │ │ │ ├── README.md │ │ │ ├── lbfgsbsolver.h │ │ │ └── morethuente.h │ │ ├── main.cpp │ │ └── makefile │ ├── Kronecker │ │ └── Kronecker.py │ ├── Modified_L_BFGS │ │ ├── Custom_SciKit_Learn_Comparison.py │ │ ├── GPs.cpp │ │ ├── GPs.h │ │ ├── LICENSE │ │ ├── Plot.py │ │ ├── README.md │ │ ├── SciKit_Learn_Comparison.py │ │ ├── lbfgsbsolver.h │ │ ├── main.cpp │ │ ├── makefile │ │ └── morethuente.h │ └── Two_Dimensional │ │ ├── 1D_example.png │ │ ├── 2D_Comparison.png │ │ ├── Deterimine_Dimension.py │ │ ├── GPs.cpp │ │ ├── GPs.h │ │ ├── POSSIBLE_main.cpp │ │ ├── Plot.py │ │ ├── SciKit_Learn_Comparison_2D.py │ │ ├── error_bounds.png │ │ ├── include │ │ └── README.md │ │ ├── main.cpp │ │ └── makefile ├── Verbose_SciKit_Learn_Comparison.py ├── example_plot.png ├── minimize.cpp ├── minimize.h ├── predictive_uncertainty.png ├── profilier_output.png ├── utils.cpp └── utils.h └── tests ├── 1D_example.cpp ├── 1D_low_noise ├── 1D_low_noise.cpp ├── 2D_example.cpp ├── 2D_multimodal.cpp ├── Comparison_with_GPyTorch.py ├── Comparison_with_SciKit_Learn.py ├── Plot_Comparisons.py └── Run_All_Tests.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | .DS_Store 103 | .zip 104 | *.zip 105 | ~$* 106 | 107 | # Avoid bak files 108 | *~ 109 | *.aux 110 | *.bbl 111 | *.blg 112 | *.brf 113 | *.synctex.gz 114 | *.mp4 115 | *.swp 116 | params 117 | *.swo 118 | *.zip 119 | src_file 120 | 121 | Run 122 | *.csv 123 | *.o 124 | 125 | callgrind* 126 | perf* 127 | BAK* 128 | 129 | include/cppoptlib 130 | include/cppoptlib/* 131 | include/CppOptLib/cppoptlib 132 | include/CppOptLib/cppoptlib/* 133 | misc/TEST_CODE/include/cppoptlib 134 | misc/TEST_CODE/include/cppoptlib/* 135 | misc/TEST_CODE/Two_Dimensional/include/cppoptlib 136 | misc/TEST_CODE/Two_Dimensional/include/cppoptlib/* 137 | misc/TEST_CODE/Cholesky_Time_Comparison/include/cppoptlib 138 | misc/TEST_CODE/Cholesky_Time_Comparison/include/cppoptlib/* 139 | vgcore.* 140 | 141 | include/LBFGS++/* 142 | include/LBFGS++/LBFGS 143 | 144 | ORIGINAL_makefile 145 | TEST_makefile 146 | 147 | tests/1D_example 148 | tests/2D_example 149 | tests/2D_multimodal 150 | GPyTorch_Results 151 | GPyTorch_Results/* 152 | SciKit_Learn_Results 153 | SciKit_Learn_Results/* 154 | tests/GPyTorch_Results 155 | tests/GPyTorch_Results/* 156 | tests/SciKit_Learn_Results 157 | tests/SciKit_Learn_Results/* 158 | 159 | gpytorch_results 160 | gpytorch_results/* 161 | scikit_learn_results 162 | scikit_learn_results/* 163 | tests/gpytorch_results 164 | tests/gpytorch_results/* 165 | tests/scikit_learn_results 166 | tests/scikit_learn_results/* 167 | -------------------------------------------------------------------------------- /Comparison_with_SciKit_Learn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from sklearn.gaussian_process import GaussianProcessRegressor 4 | from sklearn.gaussian_process.kernels import RBF, WhiteKernel, ConstantKernel 5 | import os 6 | import csv 7 | import time 8 | 9 | # Option to verify the CppGPs NLML calculation using the SciKit Learn framework 10 | VERIFY_NLML = False 11 | 12 | # Evaluate SciKit Learn Gaussian Process Regressor on CppGPs training data 13 | def main(): 14 | 15 | # Get training data from CppGPs setup 16 | inputDim, obsX, obsY, inVals = getTrainingData() 17 | if inputDim == 1: 18 | sampleCount = getSampleCount() 19 | 20 | ### SCIKIT LEARN IMPLEMENTATION 21 | X = np.reshape(obsX, [-1, inputDim]) 22 | Y = np.reshape(obsY, [-1]) 23 | Xtest = np.reshape(inVals, [-1, inputDim]) 24 | 25 | # Model parameters 26 | n_restarts = 0 27 | normalize_y = False 28 | use_white_noise = True 29 | RBF_bounds = [0.01, 100.0] 30 | Noise_bounds = [0.00001, 20.0] 31 | jitter = 1e-7 32 | 33 | # Define kernel for SciKit Learn Gaussian process regression model 34 | kernel = ConstantKernel(constant_value=1.0, constant_value_bounds=(0.0, 10.0)) * RBF(length_scale=1.0, length_scale_bounds=(RBF_bounds[0],RBF_bounds[1])) + \ 35 | WhiteKernel(noise_level=1, noise_level_bounds=(Noise_bounds[0], Noise_bounds[1])) 36 | 37 | 38 | # Fit model to data 39 | start_time = time.time() 40 | model = GaussianProcessRegressor(kernel=kernel, alpha=jitter, optimizer='fmin_l_bfgs_b', 41 | normalize_y=normalize_y, n_restarts_optimizer=n_restarts).fit(X, Y) 42 | end_time = time.time() 43 | 44 | # Display computation time 45 | print('\nComputation Time: {:.5f} s \n'.format(end_time-start_time)) 46 | 47 | print("Optimized Kernel Parameters:") 48 | print(model.kernel_) 49 | print(" ") 50 | mean, std = model.predict(Xtest, return_std=True) 51 | 52 | if inputDim == 1: 53 | model_samples = model.sample_y(Xtest, sampleCount) 54 | 55 | NLML = -model.log_marginal_likelihood() 56 | print("NLML: {:.4f}\n".format(NLML)) 57 | 58 | 59 | 60 | # Test the NLML calculation from the CppGPs optimal parameter results 61 | if VERIFY_NLML: 62 | filename = "params.csv" 63 | params = [] 64 | with open(filename, "r") as csvfile: 65 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 66 | for row in csvreader: 67 | p = row 68 | params.append(np.array(p)) 69 | params = np.array(params[0]).astype(np.float64) 70 | theta = np.log(params) 71 | CppGPs_NLML = -model.log_marginal_likelihood(theta = theta) 72 | print("CppGPs NLML = {:.4f}\n".format(CppGPs_NLML)) 73 | 74 | 75 | 76 | # Save results to file 77 | SciKit_Learn_results_dir = "./scikit_learn_results/" 78 | 79 | if not os.path.exists(SciKit_Learn_results_dir): 80 | os.makedirs(SciKit_Learn_results_dir) 81 | 82 | # Save prediction data 83 | filename = os.path.join(SciKit_Learn_results_dir, "predMean.npy") 84 | np.save(filename, mean) 85 | 86 | filename = os.path.join(SciKit_Learn_results_dir, "predStd.npy") 87 | np.save(filename, std) 88 | 89 | if inputDim == 1: 90 | # Save posterior samples 91 | filename = os.path.join(SciKit_Learn_results_dir, "samples.npy") 92 | np.save(filename, model_samples) 93 | 94 | # Save NLML 95 | filename = os.path.join(SciKit_Learn_results_dir, "NLML.npy") 96 | np.save(filename, NLML) 97 | 98 | 99 | 100 | # Get training data from CppGPs setup 101 | def getTrainingData(): 102 | 103 | # First determine the dimension of the input values 104 | filename = "predictions.csv" 105 | with open(filename, "r") as csvfile: 106 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 107 | row = next(csvreader) 108 | nonInputLength = 3 109 | inputDim = len(row) - nonInputLength 110 | 111 | # Get prediction data 112 | filename = "predictions.csv" 113 | inVals = []; trueVals = []; predMean = []; predStd = [] 114 | with open(filename, "r") as csvfile: 115 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 116 | for row in csvreader: 117 | 118 | if inputDim == 2: 119 | i1, i2, t, m, v = row 120 | inVals.append([i1,i2]) 121 | else: 122 | i, t, m, v = row 123 | inVals.append(i) 124 | 125 | trueVals.append(t) 126 | predMean.append(m) 127 | predStd.append(v) 128 | inVals = np.array(inVals).astype(np.float64) 129 | trueVals = np.array(trueVals).astype(np.float64) 130 | predMean = np.array(predMean).astype(np.float64) 131 | predStd = np.array(predStd).astype(np.float64) 132 | 133 | ## Get observation data 134 | filename = "observations.csv" 135 | obsX = []; obsY = [] 136 | with open(filename, "r") as csvfile: 137 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 138 | for row in csvreader: 139 | if inputDim == 2: 140 | x1, x2, y = row 141 | obsX.append([x1,x2]) 142 | else: 143 | x, y = row 144 | obsX.append(x) 145 | obsY.append(y) 146 | obsX = np.array(obsX).astype(np.float64) 147 | obsY = np.array(obsY).astype(np.float64) 148 | return inputDim, obsX, obsY, inVals 149 | 150 | def getSampleCount(): 151 | # Get posterior samples 152 | filename = "samples.csv" 153 | samples = [] 154 | with open(filename, "r") as csvfile: 155 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 156 | for row in csvreader: 157 | vals = np.array(row) 158 | samples.append(vals) 159 | samples = np.array(samples).astype(np.float64) 160 | return samples.shape[0] 161 | 162 | 163 | # Run main() function when called directly 164 | if __name__ == '__main__': 165 | main() 166 | -------------------------------------------------------------------------------- /GPs.h: -------------------------------------------------------------------------------- 1 | #ifndef _GPS_H 2 | #define _GPS_H 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "./include/LBFGS++/LBFGS.h" 11 | 12 | 13 | // Declare namespace for Gaussian process definitions 14 | namespace GP { 15 | 16 | // Define PI using arctan function 17 | static const double PI = std::atan(1)*4; 18 | 19 | // Define aliases with using declarations 20 | using Matrix = Eigen::MatrixXd; 21 | using Vector = Eigen::VectorXd; 22 | 23 | // Define function for retrieving time from chrono 24 | float getTime(std::chrono::high_resolution_clock::time_point start, std::chrono::high_resolution_clock::time_point end); 25 | using time = std::chrono::high_resolution_clock::time_point; 26 | using std::chrono::high_resolution_clock; 27 | 28 | // Define linspace function for generating equally spaced points 29 | Matrix linspace(double a, double b, int N, int dim=1); 30 | 31 | // Define function for sampling uniform distribution on interval 32 | Matrix sampleUnif(double a=0.0, double b=1.0, int N=1, int dim=1); 33 | Vector sampleUnifVector(Vector lbs, Vector ubs); 34 | Matrix sampleNormal(int N=1); 35 | 36 | // Define utility functions for computing distance matrices 37 | void pdist(Matrix & Dv, Matrix & X1, Matrix & X2); 38 | void squareForm(Matrix & D, Matrix & Dv, int n, double diagVal=0.0); 39 | 40 | 41 | // Define abstract base class for covariance kernels 42 | class Kernel 43 | { 44 | public: 45 | 46 | // Constructor and destructor 47 | Kernel(Vector p, int c) : kernelParams(p) , paramCount(c) { }; 48 | virtual ~Kernel() = default; 49 | 50 | // Compute the covariance matrix provided input observations and kernel hyperparameters 51 | virtual void computeCov(Matrix & K, Matrix & obsX, Vector & params, std::vector & gradList, double jitter, bool evalGrad) =0; 52 | 53 | // Compute the (cross-)covariance matrix for specified input vectors X1 and X2 54 | virtual void computeCrossCov(Matrix & K, Matrix & X1, Matrix & X2, Vector & params) = 0; 55 | 56 | // Set the noise level which is to be added to the diagonal of the covariance matrix 57 | void setNoise(double noise) { noiseLevel = noise; fixedNoise = true; } 58 | 59 | // Set the scaling level which is to be multiplied into the non-noise terms of the covariance matrix 60 | void setScaling(double scaling) { scalingLevel = scaling; fixedScaling = true; } 61 | 62 | // Set method for specifying the kernel parameters 63 | void setParams(Vector params) { kernelParams = params; }; 64 | 65 | // Get methods for retrieving the kernel paramaters 66 | int getParamCount() { return paramCount; } ; 67 | Vector getParams() { return kernelParams; }; 68 | 69 | protected: 70 | Vector kernelParams; 71 | int paramCount; 72 | double noiseLevel; 73 | double scalingLevel; 74 | bool fixedNoise = false; 75 | bool fixedScaling = false; 76 | //double parseParams(const Vector & params, Vector & kernelParams); 77 | //std::vector parseParams(const Vector & params, Vector & kernelParams); 78 | void parseParams(const Vector & params, Vector & kernelParams, std::vector & nonKernelParams); 79 | //virtual double evalKernel(Matrix&, Matrix&, Vector&, int) = 0; 80 | virtual double evalDistKernel(double, Vector&, int) = 0; 81 | }; 82 | 83 | 84 | // Define class for radial basis function (RBF) covariance kernel 85 | class RBF : public Kernel 86 | { 87 | public: 88 | 89 | // Constructor 90 | RBF() : Kernel(Vector(1), 1) { kernelParams(0)=1.0; }; 91 | 92 | // Compute the covariance matrix provided input observations and kernel hyperparameters 93 | void computeCov(Matrix & K, Matrix & obsX, Vector & params, std::vector & gradList, double jitter, bool evalGrad); 94 | // Compute the (cross-)covariance matrix for specified input vectors X1 and X2 95 | void computeCrossCov(Matrix & K, Matrix & X1, Matrix & X2, Vector & params); 96 | 97 | private: 98 | 99 | // Functions for evaluating the kernel on a pair of points / a specified squared distance 100 | //double evalKernel(Matrix&, Matrix&, Vector&, int); 101 | double evalDistKernel(double, Vector&, int); 102 | 103 | }; 104 | 105 | 106 | 107 | 108 | // Define class for Gaussian processes 109 | class GaussianProcess 110 | { 111 | public: 112 | 113 | // Constructor 114 | GaussianProcess() { } 115 | 116 | // Copy Constructor 117 | GaussianProcess(const GaussianProcess & m) { std::cout << "\n [*] WARNING: copy constructor called by GaussianProcess\n"; } 118 | 119 | // Define LBFGS++ function call for optimization 120 | double operator()(const Eigen::VectorXd& p, Eigen::VectorXd& g) { return evalNLML(p, g, true); } 121 | 122 | // Set methods 123 | void setObs(Matrix & x, Matrix & y) { obsX = x; obsY = y; } 124 | void setKernel(Kernel & k) { kernel = &k; } 125 | void setPred(Matrix & px) { predX = px; } 126 | void setNoise(double noise) { fixedNoise = true; noiseLevel = noise; } 127 | void setBounds(Vector & lbs, Vector & ubs) { lowerBounds = lbs; upperBounds = ubs; fixedBounds=true; } 128 | void setSolverIterations(int i) { solverIterations = i; }; 129 | void setSolverPrecision(double p) { solverPrecision = p; }; 130 | void setSolverRestarts(int n) { solverRestarts = n; }; 131 | 132 | // Compute methods 133 | void fitModel(); 134 | void predict(); 135 | double computeNLML(const Vector & p); 136 | double computeNLML(); 137 | 138 | // Get methods 139 | Matrix getPredMean() { return predMean; } 140 | Matrix getPredVar() { return predCov.diagonal() + noiseLevel*Eigen::VectorXd::Ones(predMean.size()); } 141 | Matrix getSamples(int count=10); 142 | Vector getParams() { return (*kernel).getParams(); } 143 | double getNoise() { return noiseLevel; } 144 | double getScaling() { return scalingLevel; } 145 | 146 | 147 | private: 148 | 149 | // Specify whether or not to display debugging and time diagnostic information 150 | bool VERBOSE = false; 151 | 152 | // Private member functions 153 | double evalNLML(const Vector & p); 154 | double evalNLML(const Vector & p, Vector & g, bool evalGrad=false); 155 | 156 | // Kernel and covariance matrix 157 | Kernel * kernel; 158 | double noiseLevel = 0.0; 159 | bool fixedNoise = false; 160 | double scalingLevel = 1.0; 161 | bool fixedScaling = false; 162 | double jitter = 1e-10; 163 | 164 | // Store Cholsky decomposition 165 | Eigen::LLT cholesky; 166 | 167 | // Hyperparameter bounds 168 | Vector lowerBounds; 169 | Vector upperBounds; 170 | bool fixedBounds = false; 171 | void parseBounds(Vector & lbs, Vector & ubs, int augParamCount); 172 | int solverIterations = 1000; 173 | //double solverPrecision = 1e8; 174 | double solverPrecision = 1e8; 175 | double solverRestarts = 0; 176 | 177 | // Store squared distance matrix and alpha for NLML/DNLML calculations 178 | Matrix alpha; 179 | 180 | // Observation data 181 | Matrix obsX; 182 | Matrix obsY; 183 | 184 | // Prediction data 185 | Matrix predX; 186 | Matrix predMean; 187 | Matrix predCov; 188 | double NLML = 0.0; 189 | 190 | 191 | // Utility function for determining "augmented" solver parameter count 192 | int getAugParamCount(int count); 193 | 194 | int paramCount; 195 | int augParamCount; 196 | 197 | // Need to find a way to avoid creating new gradList each time... 198 | std::vector gradList; 199 | 200 | // DEFINE TIMER VARIABLES 201 | ///* 202 | double time_computecov = 0.0; 203 | double time_cholesky_llt = 0.0; 204 | double time_alpha = 0.0; 205 | double time_NLML = 0.0; 206 | double time_term = 0.0; 207 | double time_grad = 0.0; 208 | double time_evaluation = 0.0; 209 | //*/ 210 | 211 | // Count gradient evaluations by optimizer 212 | int gradientEvals = 0; 213 | 214 | }; 215 | 216 | 217 | }; 218 | 219 | // ------------ CppOptLib Implementation ------------ // 220 | 221 | // Include CppOptLib files 222 | //#include "./include/cppoptlib/meta.h" 223 | //#include "./include/cppoptlib/boundedproblem.h" 224 | //#include "./include/cppoptlib/solver/lbfgsbsolver.h" 225 | 226 | // class GaussianProcess : public cppoptlib::BoundedProblem 227 | 228 | // Define alias for base class CppOptLib "BoundedProblem" for use in initialization list 229 | // using Superclass = cppoptlib::BoundedProblem; 230 | 231 | // Constructors [ Initialize lowerBounds / upperBounds of CppOptLib superclass to zero ] 232 | // GaussianProcess() : Superclass(Vector(0), Vector(0)) { } 233 | 234 | // Define CppOptLib methods 235 | // [ 'value' returns NLML and 'gradient' asigns the associated gradient vector to 'g' ] 236 | // double value(const Vector &p) { return evalNLML(p, cppOptLibgrad, true); } 237 | // void gradient(const Vector &p, Vector &g) { g = cppOptLibgrad; } 238 | // Vector cppOptLibgrad; 239 | 240 | 241 | #endif 242 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2019- Nickolas Winovich 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /Plot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import csv 4 | import time 5 | 6 | import matplotlib as mpl 7 | from mpl_toolkits.mplot3d import Axes3D 8 | 9 | # Function for converting time to formatted string 10 | def convert_time(t): 11 | minutes = np.floor((t/3600.0) * 60) 12 | seconds = np.ceil(((t/3600.0) * 60 - minutes) * 60) 13 | if (minutes >= 1): 14 | minutes = np.floor(t/60.0) 15 | seconds = np.ceil((t/60.0 - minutes) * 60) 16 | t_str = str(int(minutes)).rjust(2) + 'm ' + \ 17 | str(int(seconds)).rjust(2) + 's' 18 | else: 19 | seconds = (t/60.0 - minutes) * 60 20 | t_str = str(seconds) + 's' 21 | return t_str 22 | 23 | # Define function for removing axes from MatPlotLib plots 24 | def remove_axes(ax): 25 | # make the panes transparent 26 | ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 27 | ax.yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 28 | ax.zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 29 | # make the grid lines transparent 30 | ax.xaxis._axinfo["grid"]['color'] = (1,1,1,0) 31 | ax.yaxis._axinfo["grid"]['color'] = (1,1,1,0) 32 | ax.zaxis._axinfo["grid"]['color'] = (1,1,1,0) 33 | # remove axes 34 | ax._axis3don = False 35 | 36 | 37 | # Evaluate SciKit Learn Gaussian Process Regressor and Plot Results 38 | def main(): 39 | 40 | 41 | # First determine the dimension of the input values 42 | filename = "predictions.csv" 43 | with open(filename, "r") as csvfile: 44 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 45 | row = next(csvreader) 46 | nonInputLength = 3 47 | inputDim = len(row) - nonInputLength 48 | 49 | # Get prediction data 50 | filename = "predictions.csv" 51 | inVals = []; trueVals = []; predMean = []; predStd = [] 52 | with open(filename, "r") as csvfile: 53 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 54 | for row in csvreader: 55 | 56 | if inputDim == 2: 57 | i1, i2, t, m, v = row 58 | inVals.append([i1,i2]) 59 | else: 60 | i, t, m, v = row 61 | inVals.append(i) 62 | 63 | trueVals.append(t) 64 | predMean.append(m) 65 | predStd.append(v) 66 | inVals = np.array(inVals).astype(np.float32) 67 | trueVals = np.array(trueVals).astype(np.float32) 68 | predMean = np.array(predMean).astype(np.float32) 69 | predStd = np.array(predStd).astype(np.float32) 70 | 71 | ## Get observation data 72 | filename = "observations.csv" 73 | obsX = []; obsY = [] 74 | with open(filename, "r") as csvfile: 75 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 76 | for row in csvreader: 77 | if inputDim == 2: 78 | x1, x2, y = row 79 | obsX.append([x1,x2]) 80 | else: 81 | x, y = row 82 | obsX.append(x) 83 | obsY.append(y) 84 | obsX = np.array(obsX).astype(np.float32) 85 | obsY = np.array(obsY).astype(np.float32) 86 | 87 | 88 | 89 | if inputDim == 1: 90 | # Get posterior samples 91 | filename = "samples.csv" 92 | samples = [] 93 | with open(filename, "r") as csvfile: 94 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 95 | for row in csvreader: 96 | vals = np.array(row) 97 | samples.append(vals) 98 | samples = np.array(samples).astype(np.float32) 99 | 100 | 101 | X = np.reshape(obsX, [-1, inputDim]) 102 | Y = np.reshape(obsY, [-1]) 103 | Xtest = np.reshape(inVals, [-1, inputDim]) 104 | 105 | 106 | 107 | ### ### 108 | ### PLOT RESULTS ### 109 | ### ### 110 | 111 | if inputDim == 1: 112 | 113 | """ ONE-DIMENSIONAL PLOTS """ 114 | 115 | ### C++ IMPLEMENTATION 116 | plt.figure() 117 | plt.plot(inVals, predMean, 'C0', linewidth=2.0) 118 | alpha = 0.075 119 | for k in [1,2,3]: 120 | plt.fill_between(inVals, predMean-k*predStd, predMean+k*predStd, where=1>=0, facecolor="C0", alpha=alpha, interpolate=True) 121 | plt.plot(inVals, trueVals, 'C1', linewidth=1.0, linestyle="dashed") 122 | alpha_scatter = 0.5 123 | plt.scatter(obsX, obsY, alpha=alpha_scatter) 124 | for i in range(0,samples.shape[0]): 125 | plt.plot(inVals, samples[i,:], 'C0', alpha=0.2, linewidth=1.0, linestyle="dashed") 126 | plt.suptitle("C++ Implementation") 127 | plt.show() 128 | 129 | 130 | elif inputDim == 2: 131 | 132 | """ TWO-DIMENSIONAL PLOTS """ 133 | 134 | # Flatten input values for compatibility with MatPlotLib's tri_surf 135 | plot_X_flat = []; plot_Y_flat = [] 136 | R = inVals.shape[0] 137 | for n in range(0,R): 138 | plot_X_flat.append(inVals[n,0]) 139 | plot_Y_flat.append(inVals[n,1]) 140 | 141 | tri_fig = plt.figure() 142 | tri_ax1 = tri_fig.add_subplot(111, projection='3d') 143 | linewidth = 0.1 144 | cmap = "Blues" 145 | 146 | # Plot CppGPs results 147 | tri_ax1.plot_trisurf(plot_X_flat,plot_Y_flat, predMean, cmap=cmap, linewidth=linewidth, antialiased=True) 148 | pred_title = "CppGPs" 149 | tri_ax1.set_title(pred_title, fontsize=24) 150 | 151 | # Remove axes from plots 152 | remove_axes(tri_ax1) 153 | 154 | # Bind axes for comparison 155 | def tri_on_move(event): 156 | if event.inaxes == tri_ax1: 157 | if tri_ax1.button_pressed in tri_ax1._rotate_btn: 158 | tri_ax2.view_init(elev=tri_ax1.elev, azim=tri_ax1.azim) 159 | elif tri_ax1.button_pressed in tri_ax1._zoom_btn: 160 | tri_ax2.set_xlim3d(tri_ax1.get_xlim3d()) 161 | tri_ax2.set_ylim3d(tri_ax1.get_ylim3d()) 162 | tri_ax2.set_zlim3d(tri_ax1.get_zlim3d()) 163 | elif event.inaxes == tri_ax2: 164 | if tri_ax2.button_pressed in tri_ax2._rotate_btn: 165 | tri_ax1.view_init(elev=tri_ax2.elev, azim=tri_ax2.azim) 166 | elif tri_ax2.button_pressed in tri_ax2._zoom_btn: 167 | tri_ax1.set_xlim3d(tri_ax2.get_xlim3d()) 168 | tri_ax1.set_ylim3d(tri_ax2.get_ylim3d()) 169 | tri_ax1.set_zlim3d(tri_ax2.get_zlim3d()) 170 | else: 171 | return 172 | tri_fig.canvas.draw_idle() 173 | tri_c1 = tri_fig.canvas.mpl_connect('motion_notify_event', tri_on_move) 174 | 175 | 176 | 177 | """ Zoom in to view predictive uncertainty """ 178 | plot_radius = 0.5 179 | plot_x_min = -0.125 180 | plot_y_min = -0.125 181 | plot_x_max = 0.25 182 | 183 | # Define conditions for including values associated with an input point (x,y) 184 | def include_conditions(x,y,delta=0.0): 185 | rad = np.sqrt(np.power(x,2) + np.power(y,2)) 186 | return (x-delta<=plot_x_max) and (x+delta>=plot_x_min) and (y+delta>=plot_y_min) and (rad-delta<=plot_radius) 187 | 188 | 189 | # Restrict plots to values corresponding to valid input points 190 | R = inVals.shape[0] 191 | plot_X_zoom = []; plot_Y_zoom = []; predMean_zoom = [] 192 | predMean_plus_std = []; predMean_minus_std = [] 193 | predMean_plus_std2 = []; predMean_minus_std2 = [] 194 | predMean_plus_std3 = []; predMean_minus_std3 = [] 195 | trueVals_zoom = [] 196 | for n in range(0,R): 197 | x = plot_X_flat[n] 198 | y = plot_Y_flat[n] 199 | if include_conditions(x,y,delta=0.025): 200 | plot_X_zoom.append(x) 201 | plot_Y_zoom.append(y) 202 | predMean_zoom.append(predMean[n]) 203 | predMean_plus_std.append( predMean[n] + 1 * predStd[n] ) 204 | predMean_minus_std.append( predMean[n] - 1 * predStd[n] ) 205 | predMean_plus_std2.append( predMean[n] + 2 * predStd[n] ) 206 | predMean_minus_std2.append( predMean[n] - 2 * predStd[n] ) 207 | predMean_plus_std3.append( predMean[n] + 3 * predStd[n] ) 208 | predMean_minus_std3.append( predMean[n] - 3 * predStd[n] ) 209 | trueVals_zoom.append(trueVals[n]) 210 | 211 | 212 | # Restrict observations to valid input points 213 | obsX_x_zoom = []; obsX_y_zoom = []; obsY_zoom = [] 214 | for n in range(0, obsX.shape[0]): 215 | x = obsX[n,0] 216 | y = obsX[n,1] 217 | if include_conditions(x,y): 218 | obsX_x_zoom.append(x) 219 | obsX_y_zoom.append(y) 220 | obsY_zoom.append(obsY[n]) 221 | 222 | 223 | # Initialize plot for assessing predictive uncertainty 224 | tri_fig2 = plt.figure() 225 | tri2_ax1 = tri_fig2.add_subplot(111, projection='3d') 226 | 227 | 228 | # Plot Predictive Mean 229 | linewidth = 0.1; alpha = 0.85 230 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_zoom, cmap=cmap, linewidth=linewidth, antialiased=True, alpha=alpha) 231 | 232 | 233 | # One Standard Deviation 234 | linewidth = 0.075; alpha = 0.2 235 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_plus_std, cmap=cmap, linewidth=linewidth, antialiased=True,alpha=alpha) 236 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_minus_std, cmap=cmap, linewidth=linewidth,antialiased=True,alpha=alpha) 237 | 238 | # Two Standard Deviations 239 | linewidth = 0.05; alpha = 0.1 240 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_plus_std2, cmap=cmap, linewidth=linewidth,antialiased=True,alpha=alpha) 241 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_minus_std2, cmap=cmap, linewidth=linewidth,antialiased=True,alpha=alpha) 242 | 243 | # Three Standard Deviations 244 | linewidth = 0.01; alpha = 0.01 245 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_plus_std3, cmap=cmap, linewidth=linewidth,antialiased=True,alpha=alpha) 246 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_minus_std3, cmap=cmap, linewidth=linewidth,antialiased=True,alpha=alpha) 247 | 248 | # Scatter plot of training observations 249 | alpha = 0.4 250 | tri2_ax1.scatter(obsX_x_zoom, obsX_y_zoom, obsY_zoom, c='k', marker='o', s=15.0, alpha=alpha) 251 | 252 | # Add title to plot 253 | plt.suptitle("CppGPs Predictive Uncertainty", fontsize=24) 254 | 255 | # Remove axes from plot 256 | remove_axes(tri2_ax1) 257 | 258 | # Display plots 259 | plt.show() 260 | 261 | 262 | 263 | # Run main() function when called directly 264 | if __name__ == '__main__': 265 | main() 266 | 267 | 268 | 269 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | CppGP - C++ Gaussian Process Library 2 | ==================== 3 | 4 | [![MIT License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) 5 | 6 | Implementation of Numerical Gaussian Processes in C++ 7 | 8 | ## Dependencies 9 | * [Eigen](https://eigen.tuxfamily.org/dox/GettingStarted.html) - High-level C++ library for linear algebra, matrix operations, and solvers (version 3.3.7) 10 | * [GCC](https://gcc.gnu.org/) - GNU compiler collection; more specifically the GCC C++ compiler is recommended 11 | * [LBFGS++](https://github.com/yixuan/LBFGSpp) - A header-only implementation of the L-BFGS algorithm in C++ 12 | 13 | 14 | #### Optional Dependencies for Plotting 15 | * [NumPy](http://www.numpy.org/) - Scientific computing package for Python 16 | * [csv](https://docs.python.org/3/library/csv.html) - Python module for working with comma separated value (csv) files 17 | * [MatPlotLib](https://matplotlib.org/) - Python plotting library 18 | 19 | #### Optional Dependencies for SciKit Learn / GPyTorch Comparisons 20 | * [SciKit Learn](https://scikit-learn.org/stable/) - Data analysis library for Python 21 | * [PyTorch](https://pytorch.org/) - Open source deep learning platform 22 | * [GPyTorch](https://github.com/cornellius-gp/gpytorch) - PyTorch implementation of Gaussian processes with extensive modeling capabilities 23 | 24 | 25 | ## Gaussian Process Regression 26 | 27 | ### Compiling and Running the Code 28 | The `main.cpp` file provides an example use of the CppGP code for Gaussian process regression. Before compilation, the following steps must be carried out: 29 | 30 | * Specify the path to the Eigen header files by editing the `EIGENPATH` variable in `makefile` 31 | * Download the `LBFGS++` code as instructed in the `include/README.md` file 32 | 34 | 35 | Once these steps are completed, the example code can be compiled and run as follows: 36 | ```console 37 | user@host $ make install 38 | g++ -c -Wall -std=c++17 -I/usr/include/eigen3 -g -march=native -fopenmp -O3 main.cpp -o main.o 39 | g++ -c -Wall -std=c++17 -I/usr/include/eigen3 -g -march=native -fopenmp -O3 GPs.cpp -o GPs.o 40 | g++ -std=c++17 -I/usr/include/eigen3 -g -march=native -fopenmp -O3 -o Run main.cpp GPs.cpp 41 | 42 | user@host $ ./Run 43 | 44 | Computation Time: 0.089068 s 45 | 46 | Optimized Hyperparameters: 47 | 0.066468 (Noise = 0.969875) 48 | 49 | NLML: 469.561 50 | 51 | ``` 52 | 53 | __Note:__ A slight reduction in the run time may be achieved by installing [gperftools](https://github.com/gperftools/gperftools) and prefacing the run statement with the `LD_PRELOAD` environment variable set to the path of the [TCMalloc](http://goog-perftools.sourceforge.net/doc/tcmalloc.html) shared object: 54 | ```console 55 | user@host $ LD_PRELOAD=/usr/lib/libtcmalloc.so.4 ./Run 56 | ``` 57 | 58 | ### Defining the Target Function and Training Data 59 | The `targetFunc` function defined at the beginning of the `main.cpp` file is used to generate artificial training data for the regression task: 60 | ```cpp 61 | // Specify the target function for Gaussian process regression 62 | double targetFunc(Eigen::MatrixXd X) 63 | { 64 | double oscillation = 30.0; 65 | double xshifted = 0.5*(X(0) + 1.0); 66 | return std::sin(oscillation*(xshifted-0.1))*(0.5-(xshifted-0.1))*15.0; 67 | } 68 | ``` 69 | The training data consists of a collection of input points `X` along with an associated collection of target values `y`. This data should be formatted so that `y(i) = targetFunc(X.row(i))` (with an optional additive noise term). A simple one-dimensional problem setup can be defined as follows: 70 | ```cpp 71 | int obsCount = 250; 72 | Matrix X = sampleUnif(-1.0, 1.0, obsCount); 73 | Matrix y; y.resize(obsCount, 1); 74 | ``` 75 | Noise can be added to the training target data `y` to better assess the fit of the model's predictive variance. The level of noise in the training data can be adjusted via the `noiseLevel` parameter and used to define the target data via: 76 | ```cpp 77 | auto noiseLevel = 1.0; 78 | auto noise = sampleNormal(obsCount) * noiseLevel; 79 | for ( auto i : boost::irange(0,obsCount) ) 80 | y(i) = targetFunc(X.row(i)) + noise(i); 81 | ``` 82 | 83 | ### Specifying and Fitting the Gaussian Process Model 84 | 85 | ```cpp 86 | // Initialize Gaussian process regression model 87 | GaussianProcess model; 88 | 89 | // Specify training observations for GP model 90 | model.setObs(X,y); 91 | 92 | // Initialize RBF kernel and assign it to the model 93 | RBF kernel; 94 | model.setKernel(kernel); 95 | 96 | // Fit model to the training data 97 | model.fitModel(); 98 | ``` 99 | 100 | ### Posterior Predictions and Sample Paths 101 | ```cpp 102 | // Define test mesh for GP model predictions 103 | int predCount = 100; 104 | auto testMesh = linspace(-1.0, 1.0, predCount); 105 | model.setPred(testMesh); 106 | 107 | // Compute predicted means and variances for the test points 108 | model.predict(); 109 | Matrix pmean = model.getPredMean(); 110 | Matrix pvar = model.getPredVar(); 111 | Matrix pstd = pvar.array().sqrt().matrix(); 112 | 113 | // Get sample paths from the posterior distribution of the model 114 | int sampleCount = 25; 115 | Matrix samples = model.getSamples(sampleCount); 116 | ``` 117 | 118 | ### Plotting Results of the Trained Gaussian Process Model 119 | The artificial observation data and corresponding predictions/samples are saved in the `observations.csv` and `predictions.csv`/`samples.csv` files, respectively. The trained model results can be plotted using the provided Python script `Plot.py`. 120 | 121 | 122 |

123 | Example regression plot in one dimension 124 |

125 | 126 | 127 | For the two dimensional case (i.e. when `inputDim=2` in the `main.cpp` file), an additional figure illustrating the predictive uncertainty of the model is provided; this plot corresponds to a slice of the predictive mean, transparent standard deviation bounds, and the observation data points used for training: 128 | 129 |

130 | Example plot of the predictive uncertainty 131 |

132 | 133 | 134 | 135 | 136 | ### Comparison with SciKit Learn Implementation 137 | 138 | The results of the CppGP code and SciKit Learn `GaussianProcessRegressor` class can be compared using the `Comparison_with_SciKit_Learn.py` Python script. This code provides the estimated kernel/noise parameters and corresponding negative log marginal likelihood (NLML) calculations using the SciKit Learn framework. A qualitative comparison of the CppGP and SciKit Learn results can be plotted using the `Plot_Comparisons.py` script: 139 | 140 |

141 | Example regression plot in two dimensions 142 |

143 | 144 | The `VERIFY_NLML` variable can also be set to `True` to validate the negative log marginal likelihood calculation computed by CppGP using the SciKit Learn framework. 145 | 146 | 147 | ### Comparison with GPyTorch Implementation 148 | 149 | The CppGP results can also be compared with two GPyTorch implementations of Gaussian processes: the standard GP model as well as a structured kernel interpolation (SKI) implementation. These results can be computed using the `Comparison_with_GPyTorch.py` script and can be plotted using the `Plot_Comparisons.py` script after setting the `USE_GPyTorch` variable to `True`. 150 | 151 | 152 | 153 | ## Profiling the CppGP Implementation 154 | 155 | ### [Update] 156 | The profiling steps below will not work with the current multi-threaded implementation. They have been included for reference, since these results provided the original motivation for incorporating multi-threading into certain parts of the current code. 157 | 158 | ### Requirements 159 | * [`valgrind`](http://valgrind.org/docs/manual/quick-start.html) - debugging/profiling tool suite 160 | * [`perf`](https://en.wikipedia.org/wiki/Perf_(Linux)) - performance analyzing tool for Linux 161 | * [`graphviz`](https://www.graphviz.org/) - open source graph visualization software 162 | * [`gprof2dot`](https://github.com/jrfonseca/gprof2dot) - python script for converting profiler output to dot graphs 163 | 164 | Profiling data is produced via the `callgrind` tool in the `valgrind` suite: 165 | ``` 166 | valgrind --tool=callgrind --trace-children=yes ./Run 167 | ``` 168 | __Note:__ This will take _much_ more time to run than the standard execution time (e.g. 100x). 169 | 170 | 171 | A graph visualization of the node-wise executation times in the program can then be created via: 172 | ``` 173 | perf record -g -- ./Run 174 | perf script | c++filt | gprof2dot -s -n 5.25 -f perf | dot -Tpng -o misc/profilier_output.png 175 | ``` 176 | [//]: # (COMMENT: perf script | c++filt | python /usr/lib/python3.7/site-packages/gprof2dot.py -f perf | dot -Tpng -o output.png) 177 | 178 | 179 | __Note:__ The `-s` flag can also be removed from the `gprof2dot` call to show parameter types. The `-n` flag is used to specify the percentage threshold for ommitting nodes. 180 | 181 | 182 | ### Example Profilier Graph 183 | An example graph generated using the procedures outlined above is provided below: 184 | 185 |

186 | Example profilier graph 187 |

188 | 189 | As can be seen from the figure, the majority of the computational demand of the CppGP implementation results from the `Eigen::LLT::solveInPlace` method. This corresponds to the term `cholesky.solve(Matrix::Identity(n,n))` in the `evalNLML()` function definition, which is used to compute the gradients of the NLML with respect to the hyperparameters. A simple multi-threaded implementation of this calculation has been incorporated into the code to achieve a considerable speed-up in the execution time. 190 | 191 | 192 | ## References 193 | __Gaussian Processes for Machine Learning__ is an extremely useful reference written by Carl Rasmussen and Christopher Williams; the book has also been made freely available on the [book webpage](http://www.gaussianprocess.org/gpml/). 194 | 195 | __Eigen__ provides a header-only library for linear algebra, matrix operations, and solving linear systems. The [Getting Started](https://eigen.tuxfamily.org/dox/GettingStarted.html) page of the Eigen documentation provides a basic introduction to the library. 196 | 197 | __LBFGS++__ is a header-only C++ implementation of the limited-memory BFGS algorithm (L-BFGS) for unconstrained minimization written by Yixuan Qiu. It is based off of the libLBFGS C library developed by Naoaki Okazaki and also includes a detailed [API](http://yixuan.cos.name/LBFGSpp/doc/index.html). 198 | 199 | __CppOptimizationLibrary__ is an extensive header-only library written by Patrick Wieschollek with C++ implementations for a diverse collection of optimization algorithms; the library is availble in the GitHub repository [PatWie/CppNumericalSolvers](https://github.com/PatWie/CppNumericalSolvers). 200 | 201 | __SciKit Learn__ provides a [Gaussian Process](https://scikit-learn.org/stable/modules/gaussian_process.html) Python module with an extensive collection of covariance kernels and options. 202 | -------------------------------------------------------------------------------- /include/CppOptLib/README.md: -------------------------------------------------------------------------------- 1 | # CppOptimizationLibrary 2 | 3 | The CppOptimizationLibrary can be downloaded from the GitHub repository [PatWie/CppNumericalSolvers](https://github.com/PatWie/CppNumericalSolvers). Alternatively, the repository can be cloned via: 4 | 5 | ```console 6 | user@host $ git clone git@github.com:PatWie/CppNumericalSolvers.git 7 | ``` 8 | 9 | The CppOptimizationLibrary's `cppoptlib` directory should then be moved to the CppGPs `include/` directory: 10 | 11 | ```console 12 | user@host $ cp -r CppNumericalSolvers/include/cppoptlib ./include/ 13 | ``` 14 | 15 | ## Update L-BFGS Code 16 | 17 | The original implementation of the L-BFGS algorithm provided by CppOptimizationLibrary needs to be slightly modified for compatibility with CppGPs. In particular, the provided `lbfgsbsolver.h` file should be copied into the `cppoptlib/solvers/` directory and the `morethuente.h` file should be copied into the `cppoptlib/linesearch/` (overwriting the orginal files): 18 | 19 | ```console 20 | user@host $ cp ./include/lbfgsbsolver.h ./include/cppoptlib/solver/ 21 | user@host $ cp ./include/morethuente.h ./include/cppoptlib/linesearch/ 22 | ``` 23 | -------------------------------------------------------------------------------- /include/README.md: -------------------------------------------------------------------------------- 1 | # LBFGS++ Library 2 | 3 | The LBFGS++ library can be downloaded from the GitHub repository [yixuan/LBFGSpp](https://github.com/yixuan/LBFGSpp). Alternatively, the repository can be cloned via: 4 | 5 | ```console 6 | user@host $ git clone git@github.com:yixuan/LBFGSpp.git 7 | ``` 8 | 9 | The contents of the LBFGS++ library's `include/` directory should then be moved to the CppGPs `include/LBFGS++/` directory: 10 | 11 | ```console 12 | user@host $ mkdir LBFGS++; cp -r LBFGSpp/include/* ./LBFGS++/ 13 | ``` 14 | 15 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | // main.cpp -- example use of CppGPs for Gaussian process regression 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "GPs.h" 11 | 12 | // Specify the target function for Gaussian process regression 13 | double targetFunc(Eigen::MatrixXd X) 14 | { 15 | if ( X.size() == 1 ) 16 | { 17 | // Define the target function to be an oscillatory, non-periodic function 18 | double oscillation = 30.0; 19 | double xshifted = 0.5*(X(0) + 1.0); 20 | return std::sin(oscillation*(xshifted-0.1))*(0.5-(xshifted-0.1))*15.0; 21 | } 22 | else 23 | { 24 | // Define the target function to be the Marr wavelet (a.k.a "Mexican Hat" wavelet) 25 | // ( see https://en.wikipedia.org/wiki/Mexican_hat_wavelet ) 26 | double sigma = 0.25; 27 | double pi = std::atan(1)*4; 28 | double radialTerm = std::pow(X.squaredNorm()/sigma,2); 29 | return 2.0 / (std::sqrt(pi*sigma) * std::pow(pi,1.0/4.0)) * (1.0 - radialTerm) * std::exp(-radialTerm); 30 | } 31 | } 32 | 33 | 34 | // Example use of CppGPs code for Gaussian process regression 35 | int main(int argc, char const *argv[]) 36 | { 37 | 38 | // Inform Eigen of possible multi-threading 39 | Eigen::initParallel(); 40 | 41 | // Retrieve aliases from GP namescope 42 | using Matrix = Eigen::MatrixXd; 43 | 44 | // Convenience using-declarations 45 | using std::cout; 46 | using std::endl; 47 | using GP::GaussianProcess; 48 | using GP::linspace; 49 | using GP::sampleNormal; 50 | using GP::sampleUnif; 51 | using GP::RBF; 52 | 53 | // Used for timing code with chrono 54 | using GP::high_resolution_clock; 55 | using GP::time; 56 | using GP::getTime; 57 | 58 | // Set random seed based on system clock 59 | std::srand(static_cast(high_resolution_clock::now().time_since_epoch().count())); 60 | 61 | // Fix random seed for debugging and testing 62 | //std::srand(static_cast(0)); 63 | 64 | 65 | // 66 | // [ Define Training Data ] 67 | // 68 | 69 | // Specify the input dimensions 70 | //int inputDim = 1; 71 | int inputDim = 2; 72 | 73 | // Specify observation data count 74 | int obsCount; 75 | if ( inputDim == 1 ) 76 | obsCount = 250; 77 | else 78 | obsCount = 2000; 79 | 80 | // Specify observation noise level 81 | auto noiseLevel = 1.0; 82 | 83 | // Define random noise to add to target observations 84 | Matrix noise; 85 | noise.noalias() = sampleNormal(obsCount) * noiseLevel; 86 | 87 | // Define observations by sampling random uniform distribution 88 | Matrix X = sampleUnif(-1.0, 1.0, obsCount, inputDim); 89 | Matrix y; y.resize(obsCount, 1); 90 | 91 | // Define target observations 'y' by applying 'targetFunc' 92 | // to the input observations 'X' and adding a noise vector 93 | for ( auto i : boost::irange(0,obsCount) ) 94 | y(i) = targetFunc(X.row(i)) + noise(i); 95 | 96 | 97 | // 98 | // [ Construct Gaussian Process Model ] 99 | // 100 | 101 | // Initialize Gaussian process model 102 | GaussianProcess model; 103 | 104 | // Specify training observations for GP model 105 | model.setObs(X,y); 106 | 107 | // Fix noise level [ noise level is fit to the training data by default ] 108 | //model.setNoise(0.33); 109 | 110 | // Initialize a RBF covariance kernel and assign it to the model 111 | RBF kernel; 112 | model.setKernel(kernel); 113 | 114 | // Specify solver precision 115 | //if ( inputDim == 1 ) 116 | // model.setSolverPrecision(1e7); 117 | 118 | // Specify number of restarts for solver 119 | if ( inputDim == 1 ) 120 | model.setSolverRestarts(2); 121 | 122 | // Fit covariance kernel hyperparameters to the training data 123 | time start = high_resolution_clock::now(); 124 | model.fitModel(); 125 | time end = high_resolution_clock::now(); 126 | auto computationTime = getTime(start, end); 127 | 128 | // Display computation time required for fitting the GP model 129 | cout << "\nComputation Time: "; 130 | cout << computationTime << " s" << endl; 131 | 132 | // Retrieve the tuned/optimized kernel hyperparameters 133 | auto optParams = model.getParams(); 134 | auto noiseL = model.getNoise(); 135 | auto scalingL = model.getScaling(); 136 | cout << "\nOptimized Hyperparameters:" << endl << std::sqrt(scalingL) << "**2 * " << optParams.transpose() << " "; 137 | cout << "(Noise = " << noiseL << ")\n" << endl; 138 | 139 | // Display the negative log marginal likelihood (NLML) of the optimized model 140 | double NLML = model.computeNLML(); 141 | cout << "NLML: " << std::fixed << std::setprecision(4) << NLML << endl << endl; 142 | 143 | 144 | // 145 | // [ Posterior Predictions and Samples ] 146 | // 147 | 148 | // Define test mesh for GP model predictions 149 | int predCount; 150 | if ( inputDim == 1 ) 151 | predCount = 100; 152 | else 153 | predCount = 1000; 154 | int predRes = static_cast(std::pow(predCount,1.0/inputDim)); 155 | auto testMesh = linspace(-1.0, 1.0, predRes, inputDim); 156 | model.setPred(testMesh); 157 | 158 | // Reset predCount to account for rounding 159 | predCount = std::pow(predRes,inputDim); 160 | 161 | // Compute predicted means and variances for the test points 162 | model.predict(); 163 | Matrix pmean = model.getPredMean(); 164 | Matrix pvar = model.getPredVar(); 165 | Matrix pstd = pvar.array().sqrt().matrix(); 166 | 167 | // Get sample paths from the posterior distribution of the model 168 | int sampleCount; 169 | Matrix samples; 170 | if ( inputDim == 1 ) 171 | { 172 | sampleCount = 25; 173 | samples = model.getSamples(sampleCount); 174 | } 175 | 176 | 177 | // 178 | // [ Save Results for Plotting ] 179 | // 180 | 181 | // Save true and predicted means/variances to file 182 | std::string outputFile = "predictions.csv"; 183 | Matrix trueSoln(predCount,1); 184 | for ( auto i : boost::irange(0,predCount) ) 185 | trueSoln(i,0) = targetFunc(testMesh.row(i)); 186 | 187 | std::ofstream fout; 188 | fout.open(outputFile); 189 | for ( auto i : boost::irange(0,predCount) ) 190 | { 191 | for ( auto j : boost::irange(0,inputDim) ) 192 | fout << testMesh(i,j) << ","; 193 | 194 | fout << trueSoln(i) << "," << pmean(i) << "," << pstd(i) << "\n"; 195 | } 196 | fout.close(); 197 | 198 | // Save observations to file 199 | std::string outputObsFile = "observations.csv"; 200 | fout.open(outputObsFile); 201 | for ( auto i : boost::irange(0,obsCount) ) 202 | { 203 | for ( auto j : boost::irange(0,inputDim) ) 204 | fout << X(i,j) << ","; 205 | 206 | fout << y(i) << "\n"; 207 | } 208 | fout.close(); 209 | 210 | 211 | if ( inputDim == 1 ) 212 | { 213 | // Save samples to file 214 | std::string outputSampleFile = "samples.csv"; 215 | fout.open(outputSampleFile); 216 | for ( auto j : boost::irange(0,sampleCount) ) 217 | { 218 | for ( auto i : boost::irange(0,predCount) ) 219 | fout << samples(i,j) << ((i(optParams.size())) ) 235 | fout << optParams(i) << ","; 236 | fout << noiseL; 237 | fout.close(); 238 | 239 | return 0; 240 | 241 | } 242 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | # Define compiler and remove commands 2 | CXX=g++ 3 | RM=rm -f 4 | 5 | # Specify path to Eigen headers 6 | EIGENPATH=/usr/include/eigen3 7 | 8 | ### Optimize gcc compiler flags [ NO DEBUGGING ] 9 | CXXFLAGS=-std=c++17 -I${EIGENPATH} -DNDEBUG -march=native -fopenmp -O3 10 | 11 | ### Optimize gcc compiler flags [ DEBUGGING ] 12 | #CXXFLAGS=-std=c++17 -I${EIGENPATH} -g -march=native -fopenmp -O3 13 | 14 | CFLAGS=-c -Wall 15 | 16 | # Define all target list 17 | all: main.cpp GPs.cpp install tests 18 | 19 | # Install target list 20 | install: main.o GPs.o 21 | $(CXX) $(CXXFLAGS) -o Run main.cpp GPs.cpp 22 | 23 | # Test target list 24 | tests: test1 test2 test3 test4 25 | 26 | # Test targets 27 | test1: tests/1D_example.o GPs.o 28 | $(CXX) $(CXXFLAGS) -o tests/1D_example tests/1D_example.cpp GPs.cpp 29 | 30 | test2: tests/2D_example.o GPs.o 31 | $(CXX) $(CXXFLAGS) -o tests/2D_example tests/2D_example.cpp GPs.cpp 32 | 33 | test3: tests/2D_multimodal.o GPs.o 34 | $(CXX) $(CXXFLAGS) -o tests/2D_multimodal tests/2D_multimodal.cpp GPs.cpp 35 | 36 | test4: tests/1D_low_noise.o GPs.o 37 | $(CXX) $(CXXFLAGS) -o tests/1D_low_noise tests/1D_low_noise.cpp GPs.cpp 38 | 39 | # Object files 40 | main.o: main.cpp GPs.h 41 | $(CXX) $(CFLAGS) $(CXXFLAGS) $< -o $@ 42 | 43 | GPs.o: GPs.cpp GPs.h 44 | $(CXX) $(CFLAGS) $(CXXFLAGS) $< -o $@ 45 | 46 | # Test object files 47 | tests/1D_example.o: tests/1D_example.cpp GPs.h 48 | $(CXX) $(CFLAGS) $(CXXFLAGS) $< -o $@ 49 | 50 | tests/2D_example.o: tests/2D_example.cpp GPs.h 51 | $(CXX) $(CFLAGS) $(CXXFLAGS) $< -o $@ 52 | 53 | tests/2D_multimodal.o: tests/2D_multimodal.cpp GPs.h 54 | $(CXX) $(CFLAGS) $(CXXFLAGS) $< -o $@ 55 | 56 | tests/1D_low_noise.o: tests/1D_low_noise.cpp GPs.h 57 | $(CXX) $(CFLAGS) $(CXXFLAGS) $< -o $@ 58 | 59 | # Clean 60 | clean: 61 | rm GPs.o main.o tests/1D_example.o tests/2D_example.o tests/2D_multimodal.o tests/1D_example tests/2D_example tests/2D_multimodal tests/1D_low_noise.o tests/1D_low_noise 62 | -------------------------------------------------------------------------------- /misc/1D_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nw2190/CppGPs/eb707e54dff274596238310a654a715930d62214/misc/1D_example.png -------------------------------------------------------------------------------- /misc/2D_Comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nw2190/CppGPs/eb707e54dff274596238310a654a715930d62214/misc/2D_Comparison.png -------------------------------------------------------------------------------- /misc/Notes/Kernel_Post.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nw2190/CppGPs/eb707e54dff274596238310a654a715930d62214/misc/Notes/Kernel_Post.pdf -------------------------------------------------------------------------------- /misc/TEST_CODE/Cholesky_Time_Comparison/GPs.h: -------------------------------------------------------------------------------- 1 | #ifndef _GPS_H 2 | #define _GPS_H 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // Include CppOptLib files 11 | #include "./include/cppoptlib/meta.h" 12 | #include "./include/cppoptlib/boundedproblem.h" 13 | #include "./include/cppoptlib/solver/lbfgsbsolver.h" 14 | 15 | 16 | // Declare namespace for Gaussian process definitions 17 | namespace GP { 18 | 19 | // Define PI using arctan function 20 | static const double PI = std::atan(1)*4; 21 | 22 | // Define aliases with using declarations 23 | using Matrix = Eigen::MatrixXd; 24 | using Vector = Eigen::VectorXd; 25 | 26 | // Define function for retrieving time from chrono 27 | float getTime(std::chrono::high_resolution_clock::time_point start, std::chrono::high_resolution_clock::time_point end); 28 | using time = std::chrono::high_resolution_clock::time_point; 29 | using std::chrono::high_resolution_clock; 30 | 31 | // Define linspace function for generating equally spaced points 32 | Matrix linspace(double a, double b, int N, int dim=1); 33 | 34 | // Define function for sampling uniform distribution on interval 35 | Matrix sampleUnif(double a=0.0, double b=1.0, int N=1, int dim=1); 36 | Vector sampleUnifVector(Vector lbs, Vector ubs); 37 | Matrix sampleNormal(int N=1); 38 | 39 | // Define utility functions for computing distance matrices 40 | void pdist(Matrix & Dv, Matrix & X1, Matrix & X2); 41 | void squareForm(Matrix & D, Matrix & Dv, int n, double diagVal=0.0); 42 | 43 | 44 | // Define abstract base class for covariance kernels 45 | class Kernel 46 | { 47 | public: 48 | 49 | // Constructor and destructor 50 | Kernel(Vector p, int c) : kernelParams(p) , paramCount(c) { }; 51 | virtual ~Kernel() = default; 52 | 53 | // Compute the covariance matrix provided a "distance matrix" consisting of pairwise squared norms between points 54 | virtual void computeCov(Matrix & K, Matrix & D, Vector & params, std::vector & gradList, double jitter=0.0, bool evalGrad=false) = 0; 55 | 56 | // Compute the (cross-)covariance matrix for specified input vectors X1 and X2 57 | virtual void computeCrossCov(Matrix & K, Matrix & X1, Matrix & X2, Vector & params) = 0; 58 | 59 | // Set the noise level which is to be added to the diagonal of the covariance matrix 60 | void setNoise(double noise) { noiseLevel = noise; } 61 | 62 | // Set method for specifying the kernel parameters 63 | void setParams(Vector params) { kernelParams = params; }; 64 | 65 | // Get methods for retrieving the kernel paramaters 66 | int getParamCount() { return paramCount; } ; 67 | Vector getParams() { return kernelParams; }; 68 | 69 | protected: 70 | Vector kernelParams; 71 | int paramCount; 72 | double noiseLevel; 73 | //virtual double evalKernel(Matrix&, Matrix&, Vector&, int) = 0; 74 | virtual double evalDistKernel(double, Vector&, int) = 0; 75 | }; 76 | 77 | 78 | // Define class for radial basis function (RBF) covariance kernel 79 | class RBF : public Kernel 80 | { 81 | public: 82 | 83 | // Constructor 84 | RBF() : Kernel(Vector(1), 1) { kernelParams(0)=1.0; }; 85 | 86 | // Compute the covariance matrix provided a "distance matrix" consisting of pairwise squared norms between points 87 | void computeCov(Matrix & K, Matrix & D, Vector & params, std::vector & gradList, double jitter=0.0, bool evalGrad=false); 88 | 89 | // Compute the (cross-)covariance matrix for specified input vectors X1 and X2 90 | void computeCrossCov(Matrix & K, Matrix & X1, Matrix & X2, Vector & params); 91 | 92 | private: 93 | 94 | // Functions for evaluating the kernel on a pair of points / a specified squared distance 95 | //double evalKernel(Matrix&, Matrix&, Vector&, int); 96 | double evalDistKernel(double, Vector&, int); 97 | 98 | Matrix Kv; 99 | Matrix dK_i; 100 | Matrix dK_iv; 101 | }; 102 | 103 | 104 | 105 | 106 | // Define class for Gaussian processes 107 | class GaussianProcess : public cppoptlib::BoundedProblem 108 | { 109 | public: 110 | 111 | // Define alias for base class CppOptLib "BoundedProblem" for use in initialization list 112 | using Superclass = cppoptlib::BoundedProblem; 113 | 114 | // Constructors [ Initialize lowerBounds / upperBounds of CppOptLib superclass to zero ] 115 | GaussianProcess() : Superclass(Vector(0), Vector(0)) { } 116 | 117 | // Define CppOptLib methods 118 | // [ 'value' returns NLML and 'gradient' asigns the associated gradient vector to 'g' ] 119 | double value(const Vector &p) { return evalNLML(p, cppOptLibgrad, true); } 120 | void gradient(const Vector &p, Vector &g) { g = cppOptLibgrad; } 121 | Vector cppOptLibgrad; 122 | 123 | // Set methods 124 | void setObs(Matrix & x, Matrix & y) { obsX = x; obsY = y; N = static_cast(x.rows()); } 125 | void setKernel(Kernel & k) { kernel = &k; } 126 | void setPred(Matrix & px) { predX = px; } 127 | void setNoise(double noise) { fixedNoise = true; noiseLevel = noise; } 128 | void setBounds(Vector & lbs, Vector & ubs) { lowerBounds = lbs; upperBounds = ubs; fixedBounds=true; } 129 | void setSolverIterations(int i) { solverIterations = i; }; 130 | void setSolverPrecision(double p) { solverPrecision = p; }; 131 | 132 | // Compute methods 133 | void fitModel(); 134 | void predict(); 135 | double computeNLML(const Vector & p, double noise); 136 | double computeNLML(const Vector & p); 137 | double computeNLML(); 138 | 139 | // Get methods 140 | Matrix getPredMean() { return predMean; } 141 | Matrix getPredVar() { return predCov.diagonal() + noiseLevel*Eigen::VectorXd::Ones(predMean.size()); } 142 | Matrix getSamples(int count=10); 143 | Vector getParams() { return (*kernel).getParams(); } 144 | double getNoise() { return noiseLevel; } 145 | 146 | Matrix getK() { return K; } 147 | Matrix getAlpha() { return _alpha; } 148 | 149 | // Define method for superclass "GradientObj" used by 'cg_minimize' [only needed when using "./utils/minimize.h"] 150 | //void computeValueAndGradient(Vector X, double & val, Vector & D) { val = evalNLML(X,D,true); }; 151 | 152 | private: 153 | 154 | // Private member functions 155 | double evalNLML(const Vector & p); 156 | double evalNLML(const Vector & p, Vector & g, bool evalGrad=false); 157 | void computeDistMat(); 158 | 159 | // Status variables ( still needed ? ) 160 | int N = 0; 161 | 162 | // Kernel and covariance matrix 163 | Kernel * kernel; 164 | double noiseLevel = 0.0; 165 | bool fixedNoise = false; 166 | //double jitter = 1e-7; 167 | double jitter = 1e-10; 168 | Matrix K; 169 | 170 | // Store Cholsky decomposition 171 | Eigen::LLT cholesky; 172 | 173 | // Hyperparameter bounds 174 | Vector lowerBounds; 175 | Vector upperBounds; 176 | bool fixedBounds = false; 177 | void parseBounds(Vector & lbs, Vector & ubs, int augParamCount); 178 | int solverIterations = 1000; 179 | double solverPrecision = 7.5e-5; 180 | 181 | // Store squared distance matrix and alpha for NLML/DNLML calculations 182 | Matrix distMatrix; 183 | Matrix _alpha; 184 | 185 | // Observation data 186 | Matrix obsX; 187 | Matrix obsY; 188 | 189 | // Prediction data 190 | Matrix predX; 191 | Matrix predMean; 192 | Matrix predCov; 193 | double NLML = 0.0; 194 | 195 | int paramCount; 196 | int augParamCount; 197 | Matrix term; 198 | 199 | // Need to find a way to avoid creating new gradList each time... 200 | std::vector gradList; 201 | 202 | // DEFINE TIMER VARIABLES 203 | ///* 204 | double time_computecov = 0.0; 205 | double time_cholesky_llt = 0.0; 206 | double time_NLML = 0.0; 207 | double time_term = 0.0; 208 | double time_grad = 0.0; 209 | //*/ 210 | 211 | // Count gradient evaluations by optimizer 212 | int gradientEvals = 0; 213 | 214 | // Track old noise/length values to check for redundant calculations in optimizer 215 | //double oldN = 0.0; 216 | //double oldL = 0.0; 217 | }; 218 | 219 | 220 | }; 221 | 222 | 223 | #endif 224 | -------------------------------------------------------------------------------- /misc/TEST_CODE/Cholesky_Time_Comparison/Plot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import csv 4 | import time 5 | 6 | import matplotlib as mpl 7 | from mpl_toolkits.mplot3d import Axes3D 8 | 9 | # Function for converting time to formatted string 10 | def convert_time(t): 11 | minutes = np.floor((t/3600.0) * 60) 12 | seconds = np.ceil(((t/3600.0) * 60 - minutes) * 60) 13 | if (minutes >= 1): 14 | minutes = np.floor(t/60.0) 15 | seconds = np.ceil((t/60.0 - minutes) * 60) 16 | t_str = str(int(minutes)).rjust(2) + 'm ' + \ 17 | str(int(seconds)).rjust(2) + 's' 18 | else: 19 | seconds = (t/60.0 - minutes) * 60 20 | t_str = str(seconds) + 's' 21 | return t_str 22 | 23 | # Define function for removing axes from MatPlotLib plots 24 | def remove_axes(ax): 25 | # make the panes transparent 26 | ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 27 | ax.yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 28 | ax.zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 29 | # make the grid lines transparent 30 | ax.xaxis._axinfo["grid"]['color'] = (1,1,1,0) 31 | ax.yaxis._axinfo["grid"]['color'] = (1,1,1,0) 32 | ax.zaxis._axinfo["grid"]['color'] = (1,1,1,0) 33 | # remove axes 34 | ax._axis3don = False 35 | 36 | 37 | # Evaluate SciKit Learn Gaussian Process Regressor and Plot Results 38 | def main(): 39 | 40 | 41 | # First determine the dimension of the input values 42 | filename = "predictions.csv" 43 | with open(filename, "r") as csvfile: 44 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 45 | row = next(csvreader) 46 | nonInputLength = 3 47 | inputDim = len(row) - nonInputLength 48 | 49 | # Get prediction data 50 | filename = "predictions.csv" 51 | inVals = []; trueVals = []; predMean = []; predStd = [] 52 | with open(filename, "r") as csvfile: 53 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 54 | for row in csvreader: 55 | 56 | if inputDim == 2: 57 | i1, i2, t, m, v = row 58 | inVals.append([i1,i2]) 59 | else: 60 | i, t, m, v = row 61 | inVals.append(i) 62 | 63 | trueVals.append(t) 64 | predMean.append(m) 65 | predStd.append(v) 66 | inVals = np.array(inVals).astype(np.float32) 67 | trueVals = np.array(trueVals).astype(np.float32) 68 | predMean = np.array(predMean).astype(np.float32) 69 | predStd = np.array(predStd).astype(np.float32) 70 | 71 | ## Get observation data 72 | filename = "observations.csv" 73 | obsX = []; obsY = [] 74 | with open(filename, "r") as csvfile: 75 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 76 | for row in csvreader: 77 | if inputDim == 2: 78 | x1, x2, y = row 79 | obsX.append([x1,x2]) 80 | else: 81 | x, y = row 82 | obsX.append(x) 83 | obsY.append(y) 84 | obsX = np.array(obsX).astype(np.float32) 85 | obsY = np.array(obsY).astype(np.float32) 86 | 87 | 88 | 89 | if inputDim == 1: 90 | # Get posterior samples 91 | filename = "samples.csv" 92 | samples = [] 93 | with open(filename, "r") as csvfile: 94 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 95 | for row in csvreader: 96 | vals = np.array(row) 97 | samples.append(vals) 98 | samples = np.array(samples).astype(np.float32) 99 | 100 | 101 | X = np.reshape(obsX, [-1, inputDim]) 102 | Y = np.reshape(obsY, [-1]) 103 | Xtest = np.reshape(inVals, [-1, inputDim]) 104 | 105 | 106 | 107 | ### ### 108 | ### PLOT RESULTS ### 109 | ### ### 110 | 111 | if inputDim == 1: 112 | 113 | """ ONE-DIMENSIONAL PLOTS """ 114 | 115 | ### C++ IMPLEMENTATION 116 | plt.figure() 117 | plt.plot(inVals, predMean, 'C0', linewidth=2.0) 118 | alpha = 0.075 119 | for k in [1,2,3]: 120 | plt.fill_between(inVals, predMean-k*predStd, predMean+k*predStd, where=1>=0, facecolor="C0", alpha=alpha, interpolate=True) 121 | plt.plot(inVals, trueVals, 'C1', linewidth=1.0, linestyle="dashed") 122 | alpha_scatter = 0.5 123 | plt.scatter(obsX, obsY, alpha=alpha_scatter) 124 | for i in range(0,samples.shape[0]): 125 | plt.plot(inVals, samples[i,:], 'C0', alpha=0.2, linewidth=1.0, linestyle="dashed") 126 | plt.suptitle("C++ Implementation") 127 | plt.show() 128 | 129 | 130 | elif inputDim == 2: 131 | 132 | """ TWO-DIMENSIONAL PLOTS """ 133 | 134 | # Flatten input values for compatibility with MatPlotLib's tri_surf 135 | plot_X_flat = []; plot_Y_flat = [] 136 | R = inVals.shape[0] 137 | for n in range(0,R): 138 | plot_X_flat.append(inVals[n,0]) 139 | plot_Y_flat.append(inVals[n,1]) 140 | 141 | tri_fig = plt.figure() 142 | tri_ax1 = tri_fig.add_subplot(111, projection='3d') 143 | linewidth = 0.1 144 | cmap = "Blues" 145 | 146 | # Plot CppGPs results 147 | tri_ax1.plot_trisurf(plot_X_flat,plot_Y_flat, predMean, cmap=cmap, linewidth=linewidth, antialiased=True) 148 | pred_title = "CppGPs" 149 | tri_ax1.set_title(pred_title, fontsize=24) 150 | 151 | # Remove axes from plots 152 | remove_axes(tri_ax1) 153 | 154 | # Bind axes for comparison 155 | def tri_on_move(event): 156 | if event.inaxes == tri_ax1: 157 | if tri_ax1.button_pressed in tri_ax1._rotate_btn: 158 | tri_ax2.view_init(elev=tri_ax1.elev, azim=tri_ax1.azim) 159 | elif tri_ax1.button_pressed in tri_ax1._zoom_btn: 160 | tri_ax2.set_xlim3d(tri_ax1.get_xlim3d()) 161 | tri_ax2.set_ylim3d(tri_ax1.get_ylim3d()) 162 | tri_ax2.set_zlim3d(tri_ax1.get_zlim3d()) 163 | elif event.inaxes == tri_ax2: 164 | if tri_ax2.button_pressed in tri_ax2._rotate_btn: 165 | tri_ax1.view_init(elev=tri_ax2.elev, azim=tri_ax2.azim) 166 | elif tri_ax2.button_pressed in tri_ax2._zoom_btn: 167 | tri_ax1.set_xlim3d(tri_ax2.get_xlim3d()) 168 | tri_ax1.set_ylim3d(tri_ax2.get_ylim3d()) 169 | tri_ax1.set_zlim3d(tri_ax2.get_zlim3d()) 170 | else: 171 | return 172 | tri_fig.canvas.draw_idle() 173 | tri_c1 = tri_fig.canvas.mpl_connect('motion_notify_event', tri_on_move) 174 | 175 | 176 | 177 | """ Zoom in to view predictive uncertainty """ 178 | plot_radius = 0.5 179 | plot_x_min = -0.125 180 | plot_y_min = -0.125 181 | plot_x_max = 0.25 182 | 183 | # Define conditions for including values associated with an input point (x,y) 184 | def include_conditions(x,y,delta=0.0): 185 | rad = np.sqrt(np.power(x,2) + np.power(y,2)) 186 | return (x-delta<=plot_x_max) and (x+delta>=plot_x_min) and (y+delta>=plot_y_min) and (rad-delta<=plot_radius) 187 | 188 | 189 | # Restrict plots to values corresponding to valid input points 190 | R = inVals.shape[0] 191 | plot_X_zoom = []; plot_Y_zoom = []; predMean_zoom = [] 192 | predMean_plus_std = []; predMean_minus_std = [] 193 | predMean_plus_std2 = []; predMean_minus_std2 = [] 194 | predMean_plus_std3 = []; predMean_minus_std3 = [] 195 | trueVals_zoom = [] 196 | for n in range(0,R): 197 | x = plot_X_flat[n] 198 | y = plot_Y_flat[n] 199 | if include_conditions(x,y,delta=0.025): 200 | plot_X_zoom.append(x) 201 | plot_Y_zoom.append(y) 202 | predMean_zoom.append(predMean[n]) 203 | predMean_plus_std.append( predMean[n] + 1 * predStd[n] ) 204 | predMean_minus_std.append( predMean[n] - 1 * predStd[n] ) 205 | predMean_plus_std2.append( predMean[n] + 2 * predStd[n] ) 206 | predMean_minus_std2.append( predMean[n] - 2 * predStd[n] ) 207 | predMean_plus_std3.append( predMean[n] + 3 * predStd[n] ) 208 | predMean_minus_std3.append( predMean[n] - 3 * predStd[n] ) 209 | trueVals_zoom.append(trueVals[n]) 210 | 211 | 212 | # Restrict observations to valid input points 213 | obsX_x_zoom = []; obsX_y_zoom = []; obsY_zoom = [] 214 | for n in range(0, obsX.shape[0]): 215 | x = obsX[n,0] 216 | y = obsX[n,1] 217 | if include_conditions(x,y): 218 | obsX_x_zoom.append(x) 219 | obsX_y_zoom.append(y) 220 | obsY_zoom.append(obsY[n]) 221 | 222 | 223 | # Initialize plot for assessing predictive uncertainty 224 | tri_fig2 = plt.figure() 225 | tri2_ax1 = tri_fig2.add_subplot(111, projection='3d') 226 | 227 | 228 | # Plot Predictive Mean 229 | linewidth = 0.1; alpha = 0.85 230 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_zoom, cmap=cmap, linewidth=linewidth, antialiased=True, alpha=alpha) 231 | 232 | 233 | # One Standard Deviation 234 | linewidth = 0.075; alpha = 0.2 235 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_plus_std, cmap=cmap, linewidth=linewidth, antialiased=True,alpha=alpha) 236 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_minus_std, cmap=cmap, linewidth=linewidth,antialiased=True,alpha=alpha) 237 | 238 | # Two Standard Deviations 239 | linewidth = 0.05; alpha = 0.1 240 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_plus_std2, cmap=cmap, linewidth=linewidth,antialiased=True,alpha=alpha) 241 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_minus_std2, cmap=cmap, linewidth=linewidth,antialiased=True,alpha=alpha) 242 | 243 | # Three Standard Deviations 244 | linewidth = 0.01; alpha = 0.01 245 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_plus_std3, cmap=cmap, linewidth=linewidth,antialiased=True,alpha=alpha) 246 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_minus_std3, cmap=cmap, linewidth=linewidth,antialiased=True,alpha=alpha) 247 | 248 | # Scatter plot of training observations 249 | alpha = 0.4 250 | tri2_ax1.scatter(obsX_x_zoom, obsX_y_zoom, obsY_zoom, c='k', marker='o', s=15.0, alpha=alpha) 251 | 252 | # Add title to plot 253 | plt.suptitle("CppGPs Predictive Uncertainty", fontsize=24) 254 | 255 | # Remove axes from plot 256 | remove_axes(tri2_ax1) 257 | 258 | # Display plots 259 | plt.show() 260 | 261 | 262 | 263 | # Run main() function when called directly 264 | if __name__ == '__main__': 265 | main() 266 | 267 | 268 | 269 | -------------------------------------------------------------------------------- /misc/TEST_CODE/Cholesky_Time_Comparison/SciPy_Cholesky.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import csv 3 | import time 4 | 5 | from scipy.linalg import cholesky, cho_solve, solve_triangular 6 | 7 | # Function for converting time to formatted string 8 | def convert_time(t): 9 | minutes = np.floor((t/3600.0) * 60) 10 | seconds = np.ceil(((t/3600.0) * 60 - minutes) * 60) 11 | if (minutes >= 1): 12 | minutes = np.floor(t/60.0) 13 | seconds = np.ceil((t/60.0 - minutes) * 60) 14 | t_str = str(int(minutes)).rjust(2) + 'm ' + \ 15 | str(int(seconds)).rjust(2) + 's' 16 | else: 17 | seconds = (t/60.0 - minutes) * 60 18 | t_str = str(seconds) + 's' 19 | return t_str 20 | 21 | 22 | 23 | # Evaluate SciKit Learn Gaussian Process Regressor and Plot Results 24 | def main(): 25 | 26 | 27 | # Get K matrix 28 | filename = "K.csv" 29 | K_vals = [] 30 | with open(filename, "r") as csvfile: 31 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 32 | for row in csvreader: 33 | K_vals.append(np.array(row)) 34 | obsCount = K_vals[0].size 35 | K_vals = np.array(K_vals).astype(np.float64) 36 | 37 | # Get term matrix 38 | filename = "term.csv" 39 | term_vals = [] 40 | with open(filename, "r") as csvfile: 41 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 42 | for row in csvreader: 43 | term_vals.append(np.array(row)) 44 | term_vals = np.array(term_vals).astype(np.float64) 45 | 46 | # Get alpha matrix 47 | filename = "alpha.csv" 48 | alpha_vals = [] 49 | with open(filename, "r") as csvfile: 50 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 51 | for row in csvreader: 52 | alpha_vals.append(np.array(row)) 53 | alpha_vals = np.array(alpha_vals).astype(np.float64) 54 | 55 | ### SCIKIT LEARN IMPLEMENTATION 56 | K = np.reshape(K_vals, [obsCount, obsCount]) 57 | term = np.reshape(term_vals, [obsCount, obsCount]) 58 | alpha = np.reshape(alpha_vals, [obsCount, 1]) 59 | 60 | 61 | # Compute Cholesky factor 62 | start_time = time.time() 63 | L = cholesky(K, lower=True) 64 | end_time = time.time() 65 | time_elapsed = convert_time(end_time-start_time) 66 | print('\n Cholesky Time: \t' + time_elapsed + '\n') 67 | 68 | # May be worth comparing this as well... 69 | #alpha = cho_solve((L, True), y_train) 70 | 71 | start_time = time.time() 72 | scipy_term = -np.einsum("ik,jk->ijk", alpha, alpha) # k: output-dimension 73 | scipy_term += cho_solve((L, True), np.eye(K.shape[0]))[:, :, np.newaxis] 74 | end_time = time.time() 75 | time_elapsed = convert_time(end_time-start_time) 76 | print(' Computation Time: \t' + time_elapsed + '\n') 77 | 78 | scipy_term = scipy_term[:,:,0] 79 | scipy_norm = np.linalg.norm(scipy_term) 80 | scipy_term -= term 81 | 82 | print('\n Relative Error: \t {:.5e}\n'.format( np.linalg.norm(scipy_term)/scipy_norm ) ) 83 | 84 | # Run main() function when called directly 85 | if __name__ == '__main__': 86 | main() 87 | -------------------------------------------------------------------------------- /misc/TEST_CODE/Cholesky_Time_Comparison/include/README.md: -------------------------------------------------------------------------------- 1 | # CppOptimizationLibrary 2 | 3 | The CppOptimizationLibrary can be downloaded from the GitHub repository [PatWie/CppNumericalSolvers](https://github.com/PatWie/CppNumericalSolvers). Alternatively, the repository can be cloned via: 4 | 5 | ```console 6 | user@host $ git clone git@github.com:PatWie/CppNumericalSolvers.git 7 | ``` 8 | 9 | The `cppoptlib` directory should then be placed here (i.e. `include/cppoptlib`) before compiling the CppGPs code. 10 | 11 | 12 | ## Update L-BFGS Code 13 | 14 | The original implementation of the L-BFGS algorithm provided by CppOptimizationLibrary must be slightly modified for use with CppGPs. In particular, the provided `lbfgsbsolver.h` file in this directory should be copied into the `cppoptlib/solvers/` directory and the `morethuente.h` file in this directory should be copied into the `cppoptlib/linesearch/` (overwriting the orginal files). 15 | -------------------------------------------------------------------------------- /misc/TEST_CODE/Cholesky_Time_Comparison/include/morethuente.h: -------------------------------------------------------------------------------- 1 | // CppNumericalSolver 2 | #ifndef MORETHUENTE_H_ 3 | #define MORETHUENTE_H_ 4 | 5 | #include "../meta.h" 6 | #include 7 | 8 | namespace cppoptlib { 9 | 10 | template 11 | class MoreThuente { 12 | 13 | public: 14 | using Scalar = typename ProblemType::Scalar; 15 | using TVector = typename ProblemType::TVector; 16 | 17 | /** 18 | * @brief use MoreThuente Rule for (strong) Wolfe conditiions 19 | * @details [long description] 20 | * 21 | * @param searchDir search direction for next update step 22 | * @param objFunc handle to problem 23 | * 24 | * @return step-width 25 | */ 26 | 27 | /* 28 | static Scalar linesearch(const TVector &x, const TVector &searchDir, ProblemType &objFunc, const Scalar alpha_init = 1.0) { 29 | // assume step width 30 | Scalar ak = alpha_init; 31 | 32 | Scalar fval = objFunc.value(x); 33 | TVector g = x.eval(); 34 | objFunc.gradient(x, g); 35 | 36 | TVector s = searchDir.eval(); 37 | TVector xx = x.eval(); 38 | 39 | cvsrch(objFunc, xx, fval, g, ak, s); 40 | 41 | return ak; 42 | } 43 | */ 44 | 45 | //static Scalar linesearch(const TVector &x, const TVector &searchDir, ProblemType &objFunc, const Scalar alpha_init, Scalar fval, TVector gval, int &info) { 46 | static Scalar linesearch(const TVector &x, const TVector &searchDir, ProblemType &objFunc, const Scalar alpha_init, Scalar &fval, TVector & gval, int &info) { 47 | // assume step width 48 | Scalar ak = alpha_init; 49 | 50 | //Scalar fval = objFunc.value(x); 51 | //TVector g = x.eval(); 52 | //objFunc.gradient(x, g); 53 | 54 | TVector s = searchDir.eval(); 55 | TVector xx = x.eval(); 56 | 57 | //cvsrch(objFunc, xx, fval, g, ak, s); 58 | info = cvsrch(objFunc, xx, fval, gval, ak, s); 59 | 60 | return ak; 61 | } 62 | 63 | 64 | static int cvsrch(ProblemType &objFunc, TVector &x, Scalar &f, TVector &g, Scalar &stp, TVector &s) { 65 | //static int cvsrch(ProblemType &objFunc, TVector &x, Scalar f, TVector &g, Scalar &stp, TVector &s, Scalar &f_old) { 66 | //static int cvsrch(ProblemType &objFunc, TVector &x, Scalar f, TVector &g, Scalar &stp, TVector &s, Criteria &stop, Criteria ¤t, Status &status) { 67 | // we rewrite this from MIN-LAPACK and some MATLAB code 68 | int info = 0; 69 | int infoc = 1; 70 | const Scalar xtol = 1e-15; 71 | const Scalar ftol = 1e-4; 72 | const Scalar gtol = 1e-2; 73 | const Scalar stpmin = 1e-15; 74 | const Scalar stpmax = 1e15; 75 | const Scalar xtrapf = 4; 76 | const int maxfev = 20; 77 | int nfev = 0; 78 | 79 | 80 | Scalar dginit = g.dot(s); 81 | if (dginit >= 0.0) { 82 | // no descent direction 83 | // TODO: handle this case 84 | return -1; 85 | } 86 | 87 | bool brackt = false; 88 | bool stage1 = true; 89 | 90 | Scalar finit = f; 91 | Scalar dgtest = ftol * dginit; 92 | Scalar width = stpmax - stpmin; 93 | Scalar width1 = 2 * width; 94 | TVector wa = x.eval(); 95 | 96 | Scalar stx = 0.0; 97 | Scalar fx = finit; 98 | Scalar dgx = dginit; 99 | Scalar sty = 0.0; 100 | Scalar fy = finit; 101 | Scalar dgy = dginit; 102 | 103 | Scalar stmin; 104 | Scalar stmax; 105 | 106 | while (true) { 107 | 108 | // make sure we stay in the interval when setting min/max-step-width 109 | if (brackt) { 110 | stmin = std::min(stx, sty); 111 | stmax = std::max(stx, sty); 112 | } else { 113 | stmin = stx; 114 | stmax = stp + xtrapf * (stp - stx); 115 | } 116 | 117 | // Force the step to be within the bounds stpmax and stpmin. 118 | stp = std::max(stp, stpmin); 119 | stp = std::min(stp, stpmax); 120 | 121 | // Oops, let us return the last reliable values 122 | if ( 123 | (brackt && ((stp <= stmin) || (stp >= stmax))) 124 | || (nfev >= maxfev - 1 ) || (infoc == 0) 125 | || (brackt && ((stmax - stmin) <= (xtol * stmax)))) { 126 | stp = stx; 127 | } 128 | 129 | // test new point 130 | x = wa + stp * s; 131 | f = objFunc.value(x); 132 | objFunc.gradient(x, g); 133 | 134 | // update stopping criteria 135 | //current.fDelta = std::abs(f-f_old)/(std::max(std::max(std::abs(f),std::abs(f_old)), 1.0)); 136 | nfev++; 137 | Scalar dg = g.dot(s); 138 | Scalar ftest1 = finit + stp * dgtest; 139 | 140 | // Check problem convergence criteria 141 | //status = checkConvergence(stop, current); 142 | //if ( !(status == Status::Continue) ) 143 | // std::cout << "\n[*] Break inside cvsrch()\t x = " << x.transpose() << std::endl; 144 | 145 | 146 | // all possible convergence tests 147 | if ((brackt & ((stp <= stmin) | (stp >= stmax))) | (infoc == 0)) 148 | info = 6; 149 | 150 | if ((stp == stpmax) & (f <= ftest1) & (dg <= dgtest)) 151 | info = 5; 152 | 153 | if ((stp == stpmin) & ((f > ftest1) | (dg >= dgtest))) 154 | info = 4; 155 | 156 | if (nfev >= maxfev) 157 | info = 3; 158 | 159 | if (brackt & (stmax - stmin <= xtol * stmax)) 160 | info = 2; 161 | 162 | if ((f <= ftest1) & (fabs(dg) <= gtol * (-dginit))) 163 | info = 1; 164 | 165 | // terminate when convergence reached 166 | if (info != 0) 167 | { 168 | //std::cout << "\n[*] Line Search Terminated\n"; 169 | //std::cout << "Function Evaluations: " << nfev << "\t INFO = " << info << "\t x = " << x.transpose() << std::endl; 170 | return info; 171 | //return -1; 172 | } 173 | 174 | if (stage1 & (f <= ftest1) & (dg >= std::min(ftol, gtol)*dginit)) 175 | stage1 = false; 176 | 177 | if (stage1 & (f <= fx) & (f > ftest1)) { 178 | Scalar fm = f - stp * dgtest; 179 | Scalar fxm = fx - stx * dgtest; 180 | Scalar fym = fy - sty * dgtest; 181 | Scalar dgm = dg - dgtest; 182 | Scalar dgxm = dgx - dgtest; 183 | Scalar dgym = dgy - dgtest; 184 | 185 | cstep( stx, fxm, dgxm, sty, fym, dgym, stp, fm, dgm, brackt, stmin, stmax, infoc); 186 | 187 | fx = fxm + stx * dgtest; 188 | fy = fym + sty * dgtest; 189 | dgx = dgxm + dgtest; 190 | dgy = dgym + dgtest; 191 | } else { 192 | // this is ugly and some variables should be moved to the class scope 193 | cstep( stx, fx, dgx, sty, fy, dgy, stp, f, dg, brackt, stmin, stmax, infoc); 194 | } 195 | 196 | if (brackt) { 197 | if (fabs(sty - stx) >= 0.66 * width1) 198 | stp = stx + 0.5 * (sty - stx); 199 | width1 = width; 200 | width = fabs(sty - stx); 201 | } 202 | } 203 | 204 | return 0; 205 | } 206 | 207 | static int cstep(Scalar& stx, Scalar& fx, Scalar& dx, Scalar& sty, Scalar& fy, Scalar& dy, Scalar& stp, 208 | Scalar& fp, Scalar& dp, bool& brackt, Scalar& stpmin, Scalar& stpmax, int& info) { 209 | info = 0; 210 | bool bound = false; 211 | 212 | // Check the input parameters for errors. 213 | if ((brackt & ((stp <= std::min(stx, sty) ) | (stp >= std::max(stx, sty)))) | (dx * (stp - stx) >= 0.0) 214 | | (stpmax < stpmin)) { 215 | return -1; 216 | } 217 | 218 | Scalar sgnd = dp * (dx / fabs(dx)); 219 | 220 | Scalar stpf = 0; 221 | Scalar stpc = 0; 222 | Scalar stpq = 0; 223 | 224 | if (fp > fx) { 225 | info = 1; 226 | bound = true; 227 | Scalar theta = 3. * (fx - fp) / (stp - stx) + dx + dp; 228 | Scalar s = std::max(theta, std::max(dx, dp)); 229 | Scalar gamma = s * sqrt((theta / s) * (theta / s) - (dx / s) * (dp / s)); 230 | if (stp < stx) 231 | gamma = -gamma; 232 | Scalar p = (gamma - dx) + theta; 233 | Scalar q = ((gamma - dx) + gamma) + dp; 234 | Scalar r = p / q; 235 | stpc = stx + r * (stp - stx); 236 | stpq = stx + ((dx / ((fx - fp) / (stp - stx) + dx)) / 2.) * (stp - stx); 237 | if (fabs(stpc - stx) < fabs(stpq - stx)) 238 | stpf = stpc; 239 | else 240 | stpf = stpc + (stpq - stpc) / 2; 241 | brackt = true; 242 | } else if (sgnd < 0.0) { 243 | info = 2; 244 | bound = false; 245 | Scalar theta = 3 * (fx - fp) / (stp - stx) + dx + dp; 246 | Scalar s = std::max(theta, std::max(dx, dp)); 247 | Scalar gamma = s * sqrt((theta / s) * (theta / s) - (dx / s) * (dp / s)); 248 | if (stp > stx) 249 | gamma = -gamma; 250 | 251 | Scalar p = (gamma - dp) + theta; 252 | Scalar q = ((gamma - dp) + gamma) + dx; 253 | Scalar r = p / q; 254 | stpc = stp + r * (stx - stp); 255 | stpq = stp + (dp / (dp - dx)) * (stx - stp); 256 | if (fabs(stpc - stp) > fabs(stpq - stp)) 257 | stpf = stpc; 258 | else 259 | stpf = stpq; 260 | brackt = true; 261 | } else if (fabs(dp) < fabs(dx)) { 262 | info = 3; 263 | bound = 1; 264 | Scalar theta = 3 * (fx - fp) / (stp - stx) + dx + dp; 265 | Scalar s = std::max(theta, std::max( dx, dp)); 266 | Scalar gamma = s * sqrt(std::max(static_cast(0.), (theta / s) * (theta / s) - (dx / s) * (dp / s))); 267 | if (stp > stx) 268 | gamma = -gamma; 269 | Scalar p = (gamma - dp) + theta; 270 | Scalar q = (gamma + (dx - dp)) + gamma; 271 | Scalar r = p / q; 272 | if ((r < 0.0) & (gamma != 0.0)) { 273 | stpc = stp + r * (stx - stp); 274 | } else if (stp > stx) { 275 | stpc = stpmax; 276 | } else { 277 | stpc = stpmin; 278 | } 279 | stpq = stp + (dp / (dp - dx)) * (stx - stp); 280 | if (brackt) { 281 | if (fabs(stp - stpc) < fabs(stp - stpq)) { 282 | stpf = stpc; 283 | } else { 284 | stpf = stpq; 285 | } 286 | } else { 287 | if (fabs(stp - stpc) > fabs(stp - stpq)) { 288 | stpf = stpc; 289 | } else { 290 | stpf = stpq; 291 | } 292 | 293 | } 294 | } else { 295 | info = 4; 296 | bound = false; 297 | if (brackt) { 298 | Scalar theta = 3 * (fp - fy) / (sty - stp) + dy + dp; 299 | Scalar s = std::max(theta, std::max(dy, dp)); 300 | Scalar gamma = s * sqrt((theta / s) * (theta / s) - (dy / s) * (dp / s)); 301 | if (stp > sty) 302 | gamma = -gamma; 303 | 304 | Scalar p = (gamma - dp) + theta; 305 | Scalar q = ((gamma - dp) + gamma) + dy; 306 | Scalar r = p / q; 307 | stpc = stp + r * (sty - stp); 308 | stpf = stpc; 309 | } else if (stp > stx) 310 | stpf = stpmax; 311 | else { 312 | stpf = stpmin; 313 | } 314 | } 315 | 316 | if (fp > fx) { 317 | sty = stp; 318 | fy = fp; 319 | dy = dp; 320 | } else { 321 | if (sgnd < 0.0) { 322 | sty = stx; 323 | fy = fx; 324 | dy = dx; 325 | } 326 | 327 | stx = stp; 328 | fx = fp; 329 | dx = dp; 330 | } 331 | 332 | stpf = std::min(stpmax, stpf); 333 | stpf = std::max(stpmin, stpf); 334 | stp = stpf; 335 | 336 | if (brackt & bound) { 337 | if (sty > stx) { 338 | stp = std::min(stx + static_cast(0.66) * (sty - stx), stp); 339 | } else { 340 | stp = std::max(stx + static_cast(0.66) * (sty - stx), stp); 341 | } 342 | } 343 | 344 | return 0; 345 | 346 | } 347 | 348 | }; 349 | 350 | } 351 | 352 | #endif /* MORETHUENTE_H_ */ 353 | -------------------------------------------------------------------------------- /misc/TEST_CODE/Cholesky_Time_Comparison/main.cpp: -------------------------------------------------------------------------------- 1 | // main.cpp -- example use of CppGPs for Gaussian process regression 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "GPs.h" 13 | 14 | #include 15 | 16 | // Specify the target function for Gaussian process regression 17 | double targetFunc(Eigen::MatrixXd X) 18 | { 19 | 20 | if ( X.size() == 1 ) 21 | { 22 | // Define the target function to be an oscillatory, non-periodic function 23 | double oscillation = 30.0; 24 | double xshifted = 0.5*(X(0) + 1.0); 25 | return std::sin(oscillation*(xshifted-0.1))*(0.5-(xshifted-0.1))*15.0; 26 | } 27 | else 28 | { 29 | // Define the target function to be the Marr wavelet (a.k.a "Mexican Hat" wavelet) 30 | // ( see https://en.wikipedia.org/wiki/Mexican_hat_wavelet ) 31 | double sigma = 0.25; 32 | double pi = std::atan(1)*4; 33 | double radialTerm = std::pow(X.squaredNorm()/sigma,2); 34 | return 2.0 / (std::sqrt(pi*sigma) * std::pow(pi,1.0/4.0)) * (1.0 - radialTerm) * std::exp(-radialTerm); 35 | } 36 | } 37 | 38 | 39 | // Example use of CppGPs code for Gaussian process regression 40 | int main(int argc, char const *argv[]) 41 | { 42 | 43 | // Retrieve aliases from GP namescope 44 | using Matrix = Eigen::MatrixXd; 45 | using Vector = Eigen::VectorXd; 46 | 47 | // Convenience using-declarations 48 | using std::cout; 49 | using std::endl; 50 | using GP::GaussianProcess; 51 | using GP::linspace; 52 | using GP::sampleNormal; 53 | using GP::sampleUnif; 54 | using GP::RBF; 55 | 56 | // Used for timing code with chrono 57 | using GP::high_resolution_clock; 58 | using GP::time; 59 | using GP::getTime; 60 | 61 | // Set random seed based on system clock 62 | std::srand(static_cast(high_resolution_clock::now().time_since_epoch().count())); 63 | 64 | // Fix random seed for debugging and testing 65 | //std::srand(static_cast(0)); 66 | 67 | 68 | // 69 | // [ Define Training Data ] 70 | // 71 | 72 | // Specify the input dimensions 73 | //int inputDim = 1; 74 | int inputDim = 2; 75 | 76 | // Specify observation data count 77 | int obsCount = 2000; 78 | 79 | // Specify observation noise level 80 | auto noiseLevel = 1.0; 81 | 82 | // Define random noise to add to target observations 83 | //auto noise = sampleNormal(obsCount) * noiseLevel; 84 | Matrix noise; 85 | noise.noalias() = sampleNormal(obsCount) * noiseLevel; 86 | 87 | // Define observations by sampling random uniform distribution 88 | Matrix X = sampleUnif(-1.0, 1.0, obsCount, inputDim); 89 | Matrix y; y.resize(obsCount, 1); 90 | 91 | // Define target observations 'y' by applying 'targetFunc' 92 | // to the input observations 'X' and adding a noise vector 93 | for ( auto i : boost::irange(0,obsCount) ) 94 | y(i) = targetFunc(X.row(i)) + noise(i); 95 | 96 | 97 | 98 | // 99 | // [ Construct Gaussian Process Model ] 100 | // 101 | 102 | // Initialize Gaussian process model 103 | GaussianProcess model; 104 | 105 | // Specify training observations for GP model 106 | model.setObs(X,y); 107 | 108 | // Fix noise level [ noise level is fit to the training data by default ] 109 | //model.setNoise(0.33); 110 | 111 | // Initialize a RBF covariance kernel and assign it to the model 112 | RBF kernel; 113 | model.setKernel(kernel); 114 | 115 | // Specify hyperparameter bounds [ including noise bounds (index 0) ] 116 | // Vector lbs(2); lbs << 0.0001 , 0.01; 117 | // Vector ubs(2); ubs << 5.0 , 100.0; 118 | // model.setBounds(lbs, ubs); 119 | 120 | // Specify hyperparameter bounds [ excluding noise bounds ] 121 | Vector lbs(1); lbs << 0.01; 122 | Vector ubs(1); ubs << 100.0; 123 | model.setBounds(lbs, ubs); 124 | 125 | // Specify solver precision and iteration count 126 | model.setSolverPrecision(1e-3); 127 | model.setSolverIterations(1); 128 | 129 | // Fit covariance kernel hyperparameters to the training data 130 | model.fitModel(); 131 | 132 | 133 | Matrix K = model.getK(); 134 | Matrix alpha = model.getAlpha(); 135 | time start = high_resolution_clock::now(); 136 | auto cholesky = K.llt(); 137 | time end = high_resolution_clock::now(); 138 | auto choleskyTime = getTime(start, end); 139 | 140 | 141 | start = high_resolution_clock::now(); 142 | // Try forcing Eigen to solve in place 143 | Matrix term; 144 | term.noalias() = Matrix::Identity(obsCount,obsCount); 145 | cholesky.solveInPlace(term); 146 | term.noalias() -= alpha*alpha.transpose(); 147 | end = high_resolution_clock::now(); 148 | auto computationTime = getTime(start, end); 149 | 150 | 151 | cout << "\n Cholesky Time:\t\t" << choleskyTime << endl; 152 | cout << "\n Computation Time:\t" << computationTime << endl << endl; 153 | 154 | 155 | // 156 | // [ Save Results for Comparison ] 157 | // 158 | 159 | // Save K matrix 160 | std::string outputFile = "K.csv"; 161 | std::ofstream fout; 162 | fout << std::scientific << std::setprecision(64); 163 | fout.open(outputFile); 164 | for ( auto i : boost::irange(0,obsCount) ) 165 | { 166 | for ( auto j : boost::irange(0,obsCount) ) 167 | fout << K(i,j) << ((j 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | //#include "./utils/minimize.h" 10 | 11 | // Include CppOptLib files 12 | #include "./include/cppoptlib/meta.h" 13 | #include "./include/cppoptlib/boundedproblem.h" 14 | #include "./include/cppoptlib/solver/lbfgsbsolver.h" 15 | 16 | 17 | // Declare namespace for Gaussian process definitions 18 | namespace GP { 19 | 20 | // Define PI using arctan function 21 | static const double PI = std::atan(1)*4; 22 | 23 | // Define aliases with using declarations 24 | using Matrix = Eigen::MatrixXd; 25 | using Vector = Eigen::VectorXd; 26 | 27 | // Define function for retrieving time from chrono 28 | float getTime(std::chrono::high_resolution_clock::time_point start, std::chrono::high_resolution_clock::time_point end); 29 | using time = std::chrono::high_resolution_clock::time_point; 30 | using std::chrono::high_resolution_clock; 31 | 32 | // Define linspace function for generating equally spaced points 33 | Matrix linspace(double a, double b, int N, int dim=1); 34 | 35 | // Define function for sampling uniform distribution on interval 36 | Matrix sampleUnif(double a=0.0, double b=1.0, int N=1, int dim=1); 37 | Vector sampleUnifVector(Vector lbs, Vector ubs); 38 | Matrix sampleNormal(int N=1); 39 | 40 | // Define utility functions for computing distance matrices 41 | void pdist(Matrix & Dv, Matrix & X1, Matrix & X2); 42 | void squareForm(Matrix & D, Matrix & Dv, int n, double diagVal=0.0); 43 | 44 | 45 | // Define abstract base class for covariance kernels 46 | class Kernel 47 | { 48 | public: 49 | 50 | // Constructor and destructor 51 | Kernel(Vector p, int c) : kernelParams(p) , paramCount(c) { }; 52 | virtual ~Kernel() = default; 53 | 54 | // Compute the covariance matrix provided a "distance matrix" consisting of pairwise squared norms between points 55 | virtual std::vector computeCov(Matrix & K, Matrix & D, Vector & params, double jitter=0.0, bool evalGrad=false) = 0; 56 | 57 | // Compute the (cross-)covariance matrix for specified input vectors X1 and X2 58 | virtual void computeCrossCov(Matrix & K, Matrix & X1, Matrix & X2, Vector & params) = 0; 59 | 60 | // Set the noise level which is to be added to the diagonal of the covariance matrix 61 | void setNoise(double noise) { noiseLevel = noise; } 62 | 63 | // Set method for specifying the kernel parameters 64 | void setParams(Vector params) { kernelParams = params; }; 65 | 66 | // Get methods for retrieving the kernel paramaters 67 | int getParamCount() { return paramCount; } ; 68 | Vector getParams() { return kernelParams; }; 69 | 70 | protected: 71 | Vector kernelParams; 72 | int paramCount; 73 | double noiseLevel; 74 | virtual double evalKernel(Matrix&, Matrix&, Vector&, int) = 0; 75 | virtual double evalDistKernel(double, Vector&, int) = 0; 76 | }; 77 | 78 | 79 | // Define class for radial basis function (RBF) covariance kernel 80 | class RBF : public Kernel 81 | { 82 | public: 83 | 84 | // Constructor 85 | RBF() : Kernel(Vector(1), 1) { kernelParams(0)=1.0; }; 86 | 87 | // Compute the covariance matrix provided a "distance matrix" consisting of pairwise squared norms between points 88 | std::vector computeCov(Matrix & K, Matrix & D, Vector & params, double jitter=0.0, bool evalGrad=false); 89 | 90 | // Compute the (cross-)covariance matrix for specified input vectors X1 and X2 91 | void computeCrossCov(Matrix & K, Matrix & X1, Matrix & X2, Vector & params); 92 | 93 | private: 94 | 95 | // Functions for evaluating the kernel on a pair of points / a specified squared distance 96 | double evalKernel(Matrix&, Matrix&, Vector&, int); 97 | double evalDistKernel(double, Vector&, int); 98 | 99 | Matrix Kv; 100 | Matrix dK_i; 101 | Matrix dK_iv; 102 | }; 103 | 104 | 105 | 106 | 107 | // Define class for Gaussian processes 108 | class GaussianProcess : public cppoptlib::BoundedProblem 109 | { 110 | public: 111 | 112 | // Define alias for base class CppOptLib "BoundedProblem" for use in initialization list 113 | using Superclass = cppoptlib::BoundedProblem; 114 | 115 | // Constructors [ Initialize lowerBounds / upperBounds of CppOptLib superclass to zero ] 116 | GaussianProcess() : Superclass(Vector(0), Vector(0)) { } 117 | 118 | // Define CppOptLib methods 119 | // [ 'value' returns NLML and 'gradient' asigns the associated gradient vector to 'g' ] 120 | double value(const Vector &p) { return evalNLML(p, cppOptLibgrad, true); } 121 | void gradient(const Vector &p, Vector &g) { g = cppOptLibgrad; } 122 | Vector cppOptLibgrad; 123 | 124 | // Set methods 125 | void setObs(Matrix & x, Matrix & y) { obsX = x; obsY = y; N = static_cast(x.rows()); } 126 | void setKernel(Kernel & k) { kernel = &k; } 127 | void setPred(Matrix & px) { predX = px; } 128 | void setNoise(double noise) { fixedNoise = true; noiseLevel = noise; } 129 | void setBounds(Vector & lbs, Vector & ubs) { lowerBounds = lbs; upperBounds = ubs; fixedBounds=true; } 130 | 131 | // Compute methods 132 | void fitModel(); 133 | void predict(); 134 | double computeNLML(const Vector & p, double noise); 135 | double computeNLML(const Vector & p); 136 | double computeNLML(); 137 | 138 | // Get methods 139 | Matrix getPredMean() { return predMean; } 140 | Matrix getPredVar() { return predCov.diagonal() + noiseLevel*Eigen::VectorXd::Ones(predMean.size()); } 141 | Matrix getSamples(int count=10); 142 | Vector getParams() { return (*kernel).getParams(); } 143 | double getNoise() { return noiseLevel; } 144 | 145 | 146 | // Define method for superclass "GradientObj" used by 'cg_minimize' [only needed when using "./utils/minimize.h"] 147 | //void computeValueAndGradient(Vector X, double & val, Vector & D) { val = evalNLML(X,D,true); }; 148 | 149 | private: 150 | 151 | // Private member functions 152 | double evalNLML(const Vector & p); 153 | double evalNLML(const Vector & p, Vector & g, bool evalGrad=false); 154 | void computeDistMat(); 155 | 156 | // Status variables ( still needed ? ) 157 | int N = 0; 158 | 159 | // Kernel and covariance matrix 160 | Kernel * kernel; 161 | double noiseLevel = 0.0; 162 | bool fixedNoise = false; 163 | //double jitter = 1e-7; 164 | double jitter = 1e-10; 165 | Matrix K; 166 | 167 | // Store Cholsky decomposition 168 | Eigen::LLT cholesky; 169 | 170 | // Hyperparameter bounds 171 | Vector lowerBounds; 172 | Vector upperBounds; 173 | bool fixedBounds = false; 174 | void parseBounds(Vector & lbs, Vector & ubs, int augParamCount); 175 | 176 | // Store squared distance matrix and alpha for NLML/DNLML calculations 177 | Matrix distMatrix; 178 | Matrix _alpha; 179 | 180 | // Observation data 181 | Matrix obsX; 182 | Matrix obsY; 183 | 184 | // Prediction data 185 | Matrix predX; 186 | Matrix predMean; 187 | Matrix predCov; 188 | double NLML = 0.0; 189 | 190 | int paramCount; 191 | int augParamCount; 192 | Matrix term; 193 | //std::vector gradList; // Need to find a way to avoid creating new gradList each time... 194 | 195 | // DEFINE TIMER VARIABLES 196 | ///* 197 | double time_computecov = 0.0; 198 | double time_cholesky_llt = 0.0; 199 | double time_NLML = 0.0; 200 | double time_term = 0.0; 201 | double time_grad = 0.0; 202 | //*/ 203 | 204 | // Count gradient evaluations by optimizer 205 | int gradientEvals = 0; 206 | 207 | // Check parameter changes 208 | double oldNoise = 0.0; 209 | double oldLength = 0.0; 210 | double oldGradient = 0.0; 211 | }; 212 | 213 | // Define linspace function for generating 214 | // equally spaced points on an interval 215 | //template 216 | //Eigen::Matrix linspace(T a, T b, int N) 217 | //{ return Eigen::Array::LinSpaced(N, a, b); } 218 | 219 | 220 | }; 221 | 222 | 223 | #endif 224 | -------------------------------------------------------------------------------- /misc/TEST_CODE/Modified_L_BFGS/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2019- Nickolas Winovich 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /misc/TEST_CODE/Modified_L_BFGS/Plot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import csv 4 | import time 5 | 6 | import matplotlib as mpl 7 | from mpl_toolkits.mplot3d import Axes3D 8 | 9 | # Function for converting time to formatted string 10 | def convert_time(t): 11 | minutes = np.floor((t/3600.0) * 60) 12 | seconds = np.ceil(((t/3600.0) * 60 - minutes) * 60) 13 | if (minutes >= 1): 14 | minutes = np.floor(t/60.0) 15 | seconds = np.ceil((t/60.0 - minutes) * 60) 16 | t_str = str(int(minutes)).rjust(2) + 'm ' + \ 17 | str(int(seconds)).rjust(2) + 's' 18 | else: 19 | seconds = (t/60.0 - minutes) * 60 20 | t_str = str(seconds) + 's' 21 | return t_str 22 | 23 | # Define function for removing axes from MatPlotLib plots 24 | def remove_axes(ax): 25 | # make the panes transparent 26 | ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 27 | ax.yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 28 | ax.zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 29 | # make the grid lines transparent 30 | ax.xaxis._axinfo["grid"]['color'] = (1,1,1,0) 31 | ax.yaxis._axinfo["grid"]['color'] = (1,1,1,0) 32 | ax.zaxis._axinfo["grid"]['color'] = (1,1,1,0) 33 | # remove axes 34 | ax._axis3don = False 35 | 36 | 37 | # Evaluate SciKit Learn Gaussian Process Regressor and Plot Results 38 | def main(): 39 | 40 | 41 | # First determine the dimension of the input values 42 | filename = "predictions.csv" 43 | with open(filename, "r") as csvfile: 44 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 45 | row = next(csvreader) 46 | nonInputLength = 3 47 | inputDim = len(row) - nonInputLength 48 | 49 | # Get prediction data 50 | filename = "predictions.csv" 51 | inVals = []; trueVals = []; predMean = []; predStd = [] 52 | with open(filename, "r") as csvfile: 53 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 54 | for row in csvreader: 55 | 56 | if inputDim == 2: 57 | i1, i2, t, m, v = row 58 | inVals.append([i1,i2]) 59 | else: 60 | i, t, m, v = row 61 | inVals.append(i) 62 | 63 | trueVals.append(t) 64 | predMean.append(m) 65 | predStd.append(v) 66 | inVals = np.array(inVals).astype(np.float32) 67 | trueVals = np.array(trueVals).astype(np.float32) 68 | predMean = np.array(predMean).astype(np.float32) 69 | predStd = np.array(predStd).astype(np.float32) 70 | 71 | ## Get observation data 72 | filename = "observations.csv" 73 | obsX = []; obsY = [] 74 | with open(filename, "r") as csvfile: 75 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 76 | for row in csvreader: 77 | if inputDim == 2: 78 | x1, x2, y = row 79 | obsX.append([x1,x2]) 80 | else: 81 | x, y = row 82 | obsX.append(x) 83 | obsY.append(y) 84 | obsX = np.array(obsX).astype(np.float32) 85 | obsY = np.array(obsY).astype(np.float32) 86 | 87 | 88 | 89 | if inputDim == 1: 90 | # Get posterior samples 91 | filename = "samples.csv" 92 | samples = [] 93 | with open(filename, "r") as csvfile: 94 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 95 | for row in csvreader: 96 | vals = np.array(row) 97 | samples.append(vals) 98 | samples = np.array(samples).astype(np.float32) 99 | 100 | 101 | X = np.reshape(obsX, [-1, inputDim]) 102 | Y = np.reshape(obsY, [-1]) 103 | Xtest = np.reshape(inVals, [-1, inputDim]) 104 | 105 | 106 | 107 | ### ### 108 | ### PLOT RESULTS ### 109 | ### ### 110 | 111 | if inputDim == 1: 112 | 113 | """ ONE-DIMENSIONAL PLOTS """ 114 | 115 | ### C++ IMPLEMENTATION 116 | plt.figure() 117 | plt.plot(inVals, predMean, 'C0', linewidth=2.0) 118 | alpha = 0.075 119 | for k in [1,2,3]: 120 | plt.fill_between(inVals, predMean-k*predStd, predMean+k*predStd, where=1>=0, facecolor="C0", alpha=alpha, interpolate=True) 121 | plt.plot(inVals, trueVals, 'C1', linewidth=1.0, linestyle="dashed") 122 | alpha_scatter = 0.5 123 | plt.scatter(obsX, obsY, alpha=alpha_scatter) 124 | for i in range(0,samples.shape[0]): 125 | plt.plot(inVals, samples[i,:], 'C0', alpha=0.2, linewidth=1.0, linestyle="dashed") 126 | plt.suptitle("C++ Implementation") 127 | plt.show() 128 | 129 | 130 | elif inputDim == 2: 131 | 132 | """ TWO-DIMENSIONAL PLOTS """ 133 | 134 | # Flatten input values for compatibility with MatPlotLib's tri_surf 135 | plot_X_flat = []; plot_Y_flat = [] 136 | R = inVals.shape[0] 137 | for n in range(0,R): 138 | plot_X_flat.append(inVals[n,0]) 139 | plot_Y_flat.append(inVals[n,1]) 140 | 141 | tri_fig = plt.figure() 142 | tri_ax1 = tri_fig.add_subplot(111, projection='3d') 143 | linewidth = 0.1 144 | cmap = "Blues" 145 | 146 | # Plot CppGPs results 147 | tri_ax1.plot_trisurf(plot_X_flat,plot_Y_flat, predMean, cmap=cmap, linewidth=linewidth, antialiased=True) 148 | pred_title = "CppGPs" 149 | tri_ax1.set_title(pred_title, fontsize=24) 150 | 151 | # Remove axes from plots 152 | remove_axes(tri_ax1) 153 | 154 | # Bind axes for comparison 155 | def tri_on_move(event): 156 | if event.inaxes == tri_ax1: 157 | if tri_ax1.button_pressed in tri_ax1._rotate_btn: 158 | tri_ax2.view_init(elev=tri_ax1.elev, azim=tri_ax1.azim) 159 | elif tri_ax1.button_pressed in tri_ax1._zoom_btn: 160 | tri_ax2.set_xlim3d(tri_ax1.get_xlim3d()) 161 | tri_ax2.set_ylim3d(tri_ax1.get_ylim3d()) 162 | tri_ax2.set_zlim3d(tri_ax1.get_zlim3d()) 163 | elif event.inaxes == tri_ax2: 164 | if tri_ax2.button_pressed in tri_ax2._rotate_btn: 165 | tri_ax1.view_init(elev=tri_ax2.elev, azim=tri_ax2.azim) 166 | elif tri_ax2.button_pressed in tri_ax2._zoom_btn: 167 | tri_ax1.set_xlim3d(tri_ax2.get_xlim3d()) 168 | tri_ax1.set_ylim3d(tri_ax2.get_ylim3d()) 169 | tri_ax1.set_zlim3d(tri_ax2.get_zlim3d()) 170 | else: 171 | return 172 | tri_fig.canvas.draw_idle() 173 | tri_c1 = tri_fig.canvas.mpl_connect('motion_notify_event', tri_on_move) 174 | 175 | 176 | 177 | """ Zoom in to view predictive uncertainty """ 178 | plot_radius = 0.5 179 | plot_x_min = -0.125 180 | plot_y_min = -0.125 181 | plot_x_max = 0.25 182 | 183 | # Define conditions for including values associated with an input point (x,y) 184 | def include_conditions(x,y,delta=0.0): 185 | rad = np.sqrt(np.power(x,2) + np.power(y,2)) 186 | return (x-delta<=plot_x_max) and (x+delta>=plot_x_min) and (y+delta>=plot_y_min) and (rad-delta<=plot_radius) 187 | 188 | 189 | # Restrict plots to values corresponding to valid input points 190 | R = inVals.shape[0] 191 | plot_X_zoom = []; plot_Y_zoom = []; predMean_zoom = [] 192 | predMean_plus_std = []; predMean_minus_std = [] 193 | predMean_plus_std2 = []; predMean_minus_std2 = [] 194 | predMean_plus_std3 = []; predMean_minus_std3 = [] 195 | trueVals_zoom = [] 196 | for n in range(0,R): 197 | x = plot_X_flat[n] 198 | y = plot_Y_flat[n] 199 | if include_conditions(x,y,delta=0.025): 200 | plot_X_zoom.append(x) 201 | plot_Y_zoom.append(y) 202 | predMean_zoom.append(predMean[n]) 203 | predMean_plus_std.append( predMean[n] + 1 * predStd[n] ) 204 | predMean_minus_std.append( predMean[n] - 1 * predStd[n] ) 205 | predMean_plus_std2.append( predMean[n] + 2 * predStd[n] ) 206 | predMean_minus_std2.append( predMean[n] - 2 * predStd[n] ) 207 | predMean_plus_std3.append( predMean[n] + 3 * predStd[n] ) 208 | predMean_minus_std3.append( predMean[n] - 3 * predStd[n] ) 209 | trueVals_zoom.append(trueVals[n]) 210 | 211 | 212 | # Restrict observations to valid input points 213 | obsX_x_zoom = []; obsX_y_zoom = []; obsY_zoom = [] 214 | for n in range(0, obsX.shape[0]): 215 | x = obsX[n,0] 216 | y = obsX[n,1] 217 | if include_conditions(x,y): 218 | obsX_x_zoom.append(x) 219 | obsX_y_zoom.append(y) 220 | obsY_zoom.append(obsY[n]) 221 | 222 | 223 | # Initialize plot for assessing predictive uncertainty 224 | tri_fig2 = plt.figure() 225 | tri2_ax1 = tri_fig2.add_subplot(111, projection='3d') 226 | 227 | 228 | # Plot Predictive Mean 229 | linewidth = 0.1; alpha = 0.85 230 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_zoom, cmap=cmap, linewidth=linewidth, antialiased=True, alpha=alpha) 231 | 232 | 233 | # One Standard Deviation 234 | linewidth = 0.075; alpha = 0.2 235 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_plus_std, cmap=cmap, linewidth=linewidth, antialiased=True,alpha=alpha) 236 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_minus_std, cmap=cmap, linewidth=linewidth,antialiased=True,alpha=alpha) 237 | 238 | # Two Standard Deviations 239 | linewidth = 0.05; alpha = 0.1 240 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_plus_std2, cmap=cmap, linewidth=linewidth,antialiased=True,alpha=alpha) 241 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_minus_std2, cmap=cmap, linewidth=linewidth,antialiased=True,alpha=alpha) 242 | 243 | # Three Standard Deviations 244 | linewidth = 0.01; alpha = 0.01 245 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_plus_std3, cmap=cmap, linewidth=linewidth,antialiased=True,alpha=alpha) 246 | tri2_ax1.plot_trisurf(plot_X_zoom,plot_Y_zoom, predMean_minus_std3, cmap=cmap, linewidth=linewidth,antialiased=True,alpha=alpha) 247 | 248 | # Scatter plot of training observations 249 | alpha = 0.4 250 | tri2_ax1.scatter(obsX_x_zoom, obsX_y_zoom, obsY_zoom, c='k', marker='o', s=15.0, alpha=alpha) 251 | 252 | # Add title to plot 253 | plt.suptitle("CppGPs Predictive Uncertainty", fontsize=24) 254 | 255 | # Remove axes from plot 256 | remove_axes(tri2_ax1) 257 | 258 | # Display plots 259 | plt.show() 260 | 261 | 262 | 263 | # Run main() function when called directly 264 | if __name__ == '__main__': 265 | main() 266 | 267 | 268 | 269 | -------------------------------------------------------------------------------- /misc/TEST_CODE/Modified_L_BFGS/README.md: -------------------------------------------------------------------------------- 1 | # CppGPs - C++ Gaussian Process Library 2 | Implementation of Numerical Gaussian Processes in C++ 3 | 4 | ## Dependencies 5 | * [Eigen](https://eigen.tuxfamily.org/dox/GettingStarted.html) - High-level C++ library for linear algebra, matrix operations, and solvers (version 3.3.7) 6 | * [GCC](https://gcc.gnu.org/) - GNU compiler collection; more specifically the GCC C++ compiler is recommended 7 | * [CppOptimizationLibrary](https://github.com/PatWie/CppNumericalSolvers) - A header-only optimization library with an L-BFGS implementation in C++ 8 | 9 | ### Optional Dependencies for Plotting / SciKit Learn Comparison 10 | * [NumPy](http://www.numpy.org/) - Scientific computing package for Python 11 | * [csv](https://docs.python.org/3/library/csv.html) - Python module for working with comma separated value (CSV) files 12 | * [MatPlotLib](https://matplotlib.org/) - Python plotting library 13 | * [SciKit Learn](https://scikit-learn.org/stable/) - Data analysis library for Python 14 | 15 | 16 | 17 | ## Gaussian Process Regression 18 | 19 | ### Compiling and Running the Code 20 | The `main.cpp` file provides an example use of the CppGP code for Gaussian process regression. Before compilation, the following steps must be carried out: 21 | 22 | * Specify the path to the Eigen header files by editing the `EIGENPATH` variable in `makefile` 23 | * Download the `CppOptimizationLibrary` code as instructed in the `include/README.md` file 24 | 25 | Once these steps are completed, the example code can be compiled and run as follows: 26 | ```console 27 | user@host $ make all 28 | g++ -c -Wall -std=c++17 -I/usr/include/eigen3 -g -march=native -fopenmp -O3 main.cpp -o main.o 29 | g++ -c -Wall -std=c++17 -I/usr/include/eigen3 -g -march=native -fopenmp -O3 GPs.cpp -o GPs.o 30 | g++ -std=c++17 -I/usr/include/eigen3 -g -march=native -fopenmp -O3 -o Run main.cpp GPs.cpp 31 | 32 | user@host $ ./Run 33 | 34 | Computation Time: 0.089068 s 35 | 36 | Optimized Hyperparameters: 37 | 0.066468 (Noise = 0.969875) 38 | 39 | NLML: 469.561 40 | 41 | ``` 42 | 43 | __Note:__ A slight reduction in the run time may be achieved by installing [gperftools](https://github.com/gperftools/gperftools) and prefacing the run statement with the `LD_PRELOAD` environment variable set to the path of the [TCMalloc](http://goog-perftools.sourceforge.net/doc/tcmalloc.html) shared object: 44 | ```console 45 | user@host $ LD_PRELOAD=/usr/lib/libtcmalloc.so.4 ./Run 46 | ``` 47 | 48 | ### Defining the Target Function and Training Data 49 | The `targetFunc` function defined at the beginning of the `main.cpp` file is used to generate artificial training data for the regression task: 50 | ```cpp 51 | // Specify the target function for Gaussian process regression 52 | double targetFunc(Eigen::MatrixXd X) 53 | { 54 | double oscillation = 30.0; 55 | double xshifted = 0.5*(X(0) + 1.0); 56 | return std::sin(oscillation*(xshifted-0.1))*(0.5-(xshifted-0.1))*15.0; 57 | } 58 | ``` 59 | The training data consists of a collection of input points `X` along with an associated collection of target values `y`. This data should be formatted so that `y(i) = targetFunc(X.row(i))` (with an optional additive noise term). A simple one-dimensional problem setup can be defined as follows: 60 | ```cpp 61 | int obsCount = 250; 62 | Matrix X = sampleUnif(-1.0, 1.0, obsCount); 63 | Matrix y; y.resize(obsCount, 1); 64 | ``` 65 | Noise can be added to the training target data `y` to better assess the fit of the model's predictive variance. The level of noise in the training data can be adjusted via the `noiseLevel` parameter and used to define the target data via: 66 | ```cpp 67 | auto noiseLevel = 1.0; 68 | auto noise = sampleNormal(obsCount) * noiseLevel; 69 | for ( auto i : boost::irange(0,obsCount) ) 70 | y(i) = targetFunc(X.row(i)) + noise(i); 71 | ``` 72 | 73 | ### Specifying and Fitting the Gaussian Process Model 74 | 75 | ```cpp 76 | // Initialize Gaussian process regression model 77 | GaussianProcess model; 78 | 79 | // Specify training observations for GP model 80 | model.setObs(X,y); 81 | 82 | // Initialize RBF kernel and assign it to the model 83 | RBF kernel; 84 | model.setKernel(kernel); 85 | 86 | // Specify hyperparameter bounds 87 | Vector lbs(1); lbs << 0.01; 88 | Vector ubs(1); ubs << 100.0; 89 | model.setBounds(lbs, ubs); 90 | 91 | // Fit model to the training data 92 | model.fitModel(); 93 | ``` 94 | 95 | ### Posterior Predictions and Sample Paths 96 | ```cpp 97 | // Define test mesh for GP model predictions 98 | int predCount = 100; 99 | auto testMesh = linspace(-1.0, 1.0, predCount); 100 | model.setPred(testMesh); 101 | 102 | // Compute predicted means and variances for the test points 103 | model.predict(); 104 | Matrix pmean = model.getPredMean(); 105 | Matrix pvar = model.getPredVar(); 106 | Matrix pstd = pvar.array().sqrt().matrix(); 107 | 108 | // Get sample paths from the posterior distribution of the model 109 | int sampleCount = 25; 110 | Matrix samples = model.getSamples(sampleCount); 111 | ``` 112 | 113 | ### Plotting Results of the Trained Gaussian Process Model 114 | The artificial observation data and corresponding predictions/samples are saved in the `observations.csv` and `predictions.csv`/`samples.csv` files, respectively. The trained model results can be plotted using the provided Python script `Plot.py`. 115 | 116 | 117 |

118 | Example regression plot in one dimension 119 |

120 | 121 | 122 | For the two dimensional case (i.e. when `inputDim=2` in the `main.cpp` file), an additional figure illustrating the predictive uncertainty of the model is provided; this plot corresponds to a slice of the predictive mean, transparent standard deviation bounds, and the observation data points used for training: 123 | 124 |

125 | Example plot of the predictive uncertainty 126 |

127 | 128 | 129 | 130 | 131 | ### Comparison with SciKit Learn Implementation 132 | 133 | The results of the CppGP code and SciKit Learn `GaussianProcessRegressor` class can be compared using the `SciKit_Learn_Comparison.py` Python script. This code provides the estimated kernel/noise parameters and negative log marginal likelihood (NLML) calculations in addition to plots of the CppGP and SciKit Learn results. 134 | 135 | 136 |

137 | Example regression plot in two dimensions 138 |

139 | 140 | 141 | ## Profiling the CppGPs Implementation 142 | 143 | ### Requirements 144 | * [`valgrind`](http://valgrind.org/docs/manual/quick-start.html) - debugging/profiling tool suite 145 | * [`perf`](https://en.wikipedia.org/wiki/Perf_(Linux)) - performance analyzing tool for Linux 146 | * [`graphviz`](https://www.graphviz.org/) - open source graph visualization software 147 | * [`gprof2dot`](https://github.com/jrfonseca/gprof2dot) - python script for converting profiler output to dot graphs 148 | 149 | Profiling data is produced via the `callgrind` tool in the `valgrind` suite: 150 | ``` 151 | valgrind --tool=callgrind --trace-children=yes ./Run 152 | ``` 153 | __Note:__ This will take _much_ more time to run than the standard execution time (e.g. 100x). 154 | 155 | 156 | A graph visualization of the node-wise executation times in the program can then be created via: 157 | ``` 158 | perf record -g -- ./Run 159 | perf script | c++filt | gprof2dot -s -n 5.25 -f perf | dot -Tpng -o utils/profilier_output.png 160 | ``` 161 | [//]: # (COMMENT: perf script | c++filt | python /usr/lib/python3.7/site-packages/gprof2dot.py -f perf | dot -Tpng -o output.png) 162 | 163 | 164 | __Note:__ The `-s` flag can also be removed from the `gprof2dot` call to show parameter types. The `-n` flag is used to specify the percentage threshold for ommitting nodes. 165 | 166 | 167 | ### Example Profilier Graph 168 | An example graph generated using the procedures outlined above is provided below: 169 | 170 |

171 | Example profilier graph 172 |

173 | 174 | As can be seen from the figure, the majority of the computational demand of the CppGPs implementation results from the `Eigen::LLT::solveInPlace` method. This corresponds to the term `cholesky.solve(Matrix::Identity(n,n))` in the `evalNLML()` function definition, which is used to compute the gradients of the NLML with respect to the hyperparameters. A preliminary parallelized implementation has been included in the comments, however this naive attempt at concurency currently results in even slower evaluation times (but achieves close to 100% CPU usage at least...). 175 | 176 | 177 | ## References 178 | __Gaussian Processes for Machine Learning__ is an extremely useful reference written by Carl Rasmussen and Christopher Williams; the book has also been made freely available on the [book webpage](http://www.gaussianprocess.org/gpml/). 179 | 180 | __Eigen__ provides a header-only library for linear algebra, matrix operations, and solving linear systems. The [Getting Started](https://eigen.tuxfamily.org/dox/GettingStarted.html) page of the Eigen documentation provides a basic introduction to the library. 181 | 182 | __CppOptimizationLibrary__ is an extensive header-only library written by Patrick Wieschollek with C++ implementations for a diverse collection of optimization algorithms; the library is availble in the GitHub repository [PatWie/CppNumericalSolvers](https://github.com/PatWie/CppNumericalSolvers). 183 | 184 | __SciKit Learn__ provides a [Gaussian Process](https://scikit-learn.org/stable/modules/gaussian_process.html) Python module with an extensive collection of covariance kernels and options. 185 | -------------------------------------------------------------------------------- /misc/TEST_CODE/Modified_L_BFGS/main.cpp: -------------------------------------------------------------------------------- 1 | // main.cpp -- example use of CppGPs for Gaussian process regression 2 | 3 | // 4 | // Eigen macros for using externel routines (may need to be moved to header file) 5 | // ( see https://eigen.tuxfamily.org/dox/TopicUsingBlasLapack.html ) 6 | //#define EIGEN_USE_BLAS // Must also link to BLAS library 7 | //#define EIGEN_USE_LAPACKE_STRICT // Must also link to LAPACKE library 8 | //#define EIGEN_USE_MKL_ALL // Must also link to Intel MKL library 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "GPs.h" 20 | 21 | #include 22 | 23 | // Specify the target function for Gaussian process regression 24 | double targetFunc(Eigen::MatrixXd X) 25 | { 26 | 27 | if ( X.size() == 1 ) 28 | { 29 | // Define the target function to be an oscillatory, non-periodic function 30 | double oscillation = 30.0; 31 | double xshifted = 0.5*(X(0) + 1.0); 32 | return std::sin(oscillation*(xshifted-0.1))*(0.5-(xshifted-0.1))*15.0; 33 | } 34 | else 35 | { 36 | // Define the target function to be the Marr wavelet (a.k.a "Mexican Hat" wavelet) 37 | // ( see https://en.wikipedia.org/wiki/Mexican_hat_wavelet ) 38 | double sigma = 0.25; 39 | double pi = std::atan(1)*4; 40 | double radialTerm = std::pow(X.squaredNorm()/sigma,2); 41 | return 2.0 / (std::sqrt(pi*sigma) * std::pow(pi,1.0/4.0)) * (1.0 - radialTerm) * std::exp(-radialTerm); 42 | } 43 | } 44 | 45 | 46 | // Example use of CppGPs code for Gaussian process regression 47 | int main(int argc, char const *argv[]) 48 | { 49 | 50 | // Retrieve aliases from GP namescope 51 | using Matrix = Eigen::MatrixXd; 52 | using Vector = Eigen::VectorXd; 53 | 54 | // Convenience using-declarations 55 | using std::cout; 56 | using std::endl; 57 | using GP::GaussianProcess; 58 | using GP::linspace; 59 | using GP::sampleNormal; 60 | using GP::sampleUnif; 61 | using GP::RBF; 62 | 63 | // Used for timing code with chrono 64 | using GP::high_resolution_clock; 65 | using GP::time; 66 | using GP::getTime; 67 | 68 | // Set random seed based on system clock 69 | std::srand(static_cast(high_resolution_clock::now().time_since_epoch().count())); 70 | 71 | // Fix random seed for debugging and testing 72 | //std::srand(static_cast(0)); 73 | 74 | 75 | // 76 | // [ Define Training Data ] 77 | // 78 | 79 | // Specify the input dimensions 80 | //int inputDim = 1; 81 | int inputDim = 2; 82 | 83 | // Specify observation data count 84 | int obsCount; 85 | if ( inputDim == 1 ) 86 | obsCount = 250; 87 | else 88 | obsCount = 1000; 89 | 90 | // Specify observation noise level 91 | auto noiseLevel = 1.0; 92 | 93 | // Define random noise to add to target observations 94 | auto noise = sampleNormal(obsCount) * noiseLevel; 95 | 96 | // Define observations by sampling random uniform distribution 97 | Matrix X = sampleUnif(-1.0, 1.0, obsCount, inputDim); 98 | Matrix y; y.resize(obsCount, 1); 99 | 100 | // Define target observations 'y' by applying 'targetFunc' 101 | // to the input observations 'X' and adding a noise vector 102 | for ( auto i : boost::irange(0,obsCount) ) 103 | y(i) = targetFunc(X.row(i)) + noise(i); 104 | 105 | 106 | 107 | // 108 | // [ Construct Gaussian Process Model ] 109 | // 110 | 111 | // Initialize Gaussian process model 112 | GaussianProcess model; 113 | 114 | // Specify training observations for GP model 115 | model.setObs(X,y); 116 | 117 | // Fix noise level [ noise level is fit to the training data by default ] 118 | //model.setNoise(0.33); 119 | 120 | // Initialize a RBF covariance kernel and assign it to the model 121 | RBF kernel; 122 | model.setKernel(kernel); 123 | 124 | // Specify hyperparameter bounds [ including noise bounds (index 0) ] 125 | // Vector lbs(2); lbs << 0.0001 , 0.01; 126 | // Vector ubs(2); ubs << 5.0 , 100.0; 127 | // model.setBounds(lbs, ubs); 128 | 129 | // Specify hyperparameter bounds [ excluding noise bounds ] 130 | Vector lbs(1); lbs << 0.01; 131 | Vector ubs(1); ubs << 100.0; 132 | model.setBounds(lbs, ubs); 133 | 134 | 135 | // Fit covariance kernel hyperparameters to the training data 136 | time start = high_resolution_clock::now(); 137 | model.fitModel(); 138 | time end = high_resolution_clock::now(); 139 | auto computationTime = getTime(start, end); 140 | 141 | // Display computation time required for fitting the GP model 142 | cout << "\nComputation Time: "; 143 | cout << computationTime << " s" << endl; 144 | 145 | // Retrieve the tuned/optimized kernel hyperparameters 146 | auto optParams = model.getParams(); 147 | cout << "\nOptimized Hyperparameters:" << endl << optParams.transpose() << " "; 148 | auto noiseL = model.getNoise(); 149 | cout << "(Noise = " << noiseL << ")\n" << endl; 150 | 151 | // Display the negative log marginal likelihood (NLML) of the optimized model 152 | cout << "NLML: " << std::fixed << std::setprecision(4) << model.computeNLML() << endl << endl; 153 | 154 | 155 | //double eps = std::numeric_limits::min(); 156 | //cout << "Epsilon: " << eps << endl; 157 | 158 | // 159 | // [ Posterior Predictions and Samples ] 160 | // 161 | 162 | // Define test mesh for GP model predictions 163 | int predCount; 164 | if ( inputDim == 1 ) 165 | predCount = 100; 166 | else 167 | predCount = 1000; 168 | //int predRes = static_cast(std::sqrt(predCount)); 169 | int predRes = static_cast(std::pow(predCount,1.0/inputDim)); 170 | //auto testMesh = linspace(0.0, 1.0, predCount); 171 | auto testMesh = linspace(-1.0, 1.0, predRes, inputDim); 172 | model.setPred(testMesh); 173 | 174 | // Reset predCount to account for rounding 175 | predCount = std::pow(predRes,inputDim); 176 | 177 | // Compute predicted means and variances for the test points 178 | model.predict(); 179 | Matrix pmean = model.getPredMean(); 180 | Matrix pvar = model.getPredVar(); 181 | Matrix pstd = pvar.array().sqrt().matrix(); 182 | 183 | // Get sample paths from the posterior distribution of the model 184 | int sampleCount; 185 | Matrix samples; 186 | if ( inputDim == 1 ) 187 | { 188 | sampleCount = 25; 189 | samples = model.getSamples(sampleCount); 190 | } 191 | 192 | 193 | // 194 | // [ Save Results for Plotting ] 195 | // 196 | 197 | // Save true and predicted means/variances to file 198 | std::string outputFile = "predictions.csv"; 199 | //Matrix trueSoln = testMesh.unaryExpr(std::ptr_fun(targetFunc)); 200 | 201 | Matrix trueSoln(predCount,1); 202 | for ( auto i : boost::irange(0,predCount) ) 203 | trueSoln(i,0) = targetFunc(testMesh.row(i)); 204 | 205 | std::ofstream fout; 206 | fout.open(outputFile); 207 | for ( auto i : boost::irange(0,predCount) ) 208 | { 209 | for ( auto j : boost::irange(0,inputDim) ) 210 | fout << testMesh(i,j) << ","; 211 | 212 | fout << trueSoln(i) << "," << pmean(i) << "," << pstd(i) << "\n"; 213 | } 214 | fout.close(); 215 | 216 | // Save observations to file 217 | std::string outputObsFile = "observations.csv"; 218 | fout.open(outputObsFile); 219 | for ( auto i : boost::irange(0,obsCount) ) 220 | { 221 | for ( auto j : boost::irange(0,inputDim) ) 222 | fout << X(i,j) << ","; 223 | 224 | fout << y(i) << "\n"; 225 | } 226 | fout.close(); 227 | 228 | 229 | if ( inputDim == 1 ) 230 | { 231 | // Save samples to file 232 | std::string outputSampleFile = "samples.csv"; 233 | fout.open(outputSampleFile); 234 | for ( auto j : boost::irange(0,sampleCount) ) 235 | { 236 | for ( auto i : boost::irange(0,predCount) ) 237 | fout << samples(i,j) << ((i 7 | 8 | namespace cppoptlib { 9 | 10 | template 11 | class MoreThuente { 12 | 13 | public: 14 | using Scalar = typename ProblemType::Scalar; 15 | using TVector = typename ProblemType::TVector; 16 | 17 | /** 18 | * @brief use MoreThuente Rule for (strong) Wolfe conditiions 19 | * @details [long description] 20 | * 21 | * @param searchDir search direction for next update step 22 | * @param objFunc handle to problem 23 | * 24 | * @return step-width 25 | */ 26 | 27 | static Scalar linesearch(const TVector &x, const TVector &searchDir, ProblemType &objFunc, const Scalar alpha_init = 1.0) { 28 | // assume step width 29 | Scalar ak = alpha_init; 30 | 31 | std::cout << "\n[*] MORETHUENTE.h linesearch() \t " << (x.array().exp()).matrix().transpose() << "\n\n"; 32 | Scalar fval = objFunc.value(x); 33 | TVector g = x.eval(); 34 | objFunc.gradient(x, g); 35 | 36 | TVector s = searchDir.eval(); 37 | TVector xx = x.eval(); 38 | 39 | cvsrch(objFunc, xx, fval, g, ak, s); 40 | 41 | return ak; 42 | } 43 | 44 | static int cvsrch(ProblemType &objFunc, TVector &x, Scalar f, TVector &g, Scalar &stp, TVector &s) { 45 | // we rewrite this from MIN-LAPACK and some MATLAB code 46 | int info = 0; 47 | int infoc = 1; 48 | const Scalar xtol = 1e-15; 49 | const Scalar ftol = 1e-4; 50 | const Scalar gtol = 1e-2; 51 | const Scalar stpmin = 1e-15; 52 | const Scalar stpmax = 1e15; 53 | const Scalar xtrapf = 4; 54 | const int maxfev = 20; 55 | int nfev = 0; 56 | 57 | Scalar dginit = g.dot(s); 58 | if (dginit >= 0.0) { 59 | // no descent direction 60 | // TODO: handle this case 61 | return -1; 62 | } 63 | 64 | bool brackt = false; 65 | bool stage1 = true; 66 | 67 | Scalar finit = f; 68 | Scalar dgtest = ftol * dginit; 69 | Scalar width = stpmax - stpmin; 70 | Scalar width1 = 2 * width; 71 | TVector wa = x.eval(); 72 | 73 | Scalar stx = 0.0; 74 | Scalar fx = finit; 75 | Scalar dgx = dginit; 76 | Scalar sty = 0.0; 77 | Scalar fy = finit; 78 | Scalar dgy = dginit; 79 | 80 | Scalar stmin; 81 | Scalar stmax; 82 | 83 | while (true) { 84 | 85 | // make sure we stay in the interval when setting min/max-step-width 86 | if (brackt) { 87 | stmin = std::min(stx, sty); 88 | stmax = std::max(stx, sty); 89 | } else { 90 | stmin = stx; 91 | stmax = stp + xtrapf * (stp - stx); 92 | } 93 | 94 | // Force the step to be within the bounds stpmax and stpmin. 95 | stp = std::max(stp, stpmin); 96 | stp = std::min(stp, stpmax); 97 | 98 | // Oops, let us return the last reliable values 99 | if ( 100 | (brackt && ((stp <= stmin) || (stp >= stmax))) 101 | || (nfev >= maxfev - 1 ) || (infoc == 0) 102 | || (brackt && ((stmax - stmin) <= (xtol * stmax)))) { 103 | stp = stx; 104 | } 105 | 106 | // test new point 107 | x = wa + stp * s; 108 | std::cout << "\n[*] MORETHUENTE.h cvsrch() \t" << (x.array().exp()).matrix().transpose() << "\n\n"; 109 | f = objFunc.value(x); 110 | objFunc.gradient(x, g); 111 | nfev++; 112 | Scalar dg = g.dot(s); 113 | Scalar ftest1 = finit + stp * dgtest; 114 | 115 | // all possible convergence tests 116 | if ((brackt & ((stp <= stmin) | (stp >= stmax))) | (infoc == 0)) 117 | info = 6; 118 | 119 | if ((stp == stpmax) & (f <= ftest1) & (dg <= dgtest)) 120 | info = 5; 121 | 122 | if ((stp == stpmin) & ((f > ftest1) | (dg >= dgtest))) 123 | info = 4; 124 | 125 | if (nfev >= maxfev) 126 | info = 3; 127 | 128 | if (brackt & (stmax - stmin <= xtol * stmax)) 129 | info = 2; 130 | 131 | if ((f <= ftest1) & (fabs(dg) <= gtol * (-dginit))) 132 | info = 1; 133 | 134 | // terminate when convergence reached 135 | if (info != 0) 136 | return -1; 137 | 138 | if (stage1 & (f <= ftest1) & (dg >= std::min(ftol, gtol)*dginit)) 139 | stage1 = false; 140 | 141 | if (stage1 & (f <= fx) & (f > ftest1)) { 142 | Scalar fm = f - stp * dgtest; 143 | Scalar fxm = fx - stx * dgtest; 144 | Scalar fym = fy - sty * dgtest; 145 | Scalar dgm = dg - dgtest; 146 | Scalar dgxm = dgx - dgtest; 147 | Scalar dgym = dgy - dgtest; 148 | 149 | cstep( stx, fxm, dgxm, sty, fym, dgym, stp, fm, dgm, brackt, stmin, stmax, infoc); 150 | 151 | fx = fxm + stx * dgtest; 152 | fy = fym + sty * dgtest; 153 | dgx = dgxm + dgtest; 154 | dgy = dgym + dgtest; 155 | } else { 156 | // this is ugly and some variables should be moved to the class scope 157 | cstep( stx, fx, dgx, sty, fy, dgy, stp, f, dg, brackt, stmin, stmax, infoc); 158 | } 159 | 160 | if (brackt) { 161 | if (fabs(sty - stx) >= 0.66 * width1) 162 | stp = stx + 0.5 * (sty - stx); 163 | width1 = width; 164 | width = fabs(sty - stx); 165 | } 166 | } 167 | 168 | return 0; 169 | } 170 | 171 | static int cstep(Scalar& stx, Scalar& fx, Scalar& dx, Scalar& sty, Scalar& fy, Scalar& dy, Scalar& stp, 172 | Scalar& fp, Scalar& dp, bool& brackt, Scalar& stpmin, Scalar& stpmax, int& info) { 173 | info = 0; 174 | bool bound = false; 175 | 176 | // Check the input parameters for errors. 177 | if ((brackt & ((stp <= std::min(stx, sty) ) | (stp >= std::max(stx, sty)))) | (dx * (stp - stx) >= 0.0) 178 | | (stpmax < stpmin)) { 179 | return -1; 180 | } 181 | 182 | Scalar sgnd = dp * (dx / fabs(dx)); 183 | 184 | Scalar stpf = 0; 185 | Scalar stpc = 0; 186 | Scalar stpq = 0; 187 | 188 | if (fp > fx) { 189 | info = 1; 190 | bound = true; 191 | Scalar theta = 3. * (fx - fp) / (stp - stx) + dx + dp; 192 | Scalar s = std::max(theta, std::max(dx, dp)); 193 | Scalar gamma = s * sqrt((theta / s) * (theta / s) - (dx / s) * (dp / s)); 194 | if (stp < stx) 195 | gamma = -gamma; 196 | Scalar p = (gamma - dx) + theta; 197 | Scalar q = ((gamma - dx) + gamma) + dp; 198 | Scalar r = p / q; 199 | stpc = stx + r * (stp - stx); 200 | stpq = stx + ((dx / ((fx - fp) / (stp - stx) + dx)) / 2.) * (stp - stx); 201 | if (fabs(stpc - stx) < fabs(stpq - stx)) 202 | stpf = stpc; 203 | else 204 | stpf = stpc + (stpq - stpc) / 2; 205 | brackt = true; 206 | } else if (sgnd < 0.0) { 207 | info = 2; 208 | bound = false; 209 | Scalar theta = 3 * (fx - fp) / (stp - stx) + dx + dp; 210 | Scalar s = std::max(theta, std::max(dx, dp)); 211 | Scalar gamma = s * sqrt((theta / s) * (theta / s) - (dx / s) * (dp / s)); 212 | if (stp > stx) 213 | gamma = -gamma; 214 | 215 | Scalar p = (gamma - dp) + theta; 216 | Scalar q = ((gamma - dp) + gamma) + dx; 217 | Scalar r = p / q; 218 | stpc = stp + r * (stx - stp); 219 | stpq = stp + (dp / (dp - dx)) * (stx - stp); 220 | if (fabs(stpc - stp) > fabs(stpq - stp)) 221 | stpf = stpc; 222 | else 223 | stpf = stpq; 224 | brackt = true; 225 | } else if (fabs(dp) < fabs(dx)) { 226 | info = 3; 227 | bound = 1; 228 | Scalar theta = 3 * (fx - fp) / (stp - stx) + dx + dp; 229 | Scalar s = std::max(theta, std::max( dx, dp)); 230 | Scalar gamma = s * sqrt(std::max(static_cast(0.), (theta / s) * (theta / s) - (dx / s) * (dp / s))); 231 | if (stp > stx) 232 | gamma = -gamma; 233 | Scalar p = (gamma - dp) + theta; 234 | Scalar q = (gamma + (dx - dp)) + gamma; 235 | Scalar r = p / q; 236 | if ((r < 0.0) & (gamma != 0.0)) { 237 | stpc = stp + r * (stx - stp); 238 | } else if (stp > stx) { 239 | stpc = stpmax; 240 | } else { 241 | stpc = stpmin; 242 | } 243 | stpq = stp + (dp / (dp - dx)) * (stx - stp); 244 | if (brackt) { 245 | if (fabs(stp - stpc) < fabs(stp - stpq)) { 246 | stpf = stpc; 247 | } else { 248 | stpf = stpq; 249 | } 250 | } else { 251 | if (fabs(stp - stpc) > fabs(stp - stpq)) { 252 | stpf = stpc; 253 | } else { 254 | stpf = stpq; 255 | } 256 | 257 | } 258 | } else { 259 | info = 4; 260 | bound = false; 261 | if (brackt) { 262 | Scalar theta = 3 * (fp - fy) / (sty - stp) + dy + dp; 263 | Scalar s = std::max(theta, std::max(dy, dp)); 264 | Scalar gamma = s * sqrt((theta / s) * (theta / s) - (dy / s) * (dp / s)); 265 | if (stp > sty) 266 | gamma = -gamma; 267 | 268 | Scalar p = (gamma - dp) + theta; 269 | Scalar q = ((gamma - dp) + gamma) + dy; 270 | Scalar r = p / q; 271 | stpc = stp + r * (sty - stp); 272 | stpf = stpc; 273 | } else if (stp > stx) 274 | stpf = stpmax; 275 | else { 276 | stpf = stpmin; 277 | } 278 | } 279 | 280 | if (fp > fx) { 281 | sty = stp; 282 | fy = fp; 283 | dy = dp; 284 | } else { 285 | if (sgnd < 0.0) { 286 | sty = stx; 287 | fy = fx; 288 | dy = dx; 289 | } 290 | 291 | stx = stp; 292 | fx = fp; 293 | dx = dp; 294 | } 295 | 296 | stpf = std::min(stpmax, stpf); 297 | stpf = std::max(stpmin, stpf); 298 | stp = stpf; 299 | 300 | if (brackt & bound) { 301 | if (sty > stx) { 302 | stp = std::min(stx + static_cast(0.66) * (sty - stx), stp); 303 | } else { 304 | stp = std::max(stx + static_cast(0.66) * (sty - stx), stp); 305 | } 306 | } 307 | 308 | return 0; 309 | 310 | } 311 | 312 | }; 313 | 314 | } 315 | 316 | #endif /* MORETHUENTE_H_ */ 317 | -------------------------------------------------------------------------------- /misc/TEST_CODE/Two_Dimensional/1D_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nw2190/CppGPs/eb707e54dff274596238310a654a715930d62214/misc/TEST_CODE/Two_Dimensional/1D_example.png -------------------------------------------------------------------------------- /misc/TEST_CODE/Two_Dimensional/2D_Comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nw2190/CppGPs/eb707e54dff274596238310a654a715930d62214/misc/TEST_CODE/Two_Dimensional/2D_Comparison.png -------------------------------------------------------------------------------- /misc/TEST_CODE/Two_Dimensional/Deterimine_Dimension.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import csv 3 | import time 4 | 5 | # Evaluate SciKit Learn Gaussian Process Regressor and Plot Results 6 | def main(): 7 | 8 | # Get prediction data 9 | filename = "predictions.csv" 10 | with open(filename, "r") as csvfile: 11 | csvreader = csv.reader(csvfile, delimiter=',', quotechar='|') 12 | row = next(csvreader) 13 | print(len(row)) 14 | 15 | 16 | # Run main() function when called directly 17 | if __name__ == '__main__': 18 | main() 19 | -------------------------------------------------------------------------------- /misc/TEST_CODE/Two_Dimensional/GPs.h: -------------------------------------------------------------------------------- 1 | #ifndef _GPS_H 2 | #define _GPS_H 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | //#include "./utils/minimize.h" 10 | 11 | // Include CppOptLib files 12 | #include "./include/cppoptlib/meta.h" 13 | #include "./include/cppoptlib/boundedproblem.h" 14 | #include "./include/cppoptlib/solver/lbfgsbsolver.h" 15 | 16 | 17 | // Declare namespace for Gaussian process definitions 18 | namespace GP { 19 | 20 | // Define PI using arctan function 21 | static const double PI = std::atan(1)*4; 22 | 23 | // Define aliases with using declarations 24 | using Matrix = Eigen::MatrixXd; 25 | using Vector = Eigen::VectorXd; 26 | 27 | // Define function for retrieving time from chrono 28 | float getTime(std::chrono::high_resolution_clock::time_point start, std::chrono::high_resolution_clock::time_point end); 29 | using time = std::chrono::high_resolution_clock::time_point; 30 | using std::chrono::high_resolution_clock; 31 | 32 | // Define function for sampling uniform distribution on interval 33 | Matrix sampleUnif(double a=0.0, double b=1.0, int N=1, int dim=1); 34 | //Matrix sampleUnif(double a=0.0, double b=1.0, int N=1); 35 | //Matrix sampleUnifSquare(double a=0.0, double b=1.0, int dim=1, int N=1); 36 | Vector sampleUnifVector(Vector lbs, Vector ubs); 37 | Matrix sampleNormal(int N=1); 38 | 39 | // Define utility functions for computing distance matrices 40 | void pdist(Matrix & Dv, Matrix & X1, Matrix & X2); 41 | void squareForm(Matrix & D, Matrix & Dv, int n, double diagVal=0.0); 42 | 43 | 44 | // Define abstract base class for covariance kernels 45 | class Kernel 46 | { 47 | public: 48 | 49 | // Constructor and destructor 50 | Kernel(Vector p, int c) : kernelParams(p) , paramCount(c) { }; 51 | virtual ~Kernel() = default; 52 | 53 | // Compute the covariance matrix provided a "distance matrix" consisting of pairwise squared norms between points 54 | virtual std::vector computeCov(Matrix & K, Matrix & D, Vector & params, double jitter=0.0, bool evalGrad=false) = 0; 55 | 56 | // Compute the (cross-)covariance matrix for specified input vectors X1 and X2 57 | virtual void computeCrossCov(Matrix & K, Matrix & X1, Matrix & X2, Vector & params) = 0; 58 | 59 | // Set the noise level which is to be added to the diagonal of the covariance matrix 60 | void setNoise(double noise) { noiseLevel = noise; } 61 | 62 | // Set method for specifying the kernel parameters 63 | void setParams(Vector params) { kernelParams = params; }; 64 | 65 | // Get methods for retrieving the kernel paramaters 66 | int getParamCount() { return paramCount; } ; 67 | Vector getParams() { return kernelParams; }; 68 | 69 | protected: 70 | Vector kernelParams; 71 | int paramCount; 72 | double noiseLevel; 73 | virtual double evalKernel(Matrix&, Matrix&, Vector&, int) = 0; 74 | virtual double evalDistKernel(double, Vector&, int) = 0; 75 | }; 76 | 77 | 78 | // Define class for radial basis function (RBF) covariance kernel 79 | class RBF : public Kernel 80 | { 81 | public: 82 | 83 | // Constructor 84 | RBF() : Kernel(Vector(1), 1) { kernelParams(0)=1.0; }; 85 | 86 | // Compute the covariance matrix provided a "distance matrix" consisting of pairwise squared norms between points 87 | std::vector computeCov(Matrix & K, Matrix & D, Vector & params, double jitter=0.0, bool evalGrad=false); 88 | 89 | // Compute the (cross-)covariance matrix for specified input vectors X1 and X2 90 | void computeCrossCov(Matrix & K, Matrix & X1, Matrix & X2, Vector & params); 91 | 92 | private: 93 | 94 | // Functions for evaluating the kernel on a pair of points / a specified squared distance 95 | double evalKernel(Matrix&, Matrix&, Vector&, int); 96 | double evalDistKernel(double, Vector&, int); 97 | 98 | Matrix Kv; 99 | Matrix dK_i; 100 | Matrix dK_iv; 101 | }; 102 | 103 | 104 | 105 | 106 | // Define class for Gaussian processes 107 | class GaussianProcess : public cppoptlib::BoundedProblem 108 | { 109 | public: 110 | 111 | // Define alias for base class CppOptLib "BoundedProblem" for use in initialization list 112 | using Superclass = cppoptlib::BoundedProblem; 113 | 114 | // Constructors [ Initialize lowerBounds / upperBounds of CppOptLib superclass to zero ] 115 | GaussianProcess() : Superclass(Vector(0), Vector(0)) { } 116 | 117 | // Define CppOptLib methods 118 | // [ 'value' returns NLML and 'gradient' asigns the associated gradient vector to 'g' ] 119 | double value(const Vector &p) { return evalNLML(p, cppOptLibgrad, true); } 120 | void gradient(const Vector &p, Vector &g) { g = cppOptLibgrad; } 121 | Vector cppOptLibgrad; 122 | 123 | // Set methods 124 | void setObs(Matrix & x, Matrix & y) { obsX = x; obsY = y; N = static_cast(x.rows()); } 125 | void setKernel(Kernel & k) { kernel = &k; } 126 | void setPred(Matrix & px) { predX = px; } 127 | void setNoise(double noise) { fixedNoise = true; noiseLevel = noise; } 128 | void setBounds(Vector & lbs, Vector & ubs) { lowerBounds = lbs; upperBounds = ubs; fixedBounds=true; } 129 | 130 | // Compute methods 131 | void fitModel(); 132 | void predict(); 133 | double computeNLML(const Vector & p, double noise); 134 | double computeNLML(const Vector & p); 135 | double computeNLML(); 136 | 137 | // Get methods 138 | Matrix getPredMean() { return predMean; } 139 | Matrix getPredVar() { return predCov.diagonal() + noiseLevel*Eigen::VectorXd::Ones(predMean.size()); } 140 | Matrix getSamples(int count=10); 141 | Vector getParams() { return (*kernel).getParams(); } 142 | double getNoise() { return noiseLevel; } 143 | 144 | 145 | // Define method for superclass "GradientObj" used by 'cg_minimize' [only needed when using "./utils/minimize.h"] 146 | //void computeValueAndGradient(Vector X, double & val, Vector & D) { val = evalNLML(X,D,true); }; 147 | 148 | private: 149 | 150 | // Private member functions 151 | double evalNLML(const Vector & p); 152 | double evalNLML(const Vector & p, Vector & g, bool evalGrad=false); 153 | void computeDistMat(); 154 | 155 | // Status variables ( still needed ? ) 156 | int N = 0; 157 | 158 | // Kernel and covariance matrix 159 | Kernel * kernel; 160 | double noiseLevel = 0.0; 161 | bool fixedNoise = false; 162 | double jitter = 1e-7; 163 | Matrix K; 164 | Eigen::LLT cholesky; 165 | 166 | Vector lowerBounds; 167 | Vector upperBounds; 168 | bool fixedBounds = false; 169 | void parseBounds(Vector & lbs, Vector & ubs, int augParamCount); 170 | 171 | // Store squared distance matrix and alpha for NLML/DNLML calculations 172 | Matrix distMatrix; 173 | Matrix _alpha; 174 | 175 | // Observation data 176 | Matrix obsX; 177 | Matrix obsY; 178 | 179 | // Prediction data 180 | Matrix predX; 181 | Matrix predMean; 182 | Matrix predCov; 183 | double NLML = 0.0; 184 | 185 | int paramCount; 186 | int augParamCount; 187 | Matrix term; 188 | //std::vector gradList; // Need to find a way to avoid creating new gradList each time... 189 | 190 | // DEFINE TIMER VARIABLES 191 | /* 192 | double time_computecov = 0.0; 193 | double time_cholesky_llt = 0.0; 194 | double time_NLML = 0.0; 195 | double time_term = 0.0; 196 | double time_grad = 0.0; 197 | double time_paramsearch = 0.0; 198 | double time_minimize = 0.0; 199 | double time_final_chol = 0.0; 200 | */ 201 | }; 202 | 203 | // Define linspace function for generating 204 | // equally spaced points on an interval 205 | //template 206 | //Eigen::Matrix linspace(T a, T b, int N) 207 | //{ return Eigen::Array::LinSpaced(N, a, b); } 208 | 209 | // Define linspace function for generating equally spaced points 210 | Matrix linspace(double a, double b, int N, int dim=1); 211 | 212 | }; 213 | 214 | 215 | #endif 216 | -------------------------------------------------------------------------------- /misc/TEST_CODE/Two_Dimensional/POSSIBLE_main.cpp: -------------------------------------------------------------------------------- 1 | // main.cpp -- example use of CppGPs for Gaussian process regression 2 | 3 | // 4 | // Eigen macros for using externel routines (may need to be moved to header file) 5 | // ( see https://eigen.tuxfamily.org/dox/TopicUsingBlasLapack.html ) 6 | //#define EIGEN_USE_BLAS // Must also link to BLAS library 7 | //#define EIGEN_USE_LAPACKE_STRICT // Must also link to LAPACKE library 8 | //#define EIGEN_USE_MKL_ALL // Must also link to Intel MKL library 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include "GPs.h" 19 | 20 | 21 | // Specify the target function for Gaussian process regression 22 | double targetFunc(double x) 23 | { 24 | double oscillation = 30.0; 25 | return std::sin(oscillation*(x-0.1))*(0.5-(x-0.1))*15.0; 26 | } 27 | 28 | 29 | // Example use of CppGPs code for Gaussian process regression 30 | int main(int argc, char const *argv[]) 31 | { 32 | 33 | // Retrieve aliases from GP namescope 34 | using Matrix = Eigen::MatrixXd; 35 | using Vector = Eigen::VectorXd; 36 | 37 | // Convenience using-declarations 38 | using std::cout; 39 | using std::endl; 40 | using GP::GaussianProcess; 41 | using GP::linspace; 42 | using GP::sampleUnif; 43 | using GP::RBF; 44 | 45 | // Used for timing code with chrono 46 | using GP::high_resolution_clock; 47 | using GP::time; 48 | using GP::getTime; 49 | 50 | // Set random seed based on system clock 51 | std::srand(static_cast(high_resolution_clock::now().time_since_epoch().count())); 52 | 53 | // Fix random seed for debugging and testing 54 | //std::srand(static_cast(0)); 55 | 56 | 57 | // 58 | // [ Define Training Data ] 59 | // 60 | 61 | // Specify observation data count 62 | int obsCount = 1000; 63 | 64 | // Specify observation noise level 65 | auto noiseLevel = 1.0; 66 | 67 | // Define random uniform noise to add to target observations 68 | auto noise = Eigen::VectorXd::Random(obsCount) * noiseLevel; 69 | 70 | // Define observations by sampling random uniform distribution 71 | Matrix X = sampleUnif(0.0, 1.0, obsCount); 72 | Matrix y; y.resize(obsCount, 1); 73 | 74 | // Define target observations 'y' by applying 'targetFunc' 75 | // to the input observations 'X' and adding a noise vector 76 | y = X.unaryExpr(std::ptr_fun(targetFunc)) + noise; 77 | 78 | 79 | 80 | // 81 | // [ Construct Gaussian Process Model ] 82 | // 83 | 84 | // Initialize Gaussian process model 85 | GaussianProcess model; 86 | 87 | // Specify training observations for GP model 88 | model.setObs(X,y); 89 | 90 | // Fix noise level [ noise level is fit to the training data by default ] 91 | //model.setNoise(0.33); 92 | 93 | // Initialize a RBF covariance kernel and assign it to the model 94 | RBF kernel; 95 | model.setKernel(kernel); 96 | 97 | // Specify hyperparameter bounds [ including noise bounds (index 0) ] 98 | // Vector lbs(2); lbs << 0.0001 , 0.01; 99 | // Vector ubs(2); ubs << 5.0 , 100.0; 100 | // model.setBounds(lbs, ubs); 101 | 102 | // Specify hyperparameter bounds [ excluding noise bounds ] 103 | Vector lbs(1); lbs << 0.01; 104 | Vector ubs(1); ubs << 100.0; 105 | model.setBounds(lbs, ubs); 106 | 107 | 108 | // Fit covariance kernel hyperparameters to the training data 109 | time start = high_resolution_clock::now(); 110 | model.fitModel(); 111 | time end = high_resolution_clock::now(); 112 | auto computationTime = getTime(start, end); 113 | 114 | // Display computation time required for fitting the GP model 115 | cout << "\nComputation Time: "; 116 | cout << computationTime << " s" << endl; 117 | 118 | // Retrieve the tuned/optimized kernel hyperparameters 119 | auto optParams = model.getParams(); 120 | cout << "\nOptimized Hyperparameters:" << endl << optParams.transpose() << " "; 121 | auto noiseL = model.getNoise(); 122 | cout << "(Noise = " << noiseL << ")\n" << endl; 123 | 124 | // Display the negative log marginal likelihood (NLML) of the optimized model 125 | cout << "NLML: " << model.computeNLML() << endl << endl; 126 | 127 | 128 | // 129 | // [ Posterior Predictions and Samples ] 130 | // 131 | 132 | // Define test mesh for GP model predictions 133 | int predCount = 100; 134 | auto testMesh = linspace(0.0, 1.0, predCount); 135 | model.setPred(testMesh); 136 | 137 | // Compute predicted means and variances for the test points 138 | model.predict(); 139 | Matrix pmean = model.getPredMean(); 140 | Matrix pvar = model.getPredVar(); 141 | Matrix pstd = (pvar.array().sqrt()).matrix(); 142 | 143 | // Get sample paths from the posterior distribution of the model 144 | int sampleCount = 100; 145 | Matrix samples = model.getSamples(sampleCount); 146 | 147 | 148 | 149 | // 150 | // [ Save Results for Plotting ] 151 | // 152 | 153 | // Save true and predicted means/variances to file 154 | std::string outputFile = "predictions.csv"; 155 | Matrix trueSoln = testMesh.unaryExpr(std::ptr_fun(targetFunc)); 156 | std::ofstream fout; 157 | fout.open(outputFile); 158 | for ( auto i : boost::irange(0,predCount) ) 159 | fout << testMesh(i) << "," << trueSoln(i) << "," << pmean(i) << "," << pstd(i) << "\n"; 160 | fout.close(); 161 | 162 | // Save observations to file 163 | std::string outputObsFile = "observations.csv"; 164 | fout.open(outputObsFile); 165 | for ( auto i : boost::irange(0,obsCount) ) 166 | fout << X(i) << "," << y(i) << "\n"; 167 | fout.close(); 168 | 169 | // Save samples to file 170 | std::string outputSampleFile = "samples.csv"; 171 | fout.open(outputSampleFile); 172 | for ( auto j : boost::irange(0,sampleCount) ) 173 | { 174 | for ( auto i : boost::irange(0,predCount) ) 175 | fout << samples(i,j) << ((i= 0, facecolor="C0", alpha=alpha, interpolate=True, label=None) 55 | 56 | # Plot true solution values 57 | plt.plot(inVals, trueVals, 'C1', linewidth=2.0, linestyle="dashed") 58 | 59 | # Plot observation points 60 | plt.scatter(obsX, obsY) 61 | 62 | # Plot posterior sample paths 63 | for i in range(0,samples.shape[0]): 64 | plt.plot(inVals, samples[i,:], 'C0', alpha=0.2, linewidth=1.0, linestyle="dashed") 65 | 66 | # Add title to plot 67 | plt.suptitle("CppGPs Posterior Prediction") 68 | plt.show() 69 | 70 | 71 | # Run main() function when called directly 72 | if __name__ == '__main__': 73 | main() 74 | -------------------------------------------------------------------------------- /misc/TEST_CODE/Two_Dimensional/error_bounds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nw2190/CppGPs/eb707e54dff274596238310a654a715930d62214/misc/TEST_CODE/Two_Dimensional/error_bounds.png -------------------------------------------------------------------------------- /misc/TEST_CODE/Two_Dimensional/include/README.md: -------------------------------------------------------------------------------- 1 | # CppOptimizationLibrary 2 | 3 | The CppOptimizationLibrary can be downloaded from the GitHub repository [PatWie/CppNumericalSolvers](https://github.com/PatWie/CppNumericalSolvers). Alternatively, the repository can be cloned via: 4 | 5 | ```console 6 | user@host $ git clone git@github.com:PatWie/CppNumericalSolvers.git 7 | ``` 8 | 9 | The `cppoptlib` directory should then be placed here (i.e. `include/cppoptlib`) before compiling the CppGPs code. 10 | -------------------------------------------------------------------------------- /misc/TEST_CODE/Two_Dimensional/main.cpp: -------------------------------------------------------------------------------- 1 | // main.cpp -- example use of CppGPs for Gaussian process regression 2 | 3 | // 4 | // Eigen macros for using externel routines (may need to be moved to header file) 5 | // ( see https://eigen.tuxfamily.org/dox/TopicUsingBlasLapack.html ) 6 | //#define EIGEN_USE_BLAS // Must also link to BLAS library 7 | //#define EIGEN_USE_LAPACKE_STRICT // Must also link to LAPACKE library 8 | //#define EIGEN_USE_MKL_ALL // Must also link to Intel MKL library 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include "GPs.h" 19 | 20 | 21 | // Specify the target function for Gaussian process regression 22 | double targetFunc(Eigen::MatrixXd X) 23 | { 24 | 25 | if ( X.size() == 1 ) 26 | { 27 | // Define the target function to be an oscillatory, non-periodic function 28 | double oscillation = 30.0; 29 | double xshifted = 0.5*(X(0) + 1.0); 30 | return std::sin(oscillation*(xshifted-0.1))*(0.5-(xshifted-0.1))*15.0; 31 | } 32 | else 33 | { 34 | // Define the target function to be the Marr wavelet (a.k.a "Mexican Hat" wavelet) 35 | // ( see https://en.wikipedia.org/wiki/Mexican_hat_wavelet ) 36 | double sigma = 0.25; 37 | double pi = std::atan(1)*4; 38 | double radialTerm = std::pow(X.squaredNorm()/sigma,2); 39 | return 2.0 / (std::sqrt(pi*sigma) * std::pow(pi,1.0/4.0)) * (1.0 - radialTerm) * std::exp(-radialTerm); 40 | } 41 | } 42 | 43 | 44 | // Example use of CppGPs code for Gaussian process regression 45 | int main(int argc, char const *argv[]) 46 | { 47 | 48 | // Retrieve aliases from GP namescope 49 | using Matrix = Eigen::MatrixXd; 50 | using Vector = Eigen::VectorXd; 51 | 52 | // Convenience using-declarations 53 | using std::cout; 54 | using std::endl; 55 | using GP::GaussianProcess; 56 | using GP::linspace; 57 | using GP::sampleNormal; 58 | using GP::sampleUnif; 59 | using GP::RBF; 60 | 61 | // Used for timing code with chrono 62 | using GP::high_resolution_clock; 63 | using GP::time; 64 | using GP::getTime; 65 | 66 | // Set random seed based on system clock 67 | std::srand(static_cast(high_resolution_clock::now().time_since_epoch().count())); 68 | 69 | // Fix random seed for debugging and testing 70 | //std::srand(static_cast(0)); 71 | 72 | 73 | // 74 | // [ Define Training Data ] 75 | // 76 | 77 | 78 | // Specify the input dimensions 79 | //int inputDim = 1; 80 | int inputDim = 2; 81 | 82 | 83 | // Specify observation data count 84 | int obsCount; 85 | if ( inputDim == 1 ) 86 | obsCount = 250; 87 | else 88 | obsCount = 1000; 89 | 90 | // Specify observation noise level 91 | auto noiseLevel = 1.0; 92 | 93 | // Define random noise to add to target observations 94 | // [ NOTE: .noalias() is 100% necessary with "std::default_random_engine" ] 95 | Matrix noise; 96 | noise.noalias() = sampleNormal(obsCount) * noiseLevel; 97 | 98 | // Define observations by sampling random uniform distribution 99 | Matrix X = sampleUnif(-1.0, 1.0, obsCount, inputDim); 100 | Matrix y; y.resize(obsCount, 1); 101 | 102 | // Define target observations 'y' by applying 'targetFunc' 103 | // to the input observations 'X' and adding a noise vector 104 | for ( auto i : boost::irange(0,obsCount) ) 105 | y(i,0) = targetFunc(X.row(i)) + noise(i); 106 | 107 | 108 | // 109 | // [ Construct Gaussian Process Model ] 110 | // 111 | 112 | // Initialize Gaussian process model 113 | GaussianProcess model; 114 | 115 | // Specify training observations for GP model 116 | model.setObs(X,y); 117 | 118 | // Fix noise level [ noise level is fit to the training data by default ] 119 | //model.setNoise(0.33); 120 | 121 | // Initialize a RBF covariance kernel and assign it to the model 122 | RBF kernel; 123 | model.setKernel(kernel); 124 | 125 | // Specify hyperparameter bounds [ including noise bounds (index 0) ] 126 | // Vector lbs(2); lbs << 0.0001 , 0.01; 127 | // Vector ubs(2); ubs << 5.0 , 100.0; 128 | // model.setBounds(lbs, ubs); 129 | 130 | // Specify hyperparameter bounds [ excluding noise bounds ] 131 | Vector lbs(1); lbs << 0.01; 132 | Vector ubs(1); ubs << 100.0; 133 | model.setBounds(lbs, ubs); 134 | 135 | 136 | // Fit covariance kernel hyperparameters to the training data 137 | time start = high_resolution_clock::now(); 138 | model.fitModel(); 139 | time end = high_resolution_clock::now(); 140 | auto computationTime = getTime(start, end); 141 | 142 | // Display computation time required for fitting the GP model 143 | cout << "\nComputation Time: "; 144 | cout << computationTime << " s" << endl; 145 | 146 | // Retrieve the tuned/optimized kernel hyperparameters 147 | auto optParams = model.getParams(); 148 | cout << "\nOptimized Hyperparameters:" << endl << optParams.transpose() << " "; 149 | auto noiseL = model.getNoise(); 150 | cout << "(Noise = " << noiseL << ")\n" << endl; 151 | 152 | // Display the negative log marginal likelihood (NLML) of the optimized model 153 | cout << "NLML: " << model.computeNLML() << endl << endl; 154 | 155 | 156 | // 157 | // [ Posterior Predictions and Samples ] 158 | // 159 | 160 | // Define test mesh for GP model predictions 161 | int predCount; 162 | if ( inputDim == 1 ) 163 | predCount = 100; 164 | else 165 | predCount = 1000; 166 | //int predRes = static_cast(std::sqrt(predCount)); 167 | int predRes = static_cast(std::pow(predCount,1.0/inputDim)); 168 | //auto testMesh = linspace(0.0, 1.0, predCount); 169 | auto testMesh = linspace(-1.0, 1.0, predRes, inputDim); 170 | model.setPred(testMesh); 171 | 172 | // Reset predCount to account for rounding 173 | predCount = std::pow(predRes,inputDim); 174 | 175 | // Compute predicted means and variances for the test points 176 | model.predict(); 177 | Matrix pmean = model.getPredMean(); 178 | Matrix pvar = model.getPredVar(); 179 | Matrix pstd = (pvar.array().sqrt()).matrix(); 180 | 181 | // Get sample paths from the posterior distribution of the model 182 | int sampleCount; 183 | Matrix samples; 184 | if ( inputDim == 1 ) 185 | { 186 | sampleCount = 25; 187 | samples = model.getSamples(sampleCount); 188 | } 189 | 190 | // 191 | // [ Save Results for Plotting ] 192 | // 193 | 194 | // Save true and predicted means/variances to file 195 | std::string outputFile = "predictions.csv"; 196 | //Matrix trueSoln = testMesh.unaryExpr(std::ptr_fun(targetFunc)); 197 | 198 | Matrix trueSoln(predCount,1); 199 | for ( auto i : boost::irange(0,predCount) ) 200 | trueSoln(i,0) = targetFunc(testMesh.row(i)); 201 | 202 | std::ofstream fout; 203 | fout.open(outputFile); 204 | for ( auto i : boost::irange(0,predCount) ) 205 | { 206 | for ( auto j : boost::irange(0,inputDim) ) 207 | fout << testMesh(i,j) << ","; 208 | 209 | fout << trueSoln(i) << "," << pmean(i) << "," << pstd(i) << "\n"; 210 | } 211 | fout.close(); 212 | 213 | // Save observations to file 214 | std::string outputObsFile = "observations.csv"; 215 | fout.open(outputObsFile); 216 | for ( auto i : boost::irange(0,obsCount) ) 217 | { 218 | for ( auto j : boost::irange(0,inputDim) ) 219 | fout << X(i,j) << ","; 220 | 221 | fout << y(i) << "\n"; 222 | } 223 | fout.close(); 224 | 225 | 226 | if ( inputDim == 1 ) 227 | { 228 | // Save samples to file 229 | std::string outputSampleFile = "samples.csv"; 230 | fout.open(outputSampleFile); 231 | for ( auto j : boost::irange(0,sampleCount) ) 232 | { 233 | for ( auto i : boost::irange(0,predCount) ) 234 | fout << samples(i,j) << ((i 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | // Declare namespace for utils 10 | namespace minimize { 11 | 12 | // Define aliases with using declarations 13 | using Matrix = Eigen::MatrixXd; 14 | using Vector = Eigen::VectorXd; 15 | using Eigen::VectorXcd; 16 | 17 | class GradientObj 18 | { 19 | public: 20 | GradientObj() { }; 21 | virtual void computeValueAndGradient(Vector,double&,Vector&) { }; 22 | }; 23 | 24 | 25 | // Define interpolation procedure for minimization algorithm 26 | double interpolate(double x2, double f2, double d2, double x3, double f3, double d3, double f0, double INT, double RHO); 27 | 28 | // Define cubic extrapolation routine for minimization algorithm 29 | double cubic_extrap(double x1, double x2, double f1, double f2, double d1, double d2, double EXT, double INT); 30 | 31 | // Conjugate gradient minimization algorithm 32 | void cg_minimize(Vector & X, GradientObj * target, Vector & D, int length, double SIG=0.1, double EXT=3.0, double INT=0.01, int MAX=20); 33 | 34 | 35 | }; 36 | #endif 37 | -------------------------------------------------------------------------------- /misc/predictive_uncertainty.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nw2190/CppGPs/eb707e54dff274596238310a654a715930d62214/misc/predictive_uncertainty.png -------------------------------------------------------------------------------- /misc/profilier_output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nw2190/CppGPs/eb707e54dff274596238310a654a715930d62214/misc/profilier_output.png -------------------------------------------------------------------------------- /misc/utils.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "utils.h" 8 | 9 | // Define aliases with using declarations 10 | using Matrix = utils::Matrix; 11 | using Vector = utils::Vector; 12 | using VectorXcd = utils::VectorXcd; 13 | 14 | // Construct Toeplitz matrix from column (symmetric) 15 | Matrix utils::buildToep(const Vector & col, const Vector & row) 16 | { 17 | auto n = static_cast(col.rows()); 18 | Matrix toep(n,n); 19 | for (auto i : boost::irange(0,n)) 20 | { 21 | toep.diagonal(i) = Eigen::VectorXd::Ones(n-i)*row[i]; 22 | toep.diagonal(-i) = Eigen::VectorXd::Ones(n-i)*col[i]; 23 | } 24 | return toep; 25 | }; 26 | 27 | // Construct Toeplitz matrix from column (symmetric) 28 | Matrix utils::buildToep(const Vector & col) { return buildToep(col, col); }; 29 | 30 | // Embed Toeplitz matrix into column of circulant matrix (general) 31 | void utils::embedToep(Vector & toepCol, Vector & toepRow, VectorXcd & eigVals) 32 | { 33 | // Specify first column of circulant matrix embedding 34 | auto n = static_cast(toepCol.rows()); 35 | Vector embedCol(2*n); 36 | embedCol.head(n) = toepCol; 37 | embedCol.tail(n-1) = toepRow.tail(n-1).reverse(); 38 | 39 | // Compute eigenvalues of cirulant matrix 40 | Eigen::FFT fft; 41 | fft.fwd(eigVals, embedCol); 42 | }; 43 | 44 | 45 | // Embed Toeplitz matrix into column of circulant matrix (symmetric) 46 | void utils::embedToep(Vector & toepCol, VectorXcd & eigVals) { embedToep(toepCol, toepCol, eigVals); }; 47 | 48 | 49 | // Define matvec product for circular matrix using FFT (non-symmetric / complex eigVals) 50 | void utils::circMatVec(const VectorXcd & eigVals, Vector & x, Vector & result) 51 | { 52 | VectorXcd freqvec; 53 | Eigen::FFT fft; 54 | fft.fwd(freqvec,x); 55 | freqvec = eigVals.cwiseProduct(freqvec); 56 | fft.inv(result,freqvec); 57 | }; 58 | 59 | 60 | // Define matvec product for Toeplitz matrix using FFT (non-symmetric) 61 | Vector utils::toepMatVec(Vector & col, Vector & row, Vector & x, bool sym) 62 | { 63 | auto n = static_cast(col.rows()); 64 | Vector xembed = Eigen::VectorXd::Zero(2*n); 65 | xembed.head(n) = x; 66 | VectorXcd eigVals(2*n); 67 | Vector result(2*n); 68 | embedToep(col, row, eigVals); 69 | 70 | // Note: Should be possible to optimize this better using 71 | // the fact that eigVals is real in the symmetric case 72 | if (sym) 73 | circMatVec(eigVals.real(), xembed, result); 74 | else 75 | circMatVec(eigVals, xembed, result); 76 | 77 | return result.head(n); 78 | 79 | }; 80 | 81 | // Define matvec product for Toeplitz matrix using FFT (symmetric) 82 | Vector utils::toepMatVec(Vector & col, Vector & x) { return toepMatVec(col, col, x, true); }; 83 | 84 | 85 | 86 | // NOTE: the "fast" version avoids extra function calls and 87 | // is marginally faster than the more pedagogical "toepMatVec" 88 | 89 | // Define matvec product for Toeplitz matrix using FFT (non-symmetric) 90 | Vector utils::fastToepMatVec(Vector & toepCol, Vector & toepRow, Vector & x) 91 | { 92 | auto n = static_cast(toepCol.rows()); 93 | Vector result(2*n); 94 | 95 | // Embed RHS into 2n-vector with trailing zeros 96 | Vector xembed = Eigen::VectorXd::Zero(2*n); 97 | xembed.head(n) = x; 98 | 99 | // Specify first column of circulant matrix embedding 100 | Vector embedCol(2*n); 101 | embedCol.head(n) = toepCol; 102 | embedCol.tail(n-1) = toepRow.tail(n-1).reverse(); 103 | 104 | // Compute eigenvalues of cirulant matrix 105 | Eigen::FFT fft; 106 | VectorXcd eigVals(2*n), freqvec(2*n); 107 | fft.fwd(eigVals, embedCol); 108 | 109 | // Apply component-wise multiplication in frequency space 110 | fft.fwd(freqvec, xembed); 111 | freqvec = eigVals.cwiseProduct(freqvec); 112 | fft.inv(result,freqvec); 113 | 114 | // Return first n values corresponding to the original system 115 | return result.head(n); 116 | 117 | }; 118 | 119 | // Define matvec product for Toeplitz matrix using FFT (symmetric) 120 | Vector utils::fastToepMatVec(Vector & col, Vector & x) { return fastToepMatVec(col, col, x); }; 121 | 122 | 123 | 124 | // Define matvec product for Toeplitz matrix using FFT (non-symmetric) 125 | Matrix utils::fastToepMatMat(Vector & toepCol, Vector & toepRow, Matrix & X) 126 | { 127 | auto n = static_cast(toepCol.rows()); 128 | auto m = static_cast(X.cols()); 129 | Matrix result(n,m); 130 | for (auto i : boost::irange(0,m)) 131 | { 132 | Vector X_i = X.col(i); 133 | result.col(i) = fastToepMatVec(toepCol, toepRow, X_i); 134 | } 135 | return result; 136 | } 137 | 138 | // Define matvec product for Toeplitz matrix using FFT (non-symmetric) 139 | Matrix utils::fastToepMatMat(Vector & toepCol, Matrix & X) { return fastToepMatMat(toepCol, toepCol, X); }; 140 | 141 | 142 | 143 | // Compute Lanczos vectors for matrix-vector pair (A,v) 144 | void utils::Lanczos(Matrix & A, Vector & v, Matrix & Q, Matrix & T, int N) 145 | { 146 | // Use full decomposition if step count N is not specified 147 | auto n = static_cast(A.rows()); 148 | if (N == 0) 149 | N = n; 150 | 151 | // Initialize the algorithm's vectors and parameters 152 | Vector q = v/v.norm(); 153 | auto alpha = static_cast( (A*q).dot(q) ); 154 | Vector r = A*q - alpha*q; 155 | Q.col(0) = q; 156 | T(0,0) = alpha; 157 | Vector u; 158 | 159 | // Define orthogonality and vector norm tolerances 160 | //double epsilon = 1.0e-15; 161 | //double eta = std::pow(epsilon, 0.75); 162 | double tolerance = 0.00001; 163 | 164 | for (auto j : boost::irange(1,N)) 165 | { 166 | auto beta = static_cast(r.norm()); 167 | if (beta < tolerance) 168 | { 169 | std::cout << "\nStopped Early\n"; 170 | Q.resize(n,j); 171 | break; 172 | } 173 | 174 | // Basic implementation 175 | //v_k = vt_k/beta; 176 | //alpha = v_k.transpose().dot(A*v_k); 177 | //vt_k = A*v_k - alpha*v_k - beta*Q.col(k-1); 178 | 179 | // Improved stability implementation 180 | q = r/beta; 181 | u = A*q - beta*Q.col(j-1); 182 | alpha = q.transpose().dot(u); 183 | r = u - alpha*q; 184 | 185 | // Complete re-orthogonalization [ roughly ~ O(n^3) for combined iterations ] 186 | r = r - Q*(Q.transpose()*r); 187 | 188 | /* 189 | // Check if additional re-orthogonalization is necessary 190 | for (auto k : boost::irange(0,j)) 191 | { 192 | if ( std::abs((r/r.norm()).dot(Q.col(k))) > eta ) 193 | { 194 | //std::cout << "Reorthogonalizing..." << std::endl; 195 | r = r/r.norm(); 196 | r = r - Q*(Q.transpose()*r); 197 | break; 198 | } 199 | } 200 | */ 201 | 202 | // Assign Lanczos vector and tridiagonal entries 203 | Q.col(j) = q; 204 | T(j,j) = alpha; 205 | T(j,j-1) = T(j-1,j) = beta; 206 | 207 | } 208 | 209 | }; 210 | -------------------------------------------------------------------------------- /misc/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _UTILS_H 2 | #define _UTILS_H 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | // Declare namespace for utils 10 | namespace utils { 11 | 12 | // Define aliases with using declarations 13 | using Matrix = Eigen::MatrixXd; 14 | using Vector = Eigen::VectorXd; 15 | using Eigen::VectorXcd; 16 | 17 | // Generate equally spaced points on an interval (Eigen::Vector) 18 | template 19 | Eigen::Matrix linspace(T a, T b, int N) { return Eigen::Array::LinSpaced(N, a, b); }; 20 | 21 | 22 | // Construct Toeplitz matrix from column (symmetric) 23 | Matrix buildToep(const Vector & col, const Vector & row); 24 | 25 | // Construct Toeplitz matrix from column (symmetric) 26 | Matrix buildToep(const Vector & col); 27 | 28 | // Embed Toeplitz matrix into column of circulant matrix (general) 29 | void embedToep(Vector & toepCol, Vector & toepRow, VectorXcd & eigVals); 30 | 31 | // Embed Toeplitz matrix into column of circulant matrix (symmetric) 32 | void embedToep(Vector & toepCol, VectorXcd & eigVals); 33 | 34 | 35 | // Define matvec product for circular matrix using FFT (non-symmetric / complex eigVals) 36 | void circMatVec(const VectorXcd & eigVals, Vector & x, Vector & result); 37 | 38 | // Define matvec product for Toeplitz matrix using FFT (non-symmetric) 39 | Vector toepMatVec(Vector & col, Vector & row, Vector & x, bool sym=false); 40 | 41 | // Define matvec product for Toeplitz matrix using FFT (symmetric) 42 | Vector toepMatVec(Vector & col, Vector & x); 43 | 44 | // NOTE: the "fast" version avoids extra function calls and 45 | // is marginally faster than the more pedagogical "toepMatVec" 46 | 47 | // Define matvec product for Toeplitz matrix using FFT (non-symmetric) 48 | Vector fastToepMatVec(Vector & toepCol, Vector & toepRow, Vector & x); 49 | 50 | // Define matvec product for Toeplitz matrix using FFT (symmetric) 51 | Vector fastToepMatVec(Vector & col, Vector & x); 52 | 53 | 54 | // Define matvec product for Toeplitz matrix using FFT (non-symmetric) 55 | Matrix fastToepMatMat(Vector & toepCol, Vector & toepRow, Matrix & X); 56 | 57 | // Define matvec product for Toeplitz matrix using FFT (non-symmetric) 58 | Matrix fastToepMatMat(Vector & toepCol, Matrix & X); 59 | 60 | // Compute Lanczos vectors for matrix-vector pair (A,v) 61 | void Lanczos(Matrix & A, Vector & v, Matrix & Q, Matrix & T, int N=0); 62 | 63 | 64 | }; 65 | #endif 66 | -------------------------------------------------------------------------------- /tests/1D_example.cpp: -------------------------------------------------------------------------------- 1 | // main.cpp -- example use of CppGPs for Gaussian process regression 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "../GPs.h" 13 | 14 | #include 15 | 16 | // Specify the target function for Gaussian process regression 17 | double targetFunc(Eigen::MatrixXd X) 18 | { 19 | 20 | if ( X.size() == 1 ) 21 | { 22 | // Define the target function to be an oscillatory, non-periodic function 23 | double oscillation = 30.0; 24 | double xshifted = 0.5*(X(0) + 1.0); 25 | return std::sin(oscillation*(xshifted-0.1))*(0.5-(xshifted-0.1))*15.0; 26 | } 27 | else 28 | { 29 | // Define the target function to be the Marr wavelet (a.k.a "Mexican Hat" wavelet) 30 | // ( see https://en.wikipedia.org/wiki/Mexican_hat_wavelet ) 31 | double sigma = 0.25; 32 | double pi = std::atan(1)*4; 33 | double radialTerm = std::pow(X.squaredNorm()/sigma,2); 34 | return 2.0 / (std::sqrt(pi*sigma) * std::pow(pi,1.0/4.0)) * (1.0 - radialTerm) * std::exp(-radialTerm); 35 | } 36 | } 37 | 38 | // Specify multi-modal target function for testing 39 | double targetFuncMultiModal(Eigen::MatrixXd X) 40 | { 41 | Eigen::MatrixXd p1(1,2); p1 << 0.25, 0.5; 42 | Eigen::MatrixXd p2(1,2); p2 << -0.25, -0.5; 43 | double scale1 = 10.0; double scale2 = 15.0; 44 | double sigma1 = 0.4; double sigma2 = 0.3; 45 | double radialTerm1 = ((X-p1)*(1/sigma1)).squaredNorm(); 46 | double radialTerm2 = ((X-p2)*(1/sigma2)).squaredNorm(); 47 | return scale1*std::exp(-radialTerm1) + scale2*std::exp(-radialTerm2); 48 | } 49 | 50 | 51 | // Example use of CppGPs code for Gaussian process regression 52 | int main(int argc, char const *argv[]) 53 | { 54 | 55 | // Inform Eigen of possible multi-threading 56 | Eigen::initParallel(); 57 | 58 | // Retrieve aliases from GP namescope 59 | using Matrix = Eigen::MatrixXd; 60 | //using Vector = Eigen::VectorXd; 61 | 62 | // Convenience using-declarations 63 | using std::cout; 64 | using std::endl; 65 | using GP::GaussianProcess; 66 | using GP::linspace; 67 | using GP::sampleNormal; 68 | using GP::sampleUnif; 69 | using GP::RBF; 70 | 71 | // Used for timing code with chrono 72 | using GP::high_resolution_clock; 73 | using GP::time; 74 | using GP::getTime; 75 | 76 | // Set random seed based on system clock 77 | std::srand(static_cast(high_resolution_clock::now().time_since_epoch().count())); 78 | 79 | // Fix random seed for debugging and testing 80 | //std::srand(static_cast(0)); 81 | 82 | 83 | // 84 | // [ Define Training Data ] 85 | // 86 | 87 | // Specify the input dimensions 88 | int inputDim = 1; 89 | //int inputDim = 2; 90 | 91 | // Specify observation data count 92 | int obsCount; 93 | if ( inputDim == 1 ) 94 | obsCount = 250; 95 | else 96 | obsCount = 2000; 97 | 98 | // Specify observation noise level 99 | auto noiseLevel = 1.0; 100 | 101 | // Define random noise to add to target observations 102 | //auto noise = sampleNormal(obsCount) * noiseLevel; 103 | Matrix noise; 104 | noise.noalias() = sampleNormal(obsCount) * noiseLevel; 105 | 106 | // Define observations by sampling random uniform distribution 107 | Matrix X = sampleUnif(-1.0, 1.0, obsCount, inputDim); 108 | Matrix y; y.resize(obsCount, 1); 109 | 110 | // Define target observations 'y' by applying 'targetFunc' 111 | // to the input observations 'X' and adding a noise vector 112 | for ( auto i : boost::irange(0,obsCount) ) 113 | y(i) = targetFunc(X.row(i)) + noise(i); 114 | //y(i) = targetFuncMultiModal(X.row(i)) + noise(i); 115 | 116 | 117 | 118 | // 119 | // [ Construct Gaussian Process Model ] 120 | // 121 | 122 | // Initialize Gaussian process model 123 | GaussianProcess model; 124 | 125 | // Specify training observations for GP model 126 | model.setObs(X,y); 127 | 128 | // Fix noise level [ noise level is fit to the training data by default ] 129 | //model.setNoise(0.33); 130 | 131 | // Initialize a RBF covariance kernel and assign it to the model 132 | RBF kernel; 133 | model.setKernel(kernel); 134 | 135 | // Specify solver precision 136 | //model.setSolverPrecision(1e10); 137 | 138 | // Specify number of restarts for solver 139 | model.setSolverRestarts(2); 140 | 141 | // Fit covariance kernel hyperparameters to the training data 142 | time start = high_resolution_clock::now(); 143 | model.fitModel(); 144 | time end = high_resolution_clock::now(); 145 | auto computationTime = getTime(start, end); 146 | 147 | // Display computation time required for fitting the GP model 148 | cout << "\nComputation Time: "; 149 | cout << computationTime << " s" << endl; 150 | 151 | // Retrieve the tuned/optimized kernel hyperparameters 152 | auto optParams = model.getParams(); 153 | auto noiseL = model.getNoise(); 154 | auto scalingL = model.getScaling(); 155 | //cout << "\nOptimized Hyperparameters:" << endl << scalingL << " * " << optParams.transpose() << " "; 156 | cout << "\nOptimized Hyperparameters:" << endl << std::sqrt(scalingL) << "**2 * " << optParams.transpose() << " "; 157 | cout << "(Noise = " << noiseL << ")\n" << endl; 158 | 159 | // Display the negative log marginal likelihood (NLML) of the optimized model 160 | cout << "NLML: " << std::fixed << std::setprecision(4) << model.computeNLML() << endl << endl; 161 | 162 | 163 | // 164 | // [ Posterior Predictions and Samples ] 165 | // 166 | 167 | // Define test mesh for GP model predictions 168 | int predCount; 169 | if ( inputDim == 1 ) 170 | predCount = 100; 171 | else 172 | predCount = 1000; 173 | int predRes = static_cast(std::pow(predCount,1.0/inputDim)); 174 | auto testMesh = linspace(-1.0, 1.0, predRes, inputDim); 175 | model.setPred(testMesh); 176 | 177 | // Reset predCount to account for rounding 178 | predCount = std::pow(predRes,inputDim); 179 | 180 | // Compute predicted means and variances for the test points 181 | model.predict(); 182 | Matrix pmean = model.getPredMean(); 183 | Matrix pvar = model.getPredVar(); 184 | Matrix pstd = pvar.array().sqrt().matrix(); 185 | 186 | // Get sample paths from the posterior distribution of the model 187 | int sampleCount; 188 | Matrix samples; 189 | if ( inputDim == 1 ) 190 | { 191 | sampleCount = 25; 192 | samples = model.getSamples(sampleCount); 193 | } 194 | 195 | 196 | // 197 | // [ Save Results for Plotting ] 198 | // 199 | 200 | // Save true and predicted means/variances to file 201 | std::string outputFile = "predictions.csv"; 202 | Matrix trueSoln(predCount,1); 203 | for ( auto i : boost::irange(0,predCount) ) 204 | trueSoln(i,0) = targetFunc(testMesh.row(i)); 205 | //trueSoln(i,0) = targetFuncMultiModal(testMesh.row(i)); 206 | 207 | std::ofstream fout; 208 | fout.open(outputFile); 209 | for ( auto i : boost::irange(0,predCount) ) 210 | { 211 | for ( auto j : boost::irange(0,inputDim) ) 212 | fout << testMesh(i,j) << ","; 213 | 214 | fout << trueSoln(i) << "," << pmean(i) << "," << pstd(i) << "\n"; 215 | } 216 | fout.close(); 217 | 218 | // Save observations to file 219 | std::string outputObsFile = "observations.csv"; 220 | fout.open(outputObsFile); 221 | for ( auto i : boost::irange(0,obsCount) ) 222 | { 223 | for ( auto j : boost::irange(0,inputDim) ) 224 | fout << X(i,j) << ","; 225 | 226 | fout << y(i) << "\n"; 227 | } 228 | fout.close(); 229 | 230 | 231 | if ( inputDim == 1 ) 232 | { 233 | // Save samples to file 234 | std::string outputSampleFile = "samples.csv"; 235 | fout.open(outputSampleFile); 236 | for ( auto j : boost::irange(0,sampleCount) ) 237 | { 238 | for ( auto i : boost::irange(0,predCount) ) 239 | fout << samples(i,j) << ((i 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "../GPs.h" 13 | 14 | #include 15 | 16 | // Specify the target function for Gaussian process regression 17 | double targetFunc(Eigen::MatrixXd X) 18 | { 19 | 20 | if ( X.size() == 1 ) 21 | { 22 | // Define the target function to be an oscillatory, non-periodic function 23 | double oscillation = 30.0; 24 | double xshifted = 0.5*(X(0) + 1.0); 25 | return std::sin(oscillation*(xshifted-0.1))*(0.5-(xshifted-0.1))*15.0; 26 | } 27 | else 28 | { 29 | // Define the target function to be the Marr wavelet (a.k.a "Mexican Hat" wavelet) 30 | // ( see https://en.wikipedia.org/wiki/Mexican_hat_wavelet ) 31 | double sigma = 0.25; 32 | double pi = std::atan(1)*4; 33 | double radialTerm = std::pow(X.squaredNorm()/sigma,2); 34 | return 2.0 / (std::sqrt(pi*sigma) * std::pow(pi,1.0/4.0)) * (1.0 - radialTerm) * std::exp(-radialTerm); 35 | } 36 | } 37 | 38 | // Specify multi-modal target function for testing 39 | double targetFuncMultiModal(Eigen::MatrixXd X) 40 | { 41 | Eigen::MatrixXd p1(1,2); p1 << 0.25, 0.5; 42 | Eigen::MatrixXd p2(1,2); p2 << -0.25, -0.5; 43 | double scale1 = 10.0; double scale2 = 15.0; 44 | double sigma1 = 0.4; double sigma2 = 0.3; 45 | double radialTerm1 = ((X-p1)*(1/sigma1)).squaredNorm(); 46 | double radialTerm2 = ((X-p2)*(1/sigma2)).squaredNorm(); 47 | return scale1*std::exp(-radialTerm1) + scale2*std::exp(-radialTerm2); 48 | } 49 | 50 | 51 | // Example use of CppGPs code for Gaussian process regression 52 | int main(int argc, char const *argv[]) 53 | { 54 | 55 | // Inform Eigen of possible multi-threading 56 | Eigen::initParallel(); 57 | 58 | // Retrieve aliases from GP namescope 59 | using Matrix = Eigen::MatrixXd; 60 | //using Vector = Eigen::VectorXd; 61 | 62 | // Convenience using-declarations 63 | using std::cout; 64 | using std::endl; 65 | using GP::GaussianProcess; 66 | using GP::linspace; 67 | using GP::sampleNormal; 68 | using GP::sampleUnif; 69 | using GP::RBF; 70 | 71 | // Used for timing code with chrono 72 | using GP::high_resolution_clock; 73 | using GP::time; 74 | using GP::getTime; 75 | 76 | // Set random seed based on system clock 77 | std::srand(static_cast(high_resolution_clock::now().time_since_epoch().count())); 78 | 79 | // Fix random seed for debugging and testing 80 | //std::srand(static_cast(0)); 81 | 82 | 83 | // 84 | // [ Define Training Data ] 85 | // 86 | 87 | // Specify the input dimensions 88 | int inputDim = 1; 89 | //int inputDim = 2; 90 | 91 | // Specify observation data count 92 | int obsCount; 93 | if ( inputDim == 1 ) 94 | obsCount = 50; 95 | else 96 | obsCount = 2000; 97 | 98 | // Specify observation noise level 99 | auto noiseLevel = 0.1; 100 | 101 | // Define random noise to add to target observations 102 | //auto noise = sampleNormal(obsCount) * noiseLevel; 103 | Matrix noise; 104 | noise.noalias() = sampleNormal(obsCount) * noiseLevel; 105 | 106 | // Define observations by sampling random uniform distribution 107 | Matrix X = sampleUnif(-1.0, 1.0, obsCount, inputDim); 108 | Matrix y; y.resize(obsCount, 1); 109 | 110 | // Define target observations 'y' by applying 'targetFunc' 111 | // to the input observations 'X' and adding a noise vector 112 | for ( auto i : boost::irange(0,obsCount) ) 113 | y(i) = targetFunc(X.row(i)) + noise(i); 114 | //y(i) = targetFuncMultiModal(X.row(i)) + noise(i); 115 | 116 | 117 | 118 | // 119 | // [ Construct Gaussian Process Model ] 120 | // 121 | 122 | // Initialize Gaussian process model 123 | GaussianProcess model; 124 | 125 | // Specify training observations for GP model 126 | model.setObs(X,y); 127 | 128 | // Fix noise level [ noise level is fit to the training data by default ] 129 | //model.setNoise(0.33); 130 | 131 | // Initialize a RBF covariance kernel and assign it to the model 132 | RBF kernel; 133 | model.setKernel(kernel); 134 | 135 | // Specify solver precision 136 | //model.setSolverPrecision(1e10); 137 | 138 | // Specify number of restarts for solver 139 | model.setSolverRestarts(4); 140 | 141 | // Fit covariance kernel hyperparameters to the training data 142 | time start = high_resolution_clock::now(); 143 | model.fitModel(); 144 | time end = high_resolution_clock::now(); 145 | auto computationTime = getTime(start, end); 146 | 147 | // Display computation time required for fitting the GP model 148 | cout << "\nComputation Time: "; 149 | cout << computationTime << " s" << endl; 150 | 151 | // Retrieve the tuned/optimized kernel hyperparameters 152 | auto optParams = model.getParams(); 153 | auto noiseL = model.getNoise(); 154 | auto scalingL = model.getScaling(); 155 | //cout << "\nOptimized Hyperparameters:" << endl << scalingL << " * " << optParams.transpose() << " "; 156 | cout << "\nOptimized Hyperparameters:" << endl << std::sqrt(scalingL) << "**2 * " << optParams.transpose() << " "; 157 | cout << "(Noise = " << noiseL << ")\n" << endl; 158 | 159 | // Display the negative log marginal likelihood (NLML) of the optimized model 160 | cout << "NLML: " << std::fixed << std::setprecision(4) << model.computeNLML() << endl << endl; 161 | 162 | 163 | // 164 | // [ Posterior Predictions and Samples ] 165 | // 166 | 167 | // Define test mesh for GP model predictions 168 | int predCount; 169 | if ( inputDim == 1 ) 170 | predCount = 100; 171 | else 172 | predCount = 1000; 173 | int predRes = static_cast(std::pow(predCount,1.0/inputDim)); 174 | auto testMesh = linspace(-1.0, 1.0, predRes, inputDim); 175 | model.setPred(testMesh); 176 | 177 | // Reset predCount to account for rounding 178 | predCount = std::pow(predRes,inputDim); 179 | 180 | // Compute predicted means and variances for the test points 181 | model.predict(); 182 | Matrix pmean = model.getPredMean(); 183 | Matrix pvar = model.getPredVar(); 184 | Matrix pstd = pvar.array().sqrt().matrix(); 185 | 186 | // Get sample paths from the posterior distribution of the model 187 | int sampleCount; 188 | Matrix samples; 189 | if ( inputDim == 1 ) 190 | { 191 | sampleCount = 25; 192 | samples = model.getSamples(sampleCount); 193 | } 194 | 195 | 196 | // 197 | // [ Save Results for Plotting ] 198 | // 199 | 200 | // Save true and predicted means/variances to file 201 | std::string outputFile = "predictions.csv"; 202 | Matrix trueSoln(predCount,1); 203 | for ( auto i : boost::irange(0,predCount) ) 204 | trueSoln(i,0) = targetFunc(testMesh.row(i)); 205 | //trueSoln(i,0) = targetFuncMultiModal(testMesh.row(i)); 206 | 207 | std::ofstream fout; 208 | fout.open(outputFile); 209 | for ( auto i : boost::irange(0,predCount) ) 210 | { 211 | for ( auto j : boost::irange(0,inputDim) ) 212 | fout << testMesh(i,j) << ","; 213 | 214 | fout << trueSoln(i) << "," << pmean(i) << "," << pstd(i) << "\n"; 215 | } 216 | fout.close(); 217 | 218 | // Save observations to file 219 | std::string outputObsFile = "observations.csv"; 220 | fout.open(outputObsFile); 221 | for ( auto i : boost::irange(0,obsCount) ) 222 | { 223 | for ( auto j : boost::irange(0,inputDim) ) 224 | fout << X(i,j) << ","; 225 | 226 | fout << y(i) << "\n"; 227 | } 228 | fout.close(); 229 | 230 | 231 | if ( inputDim == 1 ) 232 | { 233 | // Save samples to file 234 | std::string outputSampleFile = "samples.csv"; 235 | fout.open(outputSampleFile); 236 | for ( auto j : boost::irange(0,sampleCount) ) 237 | { 238 | for ( auto i : boost::irange(0,predCount) ) 239 | fout << samples(i,j) << ((i 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "../GPs.h" 13 | 14 | #include 15 | 16 | // Specify the target function for Gaussian process regression 17 | double targetFunc(Eigen::MatrixXd X) 18 | { 19 | 20 | if ( X.size() == 1 ) 21 | { 22 | // Define the target function to be an oscillatory, non-periodic function 23 | double oscillation = 30.0; 24 | double xshifted = 0.5*(X(0) + 1.0); 25 | return std::sin(oscillation*(xshifted-0.1))*(0.5-(xshifted-0.1))*15.0; 26 | } 27 | else 28 | { 29 | // Define the target function to be the Marr wavelet (a.k.a "Mexican Hat" wavelet) 30 | // ( see https://en.wikipedia.org/wiki/Mexican_hat_wavelet ) 31 | double sigma = 0.25; 32 | double pi = std::atan(1)*4; 33 | double radialTerm = std::pow(X.squaredNorm()/sigma,2); 34 | return 2.0 / (std::sqrt(pi*sigma) * std::pow(pi,1.0/4.0)) * (1.0 - radialTerm) * std::exp(-radialTerm); 35 | } 36 | } 37 | 38 | // Specify multi-modal target function for testing 39 | double targetFuncMultiModal(Eigen::MatrixXd X) 40 | { 41 | Eigen::MatrixXd p1(1,2); p1 << 0.25, 0.5; 42 | Eigen::MatrixXd p2(1,2); p2 << -0.25, -0.5; 43 | double scale1 = 10.0; double scale2 = 15.0; 44 | double sigma1 = 0.4; double sigma2 = 0.3; 45 | double radialTerm1 = ((X-p1)*(1/sigma1)).squaredNorm(); 46 | double radialTerm2 = ((X-p2)*(1/sigma2)).squaredNorm(); 47 | return scale1*std::exp(-radialTerm1) + scale2*std::exp(-radialTerm2); 48 | } 49 | 50 | 51 | // Example use of CppGPs code for Gaussian process regression 52 | int main(int argc, char const *argv[]) 53 | { 54 | 55 | // Inform Eigen of possible multi-threading 56 | Eigen::initParallel(); 57 | 58 | // Retrieve aliases from GP namescope 59 | using Matrix = Eigen::MatrixXd; 60 | //using Vector = Eigen::VectorXd; 61 | 62 | // Convenience using-declarations 63 | using std::cout; 64 | using std::endl; 65 | using GP::GaussianProcess; 66 | using GP::linspace; 67 | using GP::sampleNormal; 68 | using GP::sampleUnif; 69 | using GP::RBF; 70 | 71 | // Used for timing code with chrono 72 | using GP::high_resolution_clock; 73 | using GP::time; 74 | using GP::getTime; 75 | 76 | // Set random seed based on system clock 77 | std::srand(static_cast(high_resolution_clock::now().time_since_epoch().count())); 78 | 79 | // Fix random seed for debugging and testing 80 | //std::srand(static_cast(0)); 81 | 82 | 83 | // 84 | // [ Define Training Data ] 85 | // 86 | 87 | // Specify the input dimensions 88 | //int inputDim = 1; 89 | int inputDim = 2; 90 | 91 | // Specify observation data count 92 | int obsCount; 93 | if ( inputDim == 1 ) 94 | obsCount = 250; 95 | else 96 | obsCount = 2000; 97 | 98 | // Specify observation noise level 99 | auto noiseLevel = 1.0; 100 | 101 | // Define random noise to add to target observations 102 | //auto noise = sampleNormal(obsCount) * noiseLevel; 103 | Matrix noise; 104 | noise.noalias() = sampleNormal(obsCount) * noiseLevel; 105 | 106 | // Define observations by sampling random uniform distribution 107 | Matrix X = sampleUnif(-1.0, 1.0, obsCount, inputDim); 108 | Matrix y; y.resize(obsCount, 1); 109 | 110 | // Define target observations 'y' by applying 'targetFunc' 111 | // to the input observations 'X' and adding a noise vector 112 | for ( auto i : boost::irange(0,obsCount) ) 113 | y(i) = targetFunc(X.row(i)) + noise(i); 114 | //y(i) = targetFuncMultiModal(X.row(i)) + noise(i); 115 | 116 | 117 | 118 | // 119 | // [ Construct Gaussian Process Model ] 120 | // 121 | 122 | // Initialize Gaussian process model 123 | GaussianProcess model; 124 | 125 | // Specify training observations for GP model 126 | model.setObs(X,y); 127 | 128 | // Fix noise level [ noise level is fit to the training data by default ] 129 | //model.setNoise(0.33); 130 | 131 | // Initialize a RBF covariance kernel and assign it to the model 132 | RBF kernel; 133 | model.setKernel(kernel); 134 | 135 | // Specify solver precision 136 | //model.setSolverPrecision(1e8); 137 | 138 | // Specify number of restarts for solver 139 | //model.setSolverRestarts(2); 140 | 141 | // Fit covariance kernel hyperparameters to the training data 142 | time start = high_resolution_clock::now(); 143 | model.fitModel(); 144 | time end = high_resolution_clock::now(); 145 | auto computationTime = getTime(start, end); 146 | 147 | // Display computation time required for fitting the GP model 148 | cout << "\nComputation Time: "; 149 | cout << computationTime << " s" << endl; 150 | 151 | // Retrieve the tuned/optimized kernel hyperparameters 152 | auto optParams = model.getParams(); 153 | auto noiseL = model.getNoise(); 154 | auto scalingL = model.getScaling(); 155 | //cout << "\nOptimized Hyperparameters:" << endl << scalingL << " * " << optParams.transpose() << " "; 156 | cout << "\nOptimized Hyperparameters:" << endl << std::sqrt(scalingL) << "**2 * " << optParams.transpose() << " "; 157 | cout << "(Noise = " << noiseL << ")\n" << endl; 158 | 159 | // Display the negative log marginal likelihood (NLML) of the optimized model 160 | cout << "NLML: " << std::fixed << std::setprecision(4) << model.computeNLML() << endl << endl; 161 | 162 | 163 | // 164 | // [ Posterior Predictions and Samples ] 165 | // 166 | 167 | // Define test mesh for GP model predictions 168 | int predCount; 169 | if ( inputDim == 1 ) 170 | predCount = 100; 171 | else 172 | predCount = 1000; 173 | int predRes = static_cast(std::pow(predCount,1.0/inputDim)); 174 | auto testMesh = linspace(-1.0, 1.0, predRes, inputDim); 175 | model.setPred(testMesh); 176 | 177 | // Reset predCount to account for rounding 178 | predCount = std::pow(predRes,inputDim); 179 | 180 | // Compute predicted means and variances for the test points 181 | model.predict(); 182 | Matrix pmean = model.getPredMean(); 183 | Matrix pvar = model.getPredVar(); 184 | Matrix pstd = pvar.array().sqrt().matrix(); 185 | 186 | // Get sample paths from the posterior distribution of the model 187 | int sampleCount; 188 | Matrix samples; 189 | if ( inputDim == 1 ) 190 | { 191 | sampleCount = 25; 192 | samples = model.getSamples(sampleCount); 193 | } 194 | 195 | 196 | // 197 | // [ Save Results for Plotting ] 198 | // 199 | 200 | // Save true and predicted means/variances to file 201 | std::string outputFile = "predictions.csv"; 202 | Matrix trueSoln(predCount,1); 203 | for ( auto i : boost::irange(0,predCount) ) 204 | trueSoln(i,0) = targetFunc(testMesh.row(i)); 205 | //trueSoln(i,0) = targetFuncMultiModal(testMesh.row(i)); 206 | 207 | std::ofstream fout; 208 | fout.open(outputFile); 209 | for ( auto i : boost::irange(0,predCount) ) 210 | { 211 | for ( auto j : boost::irange(0,inputDim) ) 212 | fout << testMesh(i,j) << ","; 213 | 214 | fout << trueSoln(i) << "," << pmean(i) << "," << pstd(i) << "\n"; 215 | } 216 | fout.close(); 217 | 218 | // Save observations to file 219 | std::string outputObsFile = "observations.csv"; 220 | fout.open(outputObsFile); 221 | for ( auto i : boost::irange(0,obsCount) ) 222 | { 223 | for ( auto j : boost::irange(0,inputDim) ) 224 | fout << X(i,j) << ","; 225 | 226 | fout << y(i) << "\n"; 227 | } 228 | fout.close(); 229 | 230 | 231 | if ( inputDim == 1 ) 232 | { 233 | // Save samples to file 234 | std::string outputSampleFile = "samples.csv"; 235 | fout.open(outputSampleFile); 236 | for ( auto j : boost::irange(0,sampleCount) ) 237 | { 238 | for ( auto i : boost::irange(0,predCount) ) 239 | fout << samples(i,j) << ((i 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "../GPs.h" 13 | 14 | #include 15 | 16 | // Specify multi-modal target function for testing 17 | double targetFuncMultiModal(Eigen::MatrixXd X) 18 | { 19 | Eigen::MatrixXd p1(1,2); p1 << 0.25, 0.5; 20 | Eigen::MatrixXd p2(1,2); p2 << -0.25, -0.5; 21 | double scale1 = 10.0; double scale2 = 15.0; 22 | double sigma1 = 0.4; double sigma2 = 0.3; 23 | double radialTerm1 = ((X-p1)*(1/sigma1)).squaredNorm(); 24 | double radialTerm2 = ((X-p2)*(1/sigma2)).squaredNorm(); 25 | return scale1*std::exp(-radialTerm1) + scale2*std::exp(-radialTerm2); 26 | } 27 | 28 | 29 | // Example use of CppGPs code for Gaussian process regression 30 | int main(int argc, char const *argv[]) 31 | { 32 | 33 | // Inform Eigen of possible multi-threading 34 | Eigen::initParallel(); 35 | 36 | // Retrieve aliases from GP namescope 37 | using Matrix = Eigen::MatrixXd; 38 | //using Vector = Eigen::VectorXd; 39 | 40 | // Convenience using-declarations 41 | using std::cout; 42 | using std::endl; 43 | using GP::GaussianProcess; 44 | using GP::linspace; 45 | using GP::sampleNormal; 46 | using GP::sampleUnif; 47 | using GP::RBF; 48 | 49 | // Used for timing code with chrono 50 | using GP::high_resolution_clock; 51 | using GP::time; 52 | using GP::getTime; 53 | 54 | // Set random seed based on system clock 55 | std::srand(static_cast(high_resolution_clock::now().time_since_epoch().count())); 56 | 57 | // Fix random seed for debugging and testing 58 | //std::srand(static_cast(0)); 59 | 60 | 61 | // 62 | // [ Define Training Data ] 63 | // 64 | 65 | // Specify the input dimensions 66 | int inputDim = 2; 67 | 68 | // Specify observation data count 69 | int obsCount; 70 | if ( inputDim == 1 ) 71 | obsCount = 250; 72 | else 73 | obsCount = 2000; 74 | 75 | // Specify observation noise level 76 | auto noiseLevel = 1.0; 77 | 78 | // Define random noise to add to target observations 79 | //auto noise = sampleNormal(obsCount) * noiseLevel; 80 | Matrix noise; 81 | noise.noalias() = sampleNormal(obsCount) * noiseLevel; 82 | 83 | // Define observations by sampling random uniform distribution 84 | Matrix X = sampleUnif(-1.0, 1.0, obsCount, inputDim); 85 | Matrix y; y.resize(obsCount, 1); 86 | 87 | // Define target observations 'y' by applying 'targetFunc' 88 | // to the input observations 'X' and adding a noise vector 89 | for ( auto i : boost::irange(0,obsCount) ) 90 | y(i) = targetFuncMultiModal(X.row(i)) + noise(i); 91 | 92 | 93 | 94 | // 95 | // [ Construct Gaussian Process Model ] 96 | // 97 | 98 | // Initialize Gaussian process model 99 | GaussianProcess model; 100 | 101 | // Specify training observations for GP model 102 | model.setObs(X,y); 103 | 104 | // Fix noise level [ noise level is fit to the training data by default ] 105 | //model.setNoise(0.33); 106 | 107 | // Initialize a RBF covariance kernel and assign it to the model 108 | RBF kernel; 109 | model.setKernel(kernel); 110 | 111 | // Specify solver precision 112 | //model.setSolverPrecision(1e8); 113 | 114 | // Specify number of restarts for solver 115 | //model.setSolverRestarts(2); 116 | 117 | // Fit covariance kernel hyperparameters to the training data 118 | time start = high_resolution_clock::now(); 119 | model.fitModel(); 120 | time end = high_resolution_clock::now(); 121 | auto computationTime = getTime(start, end); 122 | 123 | // Display computation time required for fitting the GP model 124 | cout << "\nComputation Time: "; 125 | cout << computationTime << " s" << endl; 126 | 127 | // Retrieve the tuned/optimized kernel hyperparameters 128 | auto optParams = model.getParams(); 129 | auto noiseL = model.getNoise(); 130 | auto scalingL = model.getScaling(); 131 | //cout << "\nOptimized Hyperparameters:" << endl << scalingL << " * " << optParams.transpose() << " "; 132 | cout << "\nOptimized Hyperparameters:" << endl << std::sqrt(scalingL) << "**2 * " << optParams.transpose() << " "; 133 | cout << "(Noise = " << noiseL << ")\n" << endl; 134 | 135 | // Display the negative log marginal likelihood (NLML) of the optimized model 136 | cout << "NLML: " << std::fixed << std::setprecision(4) << model.computeNLML() << endl << endl; 137 | 138 | 139 | // 140 | // [ Posterior Predictions and Samples ] 141 | // 142 | 143 | // Define test mesh for GP model predictions 144 | int predCount; 145 | if ( inputDim == 1 ) 146 | predCount = 100; 147 | else 148 | predCount = 1000; 149 | int predRes = static_cast(std::pow(predCount,1.0/inputDim)); 150 | auto testMesh = linspace(-1.0, 1.0, predRes, inputDim); 151 | model.setPred(testMesh); 152 | 153 | // Reset predCount to account for rounding 154 | predCount = std::pow(predRes,inputDim); 155 | 156 | // Compute predicted means and variances for the test points 157 | model.predict(); 158 | Matrix pmean = model.getPredMean(); 159 | Matrix pvar = model.getPredVar(); 160 | Matrix pstd = pvar.array().sqrt().matrix(); 161 | 162 | // Get sample paths from the posterior distribution of the model 163 | int sampleCount; 164 | Matrix samples; 165 | if ( inputDim == 1 ) 166 | { 167 | sampleCount = 25; 168 | samples = model.getSamples(sampleCount); 169 | } 170 | 171 | 172 | // 173 | // [ Save Results for Plotting ] 174 | // 175 | 176 | // Save true and predicted means/variances to file 177 | std::string outputFile = "predictions.csv"; 178 | Matrix trueSoln(predCount,1); 179 | for ( auto i : boost::irange(0,predCount) ) 180 | trueSoln(i,0) = targetFuncMultiModal(testMesh.row(i)); 181 | 182 | std::ofstream fout; 183 | fout.open(outputFile); 184 | for ( auto i : boost::irange(0,predCount) ) 185 | { 186 | for ( auto j : boost::irange(0,inputDim) ) 187 | fout << testMesh(i,j) << ","; 188 | 189 | fout << trueSoln(i) << "," << pmean(i) << "," << pstd(i) << "\n"; 190 | } 191 | fout.close(); 192 | 193 | // Save observations to file 194 | std::string outputObsFile = "observations.csv"; 195 | fout.open(outputObsFile); 196 | for ( auto i : boost::irange(0,obsCount) ) 197 | { 198 | for ( auto j : boost::irange(0,inputDim) ) 199 | fout << X(i,j) << ","; 200 | 201 | fout << y(i) << "\n"; 202 | } 203 | fout.close(); 204 | 205 | 206 | if ( inputDim == 1 ) 207 | { 208 | // Save samples to file 209 | std::string outputSampleFile = "samples.csv"; 210 | fout.open(outputSampleFile); 211 | for ( auto j : boost::irange(0,sampleCount) ) 212 | { 213 | for ( auto i : boost::irange(0,predCount) ) 214 | fout << samples(i,j) << ((i