├── .gitignore ├── Example_1a.ipynb ├── Example_1b.ipynb ├── Example_2.ipynb ├── Example_3.ipynb ├── GPConstr ├── __init__.py ├── kern.py ├── model.py ├── r_functions │ ├── __init__.py │ ├── python_wrappers.py │ └── r_gpsampler.R └── util │ ├── __init__.py │ ├── div.py │ ├── linalg.py │ └── stats.py ├── LICENSE ├── README.md ├── runJupyter.bat └── test_py.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | Example_1 - Copy.ipynb 106 | Example_1 - Copy (2).ipynb 107 | Example_1 - Copy (2) - Copy.ipynb 108 | test_bootstrap.ipynb 109 | -------------------------------------------------------------------------------- /GPConstr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cagrell/gp_constr/d3e890ed5a01d4f40e53e1c1ecb9ca3365650618/GPConstr/__init__.py -------------------------------------------------------------------------------- /GPConstr/kern.py: -------------------------------------------------------------------------------- 1 | #### 2 | # Kernels for use in the GPconstr model 3 | # RBF and Matern5/2 4 | #### 5 | 6 | import numpy as np 7 | 8 | # Can replace this with custom code that does the same.. 9 | from sklearn.metrics.pairwise import euclidean_distances as sklear_euclidean_distances 10 | 11 | class kernel_Stationary(): 12 | """ 13 | Superclass for stationary kernels 14 | """ 15 | 16 | kernel_name = '' 17 | 18 | def __init__(self, variance, lengthscale): 19 | 20 | self.lengthscale = self._convert_to_array(lengthscale) 21 | self.variance = variance 22 | self.dim = len(self.lengthscale) 23 | 24 | assert np.isscalar(variance), 'variance must be scalar' 25 | 26 | def __str__(self): 27 | """ What to show when the object is printed """ 28 | return ' type = {} \n input dim = {} \n lenghtscale = {} \n variance = {}'.format( 29 | self.kernel_name, self.dim, self.lengthscale, self.variance) 30 | 31 | def _convert_to_array(self, v): 32 | if np.isscalar(v): 33 | return np.array([v]) 34 | else: 35 | return np.array(v) 36 | 37 | def _euclidian_dist_squared(self, X1, X2): 38 | """ Return gram matrix with ||x - y||^2 for each pair of points """ 39 | 40 | # Use function from sklearn - can replace this later to avoid dependence on sklearn 41 | return sklear_euclidean_distances(X1, X2, squared = True) 42 | 43 | def set_params(self, theta): 44 | """ 45 | Set all kernel parameters from a single array theta 46 | .. Used in optimization 47 | """ 48 | assert self.dim == (len(theta) - 1), 'Parameter array does not match kernel dimension' 49 | self.variance = theta[0] 50 | self.lengthscale = theta[1:] 51 | 52 | def get_params(self): 53 | """ 54 | Get all kernel parameters in a single array 55 | .. Used in optimization 56 | """ 57 | return np.array([self.variance] + list(self.lengthscale)) 58 | 59 | def K_gradients(self, X1, X2): 60 | """ 61 | Return kernel gradients w.r.t hyperparameters 62 | 63 | Returns: 64 | List of Gram matrices of derivatives of K w.r.t. the hyperparameters in the ordering 65 | given by get_params and set_params 66 | """ 67 | 68 | r = self.r(X1, X2) 69 | K_r = self.K_r(r) 70 | 71 | # W.r.t variance 72 | dK_dv = K_r/self.variance 73 | 74 | # W.r.t lengthscales 75 | dK_dr = self.dK_dr(r) 76 | dr_dl = self.dr_dl(X1, X2) 77 | 78 | dK_dl = [dK_dr*dr_dl_i for dr_dl_i in dr_dl] 79 | 80 | return [dK_dv] + dK_dl 81 | 82 | def R(self, X1, X2): 83 | """ 84 | Return scaled distances squared 85 | """ 86 | return self._euclidian_dist_squared(X1 / self.lengthscale, X2 / self.lengthscale) 87 | 88 | def r(self, X1, X2): 89 | """ 90 | Return scaled distances 91 | """ 92 | return np.sqrt(self.R(X1, X2)) 93 | 94 | def dr_dl(self, X1, X2): 95 | """ 96 | Derivative of r w.r.t. length scales 97 | """ 98 | 99 | # r 100 | r = self.r(X1, X2) 101 | 102 | # dr_dR 103 | dr_dR = self.dr_dR(r) 104 | 105 | # dr_dl 106 | dr_dl = [] 107 | for i in range(len(self.lengthscale)): 108 | t1, t2 = np.meshgrid(X1[:,i], X2[:,i]) 109 | dR_dli = ((-2/self.lengthscale[i])*((t1 - t2)/self.lengthscale[i])**2).T 110 | dr_dl.append(dr_dR*dR_dli) 111 | 112 | return dr_dl 113 | 114 | def dr_dR(self, r): 115 | """dr / dR""" 116 | f_div_zero = np.vectorize(lambda x: 0.0 if x == 0.0 else 1/(2*x)) 117 | return f_div_zero(r) 118 | 119 | def Ri(self, X1, X2, i): 120 | """ 121 | Returns dR/dX1_i 122 | Note: dR/dX2_j(X1, X2) = Ri(X2, X1, j).T 123 | """ 124 | return (2/self.lengthscale[i]**2)*(X1[:,i].reshape(-1, 1) - X2[:,i].reshape(-1, 1).T) 125 | 126 | def K(self, X1, X2): 127 | """ Returns Gram matrix of k(x1, x2) """ 128 | return self.K_r(self.r(X1, X2)) 129 | 130 | def K_diag(self, X): 131 | """ Returns diagonal of Gram matrix of k(x, x) """ 132 | return np.ones(len(X))*self.variance 133 | 134 | def Ki0(self, X1, X2, i): 135 | """ For K = K(X1, X2), X1 = [X1_1, X1_2, ..], X2 = [X2_1, X2_2, ..] etc., return dK/dX1_i """ 136 | 137 | r = self.r(X1, X2) 138 | 139 | dK_dr = self.dK_dr(r) 140 | dr_dR = self.dr_dR(r) 141 | dR_dxi = self.Ri(X1, X2, i) 142 | 143 | return dK_dr*dr_dR*dR_dxi 144 | 145 | def Kij(self, X1, X2, i, j): 146 | """ For K = K(X1, X2), X1 = [X1_1, X1_2, ..], X2 = [X2_1, X2_2, ..] etc., return d^2K/dX1_i*dX2_j """ 147 | 148 | r = self.r(X1, X2) + 1E-50 # make nonzero to avoid handling singularity 149 | dK_dr = self.dK_dr(r) 150 | d2K_drdr = self.d2K_drdr(r) 151 | 152 | dr_dR = self.dr_dR(r) 153 | dR_dxi = self.Ri(X1, X2, i) 154 | dR_dxj = self.Ri(X1 = X2, X2 = X1, i = j).T 155 | dr_dxi = dr_dR*dR_dxi 156 | dr_dxj = dr_dR*dR_dxj 157 | 158 | d2R_dxidxj = -2/self.lengthscale[i]**2 if i == j else 0 159 | d2r_dxidxj = d2R_dxidxj*dr_dR + dR_dxi*dR_dxj*(-2*dr_dR**3) 160 | 161 | return d2r_dxidxj*dK_dr + dr_dxi*dr_dxj*d2K_drdr 162 | 163 | def K_r(self, r): 164 | """ Kernel as a function of scaled distances """ 165 | raise NotImplementedError('Need to implement K_r, dK_dr and d2K_drdr for stationary kernels') 166 | 167 | def dK_dr(self, r): 168 | """ 169 | Derivative w.r.t scaled distances 170 | """ 171 | raise NotImplementedError('Need to implement K_r, dK_dr and d2K_drdr for stationary kernels') 172 | 173 | def d2K_drdr(self, r): 174 | """ 175 | Double derivative w.r.t scaled distances 176 | """ 177 | raise NotImplementedError('Need to implement K_r, dK_dr and d2K_drdr for stationary kernels') 178 | 179 | def Kii_diag(self, X, i): 180 | """ Returns diagonal of Gram matrix of d^2K/dX1_i*dX2_i """ 181 | raise NotImplementedError('Need to implement Kii_diag') 182 | 183 | class kernel_RBF(kernel_Stationary): 184 | """ 185 | RBF kernel 186 | """ 187 | 188 | kernel_name = 'RBF' 189 | 190 | def K_r(self, r): 191 | """ Kernel as a function of scaled distances """ 192 | return self.variance*np.exp(-0.5*r**2) 193 | 194 | def dK_dr(self, r): 195 | """ 196 | Derivative w.r.t scaled distances 197 | """ 198 | return -r*self.K_r(r) 199 | 200 | def d2K_drdr(self, r): 201 | """ 202 | Double derivative w.r.t scaled distances 203 | """ 204 | return (r**2 - 1)*self.K_r(r) 205 | 206 | 207 | def Ki0(self, X1, X2, i): 208 | """ Overload generic with faster alternative """ 209 | 210 | # Include K and r as input to use this 211 | # Make use of K or r if they exist 212 | ##if K is None: 213 | ## if r is None: 214 | ## K = self.K(X1, X2) 215 | ## else: 216 | ## K = self.K_r(r) 217 | 218 | return -0.5*self.K(X1, X2)*self.Ri(X1, X2, i) 219 | 220 | 221 | def Kij(self, X1, X2, i, j): 222 | """ Overload generic with faster alternative """ 223 | 224 | # Include K and r as input to use this 225 | # Make use of K or r if they exist 226 | ##if K is None: 227 | ## if r is None: 228 | ## K = self.K(X1, X2) 229 | ## else: 230 | ## K = self.K_r(r) 231 | 232 | F = 1/self.lengthscale[i]**2 if i == j else 0 233 | K = self.K(X1, X2) 234 | return K*((1/4)*self.Ri(X1, X2, i)*self.Ri(X1 = X2, X2 = X1, i = j).T + F) 235 | 236 | def Kii_diag(self, X, i): 237 | """ Returns diagonal of Gram matrix of d^2K/dX1_i*dX2_i """ 238 | const = self.variance/(self.lengthscale[i]**2) 239 | return np.ones(len(X))*const 240 | 241 | 242 | class kernel_Matern52(kernel_Stationary): 243 | """ 244 | Matern 5/2 kernel 245 | """ 246 | 247 | kernel_name = 'Matern52' 248 | 249 | def K_r(self, r): 250 | """ Kernel as a function of scaled distances """ 251 | return self.variance*(1 + np.sqrt(5)*r + 5/3*r**2)*np.exp(-np.sqrt(5)*r) 252 | 253 | def dK_dr(self, r): 254 | """ 255 | Derivative w.r.t scaled distances 256 | """ 257 | return -5/3*self.variance*(r + np.sqrt(5)*r**2)*np.exp(-np.sqrt(5)*r) 258 | 259 | def d2K_drdr(self, r): 260 | """ 261 | Double derivative w.r.t scaled distances 262 | """ 263 | return -5/3*self.variance*(1 + np.sqrt(5)*r - 5*r**2)*np.exp(-np.sqrt(5)*r) 264 | 265 | def Kii_diag(self, X, i): 266 | """ Returns diagonal of Gram matrix of d^2K/dX1_i*dX2_i """ 267 | const = self.variance*(5/3)*(1/self.lengthscale[i]**2) 268 | return np.ones(len(X))*const 269 | 270 | 271 | class kernel_RBF_generic(kernel_Stationary): 272 | """ 273 | RBF kernel, use generic set-up for testing 274 | """ 275 | 276 | kernel_name = 'RBF_generic' 277 | 278 | def K_r(self, r): 279 | """ Kernel as a function of scaled distances """ 280 | return self.variance*np.exp(-0.5*r**2) 281 | 282 | def dK_dr(self, r): 283 | """ 284 | Derivative w.r.t scaled distances 285 | """ 286 | return -r*self.K_r(r) 287 | 288 | def d2K_drdr(self, r): 289 | """ 290 | Double derivative w.r.t scaled distances 291 | """ 292 | return (r**2 - 1)*self.K_r(r) 293 | 294 | # Old kernel class 295 | # class kernel_RBF(): 296 | # """ 297 | # RBF kernel 298 | # """ 299 | 300 | # def __init__(self, variance, lengthscale): 301 | 302 | # self.lengthscale = self._convert_to_array(lengthscale) 303 | # self.variance = variance 304 | # self.dim = len(self.lengthscale) 305 | 306 | # assert np.isscalar(variance), 'variance must be scalar' 307 | 308 | # def __str__(self): 309 | # """ What to show when the object is printed """ 310 | # return ' type = {} \n input dim = {} \n lenghtscale = {} \n variance = {}'.format( 311 | # 'RBF', self.dim, self.lengthscale, self.variance) 312 | 313 | # def _convert_to_array(self, v): 314 | # if np.isscalar(v): 315 | # return np.array([v]) 316 | # else: 317 | # return np.array(v) 318 | 319 | # def _euclidian_dist_squared(self, X1, X2): 320 | # """ Return gram matrix with ||x - y||^2 for each pair of points """ 321 | 322 | # # Use function from sklearn - can replace this later to avoid dependence on sklearn 323 | # return sklear_euclidean_distances(X1, X2, squared = True) 324 | 325 | # def set_params(self, theta): 326 | # """ 327 | # Set all kernel parameters from a single array theta 328 | # .. Used in optimization 329 | # """ 330 | # assert self.dim == (len(theta) - 1), 'Parameter array does not match kernel dimension' 331 | # self.variance = theta[0] 332 | # self.lengthscale = theta[1:] 333 | 334 | # def get_params(self): 335 | # """ 336 | # Get all kernel parameters in a single array 337 | # .. Used in optimization 338 | # """ 339 | # return np.array([self.variance] + list(self.lengthscale)) 340 | 341 | # def K_gradients(self, X1, X2): 342 | # """ 343 | # Return kernel gradients w.r.t hyperparameters 344 | 345 | # Returns: 346 | # List of Gram matrices of derivatives of K w.r.t. the hyperparameters in the ordering 347 | # given by get_params and set_params 348 | # """ 349 | 350 | # R = self.R(X1, X2) 351 | # K_R = self.K_R(R) 352 | 353 | # # W.r.t variance 354 | # dK_dv = K_R/self.variance 355 | 356 | # # W.r.t lengthscales 357 | # dK_dR = -0.5*K_R 358 | 359 | # dK_dl = [] 360 | # for i in range(len(self.lengthscale)): 361 | # t1, t2 = np.meshgrid(X1[:,i], X2[:,i]) 362 | # dR_dli = ((-2/self.lengthscale[i])*((t1 - t2)/self.lengthscale[i])**2).T 363 | 364 | # dK_dl.append(dK_dR*dR_dli) 365 | 366 | # return [dK_dv] + dK_dl 367 | 368 | # def R(self, X1, X2): 369 | # """ 370 | # Return scaled distances squared 371 | # For RBF kernel: K(X1, X2) = variance * exp(-0.5 * R) 372 | # """ 373 | # return self._euclidian_dist_squared(X1 / self.lengthscale, X2 / self.lengthscale) 374 | 375 | # def K_R(self, R): 376 | # """ Kernel as a function of squared distances """ 377 | # return self.variance*np.exp(-0.5*R) 378 | 379 | # def Ri(self, X1, X2, i): 380 | # """ 381 | # Returns dR/dX1_i 382 | # Note: dR/dX2_j(X1, X2) = Ri(X2, X1, j).T 383 | # """ 384 | # return (2/self.lengthscale[i]**2)*(X1[:,i].reshape(-1, 1) - X2[:,i].reshape(-1, 1).T) 385 | 386 | # def Ki0(self, X1, X2, i, R = None, K = None): 387 | # """ For K = K(X1, X2), X1 = [X1_1, X1_2, ..], X2 = [X2_1, X2_2, ..] etc., return dK/dX1_i """ 388 | 389 | # # Make use of K or R if they exist 390 | # if K is None: 391 | # if R is None: 392 | # K = self.K_R(self.R(X1, X2)) 393 | # else: 394 | # K = self.K_R(R) 395 | 396 | # return -0.5*K*self.Ri(X1, X2, i) 397 | 398 | # def Kij(self, X1, X2, i, j, R = None, K = None): 399 | # """ For K = K(X1, X2), X1 = [X1_1, X1_2, ..], X2 = [X2_1, X2_2, ..] etc., return d^2K/dX1_i*dX2_j """ 400 | 401 | # # Make use of K or R if they exist 402 | # if K is None: 403 | # if R is None: 404 | # K = self.K_R(self.R(X1, X2)) 405 | # else: 406 | # K = self.K_R(R) 407 | 408 | # F = 1/self.lengthscale[i]**2 if i == j else 0 409 | 410 | # return K*((1/4)*self.Ri(X1, X2, i)*self.Ri(X1 = X2, X2 = X1, i = j).T + F) 411 | 412 | 413 | # def K(self, X1, X2): 414 | # """ Returns Gram matrix of k(x1, x2) """ 415 | # return self.K_R(self.R(X1, X2)) 416 | 417 | # def K_diag(self, X): 418 | # """ Returns diagonal of Gram matrix of k(x, x) """ 419 | # return np.ones(len(X))*self.variance -------------------------------------------------------------------------------- /GPConstr/model.py: -------------------------------------------------------------------------------- 1 | ### Dependent packages ### 2 | import time 3 | import sys, os 4 | import numpy as np 5 | import scipy as sp 6 | import pandas as pd 7 | from scipy import optimize 8 | #import pyDOE # Used with model.initiate_XV_LHS 9 | 10 | ### Custom imports ### 11 | print('Loading constrained GP module from ' + os.path.dirname(os.path.realpath('__file__'))) 12 | from .util.div import formattime, len_none 13 | from .util.linalg import jitchol, try_jitchol, triang_solve, mulinv_solve, chol_inv, traceprod, nearestPD 14 | from .util.stats import norm_cdf_int, norm_cdf_int_approx, normal_cdf_approx, mode_from_samples, trunc_norm_moments_approx_corrfree 15 | 16 | ################################################################################## 17 | ### Loading R functions -- this is a hack to make R and scipy not crash....... ### 18 | ### (need to run scipy.optimize once before loading R) ### 19 | def _scipyopt_test(): 20 | 21 | def rosen(x): 22 | """The Rosenbrock function""" 23 | return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0) 24 | 25 | def rosen_der(x): 26 | xm = x[1:-1] 27 | xm_m1 = x[:-2] 28 | xm_p1 = x[2:] 29 | der = np.zeros_like(x) 30 | der[1:-1] = 200*(xm-xm_m1**2) - 400*(xm_p1 - xm**2)*xm - 2*(1-xm) 31 | der[0] = -400*x[0]*(x[1]-x[0]**2) - 2*(1-x[0]) 32 | der[-1] = 200*(x[-1]-x[-2]**2) 33 | return der 34 | 35 | x0 = np.array([1.3, 0.7, 0.8, 1.9, 1.2]) 36 | res = optimize.minimize(rosen, x0, method='BFGS', jac=rosen_der) 37 | 38 | 39 | print('Loading R wrapper...') 40 | _scipyopt_test() 41 | from .r_functions.python_wrappers import rtmvnorm, pmvnorm, mtmvnorm, moments_from_samples 42 | ################################################################################## 43 | 44 | class Constraint(): 45 | """ 46 | Stores virtual observations and bounds functions for some constraint 47 | 48 | LB(Xv) <= Lf(Xv) <= UB(Xv) for some linear operator L 49 | 50 | NB! Bound functions needs to be vectorized! 51 | """ 52 | 53 | def __init__(self, LB, UB): 54 | 55 | self.LB = LB # Function from R^n -> R 56 | self.UB = UB # Function from R^n -> R 57 | self.Xv = None # Virtual observation locations 58 | 59 | def _check(self, dim, txt): 60 | """ Check that constraint is ok """ 61 | 62 | if self.Xv is not None: 63 | assert len(self.Xv.shape) == 2, 'Error in constraint {} : Data Xv must be 2d array'.format(txt) 64 | assert self.LBXV().shape == (self.Xv.shape[0],), 'Error in constraint {} : Error in LB function'.format(txt) 65 | assert self.UBXV().shape == (self.Xv.shape[0],), 'Error in constraint {} : Error in UB function'.format(txt) 66 | assert self.Xv.shape[1] == dim, 'Error in constraint {} : Xv dimension incorrect'.format(txt) 67 | 68 | def LBXV(self): 69 | """ Lower bound evaluated at Xv """ 70 | return self.LB(self.Xv) 71 | 72 | def UBXV(self): 73 | """ Upper bound evaluated at Xv """ 74 | return self.UB(self.Xv) 75 | 76 | def add_XV(self, X): 77 | """ Add a single point X to XV """ 78 | if self.Xv is None: 79 | self.Xv = np.atleast_2d(X) 80 | else: 81 | self.Xv = np.append(self.Xv, np.atleast_2d(X), axis = 0) 82 | 83 | 84 | class GPmodel(): 85 | """ GP model """ 86 | 87 | def __init__(self, kernel, likelihood = 1, mean = 0, constr_likelihood = 1E-6, verbatim = True): 88 | 89 | ### Prior model input ################################## 90 | 91 | # GP parameters 92 | self.kernel = kernel # Object containing kernel function and its derivatives 93 | self.mean = mean # Constant mean function 94 | self.likelihood = likelihood 95 | self.constr_likelihood = constr_likelihood # 0 = noise free observations of constraint. Can increase this for stability 96 | 97 | # Design data 98 | self.X_training = None 99 | self.Y_training = None 100 | 101 | # Constraints 102 | self.constr_bounded = None # Boundedness constraint 103 | self.constr_deriv = None # List with partial derivative constraints 104 | 105 | ### Cached data from intermediate calculations ########### 106 | 107 | # Depending only on X 108 | self.K_w = None # K_w = K_x_x + sigma^2*I 109 | self.K_w_chol = None # Cholesky factor L s.t. L*L.T = K_w 110 | 111 | # Depending only on Y 112 | self.Y_centered = None 113 | 114 | # Depending on (X, Y) 115 | self.LLY = None # Only used in the unconstrained calculations 116 | 117 | # Depending on (X, XS) 118 | self.v2 = None 119 | self.A2 = None 120 | self.B2 = None 121 | 122 | # Depending on (X, XV) 123 | self.v1 = None 124 | self.A1 = None 125 | self.B1 = None 126 | self.L_1 = None 127 | self._p2 = False 128 | 129 | # Depending on (X, Y, XV) 130 | self.C_sim = None 131 | 132 | ### Other ############################################## 133 | self.verbatim = verbatim # Print info during execution 134 | 135 | # Parameters that need calculation reset 136 | @property 137 | def X_training(self): return self.__X_training 138 | 139 | @property 140 | def Y_training(self): return self.__Y_training 141 | 142 | @X_training.setter 143 | def X_training(self, value): 144 | self.K_w = None 145 | self.K_w_chol = None 146 | self.LLY = None 147 | self.v2 = None 148 | self.A2 = None 149 | self.B2 = None 150 | 151 | self.reset_XV() 152 | 153 | self.__X_training = value 154 | 155 | @Y_training.setter 156 | def Y_training(self, value): 157 | self.Y_centered = None 158 | self.LLY = None 159 | self.C_sim = None 160 | self.__Y_training = value 161 | 162 | def __str__(self): 163 | """ What to show when the object is printed """ 164 | txt = '----- GP model ----- \n mean = {} \n likelihood = {} \n '.format(self.mean, self.likelihood) 165 | txt += 'kernel: \n {} \n'.format(self.kernel.__str__()) 166 | txt += ' constraint: \n' 167 | 168 | if self.constr_bounded is None and self.constr_deriv is None: 169 | txt += ' No constraints \n' 170 | else: 171 | ls = [] 172 | if self.constr_bounded is not None: 173 | ls.append('f [{}]'.format(len_none(self.constr_bounded.Xv))) 174 | if self.constr_deriv is not None: 175 | ls = ls + ['df/dx_' + str(i+1) + ' [{}]'.format(len_none(self.constr_deriv[i].Xv)) for i in range(len(self.constr_deriv))] 176 | 177 | txt += ' ' + ', '.join(ls) + ' \n' 178 | txt += ' constr_likelihood = {} \n'.format(self.constr_likelihood) 179 | 180 | txt += '---------------------' 181 | return txt 182 | 183 | def reset(self): 184 | """ Reset model. I.e. forget all older calculations """ 185 | self.K_w = None 186 | self.K_w_chol = None 187 | self.Y_centered = None 188 | self.LLY = None 189 | self.v2 = None 190 | self.A2 = None 191 | self.B2 = None 192 | 193 | self.reset_XV() 194 | 195 | def reset_XV(self): 196 | """ Reset everything that depends on the virtual observations XV """ 197 | self.v1 = None 198 | self.A1 = None 199 | self.B1 = None 200 | self.L_1 = None 201 | self._p2 = False 202 | self.C_sim = None 203 | 204 | def calc_posterior_unconstrained(self, XS, full_cov = True): 205 | """ 206 | Calculate pridictive posterior distribution f* | Y 207 | 208 | Returns: mean, cov (full or only diagonal) 209 | """ 210 | 211 | # Check input 212 | self._check_XY_training() 213 | assert len(XS.shape) == 2, 'Test data XS must be 2d array' 214 | 215 | # Start timer 216 | t0 = time.time() 217 | 218 | # Run pre calcs 219 | self._prep_Y_centered() 220 | self._prep_K_w(verbatim = self.verbatim) 221 | self._prep_K_w_factor(verbatim = self.verbatim) 222 | self._prep_LLY() 223 | 224 | if self.verbatim: print("..Calculating f* | Y ...", end = '') 225 | 226 | # Kernel matrices needed 227 | K_x_xs = np.matrix(self.kernel.K(self.X_training, XS)) 228 | 229 | v2 = triang_solve(self.K_w_chol, K_x_xs) 230 | 231 | # Calculate mean 232 | mean = self.mean + K_x_xs.T*self.LLY 233 | 234 | # Calculate cov 235 | if full_cov: 236 | K_xs_xs = np.matrix(self.kernel.K(XS, XS)) 237 | cov = K_xs_xs - v2.T*v2 238 | else: 239 | K_xs_xs_diag = self.kernel.K_diag(XS) 240 | cov = np.matrix(K_xs_xs_diag - np.square(v2).sum(0)).T 241 | 242 | if self.verbatim: print(' DONE - Total time: {}'.format(formattime(time.time() - t0))) 243 | 244 | return mean, cov 245 | 246 | def calc_posterior_constrained(self, XS, compute_mode = False, num_samples = 1000, save_samples = 10, percentiles = [10, 50, 90], algorithm = 'minimax_tilting', resample = False): 247 | """ 248 | Calculate constrained predictive posterior distribution f* | Y, C 249 | 250 | Returns: mean, variance, percentiles, mode, samples 251 | 252 | algorithm = 'rejection', 'gibbs' or 'minimax_tilting' 253 | resample = False -> use old samples of constraint distribution if available 254 | """ 255 | 256 | # Check that there are any constraints 257 | assert self.__has_xv(), 'No constraints or no virtual points specified for any constraint' 258 | 259 | # Check input 260 | self._check_XY_training() 261 | self._check_constraints() 262 | assert len(XS.shape) == 2, 'Test data XS must be 2d array' 263 | assert save_samples < num_samples, 'save_samples must be larger or equal to num_samples' 264 | 265 | # Start timer 266 | t0 = time.time() 267 | 268 | # Calculations only depending on (X, Y) 269 | self._prep_Y_centered() 270 | self._prep_K_w(verbatim = self.verbatim) 271 | self._prep_K_w_factor(verbatim = self.verbatim) 272 | 273 | # Calculations only depending on (X, XS) - v2, A2 and B2 274 | self._prep_1(XS, verbatim = self.verbatim) 275 | 276 | # Calculations only depending on (X, XV) - v1, A1 and B1 277 | self._prep_2(verbatim = self.verbatim) 278 | 279 | # Calculate mean of constraint distribution (covariance is B1) 280 | Lmu, constr_mean = self._calc_constr_mean() 281 | 282 | # Get bound vectors for constraint distribution 283 | LB, UB = self._calc_constr_bounds() 284 | 285 | # Calculate mean and covariance of constrained GP - A, B and Sigma 286 | self._prep_3(XS, verbatim = self.verbatim) 287 | 288 | ### Sample from truncated constraint distribution ### 289 | t1 = time.time() 290 | self._sample_constr_XV(m = num_samples, mu = constr_mean, sigma = self.B1, LB = LB, UB = UB, algorithm = algorithm, resample = resample, verbatim = self.verbatim) 291 | sampling_time = [time.time() - t1] 292 | 293 | ### Sample from constrained GP ### 294 | t1 = time.time() 295 | if self.verbatim: print("..sampling {} times from constrained GP f*|C, Y".format(num_samples), end = '') 296 | 297 | # Draw from standard normal 298 | dim = XS.shape[0] 299 | U_sim = np.matrix(sp.random.multivariate_normal(np.zeros(dim), np.eye(dim), size = num_samples)).T 300 | 301 | # Using SVD to find Q.T*Q = Sigma 302 | # SVD decomposition of covariance matrix 303 | #(u, s, vh) = sp.linalg.svd(self.Sigma) 304 | #U, V = np.matrix(u), np.matrix(vh).T 305 | #Q = V*np.multiply(np.sqrt(s)[:,None], V.T) 306 | 307 | # Find matrix s.t. Q.T*Q = Sigma 308 | sigma_PD, Q = try_jitchol(self.Sigma) 309 | 310 | if not sigma_PD: 311 | Sigma_n = nearestPD(self.Sigma) 312 | err_pd = abs(Sigma_n - self.Sigma).max() 313 | Q = jitchol(Sigma_n) 314 | 315 | # Compute samples of f* 316 | fs_sim = self.mean + self.B*self.Y_centered + self.A*(self.C_sim - Lmu) + Q*U_sim 317 | 318 | # This corresponds to degenerate Sigma 319 | #print('Using degenerate Sigma!!') 320 | #fs_sim = self.mean + self.B*self.Y_centered + self.A*(self.C_sim - Lmu) 321 | sampling_time.append(time.time() - t1) 322 | if self.verbatim: print(' DONE - time: {}'.format(formattime(time.time() - t1))) 323 | 324 | t1 = time.time() 325 | if self.verbatim: print('..computing statistics from samples', end = '') 326 | 327 | # Compute mode from samples 328 | mode = None 329 | if compute_mode: mode = mode_from_samples(fs_sim) 330 | 331 | # Save some of the samples 332 | randints = np.random.choice(num_samples, save_samples) 333 | samples = fs_sim[:, randints] 334 | 335 | # Calculate mean and percentiles 336 | mean = np.matrix(fs_sim.mean(axis = 1).reshape(-1, 1)) 337 | var = np.matrix(fs_sim.var(axis = 1).reshape(-1, 1)) 338 | perc = np.percentile(fs_sim, np.array(percentiles), axis = 1) 339 | 340 | if self.verbatim: print(' DONE - time: {}'.format(formattime(time.time() - t1))) 341 | if self.verbatim: print(' DONE - Total time: {}'.format(formattime(time.time() - t0))) 342 | 343 | if not sigma_PD: 344 | if self.verbatim: print('WARNING: covariance matrix not PD! -- used closest PD matrix, error = {}'.format(err_pd)) 345 | 346 | return mean, var, perc, mode, samples, sampling_time 347 | 348 | def calc_posterior_constrained_moments(self, XS, corr_free_approx = False): 349 | """ 350 | Calculate first two moments of constranied predictive posterior distribution f* | Y, C 351 | 352 | corr_free_approx = True -> Uses correlation free approximation 353 | 354 | Returns: mean = E[f* | Y, C], cov = cov[f* | Y, C] 355 | 356 | """ 357 | 358 | # Check that there are any constraints 359 | assert self.__has_xv(), 'No constraints or no virtual points specified for any constraint' 360 | 361 | # Check input 362 | self._check_XY_training() 363 | self._check_constraints() 364 | assert len(XS.shape) == 2, 'Test data XS must be 2d array' 365 | 366 | # Start timer 367 | t0 = time.time() 368 | 369 | # Calculations only depending on (X, Y) 370 | self._prep_Y_centered() 371 | self._prep_K_w(verbatim = self.verbatim) 372 | self._prep_K_w_factor(verbatim = self.verbatim) 373 | 374 | # Calculations only depending on (X, XS) - v2, A2 and B2 375 | self._prep_1(XS, verbatim = self.verbatim) 376 | 377 | # Calculations only depending on (X, XV) - v1, A1 and B1 378 | self._prep_2(verbatim = self.verbatim) 379 | 380 | # Calculate mean of constraint distribution (covariance is B1) 381 | Lmu, constr_mean = self._calc_constr_mean() 382 | 383 | # Get bound vectors for constraint distribution 384 | LB, UB = self._calc_constr_bounds() 385 | 386 | # Calculate mean and covariance of constrained GP - A, B and Sigma 387 | self._prep_3(XS, verbatim = self.verbatim) 388 | 389 | # Compute moments of truncated variables (the virtual observations subjected to the constraint) 390 | t1 = time.time() 391 | if self.verbatim: print("..computing moments of C~|C, Y (from truncated Gaussian)", end = '') 392 | 393 | if corr_free_approx: 394 | # Using correlation free approximation 395 | tmu, tvar = trunc_norm_moments_approx_corrfree(mu = np.array(constr_mean).flatten(), sigma = self.B1, LB = LB, UB = UB) 396 | trunc_mu, trunc_cov = np.matrix(tmu).T, np.matrix(np.diag(tvar)) 397 | else: 398 | # Using mtmvnorm algorithm 399 | trunc_moments = mtmvnorm(mu = constr_mean, sigma = self.B1, a = LB, b = UB) 400 | trunc_mu, trunc_cov = np.matrix(trunc_moments[0]).T, np.matrix(trunc_moments[1]) 401 | 402 | if self.verbatim: print(' DONE - time: {}'.format(formattime(time.time() - t1))) 403 | 404 | # Compute moments of f* | Y, C 405 | t1 = time.time() 406 | if self.verbatim: print("..computing moments of f*|C, Y", end = '') 407 | 408 | mean = self.mean + self.B*self.Y_centered + self.A*(trunc_mu - Lmu) 409 | cov = self.Sigma + self.A*trunc_cov*self.A.T 410 | 411 | if self.verbatim: print(' DONE - time: {}'.format(formattime(time.time() - t1))) 412 | if self.verbatim: print(' DONE - Total time: {}'.format(formattime(time.time() - t0))) 413 | 414 | return mean, cov 415 | 416 | def constrprob_Xv(self, nu = 0, posterior = True, algorithm = 'minimax_tilting', n = 10E4): 417 | """ 418 | Calculate the probability that the constraint holds at XV 419 | 420 | posterior = False : Return P(C) 421 | posterior = True : Return P(C | Y) 422 | 423 | algorithm = 'GenzBretz' or 'minimax_tilting' 424 | """ 425 | 426 | # Check input 427 | self._check_constraints() 428 | 429 | assert algorithm in ['GenzBretz', 'minimax_tilting'], 'unknown algorithm = ' + algorithm 430 | 431 | # Get bound vectors for constraint distribution 432 | LB, UB = self._calc_constr_bounds() 433 | 434 | # Widen intervals with nu 435 | LB = LB - nu 436 | UB = UB + nu 437 | 438 | if posterior: 439 | 440 | # Check input 441 | self._check_XY_training() 442 | 443 | # Calculations only depending on (X, Y) 444 | self._prep_Y_centered() 445 | self._prep_K_w(verbatim = False) 446 | self._prep_K_w_factor(verbatim = False) 447 | 448 | # Calculations only depending on (X, XV) - v1, A1 and B1 449 | self._prep_2(verbatim = False) 450 | 451 | # Calculate mean of constraint distribution (covariance is B1) 452 | Lmu, constr_mean = self._calc_constr_mean() 453 | 454 | # Calculate probability that the constraint holds at XV 455 | return pmvnorm(constr_mean, self.B1, LB, UB, algorithm, n) 456 | 457 | else: 458 | 459 | # Mean 460 | Lmu = self._Lmu() 461 | 462 | # Covariance 463 | L1L2T_K_xv_xv = self._calc_L1L2() 464 | n = L1L2T_K_xv_xv.shape[0] 465 | cov = L1L2T_K_xv_xv + self.constr_likelihood*np.identity(n) 466 | 467 | # Calculate probability that the constraint holds at XV 468 | return pmvnorm(Lmu, cov, LB, UB, algorithm, n) 469 | 470 | 471 | def optimize(self, include_constraint = True, fix_likelihood = False, bound_min = 1e-6, conditional = False, pc_alg = 'minimax_tilting', n = 100): 472 | """ 473 | Optimize hyperparameters using MLE 474 | 475 | include_constraint = True -> optimize P(Y)*P(C|Y) 476 | fix_likelihood = False -> Don't optimize GP likelihood parameter self.likelihood 477 | bound_min = minimum value in parameter bounds = (bound_min, ...) 478 | conditional = False -> used only for constrained optimization 479 | 480 | pc_alg = 'GenzBretz' or 'minimax_tilting' -> algorithm used to compute P(C) 481 | n -> number of samples used in pc_alg 482 | """ 483 | 484 | has_constr = self.__has_xv() # If there are any virtual points specified 485 | 486 | if has_constr and include_constraint: 487 | # Optimize with constraint 488 | self._optimize_constrained(fix_likelihood = fix_likelihood, bound_min = bound_min, conditional = conditional, algorithm = pc_alg, n = n) 489 | 490 | else: 491 | # Optimize without constraint 492 | if include_constraint and not has_constr: 493 | if self.verbatim: print('No virtual points found for constraint') 494 | 495 | self._optimize_unconstrained(method = 'ML', fix_likelihood = fix_likelihood, bound_min = bound_min) 496 | 497 | # def initiate_XV_LHS(self, n_lhs, p_target, nu = 0, add_points_to_model = True, max_add_pts = 200, lhs_iterations = 1000, i_range = None): 498 | # """ 499 | # Initiate set of virtual observation locations by 500 | # 1. Generating a set of n_lhs samples 501 | # 2. Compute the constraint probability for each sample 502 | # 3. Include each point where the constraint probability is smaller than p_target 503 | 504 | # add_points_to_model = False, just return results without adding any points to the model 505 | 506 | # Return: 507 | # DataFrame with all samples and constraint probabilities for inspection 508 | # """ 509 | 510 | # # Set list of sub-operators if not specified 511 | # if i_range is None: 512 | # i_range = [] 513 | 514 | # if self.constr_bounded is not None: 515 | # i_range.append(0) 516 | 517 | # if self.constr_deriv is not None: 518 | # i_range = i_range + [i+1 for i in range(len(self.constr_deriv))] 519 | 520 | # t0 = time.time() 521 | 522 | # # 1. Generate LHS samples 523 | # if self.verbatim: print('Generating LHS samples ...', end = '') 524 | # x_lhs = pyDOE.lhs(self.kernel.dim, samples = n_lhs, criterion = 'maximin', iterations = lhs_iterations) 525 | # print(' DONE - time: {}'.format(formattime(time.time() - t0))) 526 | 527 | # # 2. Compute constraint probability 528 | # t1 = time.time() 529 | # if self.verbatim: print('Computing constraint probabilities ...', end = '') 530 | # probs = [self._constrprob_xs_1(x_lhs, i, nu) for i in i_range] 531 | # print(' DONE - time: {}'.format(formattime(time.time() - t1))) 532 | 533 | # # 3. Sort by minimum constraint probability 534 | # idx = np.array(probs).min(axis = 0).argsort() 535 | # probs = np.array(probs)[:, idx].T 536 | # x_lhs = x_lhs[idx, :] 537 | 538 | # # 3. Add points to model 539 | # if add_points_to_model: 540 | # t1 = time.time() 541 | # if self.verbatim: print('Adding virtual observation locations to model ...', end = '') 542 | 543 | # num_pts_added = 0 544 | # for j in range(len(i_range)): 545 | # i = i_range[j] # Constraint number 546 | # for k in range(n_lhs): 547 | # if probs[k][j] < p_target: 548 | # # Add the location x_lhs[k] to constraint i 549 | # if max_add_pts <= num_pts_added: break 550 | 551 | # num_pts_added+=1 552 | # if i == 0: 553 | # self.constr_bounded.add_XV(x_lhs[k]) 554 | # else: 555 | # self.constr_deriv[i-1].add_XV(x_lhs[k]) 556 | 557 | # print(' DONE - added {} points, time: {}'.format(num_pts_added, formattime(time.time() - t1))) 558 | 559 | # self.reset() 560 | 561 | # # Gather data in dataframe 562 | # df = pd.concat([pd.DataFrame(x_lhs), pd.DataFrame(probs)], axis = 1) 563 | # colnames = ['x_' + str(i+1) for i in range(self.kernel.dim)] + ['P_f' if i == 0 else 'P_df_dx_' + str(i) for i in i_range] 564 | # df.columns = colnames 565 | # df['include'] = df.iloc[:,-len(i_range):].min(axis = 1) < p_target 566 | 567 | # return df 568 | 569 | def find_XV_subop(self, p_target, Omega = None, batch_size = None, bounds = None, i_range = None, nu = None, max_iterations = 200, moment_approximation = False, num_samples = 1000, min_prob_unconstr_xv = None, sampling_alg = 'minimax_tilting', moment_alg = 'correlation-free', opt_method = 'differential_evolution', print_intermediate = True): 570 | """ 571 | Find the set of virtual observations needed for a set of sub-operators 572 | 573 | Input: 574 | 575 | Omega = Finite set of candidate points. If Omega = None then 576 | global optimization is performed in the region defined by 'bounds' 577 | 578 | bounds = bounds on input space 579 | p_target = target constraint probability 580 | i_range = list of indices of sub-operators, e.g. i_range = [0, 2] -> find XV for L = [f, df/dx_2] 581 | *** if i_range = None then all sub-operators are included *** 582 | 583 | max_iterations = maximum number of iterations 584 | 585 | print_intermediate = True -> Print intermediate steps 586 | 587 | min_prob_unconstr_xv = Minimum probability that the constraint holds at XV using the unconstrained distribution 588 | (using this as a stopping criterion when rejection sampling is used) 589 | 590 | Global optimizer: 591 | opt_method = 'differential_evolution', 'basinhopping' or 'shgo' 592 | 593 | --- The choice of algorithm used to compute the constraint probability --- 594 | 595 | moment_approximation = False -> Estimate constraint probability using samples of the constraint process 596 | num_samples = number of samples to use in estimation of constraint probability 597 | sampling_alg = algorithm used to sample from truncated Gaussian ('rejection', 'gibbs' or 'minimax_tilting') 598 | 599 | moment_approximation = True -> Use moment approximation (Assume Gaussian distribution using moments of the constraint process) 600 | moment_alg = 'correlation-free', 'mtmvnorm' 601 | 602 | """ 603 | 604 | assert not(Omega is None and bounds is None), 'Need to specity Omega (finite search) or bounds (global optimization)' 605 | 606 | # Set list of sub-operators if not specified 607 | if i_range is None: 608 | i_range = [] 609 | 610 | if self.constr_bounded is not None: 611 | i_range.append(0) 612 | 613 | if self.constr_deriv is not None: 614 | i_range = i_range + [i+1 for i in range(len(self.constr_deriv))] 615 | 616 | if min_prob_unconstr_xv is not None: 617 | if min_prob_unconstr_xv < 1E-6 and not moment_approximation and sampling_alg == 'rejection': 618 | if self.verbatim: print('WARNING: very low acceptance rate criterion for rejection sampling. min_prob_unconstr_xv = ' + str(min_prob_unconstr_xv)) 619 | 620 | # Start timer 621 | t0 = time.time() 622 | 623 | # Set nu parameter for wider bounds 624 | # Will use LB - nu and UB + nu in constraint probability calculation 625 | if nu is None: nu = max(self.constr_likelihood*sp.stats.norm.ppf(p_target), 0) 626 | 627 | # Print start message 628 | label = [] 629 | for i in i_range: 630 | F = 'f' if i == 0 else 'df/dx_{}'.format(i) 631 | label.append(F) 632 | 633 | label = '[' + ', '.join(label) + ']' 634 | 635 | if self.verbatim: print('Searching for points XV s.t. P(a - nu < Lf < b + nu) > p_target = {} for Lf = {} and nu = {} ...'.format(p_target, label, nu)) 636 | 637 | # For storing results 638 | row = [] 639 | 640 | # Just in case.. 641 | self.reset_XV() 642 | 643 | pc_min = None 644 | i_add_pts = 0 # Number of points added 645 | 646 | for j in range(max_iterations): 647 | 648 | tj = time.time() 649 | 650 | if min_prob_unconstr_xv is not None: 651 | # Check for criteria on minimum probability at XV using the unconstrained distribution 652 | # If this is too small the sampling becomes difficult 653 | if j == 0: 654 | pc_xv = self.constrprob_Xv(nu) if self.__has_xv() else 1 655 | 656 | if pc_xv < min_prob_unconstr_xv: 657 | if self.verbatim: print('ABORTED: Too low acceptance rate ({}) - Found {} points. Min. constraint prob = {}. Total time spent = {}'.format(pc_xv, j, pc_min, formattime(time.time() - t0))) 658 | break 659 | 660 | # Run global optimization for each sub-operator in the list 661 | pc_min_i = [] 662 | x_min_i = [] 663 | 664 | for i in i_range: 665 | 666 | if Omega is None: 667 | success, x_min, pc_min = self._argmin_pc_subop(i, nu, bounds, opt_method, moment_approximation, sampling_alg, moment_alg, False, num_samples) 668 | else: 669 | pc_min, x_min = self._argmin_pc_subop_finite(i, nu, Omega, batch_size, sampling_alg, num_samples, moment_approximation, moment_alg) 670 | success = True 671 | 672 | if success: 673 | pc_min_i.append(pc_min) 674 | x_min_i.append(x_min) 675 | 676 | else: 677 | print('ERROR: Optimizer failed after {} points found'.format(j)) 678 | 679 | if j == 0: return None 680 | 681 | df_out = pd.DataFrame(row) 682 | df_out.columns = ['num_Xv', 'update_constr'] + ['Xv[{}]'.format(i+1) for i in range(len(x_min))] + ['pc_{}'.format(i+1) for i in i_range] + ['acc_rate'] 683 | return df_out 684 | 685 | # Choose sub-operator with smallest probability 686 | pc_min = min(pc_min_i) 687 | 688 | i_min = pc_min_i.index(pc_min) 689 | x_min = x_min_i[i_min] 690 | 691 | if self.constr_bounded is None: i_min = i_min + 1 692 | 693 | # Store results 694 | if min_prob_unconstr_xv is not None: 695 | row.append([j, i_min] + list(x_min) + pc_min_i + [pc_xv] + [time.time() - tj]) 696 | else: 697 | row.append([j, i_min] + list(x_min) + pc_min_i + [time.time() - tj]) 698 | 699 | if pc_min >= p_target: 700 | if self.verbatim: print('DONE - Found {} points. Min. constraint prob = {}. Total time spent = {}'.format(j, pc_min, formattime(time.time() - t0))) 701 | break 702 | 703 | else: 704 | if min_prob_unconstr_xv is not None: 705 | if self.verbatim and print_intermediate: print('i = {}, XV[{}] = {}, prob = {}, acc. rate = {}, optimization time = {}'.format(i_min, j+1, x_min, pc_min, pc_xv, formattime(time.time() - tj))) 706 | else: 707 | if self.verbatim and print_intermediate: print('i = {}, XV[{}] = {}, prob = {}, optimization time = {}'.format(i_min, j+1, x_min, pc_min, formattime(time.time() - tj))) 708 | 709 | # Add point 710 | i_add_pts += 1 711 | if i_min == 0: 712 | self.constr_bounded.add_XV(x_min) 713 | else: 714 | self.constr_deriv[i_min-1].add_XV(x_min) 715 | 716 | # Reset computations depending on XV 717 | self.reset_XV() 718 | 719 | if min_prob_unconstr_xv is not None: 720 | # Compute new acceptance rate 721 | if j != 0: 722 | pc_xv = self.constrprob_Xv(nu) 723 | 724 | if j+1 == max_iterations: 725 | if self.verbatim: print('DONE - Found {} points. Min. constraint prob = {}. Total time spent = {}'.format(j+1, pc_min, formattime(time.time() - t0))) 726 | break 727 | 728 | # Put results in dataframe and return 729 | df_out = pd.DataFrame(row) 730 | 731 | if min_prob_unconstr_xv is not None: 732 | df_out.columns = ['num_Xv', 'update_constr'] + ['Xv[{}]'.format(i+1) for i in range(len(x_min))] + ['pc_{}'.format(i+1) for i in i_range] + ['acc_rate', 'time'] 733 | else: 734 | df_out.columns = ['num_Xv', 'update_constr'] + ['Xv[{}]'.format(i+1) for i in range(len(x_min))] + ['pc_{}'.format(i+1) for i in i_range] + ['time'] 735 | 736 | return df_out, i_add_pts, pc_min 737 | 738 | 739 | def _optimize_unconstrained(self, method = 'ML', fix_likelihood = False, bound_min = 1e-6): 740 | """ 741 | Optimize hyperparameters of unconstrained GP 742 | 743 | method = 'ML' -> maximum marginal likelihood 744 | method = 'CV' -> cross validation 745 | 746 | fix_likelihood = False -> Don't optimize GP likelihood parameter self.likelihood 747 | bound_min = minimum value in parameter bounds = (bound_min, ...) 748 | """ 749 | 750 | # Start timer 751 | t0 = time.time() 752 | if self.verbatim: print("..Running optimization for unconstrained GP ...", end = '') 753 | 754 | # Run optimization 755 | if method == 'ML': 756 | res = self._optimize_unconstrained_ML(fix_likelihood, bound_min) 757 | elif method == 'CV': 758 | print('TODO...') 759 | raise NotImplementedError 760 | else: 761 | raise NotImplementedError 762 | 763 | # Save results 764 | self.__setparams(res.x, not fix_likelihood) 765 | 766 | if self.verbatim: 767 | if res.success: 768 | print(' DONE - Total time: {}'.format(formattime(time.time() - t0))) 769 | else: 770 | print('WARNING -- NO CONVERGENCE IN OPTIMIZATION -- Total time: {}'.format(formattime(time.time() - t0))) 771 | 772 | 773 | def _optimize_constrained(self, fix_likelihood = False, opt_method = 'shgo', bound_min = 1e-6, conditional = False, algorithm = 'minimax_tilting', n = 10E3, opt_args = {}, bounds = None): 774 | """ 775 | Optimize hyperparameters of unconstrained GP 776 | 777 | fix_likelihood = False -> Don't optimize GP likelihood parameter self.likelihood 778 | bound_min = minimum value in parameter bounds = (bound_min, ...) 779 | conditional = False -> maximize P(Y, C), otherwise maximize P(Y|C) 780 | 781 | algorithm = 'GenzBretz' or 'minimax_tilting' -> algorithm used to compute P(C) 782 | n -> number of samples used in 'GenzBretz' or 'minimax_tilting' 783 | 784 | bounds = None -> bounds are computed automatically (theta = [(likelihood), kernel_var, kernel_len_1, ...]) 785 | 786 | opt_method = 'differential_evolution', 'basinhopping', 'shgo', 'L-BFGS-B' 787 | opt_args = dict with additional arguments to optimizer 788 | """ 789 | 790 | assert opt_method in ['differential_evolution', 'basinhopping', 'shgo', 'L-BFGS-B'], 'unknown opt_method = ' + opt_method 791 | 792 | # Start timer 793 | t0 = time.time() 794 | if self.verbatim: 795 | if not conditional: 796 | print("..Running optimization ({}) for constrained GP - max P(Y, C) ...".format(opt_method), end = '') 797 | else: 798 | print("..Running optimization ({}) for constrained GP - max P(Y | C) ...".format(opt_method), end = '') 799 | 800 | 801 | # Define wrapper function for optimization 802 | def optfun(theta, *args): 803 | self.reset() 804 | self.__setparams(theta, not args[0]) 805 | 806 | loglik_unconstr = self._loglik_unconstrained() # P(Y) 807 | loglik_constr = np.log(self.constrprob_Xv(posterior = True, algorithm = args[2], n = args[3])) # P(C|Y) 808 | 809 | if args[1] == False: 810 | return -(loglik_unconstr + loglik_constr) # P(Y, C) 811 | else: 812 | loglik_constr_cond = np.log(self.constrprob_Xv(posterior = False, algorithm = args[2], n = args[3])) # P(C) 813 | return -(loglik_constr + loglik_constr_cond - loglik_unconstr) # P(Y|C) 814 | 815 | # Initial guess (not used for some global optimizers) 816 | if fix_likelihood: 817 | theta = np.array(self.kernel.get_params()) 818 | else: 819 | theta = np.array([self.likelihood] + list(self.kernel.get_params())) 820 | 821 | # Define bounds 822 | # theta = [(likelihood), kernel_var, kernel_len_1, ...] 823 | if bounds is None: 824 | likelihood_scale = 10 825 | ker_var_scale = 10 826 | ker_len_scale = 10 827 | 828 | if fix_likelihood: 829 | bounds = [(bound_min, ker_var_scale*theta[0])] 830 | bounds = bounds + [(bound_min, ker_len_scale*theta[i+1]) for i in range(len(theta)-1)] 831 | 832 | else: 833 | bounds = [(bound_min, likelihood_scale*theta[0]), (bound_min, ker_var_scale*theta[1])] 834 | bounds = bounds + [(bound_min, ker_len_scale*theta[i+2]) for i in range(len(theta)-2)] 835 | 836 | # Run global optimization 837 | args = (fix_likelihood, conditional, algorithm, n) 838 | 839 | if opt_method == 'differential_evolution': 840 | res = optimize.differential_evolution(optfun, bounds = bounds, args = args, **opt_args) 841 | 842 | if opt_method == 'basinhopping': 843 | res = optimize.basinhopping(optfun, theta, minimizer_kwargs = {'args':args, 'bounds': bounds}, **opt_args) 844 | res = res.lowest_optimization_result 845 | 846 | if opt_method == 'shgo': 847 | res = optimize.shgo(optfun, bounds = bounds, args = args, **opt_args) 848 | 849 | if opt_method == 'L-BFGS-B': 850 | res = optimize.minimize(optfun, theta, args = args, bounds=bounds, method = 'L-BFGS-B', **opt_args) 851 | 852 | 853 | # Save results 854 | self.__setparams(res.x, not fix_likelihood) 855 | 856 | if self.verbatim: 857 | if res.success: 858 | print(' DONE - Total time: {}'.format(formattime(time.time() - t0))) 859 | else: 860 | print('WARNING -- NO CONVERGENCE IN OPTIMIZATION -- Total time: {}'.format(formattime(time.time() - t0))) 861 | 862 | 863 | def _argmin_pc_subop(self, i, nu, bounds, opt_method = 'differential_evolution', moment_approximation = False, sampling_alg = 'minimax_tilting', moment_alg = 'correlation-free', verbatim = False, num_samples = 1000, return_res = False): 864 | """ 865 | Finds smallest probability that the constraint is satisfied for 866 | the i-th sub-operator 867 | 868 | i = 0: boundedness 869 | i > 0: df/dx_i 870 | 871 | Global optimizer: 872 | opt_method = 'differential_evolution', 'basinhopping' or 'shgo' 873 | 874 | moment_approximation = False -> Use sampling based method 875 | sampling_alg = 'rejection', 'gibbs' or 'minimax_tilting' 876 | 877 | moment_approximation = True -> Use moment approximation 878 | moment_alg = 'correlation-free', 'mtmvnorm' 879 | 880 | return_res = True -> Return optimizer res object. 881 | Otherwise return: 882 | 883 | sucess = True/False 884 | x = argmin 885 | y = f(x) 886 | """ 887 | 888 | min_prob_log = 1E-10 # Cap the constraint prob at this lower limit (for log transform) 889 | 890 | assert opt_method in ['differential_evolution', 'basinhopping', 'shgo'], 'unknown opt_method = ' + opt_method 891 | 892 | # Calculations only depending on (X, Y) 893 | self._prep_Y_centered() 894 | self._prep_K_w(verbatim = False) 895 | self._prep_K_w_factor(verbatim = False) 896 | 897 | label = 'a < f < b' if i == 0 else 'a < df/dx_{} < b'.format(i) 898 | if verbatim: print('Finding argmin(p_c) sub-operator ' + label) 899 | 900 | # Define function to optimize 901 | if self._no_const(): 902 | if verbatim: print('No previous constraints found -- optimizing using unconstrained GP') 903 | 904 | args = (i, nu) 905 | 906 | def optfun(x, *args): 907 | i = args[0] 908 | nu = args[1] 909 | 910 | p_c = self._constrprob_xs_1(np.array(x).reshape(1, -1), i, nu)[0] 911 | if p_c < min_prob_log: p_c = min_prob_log 912 | return np.log(p_c) 913 | 914 | else: 915 | if verbatim: print('Optimizing using estimated constraint probability with {} samples'.format(num_samples)) 916 | 917 | if moment_approximation: 918 | # Use moment approximation of constraint probability 919 | 920 | args = (i, nu, moment_alg) 921 | 922 | def optfun(x, *args): 923 | i = args[0] 924 | nu = args[1] 925 | alg = args[2] 926 | 927 | p_c = self._constrprob_xs_2_momentapprox(np.array(x).reshape(1, -1), i, nu, alg, verbatim = False)[0] 928 | if p_c < min_prob_log: p_c = min_prob_log 929 | return np.log(p_c) 930 | 931 | else: 932 | # Estimate constraint probability from samples of the constrained process 933 | 934 | args = (i, nu, num_samples, sampling_alg) 935 | 936 | def optfun(x, *args): 937 | i = args[0] 938 | nu = args[1] 939 | num_samples = args[2] 940 | alg = args[3] 941 | 942 | p_c = self._constrprob_xs_2(np.array(x).reshape(1, -1), i, nu, num_samples, alg, verbatim = False)[0] 943 | if p_c < min_prob_log: p_c = min_prob_log 944 | return np.log(p_c) 945 | 946 | # Run global optimization 947 | if opt_method == 'differential_evolution': 948 | res = optimize.differential_evolution(optfun, bounds = bounds, args = args) 949 | 950 | if opt_method == 'basinhopping': 951 | x0 = [0.5*(x[0] + x[1]) for x in bounds] 952 | res = optimize.basinhopping(optfun, x0, minimizer_kwargs = {'args':args, 'bounds': bounds}) 953 | res = res.lowest_optimization_result 954 | 955 | if opt_method == 'shgo': 956 | res = optimize.shgo(optfun, bounds = bounds, args = args) 957 | 958 | if verbatim: 959 | if res.success: 960 | print('Global optimization completed - found x = {}, p_c = {}'.format(res.x, np.exp(res.fun))) 961 | else: 962 | print('ERROR IN GLOBAL OPTIMIZATION - ' + opt_method) 963 | 964 | # Return 965 | if return_res: return res 966 | return res.success, res.x, np.exp(res.fun) 967 | 968 | def _argmin_pc_subop_finite(self, i, nu, Omega, batch_size = None, sampling_alg = 'minimax_tilting', num_samples = 1000, moment_approximation = False, moment_alg = 'minimax_tilting', verbatim = False): 969 | """ 970 | Same as _armin_pc_subup but over a finite domain Omega 971 | """ 972 | 973 | # Calculations only depending on (X, Y) 974 | self._prep_Y_centered() 975 | self._prep_K_w(verbatim = False) 976 | self._prep_K_w_factor(verbatim = False) 977 | 978 | label = 'a < f < b' if i == 0 else 'a < df/dx_{} < b'.format(i) 979 | if verbatim: print('Finding argmin(p_c) sub-operator ' + label) 980 | 981 | if batch_size is None: batch_size = Omega.shape[0] 982 | 983 | # Split Omega in batches 984 | assert batch_size <= Omega.shape[0], 'batch_size must be less than number of elements in Omega' 985 | 986 | num_intervals, rem = np.divmod(Omega.shape[0], batch_size) 987 | 988 | # Compute constraint probability for each element in Omega 989 | if self._no_const(): 990 | if verbatim: print('No previous constraints found -- optimizing using unconstrained GP') 991 | #p_c = self._constrprob_xs_1(Omega, i, nu) 992 | #p_c = np.array([self._constrprob_xs_1(x.reshape(1, -1), i, nu)[0] for x in Omega]) 993 | 994 | p_c = [] 995 | for j in range(num_intervals): 996 | p_c += list(self._constrprob_xs_1(Omega[j*batch_size:(j+1)*batch_size], i, nu)) 997 | 998 | if rem != 0: 999 | p_c += list(self._constrprob_xs_1(Omega[-rem:], i, nu)) 1000 | else: 1001 | if verbatim: print('Optimizing using estimated constraint probability with {} samples'.format(num_samples)) 1002 | #p_c = self._constrprob_xs_2(Omega, i, nu, num_samples, sampling_alg, verbatim = False) 1003 | #p_c = np.array([self._constrprob_xs_2(x.reshape(1, -1), i, nu, num_samples, sampling_alg, verbatim = False)[0] for x in Omega]) 1004 | p_c = [] 1005 | for j in range(num_intervals): 1006 | if moment_approximation: 1007 | p_c += list(self._constrprob_xs_2_momentapprox(Omega[j*batch_size:(j+1)*batch_size], i, nu, moment_alg, verbatim = False)) 1008 | else: 1009 | p_c += list(self._constrprob_xs_2(Omega[j*batch_size:(j+1)*batch_size], i, nu, num_samples, sampling_alg, verbatim = False)) 1010 | 1011 | if rem != 0: 1012 | if moment_approximation: 1013 | p_c += list(self._constrprob_xs_2_momentapprox(Omega[-rem:], i, nu, moment_alg, verbatim = False)) 1014 | else: 1015 | p_c += list(self._constrprob_xs_2(Omega[-rem:], i, nu, num_samples, sampling_alg, verbatim = False)) 1016 | 1017 | # Find smallest element 1018 | p_c = np.array(p_c) 1019 | idx = p_c.argmin() 1020 | prob = p_c[idx] 1021 | argmin = Omega[idx] 1022 | 1023 | # Return 1024 | if verbatim: 1025 | print('Minimum probability = {}, at x = {}'.format(prob, argmin)) 1026 | 1027 | return prob, argmin 1028 | 1029 | def _constr_posterior_dist_1(self, XS, i): 1030 | """ 1031 | Return mean and covariance of the i-th constraint at XS 1032 | 1033 | C~(XS) | Y 1034 | """ 1035 | 1036 | # Calculations only depending on (X, Y) 1037 | self._prep_Y_centered() 1038 | self._prep_K_w(verbatim = False) 1039 | self._prep_K_w_factor(verbatim = False) 1040 | 1041 | # c_v2, c_A2 and c_B2 1042 | c_v2, c_A2, c_B2 = self._constr_prep_1(XS, i) 1043 | 1044 | # Prior mean 1045 | if i == 0: 1046 | # Boundedness 1047 | Lmu = np.matrix(self.mean*np.ones(len(XS))).T 1048 | 1049 | else: 1050 | # Derivative 1051 | Lmu = np.matrix(np.zeros(len(XS))).T 1052 | 1053 | # Posterior mean 1054 | mu = Lmu + c_A2*self.Y_centered 1055 | 1056 | # Return posterior mean and covariance 1057 | return mu, c_B2 1058 | 1059 | 1060 | def _constrprob_xs_1(self, XS, i, nu): 1061 | """ 1062 | Return the probability that the i-th constraint is satisfied at XS 1063 | 1064 | C~(XS) | Y 1065 | """ 1066 | 1067 | # Get mean and cov 1068 | mu, cov = self._constr_posterior_dist_1(XS, i) 1069 | std = np.sqrt(np.diagonal(cov)) 1070 | 1071 | # Get bound vectors for constraint distribution 1072 | LB, UB = self.calc_constr_bounds_subop(XS, i) 1073 | 1074 | # Widen intervals with nu 1075 | LB = LB - nu 1076 | UB = UB + nu 1077 | 1078 | # Calculate probability that the constraint holds at each XV 1079 | #return norm_cdf_int(np.array(mu)[:,0], std, LB, UB) # Exact 1080 | return norm_cdf_int_approx(np.array(mu)[:,0], std, LB, UB) # Aprroximation within E-7 error 1081 | 1082 | # def _constrprob_xs_2_OLD(self, XS, i, nu, num_samples, algorithm, verbatim = False): 1083 | # """ 1084 | # Return the probability that the i-th constraint is satisfied at XS 1085 | 1086 | # C~(XS) | Y, C 1087 | # """ 1088 | 1089 | # # Calculations only depending on (X, Y) 1090 | # self._prep_Y_centered() 1091 | # self._prep_K_w(verbatim = verbatim) 1092 | # self._prep_K_w_factor(verbatim = verbatim) 1093 | 1094 | # # Calculations only depending on (X, XV) - v1, A1 and B1 1095 | # self._prep_2(verbatim = verbatim) 1096 | 1097 | # # Calculate mean of constraint distribution at XV (covariance is B1) 1098 | # Lmu_XV, constr_mean = self._calc_constr_mean() 1099 | 1100 | # # Get bound vectors for constraint distribution 1101 | # LB, UB = self._calc_constr_bounds() 1102 | 1103 | # # Sample from truncated constraint distribution 1104 | # self._sample_constr_XV(m = num_samples, mu = constr_mean, sigma = self.B1, LB = LB, UB = UB, algorithm = algorithm, resample = False, verbatim = verbatim) 1105 | 1106 | # # c_v2, c_A2 and c_B2 1107 | # c_v2, c_A2, c_B2 = self._constr_prep_1(XS, i) 1108 | 1109 | # # c_A, c_B and c_Sigma 1110 | # c_A, c_B, c_Sigma = self._constr_prep_2(XS, i, c_v2, c_A2, c_B2) 1111 | 1112 | # # Get bound vectors for constraint distribution 1113 | # LB, UB = self.calc_constr_bounds_subop(XS, i) 1114 | 1115 | # # Widen intervals with nu 1116 | # LB = LB - nu 1117 | # UB = UB + nu 1118 | 1119 | # # Prior mean 1120 | # if i == 0: 1121 | # # Boundedness 1122 | # Lmu = np.matrix(np.zeros(len(XS))).T 1123 | 1124 | # else: 1125 | # # Derivative 1126 | # Lmu = np.matrix(self.mean*np.ones(len(XS))).T 1127 | 1128 | # t = time.time() 1129 | # # Posterior mean 1130 | # mu = Lmu + c_A*(self.C_sim - Lmu_XV) + c_B*self.Y_centered 1131 | 1132 | # # Posterior standard deviation 1133 | # std = np.sqrt(np.diagonal(c_Sigma)) 1134 | 1135 | # # Calculate probability that the constraint holds at each XS individually 1136 | # # for each sample C_j and take the average over C_j 1137 | # if XS.shape[0] == 1: 1138 | 1139 | # # Faster for single input 1140 | # probs = norm_cdf_int_approx(np.array(mu)[0], std, LB, UB) 1141 | # probs = np.array([probs.mean()]) 1142 | 1143 | # else: 1144 | # probs = np.apply_along_axis(norm_cdf_int_approx, axis = 0, arr = np.array(mu), std = std, LB = LB, UB = UB) 1145 | # probs = probs.mean(axis = 1) 1146 | 1147 | # # Return probability 1148 | # return probs 1149 | 1150 | def _constrprob_xs_2(self, XS, i, nu, num_samples, algorithm, verbatim = False): 1151 | """ 1152 | Return the probability that the i-th constraint is satisfied at XS 1153 | 1154 | C~(XS) | Y, C 1155 | """ 1156 | 1157 | # Calculations only depending on (X, Y) 1158 | self._prep_Y_centered() 1159 | self._prep_K_w(verbatim = verbatim) 1160 | self._prep_K_w_factor(verbatim = verbatim) 1161 | 1162 | # Calculations only depending on (X, XV) - v1, A1 and B1 1163 | self._prep_2(verbatim = verbatim) 1164 | 1165 | # Calculate mean of constraint distribution at XV (covariance is B1) 1166 | Lmu_XV, constr_mean = self._calc_constr_mean() 1167 | 1168 | # Get bound vectors for constraint distribution 1169 | LB, UB = self._calc_constr_bounds() 1170 | 1171 | # Sample from truncated constraint distribution 1172 | self._sample_constr_XV(m = num_samples, mu = constr_mean, sigma = self.B1, LB = LB, UB = UB, algorithm = algorithm, resample = False, verbatim = verbatim) 1173 | 1174 | # c_v2, c_A2 and c_B2 1175 | # Only compute diagonal elements of constraint covariance 1176 | #c_v2, c_A2, c_B2 = self._constr_prep_1(XS, i) 1177 | if i == 0: 1178 | # Boundedness 1179 | L2T_K_X_XS = np.matrix(self.kernel.K(self.X_training, XS)) 1180 | L1L2T_K_XS_XS_diag = np.matrix(self.kernel.K_diag(XS)) 1181 | 1182 | else: 1183 | L2T_K_X_XS = np.matrix(self.kernel.Ki0(XS, self.X_training, i-1)).T 1184 | L1L2T_K_XS_XS_diag = np.matrix(self.kernel.Kii_diag(XS, i-1)) 1185 | 1186 | c_v2 = triang_solve(self.K_w_chol, L2T_K_X_XS) 1187 | c_A2 = triang_solve(self.K_w_chol, c_v2, trans = True).T 1188 | 1189 | # c_A, c_B and c_Sigma 1190 | #c_A, c_B, c_Sigma = self._constr_prep_2(XS, i, c_v2, c_A2, c_B2) 1191 | L1L2T_XS_XV = self._calc_FiL2T(XS, i) 1192 | 1193 | c_B3 = L1L2T_XS_XV - c_v2.T*self.v1 1194 | 1195 | self._prep_L1() # Compute L_1 1196 | c_v3 = triang_solve(self.L_1, c_B3.T) 1197 | 1198 | c_A = triang_solve(self.L_1, c_v3, trans = True).T 1199 | c_B = c_A2 - c_A*self.A1 1200 | 1201 | #c_Sigma = c_B2 - c_v3.T*c_v3 1202 | c_Sigma_diag = np.matrix(L1L2T_K_XS_XS_diag - np.square(c_v2).sum(0) - np.square(c_v3).sum(0)).T 1203 | 1204 | # Get bound vectors for constraint distribution 1205 | LB, UB = self.calc_constr_bounds_subop(XS, i) 1206 | 1207 | # Widen intervals with nu 1208 | LB = LB - nu 1209 | UB = UB + nu 1210 | 1211 | # Prior mean 1212 | if i == 0: 1213 | # Boundedness 1214 | Lmu = np.matrix(self.mean*np.ones(len(XS))).T 1215 | 1216 | else: 1217 | # Derivative 1218 | Lmu = np.matrix(np.zeros(len(XS))).T 1219 | 1220 | t = time.time() 1221 | # Posterior mean 1222 | mu = Lmu + c_A*(self.C_sim - Lmu_XV) + c_B*self.Y_centered 1223 | 1224 | # Posterior standard deviation 1225 | #std = np.sqrt(np.diagonal(c_Sigma)) 1226 | std = np.array(np.sqrt(c_Sigma_diag)).flatten() 1227 | 1228 | # Calculate probability that the constraint holds at each XS individually 1229 | # for each sample C_j and take the average over C_j 1230 | if XS.shape[0] == 1: 1231 | 1232 | # Faster for single input 1233 | probs = norm_cdf_int_approx(np.array(mu)[0], std, LB, UB) 1234 | probs = np.array([probs.mean()]) 1235 | 1236 | else: 1237 | probs = np.apply_along_axis(norm_cdf_int_approx, axis = 0, arr = np.array(mu), std = std, LB = LB, UB = UB) 1238 | probs = probs.mean(axis = 1) 1239 | 1240 | # Return probability 1241 | return probs 1242 | 1243 | 1244 | def _constrprob_xs_2_momentapprox(self, XS, i, nu, algorithm = 'minimax_tilting', verbatim = False): 1245 | """ 1246 | Return the probability that the i-th constraint is satisfied at XS using moment approximation 1247 | 1248 | algorithm = 'correlation-free' -> Using correlation free approximation 1249 | algorithm = 'mtmvnorm' -> Using R-package mtvmnorm 1250 | algorithm = 'minimax_tilting' -> Using samples 1251 | algorithm = 'Genz' -> NOT YET IMPLEMENTED! (Using Genz approximation) 1252 | 1253 | C~(XS) | Y, C 1254 | """ 1255 | 1256 | assert algorithm in ['correlation-free', 'mtmvnorm', 'minimax_tilting'], 'unknown algorithm = ' + algorithm 1257 | 1258 | # Calculations only depending on (X, Y) 1259 | self._prep_Y_centered() 1260 | self._prep_K_w(verbatim = verbatim) 1261 | self._prep_K_w_factor(verbatim = verbatim) 1262 | 1263 | # Calculations only depending on (X, XV) - v1, A1 and B1 1264 | self._prep_2(verbatim = verbatim) 1265 | 1266 | # Calculate mean of constraint distribution at XV (covariance is B1) 1267 | Lmu_XV, constr_mean = self._calc_constr_mean() 1268 | 1269 | # Get bound vectors for constraint distribution 1270 | LB, UB = self._calc_constr_bounds() 1271 | 1272 | # c_v2, c_A2 and c_B2 1273 | c_v2, c_A2, c_B2 = self._constr_prep_1(XS, i) 1274 | 1275 | # c_A, c_B and c_Sigma 1276 | c_A, c_B, c_Sigma = self._constr_prep_2(XS, i, c_v2, c_A2, c_B2) 1277 | 1278 | # Compute moments of truncated variables (the virtual observations subjected to the constraint) 1279 | t1 = time.time() 1280 | if verbatim: print("..computing moments of C~|C, Y (from truncated Gaussian)", end = '') 1281 | 1282 | if algorithm =='correlation-free': 1283 | # Using correlation free approximation 1284 | tmu, tvar = trunc_norm_moments_approx_corrfree(mu = np.array(constr_mean).flatten(), sigma = self.B1, LB = LB, UB = UB) 1285 | trunc_mu, trunc_cov = np.matrix(tmu).T, np.matrix(np.diag(tvar)) 1286 | 1287 | if algorithm =='mtmvnorm': 1288 | # Using mtmvnorm algorithm 1289 | trunc_moments = mtmvnorm(mu = constr_mean, sigma = self.B1, a = LB, b = UB) 1290 | trunc_mu, trunc_cov = np.matrix(trunc_moments[0]).T, np.matrix(trunc_moments[1]) 1291 | 1292 | if algorithm =='minimax_tilting': 1293 | # Using samples from minimax tilting 1294 | trunc_mu, trunc_cov = moments_from_samples(1000 , constr_mean, self.B1, LB, UB, algorithm = 'minimax_tilting') 1295 | trunc_mu = np.matrix(trunc_mu).T 1296 | trunc_cov = np.matrix(trunc_cov) 1297 | 1298 | if verbatim: print(' DONE - time: {}'.format(formattime(time.time() - t1))) 1299 | 1300 | # Compute moments of Lf* | Y, C 1301 | t1 = time.time() 1302 | if verbatim: print("..computing moments of Lf*|C, Y", end = '') 1303 | 1304 | # Prior mean 1305 | if i == 0: 1306 | # Boundedness 1307 | Lmu = np.matrix(self.mean*np.ones(len(XS))).T 1308 | 1309 | else: 1310 | # Derivative 1311 | Lmu = np.matrix(np.zeros(len(XS))).T 1312 | 1313 | # Posterior mean 1314 | mean = Lmu + c_B*self.Y_centered + c_A*(trunc_mu - Lmu_XV) 1315 | 1316 | # Posterior standard deviation 1317 | std = np.sqrt(np.diagonal(c_Sigma + c_A*trunc_cov*c_A.T)) 1318 | 1319 | if verbatim: print(' DONE - time: {}'.format(formattime(time.time() - t1))) 1320 | 1321 | # Get bound vectors for constraint distribution 1322 | LB, UB = self.calc_constr_bounds_subop(XS, i) 1323 | 1324 | # Widen intervals with nu 1325 | LB = LB - nu 1326 | UB = UB + nu 1327 | 1328 | # Calculate probability that the constraint holds at each XS individually 1329 | # for each sample C_j and take the average over C_j 1330 | if XS.shape[0] == 1: 1331 | # Faster for single input 1332 | probs = norm_cdf_int_approx(np.array(mean)[0], std, LB, UB) 1333 | 1334 | else: 1335 | probs = np.apply_along_axis(norm_cdf_int_approx, axis = 0, arr = np.array(mean), std = std, LB = LB, UB = UB) 1336 | 1337 | # Return probability 1338 | #return probs, mean, std 1339 | return probs 1340 | 1341 | def _sample_constr_XV(self, m, mu, sigma, LB, UB, algorithm, resample = False, verbatim = True): 1342 | """ 1343 | Generate m samples from the constraint distribution 1344 | 1345 | Input: 1346 | m -- number of samples 1347 | mu, sigma, LB, UB -- distribution parameters of truncated Gaussian 1348 | algorithm -- name of sampling algorithm ('rejection', 'gibbs' or 'minimax_tilting') 1349 | resample -- resample = False -> Use old samples if they exist 1350 | 1351 | """ 1352 | 1353 | # Check if we should just use the old samples 1354 | if self.C_sim is None: 1355 | generate_samples = True 1356 | else: 1357 | if m == self.C_sim.shape[1]: 1358 | generate_samples = resample 1359 | else: 1360 | generate_samples = True 1361 | 1362 | 1363 | if generate_samples: 1364 | # Generate samples 1365 | 1366 | # Start timer 1367 | t0 = time.time() 1368 | 1369 | if verbatim: print("..sampling {} times from truncated constraint distribution C~|C, Y".format(m), end = '') 1370 | self.C_sim = rtmvnorm(n = m, mu = mu, sigma = sigma, a = LB, b = UB, algorithm = algorithm).T 1371 | if verbatim: print(' DONE - time: {}'.format(formattime(time.time() - t0))) 1372 | 1373 | else: 1374 | # Use old 1375 | if verbatim: print('..using old samples from truncated constraint distribution C~|C, Y') 1376 | 1377 | 1378 | def _check_XY_training(self): 1379 | """ 1380 | Check that X_training and Y_training are OK 1381 | """ 1382 | assert self.X_training is not None, 'Training data not found. Use model.X_training = ...' 1383 | assert len(self.X_training.shape) == 2, 'Training data X_training must be 2d array' 1384 | assert self.Y_training is not None, 'Training data not found. Use model.Y_training = ...' 1385 | assert len(self.Y_training.shape) == 1, 'Training data Y_training must be 1d array' 1386 | assert self.X_training.shape[0] == len(self.Y_training), 'Number of points in X_training and Y_training does not match' 1387 | 1388 | def _check_constraints(self): 1389 | 1390 | assert self.__has_xv(), 'No constraints found' 1391 | 1392 | if self.constr_bounded is not None: self.constr_bounded._check(self.kernel.dim, 'Bounded') 1393 | 1394 | if self.constr_deriv is not None: 1395 | i = 1 1396 | for c in self.constr_deriv: 1397 | c._check(self.kernel.dim, 'Derivative ' + str(i)) 1398 | i+=1 1399 | 1400 | def _optimize_unconstrained_ML(self, fix_likelihood = False, bound_min = 1e-6): 1401 | """ 1402 | Optimize hyperparameters of unconstrained GP using ML 1403 | 1404 | fix_likelihood = False -> Don't optimize GP likelihood parameter self.likelihood 1405 | bound_min = minimum value in parameter bounds = (bound_min, ...) 1406 | """ 1407 | 1408 | # Define wrapper functions for optimization 1409 | def optfun(theta, fix_likelihood): 1410 | self.reset() 1411 | self.__setparams(theta, not fix_likelihood) 1412 | return -self._loglik_unconstrained() 1413 | 1414 | def optfun_grad(theta, fix_likelihood): 1415 | self.reset() 1416 | self.__setparams(theta, not fix_likelihood) 1417 | grad = -np.array(self._loglik_grad_unconstrained()) 1418 | if fix_likelihood: 1419 | return grad[1:] 1420 | else: 1421 | return grad 1422 | 1423 | # Define bounds 1424 | num_params = self.kernel.dim + 2 1425 | if fix_likelihood: num_params -= 1 1426 | bounds = [(bound_min, None)]*num_params 1427 | 1428 | # Initial guess 1429 | if fix_likelihood: 1430 | theta = np.array(self.kernel.get_params()) 1431 | else: 1432 | theta = np.array([self.likelihood] + list(self.kernel.get_params())) 1433 | 1434 | # Run optimizer 1435 | res = optimize.minimize(optfun, theta, args=fix_likelihood, jac = optfun_grad, bounds=bounds, method = 'L-BFGS-B') 1436 | 1437 | return res 1438 | 1439 | def _loglik_unconstrained(self): 1440 | """ 1441 | Calculates log marginal likelihood 1442 | 1443 | I.e. log(P(Y_training | X_training)) 1444 | """ 1445 | # Check input 1446 | self._check_XY_training() 1447 | 1448 | # Run pre calcs 1449 | self._prep_Y_centered() 1450 | self._prep_K_w(verbatim = False) 1451 | self._prep_K_w_factor(verbatim = False) 1452 | self._prep_LLY() 1453 | 1454 | ### Calculate log marginal likelihood ### 1455 | n = self.X_training.shape[0] 1456 | loglik = -0.5*self.Y_centered.T*self.LLY - np.log(np.diag(self.K_w_chol)).sum() - (n/2)*np.log(2*np.pi) 1457 | loglik = loglik[0,0] 1458 | 1459 | return loglik 1460 | 1461 | def _loglik_grad_unconstrained(self): 1462 | """ 1463 | Calculates gradient of log marginal likelihood w.r.t hyperparameters 1464 | """ 1465 | # Check input 1466 | self._check_XY_training() 1467 | 1468 | # Run pre calcs 1469 | self._prep_Y_centered() 1470 | self._prep_K_w(verbatim = False) 1471 | self._prep_K_w_factor(verbatim = False) 1472 | self._prep_LLY() 1473 | 1474 | # Invert K_w using the Cholesky factor 1475 | K_w_inv = chol_inv(self.K_w_chol) 1476 | 1477 | # Partial derivative of K_w w.r.t. likelihood 1478 | n = self.X_training.shape[0] 1479 | dKw_dlik = np.matrix(np.identity(n)) 1480 | 1481 | # Partial derivative of K_w w.r.t. kernel parameters 1482 | dK_dpar = self.kernel.K_gradients(self.X_training, self.X_training) 1483 | 1484 | # Calculate gradient 1485 | alpha = K_w_inv*self.Y_centered 1486 | tmp = alpha*alpha.T - K_w_inv 1487 | 1488 | Dloglik_lik = 0.5*traceprod(tmp, dKw_dlik) # W.r.t. GP likelihood parameter 1489 | Dloglik_ker = [0.5*traceprod(tmp, K) for K in dK_dpar] # W.r.t. kernel parameters 1490 | 1491 | Dloglik = [Dloglik_lik] + Dloglik_ker 1492 | 1493 | return Dloglik 1494 | 1495 | def _EM_g(self, use_mtmvnorm = False, n = 1000): 1496 | """ 1497 | g function used in EM 1498 | 1499 | use_mtmvnorm = True -> mtmvnorm for truncated moments, otherwise estimate from samples using miminax-tilting 1500 | n = samples used to estimate moments 1501 | 1502 | Computed using current hyperparameters 1503 | """ 1504 | 1505 | # Calculations only depending on (X, Y) 1506 | self._prep_Y_centered() 1507 | self._prep_K_w(verbatim = False) 1508 | self._prep_K_w_factor(verbatim = False) 1509 | 1510 | # Calculations only depending on (X, XV) - v1, A1 and B1 1511 | self._prep_2(verbatim = False) 1512 | 1513 | # Calculate mean of constraint distribution (covariance is B1) 1514 | Lmu, constr_mean = self._calc_constr_mean() 1515 | 1516 | # Get bound vectors for constraint distribution 1517 | LB, UB = self._calc_constr_bounds() 1518 | 1519 | # Compute moments of truncated variables (the virtual observations subjected to the constraint) 1520 | if use_mtmvnorm: 1521 | trunc_moments = mtmvnorm(mu = constr_mean, sigma = self.B1, a = LB, b = UB) 1522 | trunc_mu, trunc_cov = np.matrix(trunc_moments[0]).T, np.matrix(trunc_moments[1]) 1523 | else: 1524 | trunc_mu, trunc_cov = moments_from_samples(n, constr_mean, self.B1, LB, UB, algorithm = 'minimax_tilting') 1525 | 1526 | # Center truncated mean and convert to matrix 1527 | trunc_mu = np.matrix(trunc_mu.reshape(-1, 1) - Lmu) 1528 | trunc_cov = np.matrix(trunc_cov) 1529 | y_c = np.matrix(self.Y_centered) 1530 | 1531 | # Gather blocks in matrix 1532 | dg = y_c*trunc_mu.T 1533 | g = np.block([[y_c*y_c.T, dg], [dg.T, trunc_cov + trunc_mu*trunc_mu.T]]) 1534 | return g 1535 | 1536 | def _EM_Q_check(self, n): 1537 | """ 1538 | Compute Q the slow way for testing 1539 | """ 1540 | # Calculations only depending on (X, Y) 1541 | self._prep_Y_centered() 1542 | self._prep_K_w(verbatim = False) 1543 | self._prep_K_w_factor(verbatim = False) 1544 | 1545 | # Calculations only depending on (X, XV) - v1, A1 and B1 1546 | self._prep_2(verbatim = False) 1547 | 1548 | # Calculate mean of constraint distribution (covariance is B1) 1549 | Lmu, constr_mean = self._calc_constr_mean() 1550 | 1551 | # Get bound vectors for constraint distribution 1552 | LB, UB = self._calc_constr_bounds() 1553 | 1554 | # Sample from the constraint distribution 1555 | C_sim = rtmvnorm(n = n, mu = constr_mean, sigma = self.B1, a = LB, b = UB, algorithm = 'minimax_tilting') 1556 | 1557 | # Define covariance matrix of Y and C~ 1558 | L2T_K_x_xv = self._calc_L2T(self.X_training) 1559 | L1L2T_K_xv_xv = self._calc_L1L2() 1560 | Gamma = np.block([[self.K_w, L2T_K_x_xv], [L2T_K_x_xv.T, L1L2T_K_xv_xv + self.constr_likelihood*np.identity(n = L1L2T_K_xv_xv.shape[0])]]) 1561 | 1562 | # Cholesky 1563 | L = np.matrix(jitchol(Gamma)) 1564 | 1565 | # Invert Gamma using the Cholesky factor 1566 | Gamma_inv = chol_inv(L) 1567 | 1568 | # Invert B1 1569 | B1_inv = np.linalg.inv(self.B1) 1570 | 1571 | # Estimate Q 1572 | tmp = np.zeros(C_sim.shape[0]) 1573 | tmp_2 = np.zeros(C_sim.shape[0]) 1574 | for i in range(C_sim.shape[0]): 1575 | z = np.matrix(list(self.Y_centered.flatten()) + list(C_sim[i])).T 1576 | c = np.matrix(C_sim[i]).T 1577 | tmp[i] = (z.T*Gamma_inv*z)[0,0] 1578 | tmp_2[i] = (c.T*B1_inv*c)[0,0] 1579 | 1580 | cB1c = np.array(tmp_2).mean() 1581 | zGz = np.array(tmp).mean() 1582 | H = (self.B1.shape[0]/2)*np.log(2*np.pi) + 0.5*np.log(np.linalg.det(B1_inv)) + cB1c 1583 | det_1 = np.log(np.diag(L)).sum() 1584 | det_2 = np.log(np.linalg.det(Gamma)) 1585 | Q = -0.5*zGz - (Gamma.shape[0]/2)*np.log(2*np.pi) - np.log(np.diag(L)).sum() 1586 | 1587 | return Q, zGz, H, det_1, det_2 1588 | 1589 | def _EM_Q(self, g): 1590 | """ 1591 | Q function used in EM 1592 | 1593 | Input: 1594 | g = g(theta^(t)) matrix 1595 | 1596 | Out: 1597 | Value of Q = Q(theta | theta^(t)) 1598 | """ 1599 | # Run pre calcs 1600 | self._prep_K_w(verbatim = False) 1601 | 1602 | # Define covariance matrix of Y and C~ 1603 | L2T_K_x_xv = self._calc_L2T(self.X_training) 1604 | L1L2T_K_xv_xv = self._calc_L1L2() 1605 | Gamma = np.block([[self.K_w, L2T_K_x_xv], [L2T_K_x_xv.T, L1L2T_K_xv_xv + self.constr_likelihood*np.identity(n = L1L2T_K_xv_xv.shape[0])]]) 1606 | 1607 | # Cholesky 1608 | L = np.matrix(jitchol(Gamma)) 1609 | 1610 | # Invert Gamma using the Cholesky factor 1611 | Gamma_inv = chol_inv(L) 1612 | 1613 | # Compute Q 1614 | Q = - (Gamma.shape[0]/2)*np.log(2*np.pi) - np.log(np.diag(L)).sum() - 0.5*traceprod(g, Gamma_inv) 1615 | 1616 | return Q 1617 | 1618 | def _EM_update(self, fix_likelihood = False, bounds = None, n = 1000, opt_method = 'L-BFGS-B', opt_args = {}, verbatim = False): 1619 | """ 1620 | Run one iteration of the EM algorithm and update hyperparameters 1621 | 1622 | 1. Estimate truncated moments using current hyperparameters 1623 | 2. Optimize current hyperparameters 1624 | 1625 | fix_likelihood = False -> Don't optimize GP likelihood parameter self.likelihood 1626 | n = number of samples used to estimate moments 1627 | opt_method = 'differential_evolution', 'basinhopping', 'shgo', 'L-BFGS-B' 1628 | opt_args = dict with additional arguments to optimizer 1629 | """ 1630 | 1631 | assert opt_method in ['differential_evolution', 'basinhopping', 'shgo', 'L-BFGS-B'], 'unknown opt_method = ' + opt_method 1632 | 1633 | t0 = time.time() 1634 | 1635 | # 1. Compute truncated moments and the matrix g(theta) 1636 | if verbatim: print('..Running calculation of g(theta~) ...', end = '') 1637 | g = self._EM_g(n = n) 1638 | if verbatim: print(' DONE - time: {}'.format(formattime(time.time() - t0))) 1639 | 1640 | # Initial guess (not used for some global optimizers) 1641 | if fix_likelihood: 1642 | theta = np.array(self.kernel.get_params()) 1643 | else: 1644 | theta = np.array([self.likelihood] + list(self.kernel.get_params())) 1645 | 1646 | # Define bounds 1647 | # theta = [(likelihood), kernel_var, kernel_len_1, ...] 1648 | if bounds is None: 1649 | bound_min = 1e-6 1650 | num_params = self.kernel.dim + 2 1651 | if fix_likelihood: num_params -= 1 1652 | bounds = [(bound_min, None)]*num_params 1653 | 1654 | # 2. Define function to optimize Q(theta, theta~) 1655 | def optfun(theta, *args): 1656 | self.reset() 1657 | self.__setparams(theta, not args[0]) 1658 | return -self._EM_Q(g) 1659 | 1660 | # Run optimization 1661 | t1 = time.time() 1662 | if verbatim: print('..Running optimization ({}) ...'.format(opt_method), end = '') 1663 | 1664 | args = (fix_likelihood,) 1665 | 1666 | if opt_method == 'differential_evolution': 1667 | res = optimize.differential_evolution(optfun, bounds = bounds, args = args, **opt_args) 1668 | 1669 | if opt_method == 'basinhopping': 1670 | res = optimize.basinhopping(optfun, theta, minimizer_kwargs = {'args':args, 'bounds': bounds}, **opt_args) 1671 | res = res.lowest_optimization_result 1672 | 1673 | if opt_method == 'shgo': 1674 | res = optimize.shgo(optfun, bounds = bounds, args = args, **opt_args) 1675 | 1676 | if opt_method == 'L-BFGS-B': 1677 | res = optimize.minimize(optfun, theta, args = args, bounds=bounds, method = 'L-BFGS-B', **opt_args) 1678 | 1679 | if verbatim: print(' DONE - time: {}'.format(formattime(time.time() - t1))) 1680 | 1681 | # Save results 1682 | self.__setparams(res.x, not fix_likelihood) 1683 | 1684 | def __setparams(self, theta, includes_likelihood): 1685 | """ 1686 | Set model parameters from single array theta 1687 | """ 1688 | if includes_likelihood: 1689 | self.likelihood = theta[0] 1690 | self.kernel.set_params(theta[1:]) 1691 | else: 1692 | self.kernel.set_params(theta) 1693 | 1694 | def _prep_K_w(self, verbatim = False): 1695 | """ 1696 | Calculate K_w = K_x_x + likelihood 1697 | 1698 | *** Need to run this if one of the following arrays are changed : *** 1699 | - X_training 1700 | 1701 | """ 1702 | 1703 | if verbatim: print('..Running calculation of K_w ...', end = '') 1704 | 1705 | if self.K_w is None: 1706 | 1707 | # Start timer 1708 | t0 = time.time() 1709 | 1710 | n = len(self.X_training) 1711 | self.K_w = np.matrix(self.kernel.K(self.X_training, self.X_training) + self.likelihood*np.identity(n)) 1712 | 1713 | if verbatim: print(' DONE - time: {}'.format(formattime(time.time() - t0))) 1714 | 1715 | else: 1716 | pass 1717 | if verbatim: print(' SKIP - (cached)') 1718 | 1719 | def _prep_K_w_factor(self, verbatim = False): 1720 | """ 1721 | Calculate matrix L s.t. L*L.T = K_w 1722 | 1723 | *** Need to run this if one of the following arrays are changed : *** 1724 | - X_training 1725 | """ 1726 | 1727 | if verbatim: print('..Running calculation of Cholesky factor for K_w ...', end = '') 1728 | 1729 | if self.K_w_chol is None: 1730 | 1731 | # Start timer 1732 | t0 = time.time() 1733 | 1734 | # Cholesky 1735 | self.K_w_chol = np.matrix(jitchol(self.K_w)) 1736 | 1737 | if verbatim: print(' DONE - time: {}'.format(formattime(time.time() - t0))) 1738 | 1739 | else: 1740 | if verbatim: print(' SKIP - (cached)') 1741 | 1742 | 1743 | def _prep_LLY(self): 1744 | """ 1745 | Calculate LLY = L.T \ L \ Y_centered 1746 | 1747 | *** Need to run this if one of the following arrays are changed : *** 1748 | - X_training 1749 | - Y_training 1750 | 1751 | """ 1752 | 1753 | if self.LLY is None: 1754 | # Run calculation 1755 | self.LLY = mulinv_solve(self.K_w_chol, self.Y_centered, triang = True) 1756 | 1757 | def _prep_Y_centered(self): 1758 | """ 1759 | Calculate Y_centered 1760 | """ 1761 | if self.Y_centered is None: self.Y_centered = self.Y_training.reshape(-1, 1) - self.mean 1762 | 1763 | def _prep_1(self, XS, verbatim = False): 1764 | """ 1765 | Preparation step 1 - calculate matrices depending only on (XS, X) 1766 | 1767 | Updates self.v2, self.A2, self.B2 1768 | """ 1769 | 1770 | if verbatim: print('..Running preparation step 1 - dependence on (XS, X) ...', end = '') 1771 | 1772 | # Start timer 1773 | t0 = time.time() 1774 | 1775 | K_x_xs = np.matrix(self.kernel.K(self.X_training, XS)) 1776 | K_xs_xs = np.matrix(self.kernel.K(XS, XS)) 1777 | 1778 | self.v2 = triang_solve(self.K_w_chol, K_x_xs) 1779 | self.B2 = K_xs_xs - self.v2.T*self.v2 1780 | self.A2 = triang_solve(self.K_w_chol, self.v2, trans = True).T 1781 | 1782 | if verbatim: print(' DONE - time: {}'.format(formattime(time.time() - t0))) 1783 | 1784 | def _prep_2(self, verbatim = False): 1785 | """ 1786 | Preparation step 2 - calculate matrices depending only on (XV, X) 1787 | 1788 | Updates self.v1, self.A1, self.B1 1789 | """ 1790 | 1791 | if verbatim: print('..Running preparation step 2 - dependence on (XV, X) ...', end = '') 1792 | 1793 | if self._p2 == False: 1794 | 1795 | # Start timer 1796 | t0 = time.time() 1797 | 1798 | # Calculate kernel matrices 1799 | L2T_K_x_xv = self._calc_L2T(self.X_training) 1800 | L1L2T_K_xv_xv = self._calc_L1L2() 1801 | 1802 | # Calculate v1, A1 and B1 1803 | self.v1 = triang_solve(self.K_w_chol, L2T_K_x_xv) 1804 | self.A1 = triang_solve(self.K_w_chol, self.v1, trans = True).T 1805 | 1806 | n = L1L2T_K_xv_xv.shape[0] 1807 | self.B1 = L1L2T_K_xv_xv + self.constr_likelihood*np.identity(n) - self.v1.T*self.v1 1808 | 1809 | self._p2 = True 1810 | if verbatim: print(' DONE - time: {}'.format(formattime(time.time() - t0))) 1811 | 1812 | else: 1813 | if verbatim: print(' SKIP - (cached)') 1814 | 1815 | 1816 | def _prep_3(self, XS, verbatim = False): 1817 | """ 1818 | Preparation step 3 - calculate A, B and Sigma 1819 | 1820 | Updatese self.L_1, self.v3, self.B3, self.A, self.B, and self.Sigma 1821 | """ 1822 | 1823 | if verbatim: print('..Running preparation step 3 - dependence on (XS, XV, X) ...', end = '') 1824 | 1825 | # Start timer 1826 | t0 = time.time() 1827 | 1828 | self._prep_L1() # Compute L_1 1829 | 1830 | L2T_K_xs_xv = self._calc_L2T(XS) 1831 | self.B3 = L2T_K_xs_xv - self.v2.T*self.v1 1832 | 1833 | self.v3 = triang_solve(self.L_1, self.B3.T) 1834 | 1835 | self.A = triang_solve(self.L_1, self.v3, trans = True).T 1836 | self.B = self.A2 - self.A*self.A1 1837 | 1838 | self.Sigma = self.B2 - self.v3.T*self.v3 1839 | 1840 | if verbatim: print(' DONE - time: {}'.format(formattime(time.time() - t0))) 1841 | 1842 | 1843 | def _prep_L1(self): 1844 | """ Cholesky factorization of B1 """ 1845 | 1846 | if self.L_1 is None: 1847 | self.L_1 = np.matrix(jitchol(self.B1)) 1848 | 1849 | def _constr_prep_1(self, XS, i): 1850 | """ 1851 | Return c_v2, c_A2 and c_B2 for constraint distribution 1852 | """ 1853 | 1854 | if i == 0: 1855 | # Boundedness 1856 | L2T_K_X_XS = np.matrix(self.kernel.K(self.X_training, XS)) 1857 | L1L2T_K_XS_XS = np.matrix(self.kernel.K(XS, XS)) 1858 | 1859 | else: 1860 | L2T_K_X_XS = np.matrix(self.kernel.Ki0(XS, self.X_training, i-1)).T 1861 | L1L2T_K_XS_XS = np.matrix(self.kernel.Kij(XS, XS, i-1, i-1)) 1862 | 1863 | c_v2 = triang_solve(self.K_w_chol, L2T_K_X_XS) 1864 | c_A2 = triang_solve(self.K_w_chol, c_v2, trans = True).T 1865 | c_B2 = L1L2T_K_XS_XS - c_v2.T*c_v2 1866 | 1867 | return c_v2, c_A2, c_B2 1868 | 1869 | def _constr_prep_2(self, XS, i, c_v2, c_A2, c_B2): 1870 | """ 1871 | Return c_A, c_B and c_Sigma for constraint distribution 1872 | """ 1873 | 1874 | L1L2T_XS_XV = self._calc_FiL2T(XS, i) 1875 | 1876 | c_B3 = L1L2T_XS_XV - c_v2.T*self.v1 1877 | 1878 | self._prep_L1() # Compute L_1 1879 | c_v3 = triang_solve(self.L_1, c_B3.T) 1880 | 1881 | c_A = triang_solve(self.L_1, c_v3, trans = True).T 1882 | c_B = c_A2 - c_A*self.A1 1883 | 1884 | c_Sigma = c_B2 - c_v3.T*c_v3 1885 | 1886 | return c_A, c_B, c_Sigma 1887 | 1888 | 1889 | def _calc_constr_mean(self): 1890 | """ 1891 | Calculate mean of constraint distribution 1892 | 1893 | Returns 1894 | Lmu : Linear operator applied to GP mean 1895 | constr_mean : Mean of constraint distribution = Lmu + A_1(Y - mu) 1896 | """ 1897 | Lmu = self._Lmu() 1898 | 1899 | constr_mean = Lmu + self.A1*self.Y_centered 1900 | 1901 | return Lmu, constr_mean 1902 | 1903 | def _Lmu(self): 1904 | """ 1905 | Returns 1906 | Lmu : Linear operator applied to GP mean 1907 | """ 1908 | m_tot = self._num_virtuial_pts() 1909 | 1910 | if not self.__has_xv_bounded(): 1911 | # Only derivative constraint 1912 | Lmu = np.matrix(np.zeros(m_tot)).T 1913 | 1914 | else: 1915 | if not self.__has_xv_deriv(): 1916 | # Only boundedness constraint 1917 | Lmu = np.matrix(self.mean*np.ones(m_tot)).T 1918 | 1919 | else: 1920 | # Both constraints 1921 | m_0 = self.constr_bounded.Xv.shape[0] # Number of virtual points - boundedness 1922 | m_1 = m_tot - m_0 # Number of virtual points - derivatives 1923 | 1924 | # Operator applied to mean 1925 | Lmu = np.matrix(np.concatenate((self.mean*np.ones(m_0), np.zeros(m_1)), axis=0)).T 1926 | 1927 | return Lmu 1928 | 1929 | def _calc_constr_bounds(self): 1930 | """ Return lower/upper bounds for constraint """ 1931 | 1932 | if self.__has_xv_bounded(): 1933 | LB = [self.constr_bounded.LBXV()] 1934 | UB = [self.constr_bounded.UBXV()] 1935 | else: 1936 | LB = [] 1937 | UB = [] 1938 | 1939 | if self.constr_deriv is not None: 1940 | for constr in self.constr_deriv: 1941 | if constr.Xv is not None: 1942 | LB.append(constr.LBXV()) 1943 | UB.append(constr.UBXV()) 1944 | 1945 | return np.concatenate(LB), np.concatenate(UB) 1946 | 1947 | def calc_constr_bounds_subop(self, XS, i): 1948 | """ Return lower/upper bounds for the i-th suboperator only at XS """ 1949 | 1950 | if i == 0: 1951 | LB = self.constr_bounded.LB(XS) 1952 | UB = self.constr_bounded.UB(XS) 1953 | else: 1954 | LB = self.constr_deriv[i-1].LB(XS) 1955 | UB = self.constr_deriv[i-1].UB(XS) 1956 | 1957 | return LB, UB 1958 | 1959 | def _num_virtuial_pts(self): 1960 | """ Return total number of virtual points """ 1961 | 1962 | n = 0 1963 | if self.__has_xv_bounded(): 1964 | n = self.constr_bounded.Xv.shape[0] 1965 | 1966 | if self.constr_deriv is not None: 1967 | for constr in self.constr_deriv: 1968 | if constr.Xv is not None: 1969 | n = n + constr.Xv.shape[0] 1970 | 1971 | return n 1972 | 1973 | def _no_const(self): 1974 | """ 1975 | Returns TRUE if there are no constraints or only constraints with no virtual points 1976 | """ 1977 | return not self.__has_xv() 1978 | 1979 | def _calc_L2T(self, XX): 1980 | """ Calculate L2^T K_XX_XV for XX = X or XX = XS """ 1981 | 1982 | ls = [] # List of block matrices to concatenate 1983 | 1984 | if self.__has_xv_bounded(): 1985 | ls.append(np.matrix(self.kernel.K(XX, self.constr_bounded.Xv))) 1986 | 1987 | if self.__has_xv_deriv(): 1988 | i = 0 1989 | for constr in self.constr_deriv: 1990 | if constr.Xv is not None: 1991 | ls.append(np.matrix(self.kernel.Ki0(constr.Xv, XX, i)).T) 1992 | i+= 1 1993 | 1994 | return np.block(ls) 1995 | 1996 | def _calc_FiL2T(self, XS, i): 1997 | """ Calculate FiL2^T K_S_XV -- i.e. only the i-th row-block of L1L2^T K_XS_XV """ 1998 | 1999 | if i == 0: 2000 | return self._calc_L2T(XS) 2001 | 2002 | # i > 0 2003 | ls = [] # List of block matrices to concatenate 2004 | 2005 | if self.__has_xv_bounded(): 2006 | ls.append(np.matrix(self.kernel.Ki0(XS, self.constr_bounded.Xv, i-1))) 2007 | 2008 | if self.__has_xv_deriv(): 2009 | j = 0 2010 | for constr in self.constr_deriv: 2011 | if constr.Xv is not None: 2012 | ls.append(np.matrix(self.kernel.Kij(XS, constr.Xv, i-1, j))) 2013 | 2014 | j+= 1 2015 | 2016 | return np.block(ls) 2017 | 2018 | 2019 | def _calc_L1L2(self): 2020 | """ Calculate L1L2^T K_XV_XV """ 2021 | 2022 | if self.__has_xv_bounded(): 2023 | 2024 | # Calculate boundedness constraint matrix 2025 | K_xv = np.matrix(self.kernel.K(self.constr_bounded.Xv, self.constr_bounded.Xv)) 2026 | 2027 | if self.__has_xv_deriv(): 2028 | # Calculate cross terms 2029 | ls = [] 2030 | i = 0 2031 | for constr in self.constr_deriv: 2032 | if constr.Xv is not None: 2033 | ls.append(np.matrix(self.kernel.Ki0(constr.Xv, self.constr_bounded.Xv, i)).T) 2034 | i+= 1 2035 | 2036 | K01_xv = np.block(ls) 2037 | 2038 | else: 2039 | # Only boundedness constraint 2040 | return K_xv 2041 | 2042 | if self.__has_xv_deriv(): 2043 | # Calculate derivative constraint matrix 2044 | ls = [] 2045 | 2046 | for i in range(len(self.constr_deriv)): 2047 | if self.constr_deriv[i].Xv is not None: 2048 | ls_row = [] 2049 | for l in range(len(self.constr_deriv) - i): 2050 | j = l + i 2051 | if self.constr_deriv[j].Xv is not None: 2052 | ls_row.append(np.matrix(self.kernel.Kij(self.constr_deriv[i].Xv, self.constr_deriv[j].Xv, i, j))) 2053 | ls.append(ls_row) 2054 | 2055 | # Create blocks 2056 | blocks = [[np.block(ls[0])]] 2057 | n_cols = blocks[0][0].shape[1] 2058 | for i in range(len(ls) - 1): 2059 | tmp = np.block(ls[i+1]) 2060 | n_rows = tmp.shape[0] 2061 | blanks = np.matrix(np.zeros((n_rows, n_cols - tmp.shape[1]))) 2062 | blocks.append([np.block([blanks, tmp])]) 2063 | 2064 | K11_xv = np.block(blocks) 2065 | 2066 | if not self.__has_xv_bounded(): 2067 | # Only derivative constraints, return K11_xv 2068 | i_lower = np.tril_indices(K11_xv.shape[0], -1) 2069 | K11_xv[i_lower] = K11_xv.T[i_lower] 2070 | return K11_xv 2071 | 2072 | # Compute full matrix and return 2073 | blanks = np.matrix(np.zeros((K01_xv.shape[1], K01_xv.shape[0]))) 2074 | K = np.block([[K_xv, K01_xv], [blanks, K11_xv]]) 2075 | i_lower = np.tril_indices(K.shape[0], -1) 2076 | K[i_lower] = K.T[i_lower] 2077 | return K 2078 | 2079 | def _delete_xv(self): 2080 | """ Delete all virtual points """ 2081 | if self.__has_xv_bounded(): 2082 | self.constr_bounded.Xv = None 2083 | 2084 | if self.__has_xv_deriv(): 2085 | for constr in self.constr_deriv: 2086 | constr.Xv = None 2087 | 2088 | def __has_xv_bounded(self): 2089 | """ Check if there are virtual points for boundedness constraint """ 2090 | if self.constr_bounded is None: 2091 | return False 2092 | else: 2093 | return False if self.constr_bounded.Xv is None else True 2094 | 2095 | def __has_xv_deriv(self): 2096 | """ Check if there are virtual points for derivative constraints """ 2097 | if self.constr_deriv is None: 2098 | return False 2099 | else: 2100 | for constr in self.constr_deriv: 2101 | if constr.Xv is not None: 2102 | return True 2103 | return False 2104 | 2105 | def __has_xv(self): 2106 | """ Check if there are any virtual points """ 2107 | return self.__has_xv_bounded() or self.__has_xv_deriv() 2108 | -------------------------------------------------------------------------------- /GPConstr/r_functions/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /GPConstr/r_functions/python_wrappers.py: -------------------------------------------------------------------------------- 1 | 2 | ### Python wrappers for R functions ### 3 | import rpy2.robjects as robjects 4 | import numpy as np 5 | import os 6 | 7 | # Set source 8 | r_source = robjects.r['source'] 9 | 10 | # Set working directory for R 11 | dir_path = os.path.dirname(os.path.realpath(__file__)) 12 | dir_path = dir_path.replace('\\','/') 13 | robjects.r("setwd('{}')".format(dir_path)) 14 | 15 | # Import custom files and define functions using R 16 | r_source("r_gpsampler.R") 17 | r_rtmvnorm = robjects.r['rtmvnorm_w'] 18 | r_pmvnorm = robjects.r['pmvnorm_w'] 19 | r_mtmvnorm = robjects.r['mtmvnorm_w'] 20 | 21 | # Print R version 22 | print('Running R from rpy2: {}'.format(robjects.r('R.Version()$version.string')[0])) 23 | 24 | def param_py_to_r(mu, sigma, a, b): 25 | """ Convert to r objects """ 26 | 27 | # R vector 28 | r_mu = robjects.FloatVector(mu) 29 | r_a = robjects.FloatVector(a) 30 | r_b = robjects.FloatVector(b) 31 | 32 | # R matrix 33 | sigma_flat = sigma.flatten().tolist()[0] 34 | r_sigma = robjects.r['matrix'](robjects.FloatVector(sigma_flat), nrow = sigma.shape[0]) 35 | 36 | return r_mu, r_sigma, r_a, r_b 37 | 38 | def rtmvnorm(n, mu, sigma, a, b, algorithm = 'gibbs'): 39 | """ 40 | Create n samples from truncated multivariate normal with 41 | mean = mu and covariance matrix = sigma 42 | 43 | a = lower bound, b = upper bound 44 | 45 | n : integer 46 | mu : numpy array 47 | sigma : numpy matrix 48 | a : numpy array 49 | b : numpy array 50 | H (optional) : precision matrix (inverse of sigma) <-- removed this 51 | """ 52 | 53 | # Convert to R objects 54 | r_mu, r_sigma, r_a, r_b = param_py_to_r(mu, sigma, a, b) 55 | 56 | # Run R function and cast to numpy array 57 | X = np.array(r_rtmvnorm(n, r_mu, r_sigma, r_a, r_b, algorithm)) 58 | 59 | assert not np.isnan(np.min(X)), 'rtmvnorm returns nan' 60 | 61 | return X 62 | 63 | def pmvnorm(mu, sigma, a, b, algorithm, n = 1E4): 64 | """ Returns probability over rectangle given by [a, b] """ 65 | # Convert to R objects 66 | r_mu, r_sigma, r_a, r_b = param_py_to_r(mu, sigma, a, b) 67 | 68 | return np.array(r_pmvnorm(r_mu, r_sigma, r_a, r_b, algorithm, n))[0] 69 | 70 | def mtmvnorm(mu, sigma, a, b): 71 | """ Returns moments of truncated multivariate normal """ 72 | # Convert to R objects 73 | r_mu, r_sigma, r_a, r_b = param_py_to_r(mu, sigma, a, b) 74 | 75 | moments = r_mtmvnorm(r_mu, r_sigma, r_a, r_b) 76 | 77 | return np.array(moments[0]), np.array(moments[1]) 78 | 79 | def moments_from_samples(n, mu, sigma, a, b, algorithm = 'minimax_tilting'): 80 | """ 81 | Estimate moments of truncated normal from n samples 82 | """ 83 | samples = rtmvnorm(n, mu, sigma, a, b, algorithm) 84 | return samples.mean(axis = 0), np.cov(samples, rowvar = False) -------------------------------------------------------------------------------- /GPConstr/r_functions/r_gpsampler.R: -------------------------------------------------------------------------------- 1 | 2 | library(tmvtnorm) 3 | library(mvtnorm) 4 | library(TruncatedNormal) 5 | library(truncnorm) 6 | 7 | #' Create n samples from truncated multivariate normal with 8 | #' mean = mu and covariance matrix = sigma 9 | rtmvnorm_w <- function(n, mu, sigma, a, b, algorithm = "gibbs"){ 10 | 11 | # Some different available algorithms: 12 | #'rejection' - rejection sampling (from mvtnorm) 13 | #'gibbs' - Gibbs sampling (from mvtnorm) 14 | #'minimax_tilting' - Minimax tilting (from TruncatedNormal) 15 | 16 | # For univariate distributions use rtruncnorm instead 17 | if (length(mu) == 1) { 18 | return(matrix(rtruncnorm(n = n, a = a, b = b, mean = mu, sd = sigma), ncol = 1)) 19 | } 20 | 21 | if (algorithm == "gibbs") { 22 | return(rtmvnorm(n=n, mean=mu, sigma=sigma, lower=a, upper=b, burn.in.samples=100, algorithm=algorithm)) 23 | } 24 | 25 | if (algorithm == "rejection") { 26 | return(rtmvnorm(n=n, mean=mu, sigma=sigma, lower=a, upper=b, algorithm=algorithm)) 27 | } 28 | 29 | if (algorithm == "minimax_tilting") { 30 | Z <- mvrandn(a - mu, b - mu, sigma, n) 31 | return(t(Z + mu)) 32 | } 33 | 34 | } 35 | 36 | # For calculating probability over rectangle given by [a, b] 37 | # (Same as acceptance rate for rejection sampling) 38 | pmvnorm_w <- function(mu, sigma, a, b, algorithm = "GenzBretz", n = 10^4) { 39 | 40 | # Some different available algorithms: 41 | #'GenzBretz' - (from mvtnorm) 42 | #'minimax_tilting' - Minimax tilting (from TruncatedNormal) 43 | 44 | # n = numer of simulations 45 | 46 | rownames(sigma) <- colnames(sigma) 47 | 48 | if (algorithm == "GenzBretz") { 49 | return(as.numeric(pmvnorm(lower=a, upper=b, mean=mu, sigma=sigma))) 50 | } 51 | 52 | if (algorithm == "minimax_tilting") { 53 | x=mvNcdf(a - mu, b - mu, sigma, n) 54 | return(as.numeric(x$prob)) 55 | } 56 | 57 | } 58 | 59 | # For calculating moments of truncated multivariate normal 60 | mtmvnorm_w <- function(mu, sigma, a, b) { 61 | 62 | rownames(sigma) <- colnames(sigma) 63 | 64 | moments <- mtmvnorm(mean=mu, sigma=sigma, lower=a, upper=b) 65 | return(moments) 66 | 67 | } -------------------------------------------------------------------------------- /GPConstr/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cagrell/gp_constr/d3e890ed5a01d4f40e53e1c1ecb9ca3365650618/GPConstr/util/__init__.py -------------------------------------------------------------------------------- /GPConstr/util/div.py: -------------------------------------------------------------------------------- 1 | # Div util functions 2 | 3 | # Time 4 | def formattime(sec): 5 | """ Format time in seconds to h:m:s depending on sec """ 6 | m, s = divmod(sec, 60) 7 | h, m = divmod(m, 60) 8 | 9 | if h > 0: return '{} hours {} minutes {} seconds'.format('%.0f'%h, '%.0f'%m, '%.0f'%s) 10 | if m > 0: return '{} minutes {} seconds'.format('%.0f'%m, '%.1f'%s) 11 | return '{} seconds'.format('%.3f'%s) 12 | 13 | # Other random stuff 14 | def len_none(x): 15 | return 0 if x is None else len(x) 16 | -------------------------------------------------------------------------------- /GPConstr/util/linalg.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | from numpy.core.umath_tests import inner1d 4 | 5 | def is_symPD_svd(M, verbatim = False, sym_tol = 1E-5, psd_tol = 1E-5): 6 | """ Check if matrix is symmetric and positive definite up to some error using SVD """ 7 | 8 | # Symmetric 9 | symdiff = abs(M - M.T).max() 10 | is_sym = symdiff < sym_tol 11 | 12 | # Eigenvalues 13 | eigvals = np.linalg.eigvals(M) 14 | eig_real_max, eig_real_min = eigvals.real.max(), eigvals.real.min() 15 | eig_imag_max, eig_imag_min = eigvals.imag.max(), eigvals.imag.min() 16 | 17 | # SVD 18 | # For M = U*S*V.T check if M = V*S*V.T also 19 | (u, s, vh) = sp.linalg.svd(M) 20 | vsvt = np.dot(vh.T * s, vh) 21 | svddiff = abs(M - vsvt).max() 22 | is_psd = svddiff < psd_tol 23 | 24 | if verbatim: 25 | if is_sym: 26 | print('Symmetric: error = {} < {}, OK'.format(symdiff, sym_tol)) 27 | else: 28 | print('WARNING! Symmetric: error = {} > {}, NOT OK'.format(symdiff, sym_tol)) 29 | 30 | print('Eigenvalues real part: min = {}, max = {}'.format(eig_real_min, eig_real_max)) 31 | print('Eigenvalues imag part: min = {}, max = {}'.format(eig_imag_min, eig_imag_max)) 32 | 33 | if is_psd: 34 | print('Postive definite, M - VSV.T where M = USV.T (SVD): error = {} < {}, OK'.format(svddiff, psd_tol)) 35 | else: 36 | print('WARNING! Postive definite, M - VSV.T where M = USV.T (SVD): error = {} > {}, NOT OK'.format(svddiff, psd_tol)) 37 | 38 | return is_sym & is_psd 39 | 40 | def isPD_chol(B): 41 | """Returns true when input is positive-definite, via Cholesky""" 42 | try: 43 | _ = sp.linalg.cholesky(B) 44 | return True 45 | except sp.linalg.LinAlgError: 46 | return False 47 | 48 | def try_jitchol(B): 49 | """Returns true AND cholesky factor when input is positive-definite, via Cholesky""" 50 | try: 51 | L = jitchol(B) 52 | return True, L 53 | except sp.linalg.LinAlgError: 54 | return False, None 55 | 56 | def isPD_det(B): 57 | """Returns true when input is positive-definite, via Determinant""" 58 | if sp.linalg.det(B) <= 0: return False 59 | return True 60 | 61 | def nearestPD(A, check = 'chol'): 62 | """Find the nearest positive-definite matrix to input 63 | 64 | A Python/Numpy port of John D'Errico's `nearestSPD` MATLAB code [1], which 65 | credits [2]. 66 | 67 | [1] https://www.mathworks.com/matlabcentral/fileexchange/42885-nearestspd 68 | 69 | [2] N.J. Higham, "Computing a nearest symmetric positive semidefinite 70 | matrix" (1988): https://doi.org/10.1016/0024-3795(88)90223-6 71 | 72 | *** Modified code to include different checks for PD depending on use *** 73 | Can check using 74 | Cholesky - check = 'chol' 75 | SVD - check = 'svd' 76 | Determinant - check = 'det' 77 | ************************************************************************* 78 | """ 79 | 80 | assert check in ['chol', 'svd', 'det'], 'Unknown checking function' 81 | 82 | B = (A + A.T) / 2 83 | _, s, V = sp.linalg.svd(B) 84 | 85 | H = np.dot(V.T, np.dot(np.diag(s), V)) 86 | 87 | A2 = (B + H) / 2 88 | 89 | A3 = (A2 + A2.T) / 2 90 | 91 | # Checking function 92 | if check == 'chol': 93 | isPD = isPD_chol 94 | elif check == 'svd': 95 | isPD = is_symPD_svd 96 | else: 97 | isPD = isPD_det 98 | 99 | if isPD(A3): 100 | return A3 101 | 102 | spacing = np.spacing(np.linalg.norm(A)) 103 | I = np.eye(A.shape[0]) 104 | k = 1 105 | while not isPD(A3): 106 | mineig = np.min(np.real(sp.linalg.eigvals(A3))) 107 | A3 += I * (-mineig * k**2 + spacing) 108 | k += 1 109 | 110 | return A3 111 | 112 | 113 | def jitchol(A, maxtries = 5): 114 | """ Cholesky with jitter """ 115 | A = np.ascontiguousarray(A) 116 | L, info = sp.linalg.lapack.dpotrf(A, lower=1) 117 | if info == 0: 118 | return L 119 | else: 120 | diagA = np.diag(A) 121 | if np.any(diagA <= 0.): 122 | raise sp.linalg.LinAlgError("not pd: non-positive diagonal elements") 123 | jitter = diagA.mean() * 1e-6 124 | num_tries = 1 125 | while num_tries <= maxtries and np.isfinite(jitter): 126 | try: 127 | L = sp.linalg.cholesky(A + np.eye(A.shape[0]) * jitter, lower=True) 128 | print('(!chol jitter added : '+ jitter + ')', end = '') 129 | return L 130 | except: 131 | jitter *= 10 132 | finally: 133 | num_tries += 1 134 | raise sp.linalg.LinAlgError("not positive definite, even with jitter.") 135 | import traceback 136 | try: raise 137 | except: 138 | print('\n'.join(['Added jitter of {:.10e}'.format(jitter), 139 | ' in '+traceback.format_list(traceback.extract_stack(limit=3)[-2:-1])[0][2:]])) 140 | return L 141 | 142 | def triang_solve(A, B, lower = True, trans = False, unitdiag = False): 143 | """ 144 | Wrapper for lapack dtrtrs function 145 | DTRTRS solves a triangular system of the form 146 | A * X = B or A**T * X = B, 147 | where A is a triangular matrix of order N, and B is an N-by-NRHS 148 | matrix. A check is made to verify that A is nonsingular. 149 | :param A: Matrix A(triangular) 150 | :param B: Matrix B 151 | :param lower: is matrix lower (true) or upper (false) 152 | :param trans: calculate A**T * X = B (true) or A * X = B (false) 153 | 154 | :returns: Solution to A * X = B or A**T * X = B 155 | """ 156 | 157 | lower_num = 1 if lower else 0 158 | trans_num = 1 if trans else 0 159 | unitdiag_num = 1 if unitdiag else 0 160 | 161 | A = np.asfortranarray(A) 162 | #Note: B does not seem to need to be F ordered! 163 | return np.matrix(sp.linalg.lapack.dtrtrs(A, B, lower=lower_num, trans=trans_num, unitdiag=unitdiag_num)[0]) 164 | 165 | def mulinv_solve(F, B, triang = True): 166 | """ 167 | Returns C = A^{-1} * B where A = F*F^{T} 168 | 169 | triang = True -> when F is LOWER triangular. This gives faster calculation 170 | 171 | """ 172 | if triang: 173 | tmp = triang_solve(F, B) # F*tmp = B 174 | C = triang_solve(F, tmp, trans = True) # F.T*C = tmp 175 | 176 | else: 177 | tmp = np.matrix(sp.linalg.solve(F, B)) # F*tmp = B 178 | C = np.matrix(sp.linalg.solve(F.T, tmp)) # F.T*C = tmp 179 | 180 | return C 181 | 182 | def mulinv_solve_rev(F, B, triang = True): 183 | """ 184 | Reversed version of mulinv_solve 185 | 186 | Returns C = B * A^{-1} where A = F*F^{T} 187 | 188 | triang = True -> when F is LOWER triangular. This gives faster calculation 189 | 190 | """ 191 | return mulinv_solve(F, B.T, triang).T 192 | 193 | 194 | def symmetrify(A, upper=False): 195 | """ Create symmetric matrix from triangular matrix """ 196 | triu = np.triu_indices_from(A,k=1) 197 | if upper: 198 | A.T[triu] = A[triu] 199 | else: 200 | A[triu] = A.T[triu] 201 | 202 | def chol_inv(L): 203 | """ 204 | Return inverse of matrix A = L*L.T where L is lower triangular 205 | Uses LAPACK function dpotri 206 | """ 207 | A_inv, info = sp.linalg.lapack.dpotri(L, lower=1) 208 | A_inv = np.matrix(A_inv) 209 | symmetrify(A_inv) 210 | return A_inv 211 | 212 | def traceprod(A, B): 213 | """ 214 | Calculate trace(A*B) for two matrices A and B 215 | """ 216 | return np.sum(np.core.umath_tests.inner1d(np.array(A), np.array(B).T)) -------------------------------------------------------------------------------- /GPConstr/util/stats.py: -------------------------------------------------------------------------------- 1 | from sklearn.neighbors import KernelDensity 2 | import numpy as np 3 | import scipy as sp 4 | 5 | def norm_cdf_int(mu, std, LB, UB): 6 | """ Return P(LB < X < UB) for X Normal(mu, std) """ 7 | rv = sp.stats.norm(mu, std) 8 | return rv.cdf(UB) - rv.cdf(LB) 9 | 10 | def norm_cdf_int_approx(mu, std, LB, UB): 11 | """ 12 | Return P(LB < X < UB) for X Normal(mu, std) using approximation of Normal CDF 13 | 14 | Input: All inputs as 1-D arrays 15 | """ 16 | l = normal_cdf_approx((LB - mu)/std) 17 | u = normal_cdf_approx((UB - mu)/std) 18 | return u - l 19 | 20 | def normal_cdf_approx(x): 21 | """ 22 | Approximation of standard normal CDF 23 | 24 | Input: x = array 25 | 26 | Polynomial approximation from Abramowitz and Stegun p. 932 27 | http://people.math.sfu.ca/~cbm/aands/frameindex.htm 28 | 29 | Absolute error < 7.5*10^-8 30 | """ 31 | p = 0.2316419 32 | b = [0.319381530, -0.356563782, 1.781477937, -1.821255978, 1.330274429] 33 | 34 | xx = abs(x) # Approximation only works for x > 0, return 1 - p otherwise 35 | 36 | t = 1/(1 + p*xx) 37 | Z = (1/(np.sqrt(2*np.pi)))*np.exp(-(x*x)/2) 38 | pol = b[0]*t + b[1]*(t**2) + b[2]*(t**3) + b[3]*(t**4) + b[4]*(t**5) 39 | 40 | prob = 1 - Z*pol # For x > 0 41 | prob[x < 0] = 1 - prob[x < 0] # Change when x < 0 42 | 43 | return prob 44 | 45 | def mode_from_samples(samples, bandwidth_fraction = 0.1): 46 | """ 47 | Compute the mode for each set of samples in 'samples' 48 | 49 | Using kernel density estimation 50 | 51 | Input: 52 | samples -- m x n array with n samples for each of m univariate random variables 53 | bandwidth_fraction -- kde will use bandwidth = [range of dataseries (max - min)] * bandwidth_fraction 54 | """ 55 | 56 | # Function to optimize for finding the mode 57 | # -- the kernel density estimator 58 | def optfun(x, *args): 59 | kde = args[0] 60 | return -kde.score_samples(x.reshape(-1, 1)) 61 | 62 | mode = np.zeros(samples.shape[0]) 63 | 64 | for i in range(samples.shape[0]): 65 | data = samples[i].T 66 | min_x, max_x = data.min(), data.max() 67 | bandwidth = bandwidth_fraction*(max_x - min_x) 68 | 69 | # Fit kde 70 | kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(data) 71 | 72 | # Find argmax of density 73 | args = (kde, ) 74 | bounds = [(min_x, max_x)] 75 | 76 | res = sp.optimize.differential_evolution(optfun, bounds = bounds, args = args) 77 | 78 | mode[i] = res.x[0] 79 | 80 | return mode 81 | 82 | def trunc_norm_moments_approx_corrfree(mu, sigma, LB, UB, inf_num = 1E100): 83 | """ 84 | Correlation free approximation of truncated moments of multivariate Gaussian 85 | 86 | If X~N(mu, sigma), compute expectation and variance of X | LB <= X <= UB 87 | 88 | Input: 89 | mu, LB, UB : 1D numpy arrays 90 | sigma : numpy matrix 91 | inf_num : inf values are replaced with this number in calculations 92 | 93 | Returns: 94 | tmu, tvar (expectation and variance of truncated variable) 95 | """ 96 | 97 | s2 = np.diag(sigma) 98 | s = np.sqrt(s2) 99 | a = (LB - mu )/s 100 | b = (UB - mu )/s 101 | 102 | # Replace inf and -inf by numbers 103 | a[a == float('inf')] = inf_num 104 | a[a == float('-inf')] = -inf_num 105 | b[b == float('inf')] = inf_num 106 | b[b == float('-inf')] = -inf_num 107 | 108 | phi_a = sp.stats.norm.pdf(a) 109 | phi_b = sp.stats.norm.pdf(b) 110 | PHI_diff = normal_cdf_approx(b) - normal_cdf_approx(a) 111 | 112 | tmu = mu + s*(phi_a - phi_b)/PHI_diff 113 | tvar = s2*(1 + (a*phi_a - b*phi_b)/PHI_diff - ((phi_a - phi_b)/PHI_diff)**2) 114 | 115 | return tmu, tvar -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Christian Agrell 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GPConstr - Gaussian Process regression with linear operator constraints 2 | Python module for constrained GP regression. 3 | 4 | Code based on the paper [_C. Agrell (2019) Gaussian processes with linear operator inequality constraints_](https://arxiv.org/abs/1901.03134). The current implementation covers boundedness of the function to estimate, combined with bounds on its first order partial derivatives, using the RBF or Matérn5/2 kernel. 5 | 6 | ### Prerequisites 7 | Besides the standard numpy/scipy libraries, [rpy2](https://pypi.org/project/rpy2/) is used to access some useful R packages for working with the truncated multivariate normal distribution. The code has been tested with the following requirements: 8 | 9 | __Python 3 (3.6.3 64bit)__ 10 | - __numpy (1.14.0)__ 11 | - __scipy (1.1.0)__ 12 | - __pandas (0.22.0)__ 13 | - __sklearn (0.19.1)__ _Only uses the function sklearn.metrics.pairwise.euclidean_distances from this package for fast computation of Gram matrices (and could easily be replaced by custom code if needed)_ 14 | - __rpy2 (2.8.6)__ _Used to acces R for computation involving the truncated multivariate normal. See the Python wrapper in '/GPConstr/r_functions/' for details_ 15 | 16 | __R (3.4.3)__ 17 | - __tmvtnorm (1.4.10)__ 18 | - __mvtnorm (1.0.7)__ 19 | - __TruncatedNormal (1.0)__ 20 | - __truncnorm (1.0.8)__ 21 | 22 | ### Examples 23 | Some examples are given in jupyter notebooks. [pyDOE](https://pythonhosted.org/pyDOE/) is used to generate training data, and plots are created using [plotly](https://github.com/plotly/plotly.py) with some [custom plotting functions](https://github.com/cagrell/gp_plotly) for GPs. 24 | - [__Example_1a.ipynb__](https://github.com/cagrell/gp_constr/blob/master/Example_1a.ipynb) _1D example of boundedness and monotonicity constraints_ 25 | - [__Example_1b.ipynb__](https://github.com/cagrell/gp_constr/blob/master/Example_1b.ipynb) _1D example of boundedness and monotonicity constraints - with noise_ 26 | - [__Example_2.ipynb__](https://github.com/cagrell/gp_constr/blob/master/Example_2.ipynb) _Emulation in 4D with derivative information_ 27 | - [__Example_3.ipynb__](https://github.com/cagrell/gp_constr/blob/master/Example_3.ipynb) _Regression in 5D with derivative information_ 28 | 29 | ### Further work 30 | We will be including other types of constraints and kernels as needed, either buidling on the current implementation or on a suitable GP library with good functionality for kernel manupulation such as e.g. [GPflow](https://github.com/GPflow/GPflow) 31 | -------------------------------------------------------------------------------- /runJupyter.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | jupyter notebook -------------------------------------------------------------------------------- /test_py.py: -------------------------------------------------------------------------------- 1 | 2 | # For checking that importing the model with R dependency works ok 3 | 4 | from GPConstr.model import GPmodel, Constraint 5 | from GPConstr.kern import kernel_RBF 6 | 7 | import pyDOE 8 | import numpy as np 9 | 10 | # Function to emulate 11 | def fun(x1, x2): 12 | return (x1*2)**2 + np.sin(x2*5) 13 | 14 | # Define a model and print it 15 | def main(): 16 | print('testing..') 17 | 18 | # Design data 19 | n_samples = 5 20 | x_design = pyDOE.lhs(2, samples = n_samples, criterion = 'maximin', iterations = 1000) 21 | y_design = np.array([fun(x[0], x[1]) for x in x_design]) 22 | 23 | # Initial parameters 24 | gp_mean = 0 # Constant mean function 25 | gp_likelihood = 0.000001 # Gaussian noise 26 | kernel_variance = 1 27 | kernel_lengthscale = [1, 1] 28 | 29 | # Set up model 30 | ker = kernel_RBF(variance = kernel_variance, lengthscale = kernel_lengthscale) 31 | model = GPmodel(kernel = ker, likelihood = gp_likelihood, mean = gp_mean) 32 | 33 | # Training data 34 | model.X_training = x_design 35 | model.Y_training = y_design 36 | 37 | # Optimize 38 | model.optimize(include_constraint = False, fix_likelihood = True) 39 | 40 | print(model) 41 | 42 | 43 | if __name__ == "__main__": 44 | main() --------------------------------------------------------------------------------