├── battery_sampler_dict.pkl ├── requirements.txt ├── README.md ├── LICENSE ├── data └── anonymized_battery_data.csv ├── .gitignore ├── battery_func.py ├── environment.py ├── figure_plotting.py ├── experiments.py ├── sampling.py ├── plotting.py ├── functions.py └── gp_utils.py /battery_sampler_dict.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jpfolch/MFBoom/HEAD/battery_sampler_dict.pkl -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # This file may be used to create an environment using: 2 | # $ conda create --name --file 3 | # platform: osx-64 4 | botorch==0.6.0 5 | gpytorch==1.6.0 6 | matplotlib==3.5.1 7 | numpy==1.22.2 8 | pillow==9.0.1 9 | python==3.8.2 10 | scikit-learn==1.0.2 11 | scipy==1.8.0 12 | torch==1.10.2 13 | pandas==1.4.1 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Combining Multi-Fidelity Modelling and Asynchronous Batch Bayesian Optimization 2 | 3 | Github repository containing all the code used in the research paper: 4 | 5 | Folch, J.P., Lee, R.M., Shafei, B., Walz, D., Tsay, C., van der Wilk, M., & Misener, R. (2023). "Combining Multi-Fidelity Modelling and Asynchronous Batch Bayesian Optimization". Computers & Chemical Engineering, 172, p.108194. 6 | 7 | ``` 8 | @article{folch2023combining, 9 | title={Combining multi-fidelity modelling and asynchronous batch Bayesian Optimization}, 10 | author={Folch, Jose Pablo and Lee, Robert M and Shafei, Behrang and Walz, David and Tsay, Calvin and van der Wilk, Mark and Misener, Ruth}, 11 | journal={Computers \& Chemical Engineering}, 12 | pages={108194}, 13 | year={2023}, 14 | volume={172}, 15 | publisher={Elsevier} 16 | } 17 | ``` 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2022, Jose Pablo Folch 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /data/anonymized_battery_data.csv: -------------------------------------------------------------------------------- 1 | dop1,dop2,dop3,dop4,dop5,response2 2 | 0,0,0.52,0,0.48,0.811348731 3 | 1,0,0,0,0,0.883845141 4 | 0.52,0.48,0,0,0,0.857591584 5 | 0,0.34,0.32,0.34,0,0.973082276 6 | 0.32,0.34,0.34,0,0,0.862317462 7 | 0,0.5,0,0,0.5,0.786308968 8 | 0,0,0,0,0.52,0.806092828 9 | 0,0,0.52,0,0,0.786821611 10 | 0.5,0,0,0,0,1 11 | 0,0,0,0,0.5,0.801110149 12 | 0,0.34,0,0.32,0.34,0.745494166 13 | 0,0.52,0,0,0,0.956892888 14 | 0.34,0,0,0.32,0.34,0.694336762 15 | 0.32,0.34,0,0,0.34,0.730425227 16 | 0,0,0,0.52,0,0.854910178 17 | 0.34,0,0.32,0,0.34,0.795309673 18 | 0.34,0,0.34,0.32,0,0.93780564 19 | 0.34,0.32,0.34,0,0,0.807549845 20 | 0,0.5,0,0,0,0.90994496 21 | 0,0.34,0.34,0.32,0,0.850993776 22 | 0,0.5,0,0.5,0,0.764095614 23 | 0.34,0,0,0.34,0.32,0.730522873 24 | 0,0,0,1,0,0.777536772 25 | 0.32,0.34,0,0.34,0,0.823210672 26 | 0.34,0,0.32,0.34,0,0.877146681 27 | 0,0,0.5,0.5,0,0.817506456 28 | 0.5,0.5,0,0,0,0.896721788 29 | 0,0,0.5,0,0,0.885462455 30 | 0.5,0,0,0,0.5,0.702300432 31 | 0,0,0,0.48,0.52,0.808774157 32 | 0.5,0,0,0.5,0,0.772615018 33 | 0.34,0.32,0,0,0.34,0.860270359 34 | 0,0,1,0,0,0.814200082 35 | 0.34,0.32,0,0.34,0,0.769171228 36 | 0,0,0,0.5,0,0.885747697 37 | 0,0,0.34,0.34,0.32,0.706207725 38 | 0,0,0,0.5,0.5,0.735899516 39 | 0,0,0.5,0,0.5,0.746392461 40 | 0,0.34,0.34,0,0.32,0.748644498 41 | 0,0,0.52,0.48,0,0.728060217 42 | 0,0.5,0.5,0,0,0.814112006 43 | 0,1,0,0,0,0.851659896 44 | 0.5,0,0.5,0,0,0.888203561 45 | 0.52,0,0,0,0,0.946166746 46 | 0,0.34,0.32,0,0.34,0.9170068 47 | 0,0.34,0,0.34,0.32,0.761772657 48 | 0,0,0,0,0,0.977106787 49 | 0.34,0,0.34,0,0.32,0.846587128 50 | 0,0,0,0,1,0.791386607 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # apple stuff 132 | .DS_Store 133 | 134 | # experiments 135 | experiment_results/ 136 | 137 | # HPC submit 138 | submit.pbs 139 | 140 | # Figures 141 | Figures/ 142 | 143 | # local stuff 144 | nn_utils.py 145 | bnn_utils.py 146 | bnn_utils/ 147 | svm_test.py 148 | testing.py -------------------------------------------------------------------------------- /battery_func.py: -------------------------------------------------------------------------------- 1 | import re 2 | import numpy as np 3 | import pandas as pd 4 | from sklearn.preprocessing import PolynomialFeatures 5 | from sklearn.linear_model import LinearRegression 6 | from gp_utils import BoTorchGP 7 | from sampling import EfficientThompsonSampler 8 | import torch 9 | import pickle 10 | 11 | """ 12 | This script can be used to create new benchmarks based on the Battery Dataset. 13 | 14 | A Gaussian Process is fit to the data, and a sample is created using the method described in Wilson et. al (2020) [https://arxiv.org/pdf/2002.09309.pdf] 15 | 16 | All the required parameters are saved in a dictionary. See 'functions.py' for an example of how to use the dictionary to create a function. 17 | """ 18 | 19 | battery_data = pd.read_csv('data/anonymized_battery_data.csv').values 20 | 21 | X = battery_data[:, :6] 22 | X[:, 5] = 1 - np.sum(X[:, :5], axis = 1) 23 | 24 | Y = (battery_data[:, 5:] - 0.5) * 2 25 | 26 | model = BoTorchGP(lengthscale_dim = 6) 27 | model.fit_model(X, Y) 28 | model.set_hyperparams((0.5, torch.tensor([0.2 for _ in range(6)]), .001, 0.75)) 29 | model.define_noise_constraints(noise_ub = 0.01) 30 | model.define_constraints(0.3, 0.5, 0.5) 31 | model.optim_hyperparams(num_of_epochs = 250, verbose = True) 32 | 33 | sampler = EfficientThompsonSampler(model, num_of_samples = 2, num_of_multistarts = 1) 34 | sampler.create_sample() 35 | 36 | test_x1 = torch.tensor([0.5, 0.5, 0.5, 0.5, 0.5, 0.5]).reshape(1, -1) 37 | test_x2 = torch.tensor([0.5, 0.5, 0.5, 0.5, 0.5, 0.5]).reshape(1, -1) 38 | test_x3 = torch.tensor([0.25, 0.87, 0.2, 0.45, 0.45, 0.45]).reshape(1, -1) 39 | test_y1 = sampler.query_sample(test_x1) 40 | test_y2 = sampler.query_sample(test_x2) 41 | test_y3 = sampler.query_sample(test_x3) 42 | 43 | print(test_y1) 44 | print(test_y2) 45 | print(test_y3) 46 | 47 | 48 | model_hypers = model.current_hyperparams() 49 | biases = sampler.biases.clone() 50 | thetas = sampler.thetas.clone() 51 | weights = sampler.weights.clone() 52 | Phi = sampler.Phi.clone() 53 | 54 | model2 = BoTorchGP(lengthscale_dim = 6) 55 | model2.fit_model(X, Y) 56 | model2.set_hyperparams(model_hypers) 57 | 58 | sampler2 = EfficientThompsonSampler(model2, num_of_samples = 2, num_of_multistarts = 1) 59 | sampler2.biases = biases 60 | sampler2.thetas = thetas 61 | sampler2.weights = weights 62 | sampler2.Phi = Phi 63 | 64 | test_x1 = torch.tensor([0.5, 0.5, 0.5, 0.5, 0.5, 0.5]).reshape(1, -1) 65 | test_x2 = torch.tensor([0.5, 0.5, 0.5, 0.5, 0.5, 0.5]).reshape(1, -1) 66 | test_x3 = torch.tensor([0.25, 0.87, 0.2, 0.45, 0.45, 0.45]).reshape(1, -1) 67 | test_y1 = sampler2.query_sample(test_x1) 68 | test_y2 = sampler2.query_sample(test_x2) 69 | test_y3 = sampler2.query_sample(test_x3) 70 | 71 | print(test_y1) 72 | print(test_y2) 73 | print(test_y3) 74 | 75 | sampler_dict = {} 76 | 77 | sampler_dict['X'] = X 78 | sampler_dict['Y'] = Y[:, 1] 79 | sampler_dict['model_hyperparams'] = model_hypers 80 | sampler_dict['biases'] = biases 81 | sampler_dict['thetas'] = thetas 82 | sampler_dict['weights'] = weights 83 | sampler_dict['Phi'] = Phi 84 | 85 | 86 | with open('battery_sampler_dict.pkl', 'wb') as outp: 87 | pickle.dump(sampler_dict, outp, pickle.HIGHEST_PROTOCOL) 88 | 89 | with open('battery_sampler_dict.pkl', 'rb') as inpt: 90 | sampler_dict_loaded = pickle.load(inpt) -------------------------------------------------------------------------------- /environment.py: -------------------------------------------------------------------------------- 1 | from xxlimited import new 2 | import numpy as np 3 | 4 | class mfBatchEnvironment(): 5 | ''' 6 | Environment for multi-fidelity, asynchronous batch Bayesian Optimization. 7 | 8 | Defined by the function being evaluated. Methods required: 9 | func.eval(x, m) - returns an observation at fidelity m and query x 10 | func.eval_times(M) - takes as input an array of fidelities, and returns the evaluation time of function for each one 11 | ''' 12 | def __init__(self, func): 13 | # add function 14 | self.func = func 15 | self.dim = func.dim 16 | self.num_of_fidelities = func.num_of_fidelities 17 | self.initialize_env() 18 | 19 | def initialize_env(self): 20 | # initialize time 21 | self.current_time = 0 22 | # initialize query, fidelity and times array 23 | self.query_list = np.empty((0, self.dim)) 24 | self.fidelities_list = np.empty((0, 1)) 25 | self.query_times = np.empty((0, 1)) 26 | 27 | def step(self, new_queries, fidelities): 28 | # add new queries and fidelities to list 29 | self.query_list = np.concatenate((self.query_list, new_queries)) 30 | self.fidelities_list = np.concatenate((self.fidelities_list, fidelities)) 31 | # calculate evaluation times and join them, subtract one 32 | new_eval_times = self.func.eval_times(fidelities) 33 | self.query_times = np.concatenate((self.query_times, new_eval_times)) - 1 34 | # check if we have queries to return 35 | queries_finished = (self.query_times == 0).reshape(-1) 36 | # choose finished queries from query list 37 | queries_out = self.query_list[queries_finished, :] 38 | fidelities_out = self.fidelities_list[queries_finished, :] 39 | # initialize observation list 40 | obs = [] 41 | for query, fidelity in zip(queries_out, fidelities_out): 42 | query = query.reshape(1, -1) 43 | fidelity = int(fidelity) 44 | obs_out = self.func.evaluate(query, fidelity) 45 | obs.append(obs_out) 46 | 47 | if len(obs) > 0: 48 | obs = np.array(obs).reshape(-1, 1) 49 | if queries_out.shape[0] > len(obs): 50 | print('wut') 51 | else: 52 | obs = None 53 | 54 | # redefine the queries being evaluated 55 | self.query_list = self.query_list[np.invert(queries_finished), :] 56 | self.fidelities_list = self.fidelities_list[np.invert(queries_finished), :] 57 | self.query_times = self.query_times[np.invert(queries_finished), :] 58 | 59 | # increase time-step 60 | self.current_time += 1 61 | 62 | return queries_out, fidelities_out, obs 63 | 64 | def finished_with_optim(self): 65 | number_of_queries_left = self.query_list.shape[0] 66 | obs = [] 67 | for i in range(number_of_queries_left): 68 | query = self.query_list[i, :].reshape(1, -1) 69 | fid = self.fidelities_list[i, :].reshape(1, 1) 70 | obs.append(self.func.evaluate(query, fid)) 71 | 72 | if len(obs) > 0: 73 | obs = np.array(obs).reshape(-1, 1) 74 | 75 | return self.query_list, self.fidelities_list, obs -------------------------------------------------------------------------------- /figure_plotting.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from gp_utils import BoTorchGP 4 | import torch 5 | 6 | """ 7 | Script used to create Illustrations in the paper. 8 | 9 | It provides illustration of Local Penalization [Alvi et al. 2019] and MF-GP-UCB [Kandasamy et al. 2016] 10 | 11 | """ 12 | 13 | 14 | plot_objective = False 15 | plot_ucb_low_fid = False 16 | plot_ucb_low_fid_plus_bias = False 17 | plot_ucb_high_fid = False 18 | plot_observations = True 19 | plot_min_ucb = True 20 | first_penalized_af = True 21 | plot_batch = True 22 | plot_max_bias = False 23 | 24 | bias = 0.3 25 | lipschitz_constant = 20 26 | max_val_observed = 3.46 27 | 28 | def test_func(x, m): 29 | if m == 0: 30 | out1 = np.cos(10 * x) 31 | out2 = np.exp(-(x - 0.65)**2) 32 | return out1 * out2 + 2.5 33 | 34 | if m == 1: 35 | out1 = np.cos(10.5 * x) 36 | out2 = np.exp(-(x - 0.6)**2) 37 | return 0.75 * out1 * out2 + 2.5 38 | 39 | x_grid = np.linspace(0, 1, 1001).reshape(-1, 1) 40 | 41 | x_train_low_fid = np.array([0.02, 0.05, 0.1, 0.25, 0.57, 0.59, 0.62, 0.77, 0.9]).reshape(-1, 1) 42 | x_train_high_fid = np.array([0.021, 0.55, 0.57, 0.61]).reshape(-1, 1) 43 | 44 | y_train_low_fid = test_func(x_train_low_fid, 1).reshape(-1, 1) 45 | y_train_high_fid = test_func(x_train_high_fid, 0).reshape(-1, 1) 46 | 47 | eval_batch = [0.378] 48 | 49 | model_low_fid = BoTorchGP() 50 | model_high_fid = BoTorchGP() 51 | 52 | model_low_fid.fit_model(x_train_low_fid, y_train_low_fid) 53 | model_high_fid.fit_model(x_train_high_fid, y_train_high_fid) 54 | 55 | model_low_fid.set_hyperparams((1, 0.1, 1e-5, 2)) 56 | model_high_fid.set_hyperparams((1, 0.1, 1e-5, 2)) 57 | 58 | with torch.no_grad(): 59 | mean_low_fid, std_low_fid = model_low_fid.posterior(x_grid) 60 | mean_high_fid, std_high_fid = model_high_fid.posterior(x_grid) 61 | 62 | y_high_fid_objective = test_func(x_grid, 0) 63 | y_low_fid_objective = test_func(x_grid, 1) 64 | 65 | fig, ax = plt.subplots() 66 | 67 | fig.set_figwidth(10) 68 | fig.set_figheight(6) 69 | 70 | 71 | if plot_objective is True: 72 | # ax.plot(x_grid, y_low_fid_objective, color = 'b') 73 | ax.plot(x_grid, y_high_fid_objective, color = 'k', label = 'objective') 74 | 75 | if plot_ucb_high_fid is True: 76 | # ax.plot(x_grid, mean_high_fid, color = 'r', label = 'GP high fidelity') 77 | ax.plot(x_grid, mean_high_fid, color = 'r') 78 | ax.fill_between(x_grid.reshape(-1), mean_high_fid - 1.96 * std_high_fid, mean_high_fid + 1.96 * std_high_fid, color = 'r', alpha = 0.2) 79 | # ax.plot(x_grid, mean_high_fid + 1.96 * std_high_fid, linestyle = '--', color = 'r') 80 | 81 | if plot_ucb_low_fid is True: 82 | if plot_ucb_low_fid_plus_bias is True: 83 | bias_plot = bias 84 | else: 85 | bias_plot = 0 86 | # ax.plot(x_grid, mean_low_fid, color = 'b', label = 'GP low fidelity') 87 | ax.plot(x_grid, mean_low_fid, color = 'b') 88 | ax.fill_between(x_grid.reshape(-1), mean_low_fid - 1.96 * std_low_fid, mean_low_fid + 1.96 * std_low_fid, color = 'b', alpha = 0.2) 89 | # ax.plot(x_grid.reshape(-1), mean_low_fid + 1.96 * std_low_fid + bias, linestyle = '--', color = 'b') 90 | 91 | if plot_observations is True: 92 | plt.scatter(x_train_low_fid, y_train_low_fid, c = 'b') 93 | plt.scatter(x_train_high_fid, y_train_high_fid, c = 'r') 94 | 95 | if plot_min_ucb is True: 96 | ucb_low_fid = mean_low_fid + 1.96 * std_low_fid + bias 97 | ucb_high_fid = mean_high_fid + 1.96 * std_high_fid 98 | min_ucb = torch.minimum(ucb_low_fid, ucb_high_fid) 99 | ax.plot(x_grid, min_ucb, linestyle = '--', color = 'g', label = 'Acquisition Function') 100 | 101 | if first_penalized_af is True: 102 | # calculate penalizer 103 | pen_point = np.array(eval_batch[0]).reshape(1, 1) 104 | with torch.no_grad(): 105 | pen_point_mean, pen_point_std = model_low_fid.posterior(pen_point) 106 | pen_point_mean = pen_point_mean 107 | pen_point_std = pen_point_std 108 | 109 | r_j = (max_val_observed - pen_point_mean) / lipschitz_constant 110 | denominator = r_j + pen_point_std / lipschitz_constant 111 | norm = torch.norm(torch.tensor(x_grid) - pen_point, dim = 1) 112 | penalizer = torch.min(norm / denominator, torch.tensor(1)) 113 | # previous acquisition function 114 | ucb_low_fid = mean_low_fid + 1.96 * std_low_fid + bias 115 | ucb_high_fid = mean_high_fid + 1.96 * std_high_fid 116 | min_ucb = torch.minimum(ucb_low_fid * penalizer, ucb_high_fid) 117 | # new acquisition function 118 | new_af = min_ucb 119 | plt.plot(x_grid, new_af, color = 'g', label = 'Penalized AF') 120 | 121 | # plt.plot(x_grid, new_af) 122 | 123 | if plot_batch is True: 124 | plt.vlines(eval_batch, ymin = -0.1, ymax = 5, color = 'k', label = 'Batch point') 125 | 126 | if plot_max_bias is True: 127 | idx = 415 128 | arrow_color = 'b' 129 | ucb_low_fid = mean_low_fid + 1.96 * std_low_fid 130 | ax.arrow(x_grid[idx], ucb_low_fid[idx], 0, bias * 0.95, length_includes_head = True, \ 131 | head_width = .015, head_length = 0.1, overhang = 0.25, color = arrow_color) 132 | ax.scatter([], [], marker = r'$\longrightarrow$', color = arrow_color, label = 'low fidelity bias', s = 750) 133 | 134 | ax.legend(fontsize = 20, loc = 'lower right') 135 | # ax.legend(handles=[ppfad,paufp,ppfeil],loc='lower left') 136 | ax.tick_params(axis='both', labelsize = 20) 137 | ax.set_xlim(0, 1) 138 | ax.set_ylim(-0.1, 4.5) 139 | ax.set_xlabel('x', fontsize = 20) 140 | ax.set_ylabel('y', fontsize = 20) 141 | 142 | plt.show() 143 | 144 | fig_name = 'PenalizedAF' 145 | save_name = 'Figures/' + fig_name + '.pdf' 146 | fig.savefig(save_name, bbox_inches = 'tight') -------------------------------------------------------------------------------- /experiments.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import torch 4 | from gp_utils import BoTorchGP 5 | from functions import CurrinExp2D, BadCurrinExp2D, Hartmann3D, Hartmann6D, Park4D, Borehole8D, Ackley40D, Battery 6 | from bayes_op import mfLiveBatch, UCBwILP, mfUCB, simpleUCB, MultiTaskUCBwILP, MF_MES, MF_TuRBO, TuRBO 7 | from environment import mfBatchEnvironment 8 | import numpy as np 9 | import sys 10 | import os 11 | 12 | ''' 13 | Script to run experiments in the HPC 14 | ''' 15 | 16 | arg1 = sys.argv[1] 17 | arg2 = sys.argv[2] 18 | arg3 = sys.argv[3] 19 | arg4 = sys.argv[4] 20 | arg5 = sys.argv[5] 21 | 22 | # take arguments from terminal 23 | method = str(arg1) 24 | function_number = int(float(arg2)) 25 | run_num = int(arg3) 26 | fidelity_choice = str(arg4) 27 | alpha = float(arg5) 28 | 29 | # method = 'MultiTaskUCBwILP' 30 | # function_number = 8 31 | # run_num = 4 32 | # fidelity_choice = 'V' 33 | 34 | if fidelity_choice == 'I': 35 | fidelity_choice = 'information_based' 36 | elif fidelity_choice == 'V': 37 | fidelity_choice = 'variance_thresholds' 38 | 39 | if method in ['MultiTaskUCBwILP', 'MF-TuRBO']: 40 | pass 41 | else: 42 | fidelity_choice = 'no_fid_choice' 43 | 44 | print("Method: ", method) 45 | print("With Fidelity Choice: ", fidelity_choice) 46 | print("Function number: ", function_number) 47 | print("Run Number: ", run_num) 48 | print("Battery Alpha: ", alpha) 49 | 50 | # Make sure problem is well defined 51 | assert method in ['mfLiveBatch', 'UCBwILP', 'mfUCB', 'simpleUCB', 'MultiTaskUCBwILP', 'MF-MES', 'MF-TuRBO', 'TuRBO'], 'Method must be string in [ mfLiveBatch, UCBwILP, mfUCB, simpleUCB, MultiTaskUCBwILP, MF-MSE, MF-TuRBO, TuRBO]' 52 | assert function_number in range(9), 'Function must be integer between 0 and 8' 53 | assert fidelity_choice in ['variance_thresholds', 'information_based', 'no_fid_choice'] 54 | 55 | battery_alpha = alpha 56 | # Define function name 57 | functions = [CurrinExp2D(), BadCurrinExp2D(), Hartmann3D(), Hartmann6D(), Park4D(), Borehole8D(), Ackley40D(), Battery(alpha = battery_alpha)] 58 | func = functions[function_number] 59 | 60 | hp_update_frequency = 20 61 | num_of_starts = 75 62 | beta = None 63 | 64 | batch_size = 4 65 | budget = int(200 * func.expected_costs[0] / batch_size) 66 | 67 | if function_number == 6: 68 | batch_size = 20 69 | budget = int(500 * func.expected_costs[0] / batch_size) 70 | 71 | if function_number == 7: 72 | batch_size = 20 73 | budget = int(300 * func.expected_costs[0] / (batch_size / func.fidelity_costs[0])) 74 | num_of_starts = 10 75 | 76 | # Define seed, sample initalisation points 77 | seed = run_num + function_number * 505 78 | torch.manual_seed(seed) 79 | np.random.seed(seed) 80 | 81 | dim = func.dim 82 | 83 | x_init_size = int(80 * np.log(dim)) 84 | 85 | if func.name == 'Battery': 86 | x_train = func.gen_search_grid(grid_size = int(x_init_size / 10)) 87 | else: 88 | x_train = np.random.uniform(0, 1, size = (x_init_size, dim)) 89 | y_train = [] 90 | for i in range(0, x_train.shape[0]): 91 | y_train.append(func.evaluate(x_train[i, :].reshape(1, -1), func.num_of_fidelities - 1)) 92 | print('Generating pre-training samples, finished with: ', i + 1) 93 | 94 | y_train = np.array(y_train) 95 | 96 | # train and set educated guess of hyper-parameters 97 | gp_model = BoTorchGP(lengthscale_dim = dim) 98 | 99 | gp_model.fit_model(x_train, y_train) 100 | gp_model.set_hyperparams((0.5, torch.tensor([0.2 for _ in range(dim)]), .1, 0)) 101 | 102 | gp_model.optim_hyperparams(num_of_epochs = 150) 103 | 104 | hypers = gp_model.current_hyperparams() 105 | 106 | # define the environment 107 | env = mfBatchEnvironment(func) 108 | 109 | fidelity_thresholds = [0.1 for _ in range(func.num_of_fidelities)] 110 | 111 | # Choose the correct method 112 | if method == 'mfLiveBatch': 113 | init_bias = 0.05 114 | mod = mfLiveBatch(env, budget = budget, hp_update_frequency = hp_update_frequency, cost_budget = batch_size, num_of_optim_epochs = 15, initial_bias = init_bias, fidelity_thresholds = fidelity_thresholds, num_of_starts = num_of_starts, beta = beta) 115 | elif method == 'UCBwILP': 116 | mod = UCBwILP(env, budget = budget, hp_update_frequency = hp_update_frequency, cost_budget = batch_size, num_of_starts = num_of_starts, beta = beta) 117 | elif method == 'mfUCB': 118 | mod = mfUCB(env, budget = budget, hp_update_frequency = hp_update_frequency, cost_budget = batch_size, fidelity_thresholds = fidelity_thresholds, num_of_starts = num_of_starts, beta = beta) 119 | elif method == 'simpleUCB': 120 | mod = simpleUCB(env, budget = budget, hp_update_frequency = hp_update_frequency, cost_budget = batch_size, num_of_starts = num_of_starts, beta = beta) 121 | elif method == 'MultiTaskUCBwILP': 122 | mod = MultiTaskUCBwILP(env, budget = budget, hp_update_frequency = hp_update_frequency, cost_budget = batch_size, fidelity_choice = fidelity_choice, fidelity_thresholds = fidelity_thresholds, num_of_starts = num_of_starts, beta = beta) 123 | elif method == 'MF-MES': 124 | mod = MF_MES(env, budget = budget, cost_budget = batch_size, hp_update_frequency = hp_update_frequency, num_of_starts = num_of_starts) 125 | elif method == 'MF-TuRBO': 126 | mod = MF_TuRBO(env, budget = budget, cost_budget = batch_size, hp_update_frequency = hp_update_frequency, fidelity_thresholds = fidelity_thresholds, fidelity_choice = fidelity_choice, num_of_starts = num_of_starts) 127 | elif method == 'TuRBO': 128 | mod = TuRBO(env, budget = budget, cost_budget = batch_size, hp_update_frequency = hp_update_frequency, fidelity_thresholds = fidelity_thresholds, num_of_starts = num_of_starts) 129 | 130 | mod.set_hyperparams(constant = hypers[0], lengthscale = hypers[1], noise = hypers[2], mean_constant = hypers[3], \ 131 | constraints = False) 132 | 133 | # run optimization 134 | X, Y, T = mod.run_optim(verbose = True) 135 | 136 | # print results 137 | print(X) 138 | print(Y) 139 | 140 | folder_inputs = 'experiment_results/' + method + '_' + fidelity_choice + '/' + func.name + f'/batch_size{batch_size}' + '/inputs/' 141 | folder_outputs = 'experiment_results/' + method + '_' + fidelity_choice + '/' + func.name + f'/batch_size{batch_size}' + '/outputs/' 142 | folder_timestamps = 'experiment_results/' + method + '_' + fidelity_choice + '/' + func.name + f'/batch_size{batch_size}' + '/time_stamps/' 143 | file_name = f'run_{run_num}' 144 | 145 | if func.name == 'Battery': 146 | folder_inputs = 'experiment_results/' + method + '_' + fidelity_choice + '/' + func.name + f'/batch_size{batch_size}' + f'/alpha_{battery_alpha}' + '/inputs/' 147 | folder_outputs = 'experiment_results/' + method + '_' + fidelity_choice + '/' + func.name + f'/batch_size{batch_size}' + f'/alpha_{battery_alpha}' + '/outputs/' 148 | folder_timestamps = 'experiment_results/' + method + '_' + fidelity_choice + '/' + func.name + f'/batch_size{batch_size}' + f'/alpha_{battery_alpha}' + '/time_stamps/' 149 | file_name = f'run_{run_num}' 150 | 151 | # create directories if they exist 152 | os.makedirs(folder_inputs, exist_ok = True) 153 | os.makedirs(folder_outputs, exist_ok = True) 154 | os.makedirs(folder_timestamps, exist_ok = True) 155 | 156 | np.save(folder_inputs + file_name, X) 157 | np.save(folder_outputs + file_name, Y) 158 | np.save(folder_timestamps + file_name, T) -------------------------------------------------------------------------------- /sampling.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from math import pi 4 | 5 | class EfficientThompsonSampler(): 6 | def __init__(self, model, num_of_multistarts = 5, num_of_bases = 1024, num_of_samples = 1): 7 | ''' 8 | Implementation of 'Efficiently Sampling From Gaussian Process Posteriors' by Wilson et al. (2020). It allows 9 | us to create approximate samples of the GP posterior, which we can optimise using gradient methods. We do this 10 | to generate candidates using Thompson Sampling. Link to the paper: https://arxiv.org/pdf/2002.09309.pdf . 11 | ''' 12 | # GP model 13 | self.model = model 14 | # inputs 15 | if type(self.model.train_x) == torch.Tensor: 16 | self.train_x = self.model.train_x 17 | else: 18 | self.train_x = torch.tensor(self.model.train_x) 19 | self.x_dim = torch.tensor(self.train_x.shape[1]) 20 | self.train_y = self.model.train_y 21 | self.num_of_train_inputs = self.model.train_x.shape[0] 22 | # thompson sampling parameters 23 | self.num_of_multistarts = num_of_multistarts 24 | self.num_of_bases = num_of_bases 25 | self.num_of_samples = num_of_samples 26 | # optimisation parameters 27 | self.learning_rate = 0.01 28 | self.num_of_epochs = 10 * self.x_dim 29 | # obtain the kernel parameters 30 | self.sigma = self.model.model.likelihood.noise.item() 31 | self.lengthscale = self.model.model.covar_module.base_kernel.lengthscale.detach().float() 32 | self.outputscale = self.model.model.covar_module.outputscale.item() 33 | # obtain the kernel 34 | self.kernel = self.model.model.covar_module 35 | # define the Knn matrix 36 | with torch.no_grad(): 37 | self.Knn = self.kernel(self.train_x) 38 | self.Knn = self.Knn.evaluate() 39 | # precalculate matrix inverse 40 | self.inv_mat = torch.inverse(self.Knn + self.sigma * torch.eye(self.num_of_train_inputs)) 41 | 42 | self.create_fourier_bases() 43 | self.calculate_phi() 44 | 45 | def create_fourier_bases(self): 46 | # sample thetas 47 | self.thetas = torch.randn(size = (self.num_of_bases, self.x_dim)) / self.lengthscale 48 | # sample biases 49 | self.biases = torch.rand(self.num_of_bases) * 2 * pi 50 | 51 | def create_sample(self): 52 | # sample weights 53 | self.weights = torch.randn(size = (self.num_of_samples, self.num_of_bases)).float() 54 | 55 | def calculate_phi(self): 56 | ''' 57 | From the paper, we are required to calculate a matrix which includes the evaluation of the training set, X_train, 58 | at the fourier frequencies. This function calculates that matrix, Phi. 59 | ''' 60 | # we take the dot product by element-wise multiplication followed by summation 61 | thetas = self.thetas.repeat(self.num_of_train_inputs, 1, 1) 62 | prod = thetas * self.train_x.unsqueeze(1) 63 | dot = torch.sum(prod, axis = -1) 64 | # add biases and take cosine to obtain fourier representations 65 | ft = torch.cos(dot + self.biases.unsqueeze(0)) 66 | # finally, multiply by corresponding constants (see paper) 67 | self.Phi = (self.outputscale * np.sqrt(2 / self.num_of_bases) * ft).float() 68 | 69 | def calculate_V(self): 70 | ''' 71 | From the paper, to give posterior updates we need to calculate the vector V. Since we are doing multiple samples 72 | at the same time, V will be a matrix. We can pre-calculate it, since its value does not depend on the query locations. 73 | ''' 74 | # multiply phi matrix by weights 75 | # PhiW: num_of_train x num_of_samples 76 | PhiW = torch.matmul(self.Phi, self.weights.T) 77 | # add noise (see paper) 78 | PhiW = PhiW + torch.randn(size = PhiW.shape) * self.sigma 79 | # subtract from training outputs 80 | mat1 = self.train_y - PhiW 81 | # calculate V matrix by premultiplication by inv_mat = (K_nn + I_n*sigma)^{-1} 82 | # V: num_of_train x num_of_samples 83 | self.V = torch.matmul(self.inv_mat, mat1) 84 | 85 | def calculate_fourier_features(self, x): 86 | ''' 87 | Calculate the Fourier Features evaluated at some input x 88 | ''' 89 | # evaluation using fourier features 90 | self.posterior_update(x) 91 | # calculate the dot product between the frequencies, theta, and the new query points 92 | dot = x.matmul(self.thetas.T) 93 | # calculate the fourier frequency by adding bias and cosine 94 | ft = torch.cos(dot + self.biases.unsqueeze(0)) 95 | # apply the normalising constants and return the output 96 | return self.outputscale * np.sqrt(2 / self.num_of_bases) * ft 97 | 98 | def sample_prior(self, x): 99 | ''' 100 | Create a sample form the prior, evaluate it at x 101 | ''' 102 | if type(x) is not torch.Tensor: 103 | x = torch.tensor(x) 104 | # calculate the fourier features evaluated at the query points 105 | out1 = self.calculate_fourier_features(x) 106 | # extend the weights so that we can use element wise multiplication 107 | weights = self.weights.repeat(self.num_of_multistarts, 1, 1) 108 | # return the prior 109 | return torch.sum(weights * out1, axis = -1) 110 | 111 | def posterior_update(self, x): 112 | ''' 113 | Calculate the posterior update at a location x 114 | ''' 115 | if type(x) is not torch.Tensor: 116 | x = torch.tensor(x) 117 | # x: num_of_multistarts x num_of_samples x dim 118 | self.calculate_V() 119 | # train x: num_of_multistarts x num_of_train x dim 120 | train_x = self.train_x.repeat(self.num_of_multistarts, 1, 1) 121 | # z: num_of_multistarts x num_of_train x num_of_samples 122 | # z: kernel evaluation between new query points and training set 123 | z = self.kernel(train_x, x) 124 | z = z.evaluate() 125 | # we now repeat V the number of times necessary so that we can use element-wise multiplication 126 | V = self.V.repeat(self.num_of_multistarts, 1, 1) 127 | out = z * V 128 | return out.sum(axis = 1) # we return the sum across the number of training point, as per the paper 129 | 130 | def query_sample(self, x): 131 | ''' 132 | Query the sample at a location 133 | ''' 134 | prior = self.sample_prior(x) 135 | update = self.posterior_update(x) 136 | return prior + update 137 | 138 | def generate_candidates(self): 139 | ''' 140 | Generate the Thompson Samples, this function optimizes the samples. 141 | ''' 142 | # we are always working on [0, 1]^d 143 | bounds = torch.stack([torch.zeros(self.x_dim), torch.ones(self.x_dim)]) 144 | # initialise randomly - there is definitely much better ways of doing this 145 | X = torch.rand(self.num_of_multistarts, self.num_of_samples, self.x_dim) 146 | X.requires_grad = True 147 | # define optimiser 148 | optimiser = torch.optim.Adam([X], lr = self.learning_rate) 149 | 150 | for _ in range(self.num_of_epochs): 151 | # set zero grad 152 | optimiser.zero_grad() 153 | # evaluate loss and backpropagate 154 | losses = - self.query_sample(X) 155 | loss = losses.sum() 156 | loss.backward() 157 | # take step 158 | optimiser.step() 159 | 160 | # make sure we are still within the bounds 161 | for j, (lb, ub) in enumerate(zip(*bounds)): 162 | X.data[..., j].clamp_(lb, ub) # need to do this on the data not X itself 163 | # check the final evaluations 164 | final_evals = self.query_sample(X) 165 | # choose the best one for each sample 166 | best_idx = torch.argmax(final_evals, axis = 0) 167 | # return the best one for each sample, without gradients 168 | X_out = X[best_idx, range(0, self.num_of_samples), :] 169 | return X_out.detach() -------------------------------------------------------------------------------- /plotting.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from functions import CurrinExp2D, BadCurrinExp2D, Hartmann3D, Park4D, Borehole8D, Hartmann6D, Ackley40D, Battery 4 | 5 | ''' 6 | This script can be used to generate the plots in the paper, given the folder of results, which can be generated using experiments.py 7 | ''' 8 | 9 | function_list = [CurrinExp2D(), BadCurrinExp2D(), Hartmann3D(), Hartmann6D(), Park4D(), Borehole8D(), Ackley40D(), Battery()] 10 | function_list = [Battery()] 11 | 12 | fid_frequency = True 13 | 14 | for func_idx, func in enumerate(function_list): 15 | func_name = func.name 16 | optim = func.optimum 17 | 18 | batch_size = 4 19 | if func.name == 'Ackley40D': 20 | batch_size = 20 21 | final_time = int(500 * func.expected_costs[0] / batch_size) 22 | elif func.name in ['Battery']: 23 | batch_size = 20 24 | final_time = int(300 * func.expected_costs[0] / (batch_size / func.fidelity_costs[0])) 25 | else: 26 | final_time = int(200 * func.expected_costs[0] / batch_size) 27 | time_range = range(1, final_time + 1) 28 | if func.name in ['Ackley40D']: 29 | methods = ['mfLiveBatch_no_fid_choice', 'UCBwILP_no_fid_choice', 'simpleUCB_no_fid_choice', 'mfUCB_no_fid_choice', \ 30 | 'MultiTaskUCBwILP_variance_thresholds', 'TuRBO_no_fid_choice', \ 31 | 'MF-TuRBO_variance_thresholds', 'MF-TuRBO_information_based', 'MF-MES_no_fid_choice'] 32 | colors = ['r', 'b', 'b', 'r', 'green', 'k', 'orange', 'orange', 'purple'] 33 | styles = ['solid', 'solid', 'dashed', 'dashed', 'solid', 'solid', 'solid', 'dashed', 'dashed'] 34 | elif func.name in ['Hartmann3D', 'Hartmann6D']: 35 | methods = ['mfLiveBatch_no_fid_choice', 'UCBwILP_no_fid_choice', 'simpleUCB_no_fid_choice', 'mfUCB_no_fid_choice', \ 36 | 'MultiTaskUCBwILP_variance_thresholds', 'MultiTaskUCBwILP_information_based', 'TuRBO_no_fid_choice', \ 37 | 'MF-TuRBO_variance_thresholds', 'MF-TuRBO_information_based'] 38 | colors = ['r', 'b', 'b', 'r', 'green', 'green', 'k', 'orange', 'orange'] 39 | styles = ['solid', 'solid', 'dashed', 'dashed', 'solid', 'dashed', 'solid', 'solid', 'dashed'] 40 | elif func.name in ['Detergent']: 41 | #methods = ['mfLiveBatch_no_fid_choice', 'UCBwILP_no_fid_choice', 'simpleUCB_no_fid_choice', 'mfUCB_no_fid_choice', \ 42 | # 'MultiTaskUCBwILP_variance_thresholds', 'MultiTaskUCBwILP_information_based', 'MF-MES_no_fid_choice'] 43 | methods = ['mfLiveBatch_no_fid_choice', 'UCBwILP_no_fid_choice', 'simpleUCB_no_fid_choice', \ 44 | 'MultiTaskUCBwILP_variance_thresholds', 'MultiTaskUCBwILP_information_based', 'MF-MES_no_fid_choice'] 45 | colors = ['r', 'b', 'b', 'green', 'green', 'purple'] 46 | styles = ['solid', 'solid', 'dashed', 'solid', 'dashed', 'dashed'] 47 | elif func.name in ['Battery']: 48 | #methods = ['mfLiveBatch_no_fid_choice', 'UCBwILP_no_fid_choice', 'simpleUCB_no_fid_choice', 'mfUCB_no_fid_choice', \ 49 | # 'MultiTaskUCBwILP_variance_thresholds', 'MultiTaskUCBwILP_information_based', 'MF-MES_no_fid_choice'] 50 | methods = ['mfLiveBatch_no_fid_choice', 'UCBwILP_no_fid_choice', 'simpleUCB_no_fid_choice', 'mfUCB_no_fid_choice', \ 51 | 'MultiTaskUCBwILP_variance_thresholds', 'MultiTaskUCBwILP_information_based'] 52 | #methods = ['mfLiveBatch_no_fid_choice', 'UCBwILP_no_fid_choice', 'simpleUCB_no_fid_choice', 'mfUCB_no_fid_choice', \ 53 | # 'MultiTaskUCBwILP_variance_thresholds'] 54 | colors = ['r', 'b', 'b', 'r', 'green', 'green'] 55 | styles = ['solid', 'solid', 'dashed', 'dashed', 'solid', 'dashed'] 56 | else: 57 | methods = ['mfLiveBatch_no_fid_choice', 'UCBwILP_no_fid_choice', 'simpleUCB_no_fid_choice', 'mfUCB_no_fid_choice', \ 58 | 'MultiTaskUCBwILP_variance_thresholds', 'MultiTaskUCBwILP_information_based', 'TuRBO_no_fid_choice', \ 59 | 'MF-TuRBO_variance_thresholds', 'MF-TuRBO_information_based', 'MF-MES_no_fid_choice'] 60 | colors = ['r', 'b', 'b', 'r', 'green', 'green', 'k', 'orange', 'orange', 'purple'] 61 | styles = ['solid', 'solid', 'dashed', 'dashed', 'solid', 'dashed', 'solid', 'solid', 'dashed', 'dashed'] 62 | 63 | if func_name == 'Battery' and fid_frequency == False: 64 | for alpha in [1.00]: 65 | 66 | regret_dic = {} 67 | 68 | best_magic_svm = 0 69 | 70 | for method in methods: 71 | filename = 'experiment_results/' + method + '/' + func_name + f'/batch_size{batch_size}' + f'/alpha_{alpha}' 72 | 73 | regrets_outer = [] 74 | 75 | run_list = range(1, 11) 76 | 77 | for run_num in run_list: 78 | 79 | Ys = filename + '/outputs/run_' + str(run_num) + '.npy' 80 | Xs = filename + '/inputs/run_' + str(run_num) + '.npy' 81 | Ts = filename + '/time_stamps/run_' + str(run_num) + '.npy' 82 | 83 | Ys = np.load(Ys, allow_pickle = True) 84 | Xs = np.load(Xs, allow_pickle = True) 85 | Ts = np.load(Ts, allow_pickle = True) 86 | regret = [] 87 | 88 | best_obs = np.array(0) 89 | 90 | for t in time_range: 91 | time_index = [i < t for i in Ts[0]] 92 | Ys_time_filtered = np.array(Ys[0])[time_index].reshape(-1, 1) 93 | # Xs_time_filtered = np.array(Xs[0])[time_index].reshape(-1, 1) 94 | if Ys_time_filtered.shape[0] == 0: 95 | best_obs = -3 96 | regret.append(np.log(optim - best_obs)) 97 | # regret.append(optim - best_obs) 98 | else: 99 | best_obs = np.max(Ys_time_filtered) 100 | if best_obs > best_magic_svm: 101 | best_idx = np.argmax(Ys_time_filtered) 102 | # X_best = Xs_time_filtered[best_idx, :] 103 | best_magic_svm = max(best_obs, best_magic_svm) 104 | if best_obs > optim: 105 | print('Best observation better than optimum!') 106 | regret.append(np.log(optim - best_obs)) 107 | # regret.append(optim - best_obs) 108 | if regret[-2] < regret[-1]: 109 | print('Regret is increasing!') 110 | 111 | regrets_outer.append(regret) 112 | 113 | regret_dic[method] = np.array(regrets_outer) 114 | print(best_obs) 115 | 116 | fig, ax = plt.subplots() 117 | fig.set_figheight(6) 118 | fig.set_figwidth(8) 119 | 120 | std_beta = 0.5 121 | 122 | for i, method in enumerate(methods): 123 | reg = regret_dic[method] 124 | 125 | # methods = ['mfLiveBatch_no_fid_choice', 'UCBwILP_no_fid_choice', 'simpleUCB_no_fid_choice', 'mfUCB_no_fid_choice', \ 126 | # 'MultiTaskUCBwILP_variance_thresholds', 'TuRBO_no_fid_choice', 'MF-TuRBO_variance_thresholds'] 127 | 128 | if method == 'simpleUCB_no_fid_choice': 129 | method = 'UCB' 130 | elif method == 'mfUCB_no_fid_choice': 131 | method = 'MF-GP-UCB' 132 | elif method == 'UCBwILP_no_fid_choice': 133 | method = 'PLAyBOOK' 134 | elif method == 'mfLiveBatch_no_fid_choice': 135 | method = 'MF-GP-UCB w LP' 136 | elif method == 'MultiTaskUCBwILP_variance_thresholds': 137 | method = 'UCB-V-LP' 138 | elif method == 'MultiTaskUCBwILP_information_based': 139 | method = 'UCB-I-LP' 140 | elif method == 'TuRBO_no_fid_choice': 141 | method = 'TuRBO-TS' 142 | elif method == 'MF-TuRBO_variance_thresholds': 143 | method = 'TuRBO-V-TS' 144 | elif method == 'MF-TuRBO_information_based': 145 | method = 'TuRBO-I-TS' 146 | elif method == 'MF-MES_no_fid_choice': 147 | method = 'MF-MES' 148 | 149 | mean = np.nanmean(reg, axis = 0) 150 | std = np.nanstd(reg, axis = 0) 151 | 152 | lb = mean - std_beta * std 153 | ub = mean + std_beta * std 154 | 155 | init_idx = int(0 * len(mean)) 156 | 157 | ax.plot(time_range[init_idx:], mean[init_idx:], label = method, color = colors[i], linestyle = styles[i]) 158 | ax.fill_between(time_range[init_idx:], lb[init_idx:], ub[init_idx:], color = colors[i], alpha = 0.2) 159 | 160 | 161 | ax.tick_params(axis='both', labelsize = 20) 162 | ax.grid() 163 | 164 | init_time = 0 165 | ax.set_ylim(ymax = 0.25) 166 | 167 | ax.set_xlim(init_time, final_time) 168 | ax.set_xlabel('Time-step', fontsize = 20) 169 | ax.set_ylabel('log(Regret)', fontsize = 20) 170 | expected_costs = func.expected_costs 171 | expected_costs.reverse() 172 | ax.set_title('Evaluation Times = ' + str(expected_costs), fontsize = 20) 173 | ax.legend(fontsize = 12) 174 | #plt.show() 175 | 176 | save_name = 'Figures/' + func_name + f'_alpha_{alpha}' + '.pdf' 177 | fig.savefig(save_name, bbox_inches = 'tight') 178 | 179 | elif func_name == 'Battery' and fid_frequency == True: 180 | 181 | methods = ['MultiTaskUCBwILP_variance_thresholds', 'MultiTaskUCBwILP_information_based'] 182 | colors = ['green', 'green'] 183 | styles = ['solid', 'dashed'] 184 | 185 | for alpha in [1.00]: 186 | 187 | Ts_dic = {} 188 | 189 | best_magic_svm = 0 190 | 191 | for method in methods: 192 | filename = 'experiment_results/' + method + '/' + func_name + f'/batch_size{batch_size}' + f'/alpha_{alpha}' 193 | 194 | all_Ts = [] 195 | 196 | run_list = range(1, 11) 197 | 198 | all_Ts = [[], []] 199 | 200 | for run_num in run_list: 201 | 202 | Ys = filename + '/outputs/run_' + str(run_num) + '.npy' 203 | Xs = filename + '/inputs/run_' + str(run_num) + '.npy' 204 | Ts = filename + '/time_stamps/run_' + str(run_num) + '.npy' 205 | 206 | Ys = np.load(Ys, allow_pickle = True) 207 | Xs = np.load(Xs, allow_pickle = True) 208 | Ts = np.load(Ts, allow_pickle = True) 209 | 210 | all_Ts[0] = all_Ts[0] + Ts[0] 211 | all_Ts[1] = all_Ts[1] + Ts[1] 212 | 213 | Ts_dic[method] = all_Ts 214 | 215 | fig, ax = plt.subplots(ncols = 2) 216 | fig.set_figheight(5.5) 217 | fig.set_figwidth(13) 218 | 219 | for i, method in enumerate(methods): 220 | 221 | # methods = ['mfLiveBatch_no_fid_choice', 'UCBwILP_no_fid_choice', 'simpleUCB_no_fid_choice', 'mfUCB_no_fid_choice', \ 222 | # 'MultiTaskUCBwILP_variance_thresholds', 'TuRBO_no_fid_choice', 'MF-TuRBO_variance_thresholds'] 223 | 224 | if method == 'MultiTaskUCBwILP_variance_thresholds': 225 | method_name = 'Variance-based fidelity choice' 226 | elif method == 'MultiTaskUCBwILP_information_based': 227 | method_name = 'Information-based fidelity choice' 228 | 229 | ax[i].hist(Ts_dic[method][1], label = 'Low Fidelity', bins = 6, alpha = 0.5, color = 'blue') 230 | ax[i].hist(Ts_dic[method][0], label = 'High Fidelity', bins = 6, alpha = 0.5, color = 'red') 231 | 232 | ax[i].tick_params(axis='both', labelsize = 20) 233 | ax[i].grid() 234 | ax[i].set_ylim(ymin = 0, ymax = 650) 235 | 236 | ax[i].set_xlabel('Time-step', fontsize = 20) 237 | if i == 0: 238 | ax[i].set_ylabel('Frequency of querying', fontsize = 20) 239 | if i == 1: 240 | ax[i].legend(fontsize = 20, loc = 'lower left') 241 | expected_costs = func.expected_costs 242 | expected_costs.reverse() 243 | ax[i].set_title(method_name, fontsize = 20) 244 | 245 | save_name = 'Figures/BatteryQueryingFrequency.pdf' 246 | fig.tight_layout(rect=[0, 0.03, 1, 0.95]) 247 | fig.savefig(save_name, bbox_inches = 'tight') 248 | #plt.show() 249 | 250 | else: 251 | regret_dic = {} 252 | 253 | best_magic_svm = 0 254 | 255 | for method in methods: 256 | 257 | filename = 'experiment_results/' + method + '/' + func_name + f'/batch_size{batch_size}' 258 | 259 | regrets_outer = [] 260 | 261 | run_list = range(1, 11) 262 | 263 | for run_num in run_list: 264 | 265 | Ys = filename + '/outputs/run_' + str(run_num) + '.npy' 266 | Xs = filename + '/inputs/run_' + str(run_num) + '.npy' 267 | Ts = filename + '/time_stamps/run_' + str(run_num) + '.npy' 268 | 269 | Ys = np.load(Ys, allow_pickle = True) 270 | Xs = np.load(Xs, allow_pickle = True) 271 | Ts = np.load(Ts, allow_pickle = True) 272 | regret = [] 273 | 274 | best_obs = np.array(0) 275 | 276 | for t in time_range: 277 | time_index = [i < t for i in Ts[0]] 278 | Ys_time_filtered = np.array(Ys[0])[time_index].reshape(-1, 1) 279 | # Xs_time_filtered = np.array(Xs[0])[time_index].reshape(-1, 1) 280 | if Ys_time_filtered.shape[0] == 0: 281 | best_obs = -3 282 | regret.append(np.log(optim - best_obs)) 283 | # regret.append(optim - best_obs) 284 | else: 285 | best_obs = np.max(Ys_time_filtered) 286 | if best_obs > best_magic_svm: 287 | best_idx = np.argmax(Ys_time_filtered) 288 | # X_best = Xs_time_filtered[best_idx, :] 289 | best_magic_svm = max(best_obs, best_magic_svm) 290 | if best_obs > optim: 291 | print('Best observation is better than optimum!') 292 | regret.append(np.log(optim - best_obs)) 293 | # regret.append(optim - best_obs) 294 | if regret[-2] < regret[-1]: 295 | print('Regret is increasing!') 296 | 297 | regrets_outer.append(regret) 298 | 299 | regret_dic[method] = np.array(regrets_outer) 300 | print(best_obs) 301 | 302 | fig, ax = plt.subplots() 303 | fig.set_figheight(6) 304 | fig.set_figwidth(8) 305 | 306 | std_beta = 0.5 307 | 308 | for i, method in enumerate(methods): 309 | reg = regret_dic[method] 310 | 311 | methods = ['mfLiveBatch_no_fid_choice', 'UCBwILP_no_fid_choice', 'simpleUCB_no_fid_choice', 'mfUCB_no_fid_choice', \ 312 | 'MultiTaskUCBwILP_variance_thresholds', 'TuRBO_no_fid_choice', 'MF-TuRBO_variance_thresholds'] 313 | 314 | if method == 'simpleUCB_no_fid_choice': 315 | method = 'UCB' 316 | elif method == 'mfUCB_no_fid_choice': 317 | method = 'MF-GP-UCB' 318 | elif method == 'UCBwILP_no_fid_choice': 319 | method = 'PLAyBOOK' 320 | elif method == 'mfLiveBatch_no_fid_choice': 321 | method = 'MF-GP-UCB w LP' 322 | elif method == 'MultiTaskUCBwILP_variance_thresholds': 323 | method = 'UCB-V-LP' 324 | elif method == 'MultiTaskUCBwILP_information_based': 325 | method = 'UCB-I-LP' 326 | elif method == 'TuRBO_no_fid_choice': 327 | method = 'TuRBO-TS' 328 | elif method == 'MF-TuRBO_variance_thresholds': 329 | method = 'TuRBO-V-TS' 330 | elif method == 'MF-TuRBO_information_based': 331 | method = 'TuRBO-I-TS' 332 | elif method == 'MF-MES_no_fid_choice': 333 | method = 'MF-MES' 334 | 335 | mean = np.nanmean(reg, axis = 0) 336 | std = np.nanstd(reg, axis = 0) 337 | 338 | lb = mean - std_beta * std 339 | ub = mean + std_beta * std 340 | 341 | init_idx = int(0 * len(mean)) 342 | 343 | ax.plot(time_range[init_idx:], mean[init_idx:], label = method, color = colors[i], linestyle = styles[i]) 344 | ax.fill_between(time_range[init_idx:], lb[init_idx:], ub[init_idx:], color = colors[i], alpha = 0.2) 345 | 346 | 347 | ax.tick_params(axis='both', labelsize = 20) 348 | ax.grid() 349 | 350 | init_time = 0 351 | 352 | if func.name == 'Park4D': 353 | final_time = 150 354 | ax.set_ylim(ymax = 0.25) 355 | elif func.name == 'Hartmann6D': 356 | final_time = 2700 357 | ax.set_ylim(ymax = 0.25) 358 | elif func.name == 'Hartmann3D': 359 | final_time = 3250 360 | ax.set_ylim(ymax = 0.25) 361 | elif func.name == 'CurrinExp2D': 362 | final_time = 420 363 | ax.set_ylim(ymax = -2) 364 | elif func.name == 'Borehole8D': 365 | final_time = 420 366 | ax.set_ylim(ymax = 0.25) 367 | elif func.name == 'BadCurrinExp2D': 368 | final_time = 270 369 | ax.set_ylim(ymax = -2) 370 | elif func.name == 'Ackley40D': 371 | final_time = 270 372 | ax.set_ylim(ymax = 1, ymin = -1.1) 373 | 374 | ax.set_xlim(init_time, final_time) 375 | ax.set_xlabel('Time-step', fontsize = 20) 376 | ax.set_ylabel('log(Regret)', fontsize = 20) 377 | expected_costs = func.expected_costs 378 | expected_costs.reverse() 379 | ax.set_title('Evaluation Times = ' + str(expected_costs), fontsize = 20) 380 | if func.name in ['Hartmann6D', 'Ackley40D', 'Battery']: 381 | ax.legend(fontsize = 12) 382 | # plt.show() 383 | 384 | save_name = 'Figures/' + func_name + '.pdf' 385 | fig.savefig(save_name, bbox_inches = 'tight') -------------------------------------------------------------------------------- /functions.py: -------------------------------------------------------------------------------- 1 | import imp 2 | import numpy as np 3 | import torch 4 | import random 5 | import pandas as pd 6 | import math 7 | from sklearn.svm import SVC 8 | from sklearn.ensemble import RandomForestClassifier 9 | from sklearn.metrics import accuracy_score 10 | from sklearn.preprocessing import PolynomialFeatures 11 | from sklearn.linear_model import LinearRegression 12 | from opti.problems import Detergent 13 | from itertools import combinations 14 | import pickle 15 | from gp_utils import BoTorchGP 16 | from sampling import EfficientThompsonSampler 17 | 18 | ''' 19 | File containing all the benchmark functions 20 | ''' 21 | 22 | class CurrinExp2D(): 23 | def __init__(self): 24 | self.dim = 2 25 | self.optimum = 1.379872441291809 26 | self.num_of_fidelities = 2 27 | self.name = 'CurrinExp2D' 28 | self.require_transform = False 29 | self.fidelity_costs = [1, 1] 30 | self.expected_costs = [10, 1] 31 | self.grid_search = False 32 | 33 | def draw_new_function(self): 34 | pass 35 | 36 | def evaluate_target(self, x1, x2): 37 | prod1 = 1 - np.exp(- 1 / (2 * (x2 + 1e-5))) 38 | prod2 = (2300 * x1**3 + 1900 * x1**2 + 2092 * x1 + 60) / (100 * x1**3 + 500 * x1**2 + 4 * x1 + 20) 39 | 40 | return prod1 * prod2 / 10 41 | 42 | def query_function_torch(self, x): 43 | x1 = x[:, 0] 44 | x2 = x[:, 1] 45 | 46 | prod1 = 1 - torch.exp(- 1 / (2 * (x2 + 1e-5))) 47 | prod2 = (2300 * x1**3 + 1900 * x1**2 + 2092 * x1 + 60) / (100 * x1**3 + 500 * x1**2 + 4 * x1 + 20) 48 | return prod1 * prod2 / 10 49 | 50 | def evaluate(self, x, m): 51 | 52 | assert m in [0, 1], 'CurrinExp2D only has two fidelities' 53 | 54 | x1 = x[:, 0] 55 | x2 = x[:, 1] 56 | 57 | if m == 0: 58 | return self.evaluate_target(x1, x2) 59 | 60 | elif m == 1: 61 | s1 = self.evaluate_target(x1 + 0.05, x2 + 0.05) 62 | s2 = self.evaluate_target(x1 + 0.05, np.maximum(0, x2 - 0.05)) 63 | s3 = self.evaluate_target(x1 - 0.05, x2 + 0.05) 64 | s4 = self.evaluate_target(x1 - 0.05, np.maximum(0, x2 - 0.05)) 65 | return (s1 + s2 + s3 + s4) / 4 66 | 67 | def eval_times(self, M): 68 | # returns evaluation times for each query 69 | times = [] 70 | for m in M: 71 | if m == 0: 72 | times.append(10) 73 | else: 74 | times.append(1) 75 | return np.array(times).reshape(-1, 1) 76 | 77 | class BadCurrinExp2D(): 78 | def __init__(self): 79 | self.dim = 2 80 | self.optimum = 1.379872441291809 81 | self.num_of_fidelities = 2 82 | self.name = 'BadCurrinExp2D' 83 | self.require_transform = False 84 | self.fidelity_costs = [1, 1] 85 | self.expected_costs = [10, 1] 86 | self.grid_search = False 87 | 88 | def draw_new_function(self): 89 | pass 90 | 91 | def evaluate_target(self, x1, x2): 92 | prod1 = 1 - np.exp(- 1 / (2 * (x2 + 1e-5))) 93 | prod2 = (2300 * x1**3 + 1900 * x1**2 + 2092 * x1 + 60) / (100 * x1**3 + 500 * x1**2 + 4 * x1 + 20) 94 | 95 | return prod1 * prod2 / 10 96 | 97 | def query_function_torch(self, x): 98 | x1 = x[:, 0] 99 | x2 = x[:, 1] 100 | 101 | prod1 = 1 - torch.exp(- 1 / (2 * (x2 + 1e-5))) 102 | prod2 = (2300 * x1**3 + 1900 * x1**2 + 2092 * x1 + 60) / (100 * x1**3 + 500 * x1**2 + 4 * x1 + 20) 103 | return prod1 * prod2 / 10 104 | 105 | def evaluate(self, x, m): 106 | 107 | assert m in [0, 1], 'CurrinExp2D only has two fidelities' 108 | 109 | x1 = x[:, 0] 110 | x2 = x[:, 1] 111 | 112 | if m == 0: 113 | return self.evaluate_target(x1, x2) 114 | 115 | elif m == 1: 116 | return - self.evaluate_target(x1, x2) 117 | 118 | def eval_times(self, M): 119 | # returns evaluation times for each query 120 | times = [] 121 | for m in M: 122 | if m == 0: 123 | times.append(10) 124 | else: 125 | times.append(1) 126 | return np.array(times).reshape(-1, 1) 127 | 128 | class Park4D(): 129 | def __init__(self): 130 | self.dim = 4 131 | self.optimum = 2.558925151824951 132 | self.num_of_fidelities = 2 133 | self.name = 'Park4D' 134 | self.require_transform = False 135 | self.fidelity_costs = [1, 1] 136 | self.expected_costs = [10, 1] 137 | self.grid_search = False 138 | 139 | def draw_new_function(self): 140 | pass 141 | 142 | def evaluate_target(self, x1, x2, x3, x4): 143 | sum1 = x1 / 2 * (np.sqrt(1 + (x2 + x3**2) * x4 / (x1**2 + 1e-5)) - 1) 144 | sum2 = (x1 + 3 * x4) * np.exp(np.sin(x3) + 1) 145 | return sum1 + sum2 146 | 147 | def query_function_torch(self, x): 148 | x1 = x[:, 0] 149 | x2 = x[:, 1] 150 | x3 = x[:, 2] 151 | x4 = x[:, 3] 152 | 153 | sum1 = x1 / 2 * (torch.sqrt(1 + (x2 + x3**2) * x4 / (x1**2 + 1e-5)) - 1) 154 | sum2 = (x1 + 3 * x4) * torch.exp(torch.sin(x3) + 1) 155 | return (sum1 + sum2) / 10 156 | 157 | def evaluate(self, x, m): 158 | 159 | assert m in [0, 1], 'Park4D only has two fidelities' 160 | 161 | x1 = x[:, 0] 162 | x2 = x[:, 1] 163 | x3 = x[:, 2] 164 | x4 = x[:, 3] 165 | 166 | if m == 0: 167 | return self.evaluate_target(x1, x2, x3, x4) / 10 168 | 169 | elif m == 1: 170 | s1 = (1 + np.sin(x1) / 10) * self.evaluate_target(x1, x2, x3, x4) 171 | return (s1 - 2 * x1**2 + x2**2 + x3**2 + 0.5) / 10 172 | 173 | def eval_times(self, M): 174 | # returns evaluation times for each query 175 | times = [] 176 | for m in M: 177 | if m == 0: 178 | times.append(10) 179 | else: 180 | times.append(1) 181 | return np.array(times).reshape(-1, 1) 182 | 183 | class Hartmann3D(): 184 | def __init__(self): 185 | self.dim = 3 186 | self.optimum = 3.8627800941467285 187 | self.num_of_fidelities = 3 188 | self.name = 'Hartmann3D' 189 | self.require_transform = False 190 | self.fidelity_costs = [1, 1, 1] 191 | self.expected_costs = [100, 10, 1] 192 | self.grid_search = False 193 | 194 | self.A = np.array([[3, 10, 30], [0.1, 10, 35], [3, 10, 30], [0.1, 10, 35]]) 195 | self.P = (1e-4) * np.array([[3689, 1170, 2673], [4699, 4387, 7470], [1091, 8732, 5547], [381, 5743, 8828]]) 196 | self.alpha = np.array([1, 1.2, 3, 3.2]) 197 | self.delta = np.array([0.01, -0.01, -0.1, 0.1]) 198 | 199 | def draw_new_function(self): 200 | pass 201 | 202 | def query_function_torch(self, x): 203 | sum1 = 0 204 | for i in range(0, 4): 205 | sum2 = 0 206 | for j in range(0, self.dim): 207 | sum2 = sum2 + self.A[i, j] * (x[:, j] - self.P[i, j])**2 208 | sum1 = sum1 + self.alpha[i] * torch.exp(-1 * sum2) 209 | return sum1 210 | 211 | def evaluate(self, x, m): 212 | 213 | assert m in [0, 1, 2], 'Hartmann3D only has three fidelities' 214 | 215 | sum1 = 0 216 | for i in range(0, 4): 217 | sum2 = 0 218 | for j in range(0, self.dim): 219 | sum2 = sum2 + self.A[i, j] * (x[:, j] - self.P[i, j])**2 220 | sum1 = sum1 + (self.alpha[i] + m * self.delta[i])* np.exp(-1 * sum2) 221 | return sum1 222 | 223 | def eval_times(self, M): 224 | # returns evaluation times for each query 225 | times = [] 226 | for m in M: 227 | if m == 0: 228 | times.append(100) 229 | elif m == 1: 230 | times.append(10) 231 | else: 232 | times.append(1) 233 | return np.array(times).reshape(-1, 1) 234 | 235 | class Hartmann6D(): 236 | def __init__(self): 237 | self.dim = 6 238 | self.optimum = 3.3223681449890137 239 | self.num_of_fidelities = 3 240 | self.name = 'Hartmann6D' 241 | self.require_transform = False 242 | self.fidelity_costs = [1, 1, 1] 243 | self.expected_costs = [100, 10, 1] 244 | self.grid_search = False 245 | 246 | self.A = np.array([[10, 3, 17, 3.5, 1.7, 8], [0.05, 10, 17, 0.1, 8, 14], [3, 3.5, 1.7, 10, 17, 8], [17, 8, 0.05, 10, 0.1, 14]]) 247 | self.P = (1e-4) * np.array([[1312, 1696, 5569, 124, 8283, 5886], [2329, 4135, 8307, 3736, 1004, 9991], [2348, 1451, 3522, 2883, 3047, 6650], [4047, 8828, 8732, 5743, 1091, 381]]) 248 | self.alpha = np.array([1, 1.2, 3, 3.2]) 249 | self.delta = np.array([0.01, -0.01, -0.1, 0.1]) 250 | 251 | def draw_new_function(self): 252 | pass 253 | 254 | def query_function_torch(self, x): 255 | sum1 = 0 256 | for i in range(0, 4): 257 | sum2 = 0 258 | for j in range(0, self.dim): 259 | sum2 = sum2 + self.A[i, j] * (x[:, j] - self.P[i, j])**2 260 | sum1 = sum1 + self.alpha[i] * torch.exp(-1 * sum2) 261 | return sum1 262 | 263 | def evaluate(self, x, m): 264 | 265 | assert m in [0, 1, 2], 'Hartmann6D only has three fidelities' 266 | 267 | sum1 = 0 268 | for i in range(0, 4): 269 | sum2 = 0 270 | for j in range(0, self.dim): 271 | sum2 = sum2 + self.A[i, j] * (x[:, j] - self.P[i, j])**2 272 | sum1 = sum1 + (self.alpha[i] + m * self.delta[i])* np.exp(-1 * sum2) 273 | return sum1 274 | 275 | def eval_times(self, M): 276 | # returns evaluation times for each query 277 | times = [] 278 | for m in M: 279 | if m == 0: 280 | times.append(100) 281 | elif m == 1: 282 | times.append(10) 283 | elif m == 2: 284 | times.append(1) 285 | return np.array(times).reshape(-1, 1) 286 | 287 | class Borehole8D(): 288 | def __init__(self): 289 | self.dim = 8 290 | self.optimum = 3.0957562923431396 291 | self.num_of_fidelities = 2 292 | self.name = 'Borehole8D' 293 | self.require_transform = False 294 | self.fidelity_costs = [1, 1] 295 | self.expected_costs = [10, 1] 296 | self.grid_search = False 297 | 298 | def draw_new_function(self): 299 | pass 300 | 301 | def query_function_torch(self, x): 302 | x1 = x[:, 0] * 0.1 + 0.05 303 | x2 = x[:, 1] * (50000 - 100) + 100 304 | x3 = (x[:, 2] * (115.6 - 63.07) + 63.07) * 1000 305 | x4 = x[:, 3] * (1110 - 990) + 990 306 | x5 = x[:, 4] * (116 - 63.1) + 63.1 307 | x6 = x[:, 5] * (820 - 700) + 700 308 | x7 = x[:, 6] * (1680 - 1120) + 1120 309 | x8 = x[:, 7] * (12045 - 9855) + 9855 310 | 311 | numerator = 2 * np.pi * x3 * (x4 - x6) 312 | denominator = torch.log(x2 / (x1 + 1e-5)) * (1 + (2 * x7 * x3) / (torch.log(x2 / (x1 + 1e-5)) * x1**2 * x8 + 1e-5) + x3 / (x5 + 1e-5)) 313 | 314 | return numerator / denominator / 100 315 | 316 | def evaluate(self, x, m): 317 | 318 | x1 = x[:, 0] * 0.1 + 0.05 319 | x2 = x[:, 1] * (50000 - 100) + 100 320 | x3 = (x[:, 2] * (115.6 - 63.07) + 63.07) * 1000 321 | x4 = x[:, 3] * (1110 - 990) + 990 322 | x5 = x[:, 4] * (116 - 63.1) + 63.1 323 | x6 = x[:, 5] * (820 - 700) + 700 324 | x7 = x[:, 6] * (1680 - 1120) + 1120 325 | x8 = x[:, 7] * (12045 - 9855) + 9855 326 | 327 | assert m in [0, 1], 'Borehole8D only has two fidelities' 328 | 329 | if m == 0: 330 | numerator = 2 * np.pi * x3 * (x4 - x6) 331 | denominator = np.log(x2 / (x1 + 1e-5)) * (1 + (2 * x7 * x3) / (np.log(x2 / (x1 + 1e-5)) * x1**2 * x8 + 1e-5) + x3 / (x5 + 1e-5)) 332 | 333 | elif m == 1: 334 | numerator = 5 * x3 * (x4 - x6) 335 | denominator = np.log(x2 / (x1 + 1e-5)) * (1.5 + (2 * x7 * x3) / (np.log(x2 / (x1 + 1e-5)) * x1**2 * x8 + 1e-5) + x3 / (x5 + 1e-5)) 336 | 337 | return numerator / denominator / 100 338 | 339 | def eval_times(self, M): 340 | # returns evaluation times for each query 341 | times = [] 342 | for m in M: 343 | if m == 0: 344 | times.append(10) 345 | else: 346 | times.append(1) 347 | return np.array(times).reshape(-1, 1) 348 | 349 | class Ackley40D(): 350 | def __init__(self): 351 | self.dim = 40 352 | self.optimum = 0 353 | self.num_of_fidelities = 2 354 | self.name = 'Ackley40D' 355 | self.require_transform = True 356 | self.fidelity_costs = [1, 1] 357 | self.expected_costs = [10, 1] 358 | self.grid_search = False 359 | 360 | self.a = 20 361 | self.b = 0.2 362 | self.c = 2 * np.pi 363 | 364 | def draw_new_function(self): 365 | pass 366 | 367 | def query_function_torch(self, x): 368 | x = x * 9 - 4 369 | s1 = torch.sum(x**2, axis = 1) / self.dim 370 | s2 = torch.sum(torch.cos(self.c * x), axis = 1) / self.dim 371 | return (self.a * torch.exp(-self.b * torch.sqrt(s1)) + torch.exp(s2) - self.a - torch.exp(torch.tensor(1))) / 6 372 | 373 | def evaluate(self, x, m): 374 | if m == 0: 375 | x = x * 9 - 4 376 | s1 = np.sum(x**2, axis = 1) / self.dim 377 | s2 = np.sum(np.cos(self.c * x), axis = 1) / self.dim 378 | return (self.a * np.exp(-self.b * np.sqrt(s1)) + np.exp(s2) - self.a - np.exp(1)) / 6 379 | elif m == 1: 380 | x = x * 9 - 3.8 381 | s1 = np.sum(x**2, axis = 1) / (self.dim + 5) 382 | s2 = np.sum(np.cos(self.c * x), axis = 1) / (self.dim + 3) 383 | return (self.a * np.exp(-self.b * np.sqrt(s1)) + np.exp(s2) - self.a - np.exp(1)) / 6 384 | 385 | def eval_times(self, M): 386 | # returns evaluation times for each query 387 | times = [] 388 | for m in M: 389 | if m == 0: 390 | times.append(10) 391 | else: 392 | times.append(1) 393 | return np.array(times).reshape(-1, 1) 394 | 395 | class Battery(): 396 | def __init__(self, alpha = 0.1): 397 | self.dim = 6 398 | self.optimum = 0.8749 399 | self.num_of_fidelities = 2 400 | self.name = 'Battery' 401 | self.require_transform = False 402 | self.fidelity_costs = [2, 1] 403 | self.expected_costs = [10, 1] 404 | self.grid_search = True 405 | self.alpha = alpha 406 | 407 | # load the transformer 408 | with open('battery_sampler_dict.pkl', 'rb') as inpt: 409 | sampler_dict = pickle.load(inpt) 410 | 411 | X = sampler_dict['X'] 412 | Y = sampler_dict['Y'] 413 | model_hypers = sampler_dict['model_hyperparams'] 414 | biases = sampler_dict['biases'].double() 415 | thetas = sampler_dict['thetas'].double() 416 | weights = sampler_dict['weights'].double() 417 | Phi = sampler_dict['Phi'].double() 418 | 419 | model = BoTorchGP(lengthscale_dim = 6) 420 | model.fit_model(X, Y) 421 | model.set_hyperparams(model_hypers) 422 | 423 | self.sampler = EfficientThompsonSampler(model, num_of_samples = 2, num_of_multistarts = 1) 424 | self.sampler.biases = biases 425 | self.sampler.thetas = thetas 426 | self.sampler.weights = weights 427 | self.sampler.Phi = Phi 428 | 429 | def evaluate(self, x, m): 430 | x_tensor = torch.tensor(x) 431 | with torch.no_grad(): 432 | y_vec = self.sampler.query_sample(x_tensor) 433 | y0 = y_vec.reshape(-1)[0] 434 | y1 = y_vec.reshape(-1)[1] 435 | 436 | if m == 0: 437 | output = y0.numpy() 438 | elif m == 1: 439 | output = (1 - self.alpha) * y0 + self.alpha * y1 440 | # outputs battery data 441 | output = output.numpy() 442 | # battery_output with bias and noise 443 | output = output + np.random.normal(scale = 0.1 / 3, size = output.shape) 444 | return output 445 | 446 | def eval_times(self, M): 447 | # returns evaluation times for each query 448 | times = [] 449 | for m in M: 450 | if m == 0: 451 | times.append(10) 452 | elif m == 1: 453 | times.append(1) 454 | return np.array(times).reshape(-1, 1) 455 | 456 | def gen_search_grid(self, grid_size): 457 | with torch.no_grad(): 458 | # all idxs due to n choose k constraint 459 | all_idxs = [list(c) for c in combinations([0, 1, 2, 3, 4, 5], 3)] 460 | # generate a single sobol sequence 461 | sobol_gen = torch.quasirandom.SobolEngine(2, scramble = True) 462 | X_sobol_2d = sobol_gen.draw(2 * grid_size).double() 463 | X_sobol_2d = X_sobol_2d[X_sobol_2d.sum(dim = 1) < 1, :] 464 | # calculate third component 465 | valid_grid_size = X_sobol_2d.shape[0] 466 | X_sobol_3d = torch.zeros(size = (valid_grid_size, 3)).double() 467 | X_sobol_3d[:, :2] = X_sobol_2d 468 | X_sobol_3d[:, 2] = 1 - X_sobol_2d.sum(axis = 1) 469 | # define large zero vector, multiply by 20 because of number of combinations 470 | X_out = torch.zeros(size = (valid_grid_size * 20, 6)).double() 471 | # add sobol sequence to larger grid 472 | for i, idx_comb in enumerate(all_idxs): 473 | X_out[i * valid_grid_size: (i+1) * valid_grid_size, idx_comb] = X_sobol_3d.clone() 474 | return X_out 475 | 476 | def find_optimum(func, n_starts = 25, n_epochs = 100): 477 | # find dimension 478 | dim = func.dim 479 | # define bounds 480 | bounds = torch.stack([torch.zeros(dim), torch.ones(dim)]) 481 | # random multistart 482 | X = torch.rand(n_starts, dim) 483 | X.requires_grad = True 484 | optimiser = torch.optim.Adam([X], lr = 0.01) 485 | 486 | for i in range(n_epochs): 487 | # set zero grad 488 | optimiser.zero_grad() 489 | # losses for optimiser 490 | losses = - func.query_function_torch(X) 491 | loss = losses.sum() 492 | loss.backward() 493 | # optim step 494 | optimiser.step() 495 | 496 | # make sure we are still within the bounds 497 | for j, (lb, ub) in enumerate(zip(*bounds)): 498 | X.data[..., j].clamp_(lb, ub) # need to do this on the data not X itself 499 | 500 | final_evals = func.query_function_torch(X) 501 | best_eval = torch.max(final_evals) 502 | best_start = torch.argmax(final_evals) 503 | best_input = X[best_start, :].detach() 504 | 505 | return best_input, best_eval 506 | 507 | # this last part is used to find the optimum of functions using gradient methods, if optimum is not available online 508 | if __name__ == '__main__': 509 | func = Park4D() 510 | best_input, best_eval = find_optimum(func, n_starts = 100000, n_epochs = 1000) 511 | print(best_input) 512 | print(float(best_eval.detach())) -------------------------------------------------------------------------------- /gp_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from gpytorch.priors import SmoothedBoxPrior 4 | import gpytorch 5 | from botorch.models import SingleTaskGP 6 | from gpytorch.constraints import GreaterThan 7 | from gpytorch.mlls import ExactMarginalLogLikelihood 8 | from gpytorch.likelihoods import Likelihood, _GaussianLikelihoodBase 9 | from gpytorch.likelihoods.noise_models import MultitaskHomoskedasticNoise, HomoskedasticNoise 10 | from torch.optim import Adam 11 | from typing import Any 12 | from torch import Tensor, dtype 13 | from gpytorch.distributions import MultivariateNormal, base_distributions 14 | import matplotlib.pyplot as plt 15 | 16 | ''' 17 | This python file defines the Gaussian Process class which is used in all optimization methods. 18 | ''' 19 | 20 | class BoTorchGP(): 21 | ''' 22 | Our GP implementation using GPyTorch. 23 | ''' 24 | def __init__(self, kernel = None, lengthscale_dim = None): 25 | # initialize kernel 26 | if kernel == None: 27 | self.kernel = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims = lengthscale_dim)) 28 | else: 29 | self.kernel = kernel 30 | # initialize if we should set constraints and if we have a multi-dimensional lengthscale 31 | self.constraints_set = False 32 | self.noise_constraint = False 33 | self.lengthscale_dim = lengthscale_dim 34 | self.model = None 35 | 36 | def fit_model(self, train_x, train_y, train_hyperparams = False, previous_hyperparams = None): 37 | ''' 38 | This function fits the GP model with the given data. 39 | ''' 40 | # transform data to tensors 41 | self.train_x = torch.tensor(train_x) 42 | train_y = np.array(train_y) 43 | self.train_y = torch.tensor(train_y).reshape(-1, 1) 44 | # define model 45 | self.model = SingleTaskGP(train_X = self.train_x, train_Y = self.train_y, \ 46 | covar_module = self.kernel) 47 | self.model.likelihood.noise_covar.register_constraint("raw_noise", GreaterThan(1e-5)) 48 | 49 | # marginal likelihood 50 | self.mll = ExactMarginalLogLikelihood(likelihood = self.model.likelihood, model = self.model) 51 | 52 | # check if we should set hyper-parameters or if we should optimize them 53 | if previous_hyperparams is not None: 54 | self.outputscale = float(previous_hyperparams[0]) 55 | self.lengthscale = previous_hyperparams[1].detach() 56 | self.noise = float(previous_hyperparams[2]) 57 | self.mean_constant = float(previous_hyperparams[3]) 58 | self.set_hyperparams() 59 | 60 | if train_hyperparams == True: 61 | self.optim_hyperparams() 62 | 63 | def define_constraints(self, init_lengthscale, init_mean_constant, init_outputscale, init_noise = None): 64 | ''' 65 | This model defines constraints on hyper-parameters as defined in the Appendix of the paper. 66 | ''' 67 | # define lengthscale bounds 68 | self.lengthscale_ub = 2 * init_lengthscale 69 | self.lengthscale_lb = init_lengthscale / 2 70 | # define mean_constant bounds 71 | self.mean_constant_ub = init_mean_constant + 0.25 * init_outputscale 72 | self.mean_constant_lb = init_mean_constant - init_outputscale 73 | # define outputscale bounds 74 | self.outputscale_ub = 3 * init_outputscale 75 | self.outputscale_lb = init_outputscale / 3 76 | 77 | self.constraints_set = True 78 | 79 | if init_noise is not None: 80 | self.noise_ub = 3 * init_noise 81 | self.noise_lb = init_noise / 3 82 | self.noise_constraint = True 83 | else: 84 | self.noise_constraint = False 85 | 86 | def define_noise_constraints(self, noise_lb = 1e-5, noise_ub = 0.2): 87 | ''' 88 | This model defines constraints on hyper-parameters as defined in the Appendix of the paper. 89 | ''' 90 | self.noise_ub = noise_ub 91 | self.noise_lb = noise_lb 92 | self.noise_constraint = True 93 | 94 | def optim_hyperparams(self, num_of_epochs = 25, verbose = False, train_only_outputscale_and_noise = False): 95 | ''' 96 | We can optimize the hype-parameters by maximizing the marginal log-likelihood. 97 | ''' 98 | # for lengthscale 99 | lengthscale_lb = torch.tensor([0.025 for _ in range(self.lengthscale_dim)]) 100 | lengthscale_ub = torch.tensor([0.6 for _ in range(self.lengthscale_dim)]) 101 | prior_lengthscale = SmoothedBoxPrior(lengthscale_lb, lengthscale_ub, 0.1) 102 | self.model.covar_module.base_kernel.register_prior('Smoothed Box Prior', prior_lengthscale, "lengthscale") 103 | # for outputscale 104 | prior_outputscale = SmoothedBoxPrior(0.05, 2, 0.1) 105 | self.model.covar_module.register_prior('Smoothed Box Prior', prior_outputscale, "outputscale") 106 | # for mean constant 107 | prior_constant = SmoothedBoxPrior(-1, 1, 0.1) 108 | self.model.mean_module.register_prior('Smoothed Box Prior', prior_constant, "constant") 109 | # for noise constraint 110 | if self.noise_constraint == True: 111 | noise_lb = torch.tensor(self.noise_lb) 112 | noise_ub = torch.tensor(self.noise_ub) 113 | prior_noise = SmoothedBoxPrior(noise_lb, noise_ub, 0.1) 114 | else: 115 | prior_noise = SmoothedBoxPrior(1e-5, 0.2, 0.1) 116 | self.model.likelihood.register_prior('Smoothed Box Prior', prior_noise, "noise") 117 | 118 | if train_only_outputscale_and_noise: 119 | current_hyperparameters = self.current_hyperparams() 120 | # for lengthscale 121 | lengthscale_lb = current_hyperparameters[1] - 1e-4 122 | lengthscale_ub = current_hyperparameters[1] + 1e-4 123 | prior_lengthscale = SmoothedBoxPrior(lengthscale_lb, lengthscale_ub, 0.00001) 124 | self.model.covar_module.base_kernel.register_prior('Smoothed Box Prior', prior_lengthscale, "lengthscale") 125 | # for mean constant 126 | mean_constant_lb = current_hyperparameters[3] - 1e-4 127 | mean_constant_ub = current_hyperparameters[3] + 1e-4 128 | prior_constant = SmoothedBoxPrior(mean_constant_lb, mean_constant_ub, 0.00001) 129 | self.model.mean_module.register_prior('Smoothed Box Prior', prior_constant, "constant") 130 | 131 | # define optimiser 132 | optimiser = Adam([{'params': self.model.parameters()}], lr=0.01) 133 | 134 | self.model.train() 135 | 136 | for epoch in range(num_of_epochs): 137 | # obtain output 138 | output = self.model(self.train_x) 139 | # calculate loss 140 | loss = - self.mll(output, self.train_y.view(-1)) 141 | # optim step 142 | optimiser.zero_grad() 143 | loss.backward() 144 | optimiser.step() 145 | 146 | if ((epoch + 1) % 10 == 0) & (verbose): 147 | print( 148 | f"Epoch {epoch+1:>3}/{num_of_epochs} - Loss: {loss.item()} " 149 | f"outputscale: {self.model.covar_module.outputscale.item()} " 150 | f"lengthscale: {self.model.covar_module.base_kernel.lengthscale.detach()} " 151 | f"noise: {self.model.likelihood.noise.item()} " 152 | f"mean constant: {self.model.mean_module.constant.item()}" 153 | ) 154 | 155 | def current_hyperparams(self): 156 | ''' 157 | Returns the current values of the hyper-parameters. 158 | ''' 159 | noise = self.model.likelihood.noise.item() 160 | lengthscale = self.model.covar_module.base_kernel.lengthscale.detach() 161 | outputscale = self.model.covar_module.outputscale.item() 162 | mean_constant = self.model.mean_module.constant.item() 163 | return (outputscale, lengthscale, noise, mean_constant) 164 | 165 | def set_hyperparams(self, hyperparams = None): 166 | ''' 167 | This function allows us to set the hyper-parameters. 168 | ''' 169 | if hyperparams == None: 170 | hypers = { 171 | 'likelihood.noise_covar.noise': torch.tensor(self.noise), 172 | 'covar_module.base_kernel.lengthscale': self.lengthscale, 173 | 'covar_module.outputscale': torch.tensor(self.outputscale), 174 | 'mean_module.constant': torch.tensor(self.mean_constant) 175 | } 176 | else: 177 | hypers = { 178 | 'likelihood.noise_covar.noise': torch.tensor(hyperparams[2]).float(), 179 | 'covar_module.base_kernel.lengthscale': hyperparams[1], 180 | 'covar_module.outputscale': torch.tensor(hyperparams[0]).float(), 181 | 'mean_module.constant': torch.tensor(hyperparams[3]).float() 182 | } 183 | self.model.initialize(**hypers) 184 | 185 | def posterior(self, test_x): 186 | ''' 187 | Calculates the posterior of the GP, returning the mean and standard deviation at a corresponding set of points. 188 | ''' 189 | if type(test_x) is not torch.Tensor: 190 | test_x = torch.tensor(test_x).double() 191 | self.model.eval() 192 | model_posterior = self.model(test_x) 193 | mean = model_posterior.mean 194 | std = model_posterior.stddev 195 | return mean, std 196 | 197 | class MultiTaskBoTorchGP(): 198 | ''' 199 | Our MultiTask GP implementation using GPyTorch. 200 | ''' 201 | def __init__(self, num_of_tasks, num_of_latents = 2, ranks = [2, 2], lengthscale_dim = None): 202 | # initialize if we should set constraints and if we have a multi-dimensional lengthscale 203 | self.constraints_set = False 204 | self.lengthscale_dim = lengthscale_dim 205 | self.model = None 206 | # multitask parameters 207 | self.num_of_tasks = num_of_tasks 208 | self.num_of_latents = num_of_latents 209 | self.latent_ranks = ranks 210 | # initialize noise constraint 211 | self.noise_constraint = False 212 | 213 | def fit_model(self, train_x, train_y, train_hyperparams = False, previous_hyperparams = None): 214 | ''' 215 | This function fits the GP model with the given data. 216 | ''' 217 | # find dimension 218 | if train_x[-1] == []: 219 | dim = len(train_x[0][0]) 220 | else: 221 | dim = len(train_x[-1][0]) 222 | # train_x is a list of lists, need to transform it into large vector form 223 | num_task_0_obs = len(train_x[0]) 224 | 225 | train_x_init = np.array(train_x[0]).reshape(num_task_0_obs, dim) 226 | train_i_init = np.full(shape = (num_task_0_obs, 1), fill_value = 0) 227 | train_y_init = np.array(train_y[0]).reshape(num_task_0_obs, 1) 228 | for task_num in range(1, self.num_of_tasks): 229 | # find the number of observations corresponding to task 230 | num_task_obs = len(train_x[task_num]) 231 | # obtain task observations and reshape 232 | x_train_task = np.array(train_x[task_num]).reshape(num_task_obs, dim) 233 | train_x_init = np.concatenate((train_x_init, x_train_task), axis = 0) 234 | # create long vector containing task numbers 235 | train_i_task = np.full(shape = (num_task_obs, 1), fill_value = task_num) 236 | train_i_init = np.concatenate((train_i_init, train_i_task), axis = 0) 237 | # create long vector containing observations 238 | train_y_task = np.array((train_y[task_num])).reshape(num_task_obs, 1) 239 | train_y_init = np.concatenate((train_y_init, train_y_task), axis = 0) 240 | # transform data to tensors 241 | self.train_x = torch.tensor(train_x_init).double() 242 | self.train_i = torch.tensor(train_i_init).int() 243 | train_y_init = np.array(train_y_init) 244 | self.train_y = torch.tensor(train_y_init).reshape(-1).double() 245 | # define model 246 | self.likelihood = MultitaskGaussianLikelihood(num_of_tasks = self.num_of_tasks, train_i = self.train_i) 247 | self.model = MultitaskGPModel(train_x = (self.train_x, self.train_i), train_y = self.train_y, likelihood = self.likelihood, num_tasks = self.num_of_tasks, rank = self.latent_ranks, num_of_latents = self.num_of_latents, lengthscale_dim = self.lengthscale_dim) 248 | # marginal likelihood 249 | self.mll = ExactMarginalLogLikelihood(likelihood = self.model.likelihood, model = self.model) 250 | # change model dtype 251 | self.model.double() 252 | 253 | # check if we should set hyper-parameters or if we should optimize them 254 | if previous_hyperparams is not None: 255 | self.set_hyperparams(hyperparams = previous_hyperparams) 256 | 257 | if train_hyperparams == True: 258 | self.optim_hyperparams() 259 | 260 | def define_constraints(self, init_lengthscale, init_mean_constant, init_outputscale, init_noise = None): 261 | ''' 262 | This model defines constraints on hyper-parameters as defined in the Appendix of the paper. 263 | ''' 264 | # define lengthscale bounds 265 | self.lengthscale_ub = 2 * init_lengthscale 266 | self.lengthscale_lb = init_lengthscale / 2 267 | # define mean_constant bounds 268 | self.mean_constant_ub = init_mean_constant + 0.25 * init_outputscale 269 | self.mean_constant_lb = init_mean_constant - init_outputscale 270 | # define outputscale bounds 271 | self.outputscale_ub = 3 * init_outputscale 272 | self.outputscale_lb = init_outputscale / 3 273 | 274 | self.constraints_set = True 275 | 276 | if init_noise is not None: 277 | self.noise_ub = 3 * init_noise 278 | self.noise_lb = init_noise / 3 279 | self.noise_constraint = True 280 | else: 281 | self.noise_constraint = False 282 | 283 | def define_noise_constraints(self, noise_lb = 1e-5, noise_ub = 0.2): 284 | ''' 285 | This model defines constraints on hyper-parameters as defined in the Appendix of the paper. 286 | ''' 287 | self.noise_ub = noise_ub 288 | self.noise_lb = noise_lb 289 | self.noise_constraint = True 290 | 291 | def optim_hyperparams(self, num_of_epochs = 75, verbose = False, train_only_outputscale_and_noise = False): 292 | ''' 293 | We can optimize the hype-parameters by maximizing the marginal log-likelihood. 294 | ''' 295 | # for lengthscale 296 | for latent_num in range(self.num_of_latents): 297 | lengthscale_lb = torch.tensor([0.025 for _ in range(self.lengthscale_dim)]) 298 | lengthscale_ub = torch.tensor([0.6 for _ in range(self.lengthscale_dim)]) 299 | prior_lengthscale = SmoothedBoxPrior(lengthscale_lb, lengthscale_ub, 0.1) 300 | exec(f'self.model.covar_module_{latent_num}.register_prior("Smoothed Box Prior", prior_lengthscale, "lengthscale")') 301 | # for outputscale 302 | for latent_num in range(self.num_of_latents): 303 | outputscale_lb = torch.tensor([0.05 for _ in range(self.num_of_tasks)]) 304 | outputscale_ub = torch.tensor([2 for _ in range(self.num_of_tasks)]) 305 | prior_var = SmoothedBoxPrior(outputscale_lb, outputscale_ub, 0.1) 306 | exec(f'self.model.task_covar_module_{latent_num}.register_prior("Smoothed Box Prior", prior_var, "var")') 307 | # for mean constant 308 | prior_constant = SmoothedBoxPrior(-1, 1, 0.1) 309 | self.model.mean_module.register_prior('Smoothed Box Prior', prior_constant, "constant") 310 | # for noise constraint 311 | if self.noise_constraint == True: 312 | noise_lb = torch.tensor([self.noise_lb for _ in range(self.num_of_tasks)]) 313 | noise_ub = torch.tensor([self.noise_ub for _ in range(self.num_of_tasks)]) 314 | prior_noise = SmoothedBoxPrior(noise_lb, noise_ub, 0.1) 315 | else: 316 | noise_lb = torch.tensor([1e-5 for _ in range(self.num_of_tasks)]) 317 | noise_ub = torch.tensor([0.2 for _ in range(self.num_of_tasks)]) 318 | prior_noise = SmoothedBoxPrior(noise_lb, noise_ub, 0.1) 319 | self.model.likelihood.register_prior('Smoothed Box Prior', prior_noise, "noise") 320 | 321 | # define optimiser 322 | optimiser = Adam(self.model.parameters(), lr=0.1) 323 | 324 | self.model.train() 325 | self.likelihood.train() 326 | 327 | for epoch in range(num_of_epochs): 328 | # obtain output 329 | output = self.model(self.train_x, self.train_i) 330 | # calculate loss 331 | loss = - self.mll(output, self.train_y) 332 | # optim step 333 | optimiser.zero_grad() 334 | loss.backward() 335 | optimiser.step() 336 | 337 | if ((epoch + 1) % 10 == 0) & (verbose): 338 | print( 339 | f"Epoch {epoch+1:>3}/{num_of_epochs} - Loss: {loss.item()} " 340 | f"Hyper-parameter value display not implemented yet." 341 | ) 342 | 343 | def current_hyperparams(self): 344 | ''' 345 | Returns the current values of the hyper-parameters. 346 | ''' 347 | params_dict = {} 348 | for param in self.model.named_parameters(): 349 | params_dict[param[0]] = torch.tensor(param[1]) 350 | return params_dict 351 | 352 | def set_hyperparams(self, hyperparams = None): 353 | ''' 354 | This function allows us to set the hyper-parameters. 355 | ''' 356 | if hyperparams == None: 357 | hypers = {} 358 | for latent in range(self.num_of_latents): 359 | parameter_key_lengthscale = 'covar_module_' + str(latent) + '.lengthscale' 360 | parameter_key_variance = 'task_covar_module_' + str(latent) + '.var' 361 | hypers[parameter_key_lengthscale] = self.lengthscale.clone() 362 | hypers[parameter_key_variance] = torch.tensor([self.outputscale for _ in range(self.num_of_tasks)]) 363 | hypers['likelihood.noise'] = torch.tensor([self.noise for _ in range(self.num_of_tasks)]) 364 | hypers['mean_module.constant'] = torch.tensor(self.mean_constant) 365 | else: 366 | hypers = hyperparams 367 | self.model.initialize(**hypers) 368 | 369 | def posterior(self, test_x, test_i, with_likelihood = False): 370 | ''' 371 | Calculates the posterior of the GP, returning the mean and standard deviation at a corresponding set of points. 372 | ''' 373 | if type(test_x) is not torch.Tensor: 374 | test_x = torch.tensor(test_x).double() 375 | if type(test_i) is not torch.Tensor: 376 | test_i = torch.tensor(test_i).double() 377 | 378 | self.model.eval() 379 | self.model.likelihood.eval() 380 | 381 | model_posterior = self.model(test_x, test_i) 382 | mean = model_posterior.mean 383 | # check if we add noise to posterior prediction 384 | if with_likelihood is False: 385 | std = model_posterior.stddev 386 | else: 387 | y_pred = self.model.likelihood(model_posterior, test_i) 388 | std = y_pred.stddev 389 | 390 | return mean, std 391 | 392 | def generate_samples(self, X, fidelity = 0, num_of_samples = 1): 393 | posterior_points = X.shape[0] 394 | i = torch.full(size = (posterior_points,), fill_value = fidelity).reshape(-1, 1).int() 395 | self.model.eval() 396 | posterior_distribution = self.model(X, i) 397 | samples = posterior_distribution.sample(torch.Size((num_of_samples,))) 398 | return samples 399 | 400 | # For MultiTask we need to define a new model within GPyTorch, that implements the intrinsic model of coregionalization (IMC) 401 | 402 | class MultitaskGPModel(gpytorch.models.ExactGP): 403 | def __init__(self, train_x, train_y, likelihood, num_tasks = 2, rank = [2, 2], num_of_latents = 2, lengthscale_dim = 1): 404 | assert num_of_latents == len(rank), 'Length of rank list should equal number of latents' 405 | super(MultitaskGPModel, self).__init__(train_x, train_y, likelihood) 406 | self.mean_module = gpytorch.means.ConstantMean() 407 | # set up kernels 408 | self.num_of_latents = num_of_latents 409 | self.num_of_tasks = num_tasks 410 | self.ranks = rank 411 | self.lengthscale_dim = lengthscale_dim 412 | # select lists 413 | for task_num in range(num_of_latents): 414 | if self.lengthscale_dim == 1: 415 | exec(f'self.covar_module_{task_num} = gpytorch.kernels.RBFKernel(active_dims = None)') 416 | else: 417 | exec(f'self.covar_module_{task_num} = gpytorch.kernels.RBFKernel(ard_num_dims = self.lengthscale_dim)') 418 | # We learn an IndexKernel for 2 tasks 419 | for task_num in range(num_of_latents): 420 | exec(f'self.task_covar_module_{task_num} = gpytorch.kernels.IndexKernel(num_tasks = num_tasks, rank = rank[task_num])') 421 | 422 | def forward(self, x, i): 423 | mean_x = self.mean_module(x) 424 | # Get input-input covariance 425 | covar_x = self.covar_module_0(x) 426 | # Get task-task covariance 427 | covar_i = self.task_covar_module_0(i) 428 | # Multiply the two together to get the covariance we want 429 | covar = covar_x.mul(covar_i) 430 | 431 | for latent in range(1, self.num_of_latents): 432 | # Get input-input covariance 433 | exec(f'covar_x = self.covar_module_{latent}(x)') 434 | # Get task-task covariance 435 | exec(f'covar_i = self.task_covar_module_{latent}(i)') 436 | # add the new covariance 437 | covar = covar + covar_x.mul(covar_i) 438 | 439 | return gpytorch.distributions.MultivariateNormal(mean_x, covar) 440 | 441 | def covariance_matrix(self, x, i): 442 | # Get input-input covariance 443 | covar_x = self.covar_module_0(x) 444 | # Get task-task covariance 445 | covar_i = self.task_covar_module_0(i) 446 | # Multiply the two together to get the covariance we want 447 | covar = covar_x.mul(covar_i) 448 | 449 | for latent in range(1, self.num_of_latents): 450 | # Get input-input covariance 451 | exec(f'covar_x = self.covar_module_{latent}(x)') 452 | # Get task-task covariance 453 | exec(f'covar_i = self.task_covar_module_{latent}(i)') 454 | # add the new covariance 455 | covar = covar + covar_x.mul(covar_i) 456 | 457 | return covar 458 | 459 | class MultitaskGPModelICM(gpytorch.models.ExactGP): 460 | def __init__(self, train_x, train_y, likelihood, num_tasks = 2, rank = 2): 461 | super(MultitaskGPModelICM, self).__init__(train_x, train_y, likelihood) 462 | # initialize mean module 463 | self.mean_module = gpytorch.means.ConstantMean() 464 | # initialize x covar module 465 | self.covar_module = gpytorch.kernels.RBFKernel() 466 | # initialize task covar module 467 | self.task_covar_module = gpytorch.kernels.IndexKernel(num_tasks = num_tasks, rank = rank) 468 | 469 | def forward(self, x, i): 470 | # THIS LIST STRUCTURE MIGHT BE KILLING LENGTH-SCALE LEARNING 471 | mean_x = self.mean_module(x) 472 | # Get input-input covariance 473 | covar_x = self.covar_module(x) 474 | # Get task-task covariance 475 | covar_i = self.task_covar_module(i) 476 | # Multiply the two together to get the covariance we want 477 | covar = covar_x.mul(covar_i) 478 | return gpytorch.distributions.MultivariateNormal(mean_x, covar) 479 | 480 | class MultitaskGaussianLikelihood(_GaussianLikelihoodBase): 481 | r""" 482 | Likelihood for input-wise homo-skedastic noise, and task-wise hetero-skedastic, i.e. we learn a different (constant) noise level for each fidelity. 483 | 484 | To initialize: 485 | num_of_tasks : int 486 | train_i : vector of tasks indexes for each training data-point 487 | noise_prior : any prior you want to put on the noise 488 | noise_constraint : constraint to put on the noise 489 | """ 490 | 491 | def __init__(self, num_of_tasks, train_i, noise_prior=None, noise_constraint=None, batch_shape=torch.Size(), **kwargs): 492 | noise_covar = MultitaskHomoskedasticNoise(num_tasks = num_of_tasks, 493 | noise_prior=noise_prior, noise_constraint=noise_constraint, batch_shape=batch_shape 494 | ) 495 | self.active_i = train_i 496 | self.num_tasks = num_of_tasks 497 | super().__init__(noise_covar=noise_covar) 498 | 499 | @property 500 | def noise(self) -> torch.Tensor: 501 | return self.noise_covar.noise 502 | 503 | @noise.setter 504 | def noise(self, value: torch.Tensor) -> None: 505 | self.noise_covar.initialize(noise=value) 506 | 507 | @property 508 | def raw_noise(self) -> torch.Tensor: 509 | return self.noise_covar.raw_noise 510 | 511 | @raw_noise.setter 512 | def raw_noise(self, value: torch.Tensor) -> None: 513 | self.noise_covar.initialize(raw_noise=value) 514 | 515 | def _shaped_noise_covar(self, base_shape: torch.Size, *params: Any, **kwargs: Any): 516 | # need to return 1 x num_obs x num_obs matrix 517 | noise_base_covar_matrix = self.noise_covar(*params, shape=base_shape, **kwargs) 518 | # initialize masking 519 | mask = torch.zeros(size = noise_base_covar_matrix.shape) 520 | # for each task create a masking 521 | for task_num in range(self.num_tasks): 522 | # create vector of indexes 523 | task_idx_diag = (self.active_i == task_num).int().reshape(-1).diag() 524 | mask[..., task_num, :, :] = task_idx_diag 525 | # multiply covar by masking 526 | # there seems to be problems when base_shape is singleton, so we need to squeeze 527 | if base_shape == torch.Size([1]): 528 | noise_base_covar_matrix = noise_base_covar_matrix.squeeze(-1).mul(mask.squeeze(-1)) 529 | noise_covar_matrix = noise_base_covar_matrix.unsqueeze(-1).sum(dim = 1) 530 | else: 531 | noise_covar_matrix = noise_base_covar_matrix.mul(mask).sum(dim = 1) 532 | return noise_covar_matrix 533 | 534 | def forward(self, function_samples: Tensor, test_i = None, *params: Any, **kwargs: Any) -> base_distributions.Normal: 535 | if test_i is not None: 536 | self.active_i = test_i[1] 537 | noise = self._shaped_noise_covar(function_samples.shape, *params, **kwargs).diag() 538 | return base_distributions.Normal(function_samples, noise.sqrt()) 539 | 540 | def marginal(self, function_dist: MultivariateNormal, test_i = None, *params: Any, **kwargs: Any) -> MultivariateNormal: 541 | if test_i is not None: 542 | self.active_i = test_i[1] 543 | mean, covar = function_dist.mean, function_dist.lazy_covariance_matrix 544 | noise_covar = self._shaped_noise_covar(mean.shape, *params, **kwargs).squeeze(0) 545 | full_covar = covar + noise_covar 546 | return function_dist.__class__(mean, full_covar) --------------------------------------------------------------------------------