├── LICENSE ├── gp_model.py ├── singlemes.py ├── README.md ├── benchmark_functions.py └── main.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 belakaria 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /gp_model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Oct 29 14:34:01 2018 4 | 5 | @author: Syrine Belakaria 6 | """ 7 | import numpy as np 8 | from sklearn.gaussian_process import GaussianProcessRegressor 9 | from sklearn.gaussian_process.kernels import RBF 10 | 11 | 12 | class GaussianProcess: 13 | def __init__(self, dim): 14 | self.dim = dim 15 | self.kernel = RBF(length_scale=1, length_scale_bounds=(1e-3, 1e2)) 16 | self.beta = 1e6 17 | self.xValues = [] 18 | self.yValues = [] 19 | self.yValuesNorm = [] 20 | self.model = GaussianProcessRegressor( 21 | kernel=self.kernel, n_restarts_optimizer=5 22 | ) 23 | 24 | def fitNormal(self): 25 | y_mean = np.mean(self.yValues) 26 | y_std = self.getstd() 27 | self.yValuesNorm = (self.yValues - y_mean) / y_std 28 | self.model.fit(self.xValues, self.yValuesNorm) 29 | 30 | def fitModel(self): 31 | self.model.fit(self.xValues, self.yValues) 32 | 33 | def addSample(self, x, y): 34 | self.xValues.append(x) 35 | self.yValues.append(y) 36 | 37 | def getPrediction(self, x): 38 | mean, std = self.model.predict(x.reshape(1, -1), return_std=True) 39 | if std[0] == 0: 40 | std[0] = np.sqrt(1e-5) * self.getstd() 41 | return mean, std 42 | 43 | def getmean(self): 44 | return np.mean(self.yValues) 45 | 46 | def getstd(self): 47 | y_std = np.std(self.yValues) 48 | if y_std == 0: 49 | y_std = 1 50 | return y_std 51 | -------------------------------------------------------------------------------- /singlemes.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Oct 29 14:34:01 2018 4 | 5 | @author: Syrine Belakaria 6 | """ 7 | import numpy as np 8 | from scipy.stats import norm 9 | from sklearn.kernel_approximation import RBFSampler 10 | 11 | 12 | class MaxvalueEntropySearch(object): 13 | def __init__(self, GPmodel): 14 | self.GPmodel = GPmodel 15 | self.y_max = max(GPmodel.yValues) 16 | self.d = GPmodel.dim 17 | 18 | def Sampling_RFM(self): 19 | self.rbf_features = RBFSampler( 20 | gamma=1 / (2 * self.GPmodel.kernel.length_scale**2), 21 | n_components=1000, 22 | random_state=1, 23 | ) 24 | X_train_features = self.rbf_features.fit_transform( 25 | np.asarray(self.GPmodel.xValues) 26 | ) 27 | 28 | A_inv = np.linalg.inv( 29 | (X_train_features.T).dot(X_train_features) 30 | + np.eye(self.rbf_features.n_components) / self.GPmodel.beta 31 | ) 32 | self.weights_mu = A_inv.dot(X_train_features.T).dot(self.GPmodel.yValues) 33 | weights_gamma = A_inv / self.GPmodel.beta 34 | self.L = np.linalg.cholesky(weights_gamma) 35 | 36 | def weigh_sampling(self): 37 | random_normal_sample = np.random.normal(0, 1, np.size(self.weights_mu)) 38 | self.sampled_weights = np.c_[self.weights_mu] + self.L.dot( 39 | np.c_[random_normal_sample] 40 | ) 41 | 42 | def f_regression(self, x): 43 | 44 | X_features = self.rbf_features.fit_transform(x.reshape(1, len(x))) 45 | return -(X_features.dot(self.sampled_weights)) 46 | 47 | def single_acq(self, x, maximum): 48 | mean, std = self.GPmodel.getPrediction(x) 49 | mean = mean[0] 50 | std = std[0] 51 | if maximum < max(self.GPmodel.yValues) + 5 / self.GPmodel.beta: 52 | maximum = max(self.GPmodel.yValues) + 5 / self.GPmodel.beta 53 | 54 | normalized_max = (maximum - mean) / std 55 | pdf = norm.pdf(normalized_max) 56 | cdf = norm.cdf(normalized_max) 57 | if cdf == 0: 58 | cdf = 1e-30 59 | return -(normalized_max * pdf) / (2 * cdf) + np.log(cdf) 60 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Max-value Entropy Search for Multi-Objective Bayesian Optimization 2 | 3 | 4 | This repository contains the python implementation for MESMO from the Neurips 2019 paper "[Max-value Entropy Search for Multi-ObjectiveBayesian Optimization](https://papers.nips.cc/paper/8997-max-value-entropy-search-for-multi-objective-bayesian-optimization.pdf)". 5 | 6 | --- 7 | 8 | ## Requirements 9 | 10 | This code is implemented in Python and requires the following dependencies: 11 | 12 | * [`sobol_seq`](https://github.com/naught101/sobol_seq) – for generating Sobol sequences 13 | * [`platypus`](https://platypus.readthedocs.io/en/latest/getting-started.html#installing-platypus) – for multi-objective evolutionary algorithms 14 | * [`scikit-learn`](https://scikit-learn.org/stable/modules/gaussian_process.html) – specifically `sklearn.gaussian_process` for GP modeling 15 | * [`pygmo`](https://esa.github.io/pygmo2/install.html) – for parallel optimization algorithms 16 | 17 | You can install the required packages using: 18 | 19 | ```bash 20 | pip install sobol_seq platypus-opt scikit-learn pygmo 21 | ``` 22 | --- 23 | ## Running MESMO 24 | 25 | 26 | ```bash 27 | python main.py 28 | ``` 29 | 30 | Here's an example command you could run from bash: 31 | 32 | ```bash 33 | python main.py branin,Currin 2 0 5 100 10 34 | ``` 35 | 36 | Explanation of arguments: 37 | 38 | 1. `function_names`: names of the benchmark functions separated by a comma 39 | 2. `d`: number of input dimensions 40 | 3. `seed`: random seed 41 | 4. `initial_number`: number of initial of evaluations 42 | 5. `total_iterations`: number of BO iterations 43 | 6. `sample_number`: number of samples to use for entropy estimation 44 | 45 | --- 46 | ### Citation 47 | If you use this code please cite our papers: 48 | 49 | ```bibtex 50 | 51 | @inproceedings{belakaria2019max, 52 | title={Max-value entropy search for multi-objective bayesian optimization}, 53 | author={Belakaria, Syrine and Deshwal, Aryan}, 54 | booktitle={International Conference on Neural Information Processing Systems (NeurIPS)}, 55 | year={2019} 56 | } 57 | 58 | @article{belakaria2021output, 59 | title={Output Space Entropy Search Framework for Multi-Objective Bayesian Optimization}, 60 | author={Belakaria, Syrine and Deshwal, Aryan and Doppa, Janardhan Rao}, 61 | journal={Journal of Artificial Intelligence Research}, 62 | volume={72}, 63 | pages={667-715}, 64 | year={2021} 65 | } 66 | 67 | ```` -------------------------------------------------------------------------------- /benchmark_functions.py: -------------------------------------------------------------------------------- 1 | import math 2 | from copy import deepcopy 3 | 4 | import numpy as np 5 | 6 | 7 | def get_benchmark_functions(names="branin,Currin"): 8 | """ 9 | Returns a list of benchmark functions. 10 | """ 11 | names = names.split(",") 12 | available_functions = { 13 | "branin": branin, 14 | "Currin": Currin, 15 | "Powell": Powell, 16 | "Perm": Perm, 17 | "Dixon": Dixon, 18 | "ZAKHAROV": ZAKHAROV, 19 | "RASTRIGIN": RASTRIGIN, 20 | "SumSquares": SumSquares, 21 | "oka21": oka21, 22 | "oka22": oka22, 23 | "DTLZ14f_1": DTLZ14f_1, 24 | "DTLZ14f_2": DTLZ14f_2, 25 | "DTLZ14f_3": DTLZ14f_3, 26 | "DTLZ14f_4": DTLZ14f_4, 27 | # Add other functions here as needed 28 | } 29 | functions = [] 30 | for name in names: 31 | if name in available_functions: 32 | functions.append(available_functions[name]) 33 | else: 34 | raise ValueError(f"Function '{name}' is not available.") 35 | return functions 36 | 37 | 38 | def Currin(x, d): 39 | return -1 * float( 40 | ( 41 | (1 - math.exp(-0.5 * (1 / x[1]))) 42 | * ( 43 | (2300 * pow(x[0], 3) + 1900 * x[0] * x[0] + 2092 * x[0] + 60) 44 | / (100 * pow(x[0], 3) + 500 * x[0] * x[0] + 4 * x[0] + 20) 45 | ) 46 | ) 47 | ) 48 | 49 | 50 | def branin(x1, d): 51 | x = deepcopy(x1) 52 | x[0] = 15 * x[0] - 5 53 | x[1] = 15 * x[1] 54 | return -1 * float( 55 | np.square( 56 | x[1] 57 | - (5.1 / (4 * np.square(math.pi))) * np.square(x[0]) 58 | + (5 / math.pi) * x[0] 59 | - 6 60 | ) 61 | + 10 * (1 - (1.0 / (8 * math.pi))) * np.cos(x[0]) 62 | + 10 63 | ) 64 | 65 | 66 | def Powell(xx, d): 67 | 68 | vmin = -4 69 | vmax = 5 70 | 71 | x = [None] + list(vmin + np.asarray(xx) * (vmax - vmin)) 72 | f_original = 0 73 | for i in range(1, int(math.floor(d / 4) + 1)): 74 | f_original = ( 75 | f_original 76 | + pow(x[4 * i - 3] + 10 * x[4 * i - 2], 2) 77 | + 5 * pow(x[4 * i - 1] - x[4 * i], 2) 78 | + pow(x[4 * i - 2] - 2 * x[4 * i - 1], 4) 79 | + 10 * pow(x[4 * i - 3] - 2 * x[4 * i], 4) 80 | ) 81 | return -1 * float(f_original) 82 | 83 | 84 | def Perm(xx, d): 85 | vmin = -1 * d 86 | vmax = d 87 | beta = 10 88 | x = [None] + list(vmin + np.asarray(xx) * (vmax - vmin)) 89 | f_original = 0 90 | for i in range(1, d + 1): 91 | sum1 = 0 92 | for j in range(1, d + 1): 93 | sum1 = sum1 + (j + beta) * (x[j] - math.pow(j, -1 * i)) 94 | f_original = f_original + math.pow(sum1, 2) 95 | return -1 * f_original 96 | 97 | 98 | def Dixon(xx, d): 99 | vmin = -10 100 | vmax = 10 101 | x = [None] + list(vmin + np.asarray(xx) * (vmax - vmin)) 102 | f_original = 0 103 | for i in range(2, d + 1): 104 | f_original = f_original + i * math.pow(2 * math.pow(x[i], 2) - x[i - 1], 2) 105 | f_original = f_original + math.pow(x[1] - 1, 1) 106 | return -1 * f_original 107 | 108 | 109 | def ZAKHAROV(xx, d): 110 | vmin = -5 111 | vmax = 10 112 | x = [None] + list(vmin + np.asarray(xx) * (vmax - vmin)) 113 | term1 = 0 114 | term2 = 0 115 | for i in range(1, d + 1): 116 | term1 = term1 + x[i] ** 2 117 | term2 = term2 + 0.5 * i * x[i] 118 | f_original = term1 + math.pow(term2, 2) + math.pow(term2, 4) 119 | return -1 * f_original 120 | 121 | 122 | def RASTRIGIN(xx, d): 123 | vmin = -5.12 124 | vmax = 5.12 125 | x = [None] + list(vmin + np.asarray(xx) * (vmax - vmin)) 126 | f_original = 0 127 | for i in range(1, d + 1): 128 | f_original = f_original + (x[i] ** 2 - 10 * math.cos(2 * x[i] * math.pi)) 129 | f_original = f_original + 10 * d 130 | return -1 * f_original 131 | 132 | 133 | def SumSquares(xx, d): 134 | vmin = -5.12 135 | vmax = 5.12 136 | x = [None] + list(vmin + np.asarray(xx) * (vmax - vmin)) 137 | f_original = 0 138 | for i in range(1, d + 1): 139 | f_original = f_original + (i * math.pow(x[i], 2)) 140 | return -1 * f_original 141 | 142 | 143 | ############################################ 144 | # d=3 145 | def oka21(xx, d): 146 | x = deepcopy(xx) 147 | x[0] = x[0] * (2 * 3.14) - 3.14 148 | f_original = x[0] 149 | return -1 * f_original 150 | 151 | 152 | def oka22(xx, d): 153 | x = deepcopy(xx) 154 | x[0] = x[0] * (2 * 3.14) - 3.14 155 | x[1] = x[1] * (2 * 5) - 5 156 | x[2] = x[2] * (2 * 5) - 5 157 | f_original = ( 158 | 1 159 | - 1.0 / (4 * pow(math.pi, 2)) * pow(x[0] + math.pi, 2) 160 | + pow(np.abs(x[1] - 5 * math.cos(x[0])), 1.0 / 3) 161 | + pow(np.abs(x[2] - 5 * math.sin(x[0])), 1.0 / 3) 162 | ) 163 | return -1 * f_original 164 | 165 | 166 | ################################################ 167 | # d=5/d=3 168 | def DTLZ14f_1(x, d): 169 | g = 0 170 | for i in range(d): 171 | g = g + pow(x[i] - 0.5, 2) - math.cos(20 * math.pi * (x[i] - 0.5)) 172 | g = 100 * (d + g) 173 | y1 = (1 + g) * 0.5 * x[0] * x[1] * x[2] 174 | return -1 * y1 175 | 176 | 177 | def DTLZ14f_2(x, d): 178 | g = 0 179 | for i in range(d): 180 | g = g + pow(x[i] - 0.5, 2) - math.cos(20 * math.pi * (x[i] - 0.5)) 181 | g = 100 * (d + g) 182 | y2 = (1 + g) * 0.5 * (1 - x[2]) * x[0] * x[1] 183 | return -1 * y2 184 | 185 | 186 | def DTLZ14f_3(x, d): 187 | g = 0 188 | for i in range(d): 189 | g = g + pow(x[i] - 0.5, 2) - math.cos(20 * math.pi * (x[i] - 0.5)) 190 | g = 100 * (d + g) 191 | y3 = (1 + g) * 0.5 * (1 - x[1]) * x[0] 192 | return -1 * y3 193 | 194 | 195 | def DTLZ14f_4(x, d): 196 | g = 0 197 | for i in range(d): 198 | g = g + pow(x[i] - 0.5, 2) - math.cos(20 * math.pi * (x[i] - 0.5)) 199 | g = 100 * (d + g) 200 | y4 = (1 + g) * 0.5 * (1 - x[0]) 201 | return -1 * y4 202 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Oct 29 14:34:01 2018 4 | 5 | @author: Syrine Belakaria 6 | """ 7 | import os 8 | import sys 9 | 10 | import numpy as np 11 | import sobol_seq 12 | from GPmodel import GaussianProcess 13 | from platypus import NSGAII, Problem, Real 14 | from pygmo import hypervolume 15 | from scipy.optimize import minimize as scipyminimize 16 | 17 | from benchmark_functions import get_benchmark_functions 18 | from singlemes import MaxvalueEntropySearch 19 | 20 | ######################Algorithm input############################## 21 | 22 | args = sys.argv[1:] 23 | function_names = args[0] 24 | functions = get_benchmark_functions(function_names) 25 | d = int(args[1]) 26 | seed = int(args[2]) 27 | intial_number = int(args[3]) 28 | total_iterations = int(args[4]) 29 | sample_number = int(args[5]) 30 | paths = "." 31 | np.random.seed(seed) 32 | 33 | ###################Sobol Sequence Initialisation and search bounds########################## 34 | 35 | bound = [0, 1] 36 | Fun_bounds = [bound] * d 37 | grid = sobol_seq.i4_sobol_generate(d, 1000, np.random.randint(0, 100)) 38 | design_index = np.random.randint(0, grid.shape[0]) 39 | 40 | ###################GP Initialisation########################## 41 | 42 | GPs = [] 43 | Multiplemes = [] 44 | for i in range(len(functions)): 45 | GPs.append(GaussianProcess(d)) 46 | for k in range(intial_number): 47 | exist = True 48 | while exist: 49 | design_index = np.random.randint(0, grid.shape[0]) 50 | x_rand = list(grid[design_index : (design_index + 1), :][0]) 51 | if (any((x_rand == x).all() for x in GPs[0].xValues)) == False: 52 | exist = False 53 | for i in range(len(functions)): 54 | GPs[i].addSample(np.asarray(x_rand), functions[i](x_rand, d)) 55 | 56 | for i in range(len(functions)): 57 | GPs[i].fitModel() 58 | Multiplemes.append(MaxvalueEntropySearch(GPs[i])) 59 | 60 | 61 | #### write the initial points into file 62 | input_output = open(os.path.join(paths, "input_output.txt"), "a") 63 | for j in range(len(GPs[0].yValues)): 64 | input_output.write( 65 | str(GPs[0].xValues[j]) 66 | + "---" 67 | + str([GPs[i].yValues[j] for i in range(len(functions))]) 68 | + "\n" 69 | ) 70 | input_output.close() 71 | 72 | ##################### main loop ########## 73 | 74 | for l in range(total_iterations): 75 | 76 | for i in range(len(functions)): 77 | Multiplemes[i] = MaxvalueEntropySearch(GPs[i]) 78 | Multiplemes[i].Sampling_RFM() 79 | max_samples = [] 80 | for j in range(sample_number): 81 | for i in range(len(functions)): 82 | Multiplemes[i].weigh_sampling() 83 | cheap_pareto_front = [] 84 | 85 | def CMO(xi): 86 | xi = np.asarray(xi) 87 | y = [Multiplemes[i].f_regression(xi)[0][0] for i in range(len(GPs))] 88 | return y 89 | 90 | problem = Problem(d, len(functions)) 91 | problem.types[:] = Real(bound[0], bound[1]) 92 | problem.function = CMO 93 | algorithm = NSGAII(problem) 94 | algorithm.run(1500) 95 | cheap_pareto_front = [ 96 | list(solution.objectives) for solution in algorithm.result 97 | ] 98 | #########picking the max over the pareto: best case 99 | maxoffunctions = [-1 * min(f) for f in list(zip(*cheap_pareto_front))] 100 | max_samples.append(maxoffunctions) 101 | 102 | def mesmo_acq(x): 103 | multi_obj_acq_total = 0 104 | for j in range(sample_number): 105 | multi_obj_acq_sample = 0 106 | for i in range(len(functions)): 107 | multi_obj_acq_sample = multi_obj_acq_sample + Multiplemes[i].single_acq( 108 | x, max_samples[j][i] 109 | ) 110 | multi_obj_acq_total = multi_obj_acq_total + multi_obj_acq_sample 111 | return multi_obj_acq_total / sample_number 112 | 113 | # l-bfgs-b acquisation optimization 114 | x_tries = np.random.uniform(bound[0], bound[1], size=(1000, d)) 115 | y_tries = [mesmo_acq(x) for x in x_tries] 116 | sorted_indecies = np.argsort(y_tries) 117 | i = 0 118 | x_best = x_tries[sorted_indecies[i]] 119 | while any((x_best == x).all() for x in GPs[0].xValues): 120 | i = i + 1 121 | x_best = x_tries[sorted_indecies[i]] 122 | y_best = y_tries[sorted_indecies[i]] 123 | x_seed = list(np.random.uniform(low=bound[0], high=bound[1], size=(1000, d))) 124 | for x_try in x_seed: 125 | result = scipyminimize( 126 | mesmo_acq, 127 | x0=np.asarray(x_try).reshape(1, -1), 128 | method="L-BFGS-B", 129 | bounds=Fun_bounds, 130 | ) 131 | if not result.success: 132 | continue 133 | if (result.fun <= y_best) and (not (result.x in np.asarray(GPs[0].xValues))): 134 | x_best = result.x 135 | y_best = result.fun 136 | 137 | # ---------------Updating and fitting the GPs----------------- 138 | for i in range(len(functions)): 139 | GPs[i].addSample(x_best, functions[i](list(x_best), d)) 140 | GPs[i].fitModel() 141 | 142 | ############################ write Input output into file ################## 143 | input_output = open(os.path.join(paths, "input_output.txt"), "a") 144 | input_output.write( 145 | str(GPs[0].xValues[-1]) 146 | + "---" 147 | + str([GPs[i].yValues[-1] for i in range(len(functions))]) 148 | + "\n" 149 | ) 150 | input_output.close() 151 | 152 | ############################ write hypervolume into file################## 153 | referencePoint = [1e5] * len( 154 | functions 155 | ) # A point that is worse than any point in the Pareto front 156 | current_hypervolume = open(os.path.join(paths, "hypervolumes.txt"), "a") 157 | simple_pareto_front_evaluations = list( 158 | zip(*[GPs[i].yValues for i in range(len(functions))]) 159 | ) 160 | print( 161 | "hypervolume ", 162 | hypervolume(-1 * (np.asarray(simple_pareto_front_evaluations))).compute( 163 | referencePoint 164 | ), 165 | ) 166 | current_hypervolume.write( 167 | "%f \n" 168 | % hypervolume(-1 * (np.asarray(simple_pareto_front_evaluations))).compute( 169 | referencePoint 170 | ) 171 | ) 172 | current_hypervolume.close() 173 | --------------------------------------------------------------------------------