├── .gitignore ├── results ├── 10.jpg ├── 11.jpg ├── 12.jpg ├── 13.jpg ├── 14.jpg ├── 15.jpg ├── 16.jpg ├── 17.jpg ├── 18.jpg ├── 19.jpg ├── 2.jpg ├── 3.jpg ├── 4.jpg ├── 5.jpg ├── 6.jpg ├── 7.jpg ├── 8.jpg └── 9.jpg ├── hyperparameter.py ├── LICENSE ├── output_grapher.py ├── acquisition_functions.py ├── black_box_functions.py ├── covariance_functions.py ├── README.md ├── bbho_base.py └── bbho.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | *.txt 4 | test*.py 5 | 6 | -------------------------------------------------------------------------------- /results/10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/10.jpg -------------------------------------------------------------------------------- /results/11.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/11.jpg -------------------------------------------------------------------------------- /results/12.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/12.jpg -------------------------------------------------------------------------------- /results/13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/13.jpg -------------------------------------------------------------------------------- /results/14.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/14.jpg -------------------------------------------------------------------------------- /results/15.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/15.jpg -------------------------------------------------------------------------------- /results/16.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/16.jpg -------------------------------------------------------------------------------- /results/17.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/17.jpg -------------------------------------------------------------------------------- /results/18.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/18.jpg -------------------------------------------------------------------------------- /results/19.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/19.jpg -------------------------------------------------------------------------------- /results/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/2.jpg -------------------------------------------------------------------------------- /results/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/3.jpg -------------------------------------------------------------------------------- /results/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/4.jpg -------------------------------------------------------------------------------- /results/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/5.jpg -------------------------------------------------------------------------------- /results/6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/6.jpg -------------------------------------------------------------------------------- /results/7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/7.jpg -------------------------------------------------------------------------------- /results/8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/8.jpg -------------------------------------------------------------------------------- /results/9.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/9.jpg -------------------------------------------------------------------------------- /hyperparameter.py: -------------------------------------------------------------------------------- 1 | class HyperParameter(object): 2 | def __init__(self, min, max): 3 | self.min = min 4 | self.max = max 5 | 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Blake Edwards / Dark Element 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /output_grapher.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from mpl_toolkits.mplot3d import axes3d 4 | import matplotlib.pyplot as plt 5 | from matplotlib import cm 6 | 7 | def graph_output(plot_2d_results, plot_3d_results, bbf_evaluation_i, bbf_evaluation_n, domain_x, domain_y, detail_n, test_means, bbf_inputs, bbf_evaluations, val1, val2): 8 | 9 | #Set the filename 10 | fname = "results/%02d" % bbf_evaluation_i 11 | 12 | #Plot our updates 13 | if plot_2d_results: 14 | plt.plot(domain_x, test_means) 15 | #plt.plot(domain_x, test_variances, 'r') 16 | #plt.plot(bbf_inputs, bbf_evaluations, 'bo') 17 | plt.scatter(bbf_inputs, bbf_evaluations, marker='o', c='b', s=100.0, label="Function Evaluations") 18 | plt.plot(domain_x, val1, 'r') 19 | plt.plot(domain_x, val2, 'r') 20 | #plt.plot(domain_x, bbf(domain_x), 'y') 21 | plt.savefig("%s.jpg" % fname, dpi=None, facecolor='w', edgecolor='w', 22 | orientation='portrait', papertype=None, format=None, 23 | transparent=False, bbox_inches='tight', pad_inches=0.1, 24 | frameon=None) 25 | plt.xlabel("X-Axis") 26 | plt.ylabel("Y-Axis") 27 | 28 | plt.legend(bbox_to_anchor=(1, 1), loc=1, borderaxespad=0.) 29 | plt.axis([0, 10, 0, 2]) 30 | #plt.show() 31 | plt.gcf().clear() 32 | 33 | elif plot_3d_results: 34 | #So we only render on the last one(just erase this if you want all of them) 35 | if bbf_evaluation_i == bbf_evaluation_n-1: 36 | fig = plt.figure() 37 | ax = fig.add_subplot(111, projection='3d') 38 | #X & Y have to be matrices of all vertices 39 | #Z has to be matrix of outputs 40 | #Convert our vectors to compatible matrix counterparts 41 | Y = np.array([[i] for i in domain_y]) 42 | 43 | X = np.tile(domain_x, (detail_n, 1)) 44 | Y = np.tile(Y, (1, detail_n)) 45 | 46 | #This ones easy, just reshape 47 | Z1 = test_means.reshape(detail_n, detail_n) 48 | #Z2 = test_variances.reshape(detail_n, detail_n) 49 | Z3 = (val1).reshape(detail_n, detail_n) 50 | Z4 = (val2).reshape(detail_n, detail_n) 51 | 52 | 53 | ax.plot_surface(X, Y, Z1, rstride=1, cstride=1, cmap=cm.coolwarm) 54 | #ax.plot_wireframe(X, Y, Z2, rstride=1, cstride=1) 55 | ax.plot_wireframe(X, Y, Z3, rstride=1, cstride=1) 56 | ax.plot_wireframe(X, Y, Z4, rstride=1, cstride=1) 57 | plt.savefig("%s.jpg" % fname, dpi=None, facecolor='w', edgecolor='w', 58 | orientation='portrait', papertype=None, format=None, 59 | transparent=False, bbox_inches='tight', pad_inches=0.1, 60 | frameon=None) 61 | 62 | plt.gcf().clear() 63 | #plt.show() 64 | -------------------------------------------------------------------------------- /acquisition_functions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import bbho_base 4 | from bbho_base import * 5 | 6 | means = T.vector() 7 | stddevs = T.vector() 8 | variances = T.vector() 9 | values = T.vector() 10 | cdfs = T.vector() 11 | dist_values = T.vector() 12 | normal_dist_values = T.vector() 13 | confidence_interval = T.scalar() 14 | 15 | class acquisition_function(object): 16 | 17 | def __init__(self): 18 | #Currently none to init with 19 | pass 20 | 21 | class probability_improvement(acquisition_function): 22 | 23 | def __init__(self): 24 | acquisition_function.__init__(self) 25 | self.f = theano.function([cdfs], 26 | outputs = T.argmax(cdfs), 27 | allow_input_downcast=True 28 | ) 29 | 30 | def evaluate(self, means, variances, values, confidence_interval): 31 | #We have to format it like this so that our cdf function does not get called until we have means, variances, and values 32 | #Unlike if we included this in the theano function, where it would be called with the initialization of the function 33 | cdfs = cdf(values, means, variances) 34 | return self.f(cdfs) 35 | 36 | class expected_improvement(acquisition_function): 37 | 38 | def __init__(self): 39 | acquisition_function.__init__(self) 40 | 41 | #We assign this so we don't compute it twice in our function 42 | self.stddev = theano.function([variances], T.sqrt(variances), allow_input_downcast=True) 43 | 44 | self.f = theano.function([stddevs, dist_values, cdfs, normal_dist_values], 45 | outputs = T.argmax(stddevs * dist_values * cdfs + normal_dist_values), 46 | allow_input_downcast=True 47 | ) 48 | 49 | def evaluate(self, means, variances, values, confidence_interval): 50 | #We have to format it like this so that our cdf function does not get called until we have means, variances, and values 51 | #Unlike if we included this in the theano function, where it would be called with the initialization of the function 52 | dist_values = gaussian_distribution_v(values, means, variances) 53 | cdfs = cdf(values, means, variances) 54 | normal_dist_values = gaussian_distribution_v(values, np.zeros_like(values), np.ones_like(values)) 55 | 56 | return self.f(self.stddev(variances), dist_values, cdfs, normal_dist_values) 57 | 58 | 59 | class upper_confidence_bound(acquisition_function): 60 | 61 | def __init__(self): 62 | acquisition_function.__init__(self) 63 | self.f = theano.function([means, variances, confidence_interval], 64 | outputs = T.argmax(means + confidence_interval * T.sqrt(variances)), 65 | allow_input_downcast=True 66 | ) 67 | 68 | def evaluate(self, means, variances, values, confidence_interval): 69 | return self.f(means, variances, confidence_interval) 70 | 71 | -------------------------------------------------------------------------------- /black_box_functions.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | Used to setup our other programs for use with BBHO. 4 | 5 | __init__ can take whatever argumenets you choose, 6 | as you can merely change how it is called in bbho.py, 7 | evaluate() however must take the same, with next_input being a list of new parameters. 8 | 9 | -Blake Edwards / Dark Element 10 | """ 11 | 12 | import sys 13 | 14 | import numpy as np 15 | import tensorflow as tf 16 | 17 | #Currently only on cartpole, will push to more general directory when finished 18 | sys.path.append("../openai/classic_control/cartpole/src") 19 | 20 | import policy_gradient_configurer 21 | 22 | #For LIRA/DENNIS 23 | sys.path.append("../dennis/dennis6/src")#THIS NEEDS TO BE UPDATED 24 | 25 | sys.path.append("../tuberculosis_project/lira/lira2/src") 26 | import lira2_bbho_configurer 27 | import lira2_pre_transfer_bbho_configurer 28 | 29 | #How we obtain a scalar output given our inputs, for varying functions 30 | class policy_gradient(object): 31 | #We get our policy gradient result by averaging over all our average timestep results 32 | def __init__(self, epochs, timestep_n, run_count): 33 | self.run_count = run_count 34 | self.configurer = policy_gradient_configurer.Configurer(epochs, timestep_n) 35 | 36 | def evaluate(self, bbf_evaluation_i, bbf_evaluation_n, next_input): 37 | config_output = self.configurer.run_config(bbf_evaluation_i, bbf_evaluation_n, self.run_count, next_input[0], 0.0, next_input[1], 0.0, next_input[2], next_input[3]) 38 | config_avg_output = np.mean(config_output) 39 | return [config_avg_output] 40 | 41 | """ 42 | NEEDS TO BE UPDATED 43 | class dennis(object): 44 | def __init__(self, epochs, run_count): 45 | self.configurer = dennis_configurer.Configurer(epochs, run_count) 46 | 47 | def evaluate(self, bbf_evaluation_i, bbf_evaluation_n, next_input): 48 | config_output = self.configurer.run_config(next_input[0], next_input[1], next_input[2], next_input[3]) 49 | config_avg_output = np.mean(config_output) 50 | return [config_avg_output] 51 | """ 52 | 53 | class lira2(object): 54 | def __init__(self, epochs, run_count): 55 | self.configurer = lira2_bbho_configurer.Configurer(epochs, run_count) 56 | 57 | def evaluate(self, bbf_evaluation_i, bbf_evaluation_n, next_input): 58 | 59 | #So we make sure that we don't lose available memory with each run (due to small Keras bug I believe) 60 | with tf.Session().as_default(): 61 | """ 62 | Get the config output, should be an average over training iterations 63 | """ 64 | config_output = self.configurer.run_config(next_input) 65 | 66 | """ 67 | Then average over our epochs to get one scalar value 68 | """ 69 | config_avg_output = np.mean(config_output) 70 | return [config_avg_output] 71 | 72 | class lira2_pre_transfer(object): 73 | def __init__(self, epochs, run_count): 74 | self.configurer = lira2_pre_transfer_bbho_configurer.Configurer(epochs, run_count) 75 | 76 | def evaluate(self, bbf_evaluation_i, bbf_evaluation_n, next_input): 77 | 78 | #So we make sure that we don't lose available memory with each run (due to small Keras bug I believe) 79 | with tf.Session().as_default(): 80 | """ 81 | Get the config output, should be an average over training iterations 82 | """ 83 | config_output = self.configurer.run_config(next_input) 84 | 85 | """ 86 | Then average over our epochs to get one scalar value 87 | """ 88 | config_avg_output = np.mean(config_output) 89 | return [config_avg_output] 90 | -------------------------------------------------------------------------------- /covariance_functions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.special as ss 3 | import theano 4 | import theano.tensor as T 5 | 6 | x1 = T.fvector() 7 | x2 = T.fvector() 8 | l = T.fscalar() 9 | v = T.scalar() 10 | 11 | class covariance_function(object): 12 | #Superclass 13 | 14 | def __init__(self, lengthscale, v): 15 | self.lengthscale = lengthscale 16 | self.v = v 17 | 18 | class dot_product(covariance_function): 19 | 20 | def __init__(self, lengthscale, v): 21 | covariance_function.__init__(self, lengthscale, v) 22 | self.f = theano.function([x1, x2], T.dot(x1.T, x2), allow_input_downcast=True) 23 | 24 | def evaluate(self, x_i, x_j): 25 | return 1 * self.f(x_i, x_j) 26 | 27 | class brownian_motion(covariance_function): 28 | 29 | def __init__(self, lengthscale, v): 30 | covariance_function.__init__(self, lengthscale, v) 31 | self.f = theano.function([x1, x2], T.minimum(x1, x2), allow_input_downcast=True) 32 | 33 | def evaluate(self, x_i, x_j): 34 | return 1 * self.f(x_i, x_j) 35 | 36 | class squared_exponential(covariance_function): 37 | 38 | def __init__(self, lengthscale, v): 39 | covariance_function.__init__(self, lengthscale, v) 40 | self.f = theano.function([x1, x2, l], 41 | T.exp( 42 | T.dot( 43 | (-1.0/T.dot(2.0, l)), 44 | T.sum(T.sqr(x1 - x2)) 45 | ) 46 | ) 47 | , allow_input_downcast=True) 48 | 49 | def evaluate(self, x_i, x_j): 50 | return self.f(x_i, x_j, self.lengthscale) 51 | 52 | class ornstein_uhlenbeck(covariance_function): 53 | 54 | def __init__(self, lengthscale, v): 55 | covariance_function.__init__(self, lengthscale, v) 56 | self.f = theano.function([x1, x2], 57 | T.exp(-1.0 * T.sqrt(T.dot((x1-x2).T, (x1-x2)))), 58 | allow_input_downcast=True) 59 | 60 | def evaluate(self, x_i, x_j): 61 | return self.f(x_i, x_j) 62 | 63 | class periodic1(covariance_function): 64 | 65 | def __init__(self, lengthscale, v): 66 | covariance_function.__init__(self, lengthscale, v) 67 | self.f = theano.function([x1, x2], 68 | T.exp(-1.0 * T.sin(5.0 * np.pi * T.sum(T.sqr(x1-x2)))), 69 | allow_input_downcast=True) 70 | 71 | 72 | def evaluate(self, x_i, x_j): 73 | return self.f(x_i, x_j) 74 | 75 | class matern1(covariance_function): 76 | """ 77 | NOT UPGRADING THIS YET SINCE THEANO DOESN'T HAVE THE BESSEL FUNCTION 78 | AND ALSO BECAUSE I DON'T KNOW WHY MY IMPLEMENTATION IS HORRIBLY BROKEN 79 | FOR THIS ONE, AS WELL 80 | """ 81 | 82 | def __init__(self, lengthscale, v): 83 | covariance_function.__init__(self, lengthscale, v) 84 | 85 | def evaluate(self, x_i, x_j): 86 | dist = np.linalg.norm(x_i-x_j) 87 | return np.nan_to_num(((2**(1-self.v))/(ss.gamma(self.v))) * ((np.sqrt(2*self.v) * (dist/self.lengthscale))**self.v) * ss.kv(self.v, (np.sqrt(2*self.v) * (dist/self.lengthscale)))) 88 | 89 | class matern2(covariance_function): 90 | """However, this one works perfectly, and is so far the best one. 91 | Credit to the "Practical Bayesian Optimization of Machine Learning" Research paper, 92 | for including this covariance function.""" 93 | 94 | def __init__(self, lengthscale, v): 95 | covariance_function.__init__(self, lengthscale, v) 96 | self.dist = theano.function([x1, x2], T.sum(T.sqr(x1-x2))) 97 | self.f = theano.function([x1, x2], 98 | (1 + T.sqrt(5 * T.sum(T.sqr(x1-x2))) + (5.0/3.0) * T.sum(T.sqr(x1-x2))) * T.exp(-T.sqrt(5 * T.sum(T.sqr(x1-x2)))), 99 | allow_input_downcast=True) 100 | 101 | def evaluate(self, x_i, x_j): 102 | return self.f(x_i, x_j) 103 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #Description 2 | 3 | BBHO (pronounced BeeBo because names are much more fun) is a Black Box Optimization program that uses [Bayesian Optimization](https://arxiv.org/pdf/1206.2944.pdf) to optimize a given unknown (black box) function. It does this using something known as Gaussian Processes for Regression, and then what is known as an acquisition function. To get an entire overarching and fundamental understanding of this method, 4 | 5 | ###[I've made a series on my blog detailing it.](https://dark-element.com/2016/10/10/bayesian-optimization-of-black-box-functions/) 6 | 7 | ###[Resources & Sources where I learned what I needed to go from fundamentals to this](https://dark-element.com/2016/10/14/bayesian-optimization-of-black-box-functions-appendix-and-sources-resources/#resources-sources) 8 | 9 | 10 | #Installation 11 | 12 | 1. Clone this repository 13 | 2. Add in your own handler for whatever you are optimizing in the `black_box_functions.py` file, and how it is called in the beginning of `bbho.py`. In there, you can see several examples of different models I have previously optimized with BBHO. Each is represented as a Python class, with methods __init__() for the initialization, and evaluate() for feeding in new inputs. You can change how it is initialized to take whatever arguments you need, however I do not recommend changing the format for the evaluate() method. 14 | 3. BBHO will pass the index of the evaluation number `bbf_evaluation_i`, the total evaluation number `bbf_evaluation_n` and finally a list of the hyper parameters `next_input`. You can choose to do nothing with `bbf_evaluation_i` and `bbf_evaluation_n`, however they are meant to be used to print the progress of the optimizer. 15 | 4. Use the `next_input` variable as the inputs to whatever problem you are trying to optimize. In my case, I might have a keras neural network that assigns the mini batch size and learning rate like `mini_batch_size, learning_rate = next_input`, where in this case `next_input = [some_mini_batch_size_value, some_learning_rate_value]` 16 | 5. After you have fed this input in, just make sure that your handler returns a scalar. In my configurer for lira, I get the validation accuracy and then use `np.mean(config_output)` to get one value representing how well the inputs performed. I then return it, and so my handler's evaluate method only needs 4 lines. 17 | 6. With this, you should be able to start optimizing, unfortunately there are some parameters for BBHO itself. 18 | 19 | #Configuration 20 | 21 | 1. In my lira black box function, I initialize it with `epochs` and `run_count` variables. These represent the number of times to loop over the training set, and the number of times to run an entire training iteration, respectively. As said in the Installation instructions, you can change the initialization to respect whatever your black box function may be, but you do have to initialize what you are optimizing in the beginning of `bbho.py`. 22 | 2. After this, the remaining parameters have to do with the optimization: 23 | 24 | `detail_n`: the number of intervals for each hyper parameter optimized, so that if you assign the `hps` variable later to be `[HyperParameter(0, 100)]`, and set `detail_n = 100`, that hyper parameter's range will be `1, 2, 3, ... 99`. If you were to set `detail_n = 50`, it would be something like `1, 3, 5, ..., 49`, and so on. 25 | 26 | `maximizing`: a boolean for if we are maximizing or minimizing. I have not tested it extensively on minimizing, use at your own risk or do a slight transformation on the output of your handler to keep this True. 27 | 28 | `bbf_evaluation_n`: the number of points evaluated with BBHO, not including the two random points it starts with. Changing this will change the time it takes to finish optimization. 29 | 30 | `acquisition_function`: the acquisition function we will use later on. I have it set to the upper confidence bound acquisition function with k = confidence interval = 1.5, but feel free to change this to the options available in `acquisition_functions.py`. Further explanation of acquisition functions can be found in my blog series, [Part Three](https://dark-element.com/2016/10/13/bayesian-optimization-of-black-box-functions-part-3/) and [Appendix](https://dark-element.com/2016/10/14/bayesian-optimization-of-black-box-functions-appendix-and-sources-resources/). I have added some parameters for the exponential decay of the confidence interval, but this is for personal experiments of my own and I can not make any guarantees as to the efficacy of using a decay rate, yet. 31 | 32 | `covariance_function`: the covariance function we will use later on. It is initially set to the matern 5/2 covariance function with lengthscale 1, but feel free to change this to the options available in `covariance_functions.py`. Further explanation of covariance functions can be found in my blog series, [Part Three](https://dark-element.com/2016/10/13/bayesian-optimization-of-black-box-functions-part-3/) and [Appendix](https://dark-element.com/2016/10/14/bayesian-optimization-of-black-box-functions-appendix-and-sources-resources/). 33 | 34 | `hps`: mentioned earlier, this is a list of Hyper Parameter classes, found in `hyperparameter.py`. You should specify the Hyper Parameters according to the syntax `HyperParameter(min, max)`, with `min`, `max` according to the range of that hyper parameter. You should format the number of these according to the number of arguments your handler is ready for. If I was prepared for mini batch size and regularization rate in my handler, I might have `hps = [HyperParameter(0, 50), HyperParameter(0, 1)]` 35 | 36 | 37 | Feel free to contact me with any questions or help, and Good luck, have fun! 38 | 39 | -------------------------------------------------------------------------------- /bbho_base.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import theano 4 | import theano.tensor as T 5 | from theano.tensor.nlinalg import matrix_inverse, eig 6 | 7 | import itertools#For Cartesian product 8 | 9 | #We initialize the following here so we don't have to when we call the functions that use them. 10 | 11 | "Matrix inverse" 12 | m = T.matrix() 13 | invert_matrix = theano.function([m], matrix_inverse(m), allow_input_downcast=True) 14 | 15 | "Adds one axis if we are hoping to use the Prob Density function with vectors instead of matrices, as it's designed" 16 | v = T.vector() 17 | vector_to_column = theano.function([v], v.dimshuffle(0, 'x'), allow_input_downcast=True) 18 | 19 | #Multivariate covariance mean & variance 20 | test_cov = T.matrix() 21 | test_cov_T = T.matrix() 22 | test_cov_diag = T.vector() 23 | training_cov_m_inv = T.matrix() 24 | bbf_evaluations = T.matrix() 25 | 26 | #Gaussian Distribution vars 27 | inputs_m = T.matrix() 28 | means_m = T.matrix() 29 | variances_m = T.matrix() 30 | 31 | inputs_v = T.vector() 32 | means_v = T.vector() 33 | variances_v = T.vector() 34 | 35 | "Multivariate covariance mean" 36 | compute_mv_mean = theano.function([test_cov_T, training_cov_m_inv, bbf_evaluations], 37 | outputs = T.dot(T.dot(test_cov_T, training_cov_m_inv), bbf_evaluations), 38 | allow_input_downcast=True 39 | ) 40 | 41 | "Multivariate covariance variance" 42 | """ 43 | We only need the diagonal axis at completion. 44 | If we do the entire dot product, it's often with arrays of size 45 | nx2, 2xn, which results in an array of size nxn, 46 | but since n is often huge this can result in an array that would take up 47 | exobytes of storage. So, we do it like this so we only have to get a vector result. 48 | 49 | sum(A * B^T, axis=1) 50 | """ 51 | 52 | compute_mv_variance = theano.function([test_cov_diag, test_cov_T, training_cov_m_inv, test_cov], 53 | outputs = test_cov_diag - T.sum(T.dot(test_cov_T, training_cov_m_inv) * test_cov.T, axis=1), 54 | allow_input_downcast=True 55 | ) 56 | 57 | """ 58 | I had a brief moment where I thought I was using Gaussian Distributions incorrectly 59 | to get the correct result. However, when we get the cumulative distribution, 60 | or the probabilities up to a point, or the probability of a point; any time we 61 | need the probability of some input wrt a Gaussian Distribution. For this, we 62 | obviously use the probability density function, that returns the probability of 63 | a given input point. So, you can find the pretty latex version under wikipedia. 64 | """ 65 | 66 | "Probability Density Function / Gaussian Distribution Function (for matrix input)" 67 | probability_density_function_m = theano.function([inputs_m, means_m, variances_m], 68 | outputs = (T.exp(-(T.sqr(inputs_m-means_m))/(2*variances_m)) / (T.sqrt(2*variances_m*np.pi))), 69 | allow_input_downcast=True 70 | ) 71 | 72 | "Probability Density Function / Gaussian Distribution Function (for vector input)" 73 | probability_density_function_v = theano.function([inputs_v, means_v, variances_v], 74 | outputs = (T.exp(-(T.sqr(inputs_v-means_v))/(2*variances_v)) / (T.sqrt(2*variances_v*np.pi))), 75 | allow_input_downcast=True 76 | ) 77 | 78 | def gaussian_distribution_m(inputs, means, variances): 79 | #For matrices 80 | #Gets input x, means, and variances 81 | #Returns vector or scalar from input 82 | return probability_density_function_m(inputs, means, variances) 83 | 84 | def gaussian_distribution_v(inputs, means, variances): 85 | #For vectors 86 | #Gets input x, means, and variances 87 | #Returns vector or scalar from input 88 | return probability_density_function_v(inputs, means, variances) 89 | 90 | def cdf(inputs, means, variances): 91 | #Get values to compute cdf over 92 | #print inputs.shape, means.shape, variances.shape 93 | #print np.array([np.arange(input-100, input, .1) for input in inputs]).shape 94 | 95 | #Convert to matrix counterparts so we can correctly get an estimate of the cdf 96 | inputs = np.array([np.arange(input-100, input, .1) for input in inputs]) 97 | 98 | #We just repeat these over the axis we are getting different inputs on, which 99 | # is why they are repeated according to inputs.shape[1] 100 | means = np.array([np.repeat(mean, inputs.shape[1]) for mean in means]) 101 | variances = np.array([np.repeat(mean, inputs.shape[1]) for variance in variances]) 102 | 103 | dist_values = gaussian_distribution_m(inputs, means, variances) 104 | #dist_values = gaussian_distribution(inputs, means, variances) 105 | 106 | #Equivalent to the last element of cumulative sum 107 | return np.sum(dist_values, axis=1) 108 | 109 | def get_cov_matrix(f, cov): 110 | #Given a vector f, generate the covariance matrix 111 | #f because known inputs 112 | #Numpy loops faster than theano 113 | f_n = len(f) 114 | f_m = np.zeros(shape=(f_n, f_n)) 115 | for row_i, f_i in enumerate(f): 116 | for col_i, f_j in enumerate(f): 117 | f_m[row_i, col_i] = cov.evaluate(f_i, f_j) 118 | 119 | return f_m 120 | 121 | def get_cov_vector(f, test_f, cov): 122 | #Given a vector f and scalar f* (test_f) 123 | #Generate a covariance vector for each value in f 124 | #Numpy loops faster than theano 125 | f_n = len(f) 126 | f_v = np.zeros(shape=(f_n)) 127 | for row_i, f_i in enumerate(f): 128 | f_v[row_i] = cov.evaluate(test_f, f_i) 129 | 130 | return f_v 131 | 132 | """ 133 | def cartesian_product(vectors): 134 | return [np.array(i) for i in itertools.product(*vectors)] 135 | """ 136 | 137 | def get_cartesian_product_element_by_index(cp, index): 138 | for i, e in enumerate(cp): 139 | if i == index: 140 | return e 141 | else: 142 | #Not in the cartesian product 143 | return False 144 | 145 | 146 | def convert_vector_to_column(v): 147 | return vector_to_column(v) 148 | 149 | def theano_matrix_inv(m): 150 | return invert_matrix(m) 151 | 152 | def get_test_means(test_cov_T, training_cov_m_inv, bbf_evaluations): 153 | return compute_mv_mean(test_cov_T, training_cov_m_inv, bbf_evaluations) 154 | 155 | def get_test_variances(test_cov_diag, test_cov_T, training_cov_m_inv, test_cov): 156 | return compute_mv_variance(test_cov_diag, test_cov_T, training_cov_m_inv, test_cov) 157 | 158 | def exp_decay(initial, rate, iteration): 159 | #Do our k*e^(r*t) exponential decay 160 | return initial*np.exp(rate*iteration) 161 | 162 | 163 | -------------------------------------------------------------------------------- /bbho.py: -------------------------------------------------------------------------------- 1 | """ 2 | Read all the details in my blog post here: 3 | https://dark-element.com/2016/10/10/bayesian-optimization-of-black-box-functions/ 4 | Or the github repo here: 5 | https://github.com/DarkElement75/bbho 6 | 7 | Made by Blake Edwards / Dark Element 8 | """ 9 | 10 | import sys, time, itertools 11 | 12 | import numpy as np 13 | 14 | import theano 15 | import theano.tensor as T 16 | 17 | import hyperparameter 18 | from hyperparameter import HyperParameter 19 | 20 | import output_grapher 21 | 22 | import acquisition_functions 23 | from acquisition_functions import * 24 | 25 | import covariance_functions 26 | from covariance_functions import * 27 | 28 | import bbho_base 29 | from bbho_base import * 30 | 31 | import black_box_functions 32 | 33 | """START TUNABLE PARAMETERS""" 34 | """ 35 | POLICY GRADIENT IMPLEMENTATION 36 | #We use our policy gradient black box function 37 | #Configure specifics in the black_box_functions file and so on 38 | epochs = 400 39 | timestep_n = 200 40 | run_count = 5 41 | bbf = black_box_functions.policy_gradient(epochs, timestep_n, run_count) 42 | """ 43 | 44 | """ 45 | DENNIS IMPLEMENTATION 46 | #We use our DENNIS+LIRA black box function 47 | #Configure specifics in the black_box_functions file and so on 48 | epochs = 1000 49 | run_count = 3 50 | bbf = black_box_functions.dennis(epochs, run_count) 51 | """ 52 | 53 | """ 54 | LIRA IMPLEMENTATION 55 | """ 56 | #We use our DENNIS+LIRA black box function 57 | #Configure specifics in the black_box_functions file and so on 58 | epochs = 50 59 | run_count = 3 60 | bbf = black_box_functions.lira2_pre_transfer(epochs, run_count) 61 | 62 | #For efficiency comparisons 63 | start_time = time.time() 64 | 65 | #Number of evaluated input points / level of detail 66 | #Note: increasing this causes massive increases in the computations required for an evaluation. 67 | detail_n = 50 68 | 69 | #If we want the highest point or lowest point 70 | maximizing = True 71 | 72 | #Number of bbf evaluations allowed to perform before ending optimization 73 | bbf_evaluation_n = 40 74 | 75 | #Choice of acquisition function and acquisition function parameters 76 | initial_confidence_interval = 1.5 77 | #confidence_interval_decay_rate = -4.0/bbf_evaluation_n 78 | confidence_interval_decay_rate = 0 79 | acquisition_function = upper_confidence_bound() 80 | 81 | #Choice of covariance function and cf parameters 82 | lengthscale = 1.0 83 | v = [5/2.0]#For matern1(not currently functional) 84 | covariance_function = matern2(lengthscale, v) 85 | 86 | #Initialize ranges for each parameter into a resulting matrix 87 | #Our level of detail / detail_n determines our step size for each 88 | #Mini Batch Size, Regularization Rate, Dropout Percentage 89 | hps = [HyperParameter(0, 100), HyperParameter(0, 7), HyperParameter(0, 1)] 90 | 91 | #UI/graph settings for testing 92 | plot_2d_results = False 93 | plot_3d_results = False 94 | 95 | """END TUNABLE PARAMETERS""" 96 | 97 | #Initialize independent domains of each parameter 98 | #We will do the cartesian product on the vectors contained here to get our entire sets of multidimensional inputs 99 | independent_domains = np.array([np.arange(hp.min, hp.max, ((hp.max-hp.min)/float(detail_n))) for hp in hps]) 100 | 101 | #Make sure we don't have annoying problem where we might get one extra on accident 102 | for i in range(len(independent_domains)): 103 | independent_domains[i] = independent_domains[i][:detail_n] 104 | 105 | #Get past annoying problem of it not treating it like a proper matrix 106 | independent_domains = np.array([np.array(independent_domain) for independent_domain in independent_domains]) 107 | 108 | #Get the total number of outputs as n^r 109 | n = detail_n**len(hps) 110 | 111 | """ 112 | We make a copy so we have something that is not shuffled to cartesian product through from now on. 113 | Later on, when we Get the cartesian product of all vectors contained to get entire multidimensional domain, 114 | We have to restart over again, as iterators can only loop through it, and in order to restart we have to must initialize a new iterator. 115 | So, that's why you'll see a lot of itertools.product(*domain) in here. 116 | """ 117 | domain = np.copy(independent_domains) 118 | 119 | #Get our axis vectors if plotting 120 | if plot_2d_results: 121 | domain_x = np.copy(independent_domains[0]) 122 | domain_y = [] 123 | 124 | elif plot_3d_results: 125 | domain_x = np.copy(independent_domains[0]) 126 | domain_y = np.copy(independent_domains[1]) 127 | 128 | #Now that we have full domain, we can shuffle the original to get two random input vectors 129 | for independent_domain in independent_domains: 130 | np.random.shuffle(independent_domain) 131 | 132 | #Get our different values easily by transposing 133 | x1, x2 = independent_domains.transpose()[:2] 134 | 135 | #Known inputs 136 | training_inputs = T.vector() 137 | 138 | #Known evaluations 139 | training_outputs = T.vector() 140 | 141 | #Cartesian product of the ranges of each of our hyper parameter ranges 142 | test_domain = T.matrix() 143 | 144 | #Now that we have our two random input vectors, evaluate them and store them in our bbf inputs and outputs vector 145 | #Modify the bbf function when you make this more complicated with input to a bot 146 | #This needs to not be a np.array since we have to append 147 | bbf_inputs = [x1, x2] 148 | 149 | #This needs to be np array so we can do vector multiplication 150 | print "Evaluating Initial Random Inputs" 151 | bbf_evaluations = np.array([bbf.evaluate(0, bbf_evaluation_n, x1), bbf.evaluate(1, bbf_evaluation_n, x2)]) 152 | 153 | #Our main loop to go through every time we evaluate a new point, until we have exhausted our allowed 154 | # black box function evaluations. 155 | for bbf_evaluation_i in range(2, bbf_evaluation_n): 156 | #sys.stdout.write("\rDetermining Point #%i" % (bbf_evaluation_i+1)) 157 | #sys.stdout.flush() 158 | print "Determining Point #%i" % (bbf_evaluation_i+1) 159 | 160 | #Decay our confidence interval by decay rate, 161 | # and adjust our evaluation index back accordingly to account for our first two random inputs 162 | confidence_interval = exp_decay(initial_confidence_interval, confidence_interval_decay_rate, bbf_evaluation_i-2) 163 | 164 | #Since we reset this every time we generate through the domain 165 | test_means = np.zeros(shape=(n)) 166 | test_variances = np.zeros(shape=(n)) 167 | test_values = np.zeros(shape=(n)) 168 | 169 | #Generate our covariance matrices and vectors with theano backend 170 | training_cov_m = get_cov_matrix(bbf_inputs, covariance_function)#K 171 | 172 | #Clip a small amount so we don't have singular matrix 173 | training_cov_m = training_cov_m + (np.eye(training_cov_m.shape[0])*1e-7) 174 | 175 | #Invert 176 | training_cov_m_inv = theano_matrix_inv(training_cov_m)#K^-1 177 | 178 | #Get matrix by getting our vectors for each test point and combining 179 | test_cov_T = np.array([get_cov_vector(bbf_inputs, np.array(test_input), covariance_function) for test_input in itertools.product(*domain)])#K* 180 | test_cov = test_cov_T.transpose()#K*T 181 | 182 | #Get each diag for each test input 183 | test_cov_diag = np.array([covariance_function.evaluate(np.array(test_input), np.array(test_input)) for test_input in itertools.product(*domain)])#K** 184 | 185 | #Compute test mean using our Multivariate Gaussian Theorems 186 | #We flatten so we don't have shape (100, 1), but shape (100,) 187 | #test_mean = np.dot(np.dot(test_cov_T, training_cov_m_inv), bbf_evaluations) 188 | test_means = get_test_means(test_cov_T, training_cov_m_inv, bbf_evaluations).flatten() 189 | 190 | #Compute test variance using our Multivariate Gaussian Theorems 191 | #test_variance = test_cov_diag - np.dot(np.dot(test_cov_T, training_cov_m_inv), test_cov) 192 | test_variances = get_test_variances(test_cov_diag, test_cov_T, training_cov_m_inv, test_cov) 193 | 194 | #Now that we have all our means u* and variances c* for every point in the domain, 195 | #Move on to determining next point to evaluate using our acquisition function 196 | #If we want the point that will give us next greatest input, do u + c, otherwise u - c 197 | #Numpy adds faster 198 | if maximizing: 199 | test_values = test_means + test_variances 200 | else: 201 | test_values = test_means - test_variances 202 | 203 | if plot_2d_results or plot_3d_results: 204 | output_grapher.graph_output(plot_2d_results, plot_3d_results, bbf_evaluation_i, bbf_evaluation_n, domain_x, domain_y, detail_n, test_means, bbf_inputs, bbf_evaluations, test_means+test_variances, test_means-test_variances) 205 | 206 | #Get the index of the next input to evaluate in our black box function 207 | #Since acquisition functions return argmax values 208 | next_input_i = acquisition_function.evaluate(test_means, test_variances, test_values, confidence_interval) 209 | 210 | #Add our new input 211 | next_input = get_cartesian_product_element_by_index(itertools.product(*domain), next_input_i) 212 | #next_input = domain[next_input_i] 213 | #print "\tNew point: {}".format(next_input) 214 | 215 | bbf_inputs.append(np.array(next_input)) 216 | 217 | #Evaluate new input 218 | #We need this as nparray for vector multiplication 219 | #But we need to append as well, so we have to do this. 220 | #Luckily, it's our smallest np array 221 | bbf_evaluations = list(bbf_evaluations) 222 | 223 | #Evaluate using our specified black box function 224 | bbf_evaluations.append(bbf.evaluate(bbf_evaluation_i, bbf_evaluation_n, next_input)) 225 | 226 | bbf_evaluations = np.array(bbf_evaluations) 227 | 228 | best_input = bbf_inputs[np.argmax(bbf_evaluations)] 229 | print "" 230 | print bbf_inputs, bbf_evaluations 231 | print "Best input found after {} iterations: {}".format(bbf_evaluation_n, best_input) 232 | print "Time to run: %f" % (time.time() - start_time) 233 | --------------------------------------------------------------------------------