├── .gitignore
├── results
    ├── 10.jpg
    ├── 11.jpg
    ├── 12.jpg
    ├── 13.jpg
    ├── 14.jpg
    ├── 15.jpg
    ├── 16.jpg
    ├── 17.jpg
    ├── 18.jpg
    ├── 19.jpg
    ├── 2.jpg
    ├── 3.jpg
    ├── 4.jpg
    ├── 5.jpg
    ├── 6.jpg
    ├── 7.jpg
    ├── 8.jpg
    └── 9.jpg
├── hyperparameter.py
├── LICENSE
├── output_grapher.py
├── acquisition_functions.py
├── black_box_functions.py
├── covariance_functions.py
├── README.md
├── bbho_base.py
└── bbho.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.swp
3 | *.txt
4 | test*.py
5 | 
6 | 


--------------------------------------------------------------------------------
/results/10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/10.jpg


--------------------------------------------------------------------------------
/results/11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/11.jpg


--------------------------------------------------------------------------------
/results/12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/12.jpg


--------------------------------------------------------------------------------
/results/13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/13.jpg


--------------------------------------------------------------------------------
/results/14.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/14.jpg


--------------------------------------------------------------------------------
/results/15.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/15.jpg


--------------------------------------------------------------------------------
/results/16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/16.jpg


--------------------------------------------------------------------------------
/results/17.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/17.jpg


--------------------------------------------------------------------------------
/results/18.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/18.jpg


--------------------------------------------------------------------------------
/results/19.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/19.jpg


--------------------------------------------------------------------------------
/results/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/2.jpg


--------------------------------------------------------------------------------
/results/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/3.jpg


--------------------------------------------------------------------------------
/results/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/4.jpg


--------------------------------------------------------------------------------
/results/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/5.jpg


--------------------------------------------------------------------------------
/results/6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/6.jpg


--------------------------------------------------------------------------------
/results/7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/7.jpg


--------------------------------------------------------------------------------
/results/8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/8.jpg


--------------------------------------------------------------------------------
/results/9.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlueHephaestus/bbho/HEAD/results/9.jpg


--------------------------------------------------------------------------------
/hyperparameter.py:
--------------------------------------------------------------------------------
1 | class HyperParameter(object):
2 |     def __init__(self, min, max):
3 |         self.min = min
4 |         self.max = max
5 | 
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Blake Edwards / Dark Element
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | 
 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10 | 


--------------------------------------------------------------------------------
/output_grapher.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from mpl_toolkits.mplot3d import axes3d
 4 | import matplotlib.pyplot as plt
 5 | from matplotlib import cm
 6 | 
 7 | def graph_output(plot_2d_results, plot_3d_results, bbf_evaluation_i, bbf_evaluation_n, domain_x, domain_y, detail_n, test_means, bbf_inputs, bbf_evaluations, val1, val2):
 8 | 
 9 |   #Set the filename
10 |   fname = "results/%02d" % bbf_evaluation_i
11 | 
12 |   #Plot our updates
13 |   if plot_2d_results:
14 |       plt.plot(domain_x, test_means)
15 |       #plt.plot(domain_x, test_variances, 'r')
16 |       #plt.plot(bbf_inputs, bbf_evaluations, 'bo')
17 |       plt.scatter(bbf_inputs, bbf_evaluations, marker='o', c='b', s=100.0, label="Function Evaluations")
18 |       plt.plot(domain_x, val1, 'r')
19 |       plt.plot(domain_x, val2, 'r')
20 |       #plt.plot(domain_x, bbf(domain_x), 'y')
21 |       plt.savefig("%s.jpg" % fname, dpi=None, facecolor='w', edgecolor='w',
22 |           orientation='portrait', papertype=None, format=None,
23 |           transparent=False, bbox_inches='tight', pad_inches=0.1,
24 |           frameon=None)
25 |       plt.xlabel("X-Axis")
26 |       plt.ylabel("Y-Axis")
27 | 
28 |       plt.legend(bbox_to_anchor=(1, 1), loc=1, borderaxespad=0.)
29 |       plt.axis([0, 10, 0, 2])
30 |       #plt.show()
31 |       plt.gcf().clear()
32 | 
33 |   elif plot_3d_results:
34 |       #So we only render on the last one(just erase this if you want all of them)
35 |       if bbf_evaluation_i == bbf_evaluation_n-1:
36 |           fig = plt.figure()
37 |           ax = fig.add_subplot(111, projection='3d')
38 |           #X & Y have to be matrices of all vertices
39 |           #Z has to be matrix of outputs
40 |           #Convert our vectors to compatible matrix counterparts
41 |           Y = np.array([[i] for i in domain_y])
42 | 
43 |           X = np.tile(domain_x, (detail_n, 1))
44 |           Y = np.tile(Y, (1, detail_n))
45 | 
46 |           #This ones easy, just reshape
47 |           Z1 = test_means.reshape(detail_n, detail_n)
48 |           #Z2 = test_variances.reshape(detail_n, detail_n)
49 |           Z3 = (val1).reshape(detail_n, detail_n)
50 |           Z4 = (val2).reshape(detail_n, detail_n)
51 | 
52 | 
53 |           ax.plot_surface(X, Y, Z1, rstride=1, cstride=1, cmap=cm.coolwarm)
54 |           #ax.plot_wireframe(X, Y, Z2, rstride=1, cstride=1)
55 |           ax.plot_wireframe(X, Y, Z3, rstride=1, cstride=1)
56 |           ax.plot_wireframe(X, Y, Z4, rstride=1, cstride=1)
57 |           plt.savefig("%s.jpg" % fname, dpi=None, facecolor='w', edgecolor='w',
58 |               orientation='portrait', papertype=None, format=None,
59 |               transparent=False, bbox_inches='tight', pad_inches=0.1,
60 |               frameon=None)
61 | 
62 |           plt.gcf().clear()
63 |           #plt.show()
64 | 


--------------------------------------------------------------------------------
/acquisition_functions.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import bbho_base
 4 | from bbho_base import *
 5 | 
 6 | means = T.vector()
 7 | stddevs = T.vector()
 8 | variances = T.vector()
 9 | values = T.vector()
10 | cdfs = T.vector()
11 | dist_values = T.vector()
12 | normal_dist_values = T.vector()
13 | confidence_interval = T.scalar()
14 | 
15 | class acquisition_function(object):
16 | 
17 |     def __init__(self):
18 |         #Currently none to init with
19 |         pass
20 | 
21 | class probability_improvement(acquisition_function):
22 | 
23 |     def __init__(self):
24 |         acquisition_function.__init__(self)
25 |         self.f = theano.function([cdfs], 
26 |                     outputs = T.argmax(cdfs),
27 |                     allow_input_downcast=True
28 |                     )
29 | 
30 |     def evaluate(self, means, variances, values, confidence_interval):
31 |         #We have to format it like this so that our cdf function does not get called until we have means, variances, and values
32 |         #Unlike if we included this in the theano function, where it would be called with the initialization of the function
33 |         cdfs = cdf(values, means, variances)
34 |         return self.f(cdfs)
35 | 
36 | class expected_improvement(acquisition_function):
37 | 
38 |     def __init__(self):
39 |         acquisition_function.__init__(self)
40 |         
41 |         #We assign this so we don't compute it twice in our function
42 |         self.stddev = theano.function([variances], T.sqrt(variances), allow_input_downcast=True)
43 | 
44 |         self.f = theano.function([stddevs, dist_values, cdfs, normal_dist_values], 
45 |                     outputs = T.argmax(stddevs * dist_values * cdfs + normal_dist_values),
46 |                     allow_input_downcast=True
47 |                     )
48 | 
49 |     def evaluate(self, means, variances, values, confidence_interval):
50 |         #We have to format it like this so that our cdf function does not get called until we have means, variances, and values
51 |         #Unlike if we included this in the theano function, where it would be called with the initialization of the function
52 |         dist_values = gaussian_distribution_v(values, means, variances)
53 |         cdfs = cdf(values, means, variances)
54 |         normal_dist_values = gaussian_distribution_v(values, np.zeros_like(values), np.ones_like(values))
55 | 
56 |         return self.f(self.stddev(variances), dist_values, cdfs, normal_dist_values)
57 | 
58 | 
59 | class upper_confidence_bound(acquisition_function):
60 | 
61 |     def __init__(self):
62 |         acquisition_function.__init__(self)
63 |         self.f = theano.function([means, variances, confidence_interval], 
64 |                     outputs = T.argmax(means + confidence_interval * T.sqrt(variances)),
65 |                     allow_input_downcast=True
66 |                     )
67 |    
68 |     def evaluate(self, means, variances, values, confidence_interval):
69 |         return self.f(means, variances, confidence_interval)
70 | 
71 | 


--------------------------------------------------------------------------------
/black_box_functions.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | Used to setup our other programs for use with BBHO.
 4 | 
 5 | __init__ can take whatever argumenets you choose,
 6 |     as you can merely change how it is called in bbho.py,
 7 | evaluate() however must take the same, with next_input being a list of new parameters.
 8 | 
 9 | -Blake Edwards / Dark Element
10 | """
11 | 
12 | import sys
13 | 
14 | import numpy as np
15 | import tensorflow as tf
16 | 
17 | #Currently only on cartpole, will push to more general directory when finished
18 | sys.path.append("../openai/classic_control/cartpole/src")
19 | 
20 | import policy_gradient_configurer
21 | 
22 | #For LIRA/DENNIS
23 | sys.path.append("../dennis/dennis6/src")#THIS NEEDS TO BE UPDATED
24 | 
25 | sys.path.append("../tuberculosis_project/lira/lira2/src")
26 | import lira2_bbho_configurer
27 | import lira2_pre_transfer_bbho_configurer
28 | 
29 | #How we obtain a scalar output given our inputs, for varying functions
30 | class policy_gradient(object):
31 |     #We get our policy gradient result by averaging over all our average timestep results
32 |     def __init__(self, epochs, timestep_n, run_count):
33 |         self.run_count = run_count
34 |         self.configurer = policy_gradient_configurer.Configurer(epochs, timestep_n)
35 | 
36 |     def evaluate(self, bbf_evaluation_i, bbf_evaluation_n, next_input):
37 |         config_output = self.configurer.run_config(bbf_evaluation_i, bbf_evaluation_n, self.run_count, next_input[0], 0.0, next_input[1], 0.0, next_input[2], next_input[3])
38 |         config_avg_output = np.mean(config_output)
39 |         return [config_avg_output]
40 | 
41 | """
42 | NEEDS TO BE UPDATED
43 | class dennis(object):
44 |     def __init__(self, epochs, run_count):
45 |         self.configurer = dennis_configurer.Configurer(epochs, run_count)
46 | 
47 |     def evaluate(self, bbf_evaluation_i, bbf_evaluation_n, next_input):
48 |         config_output = self.configurer.run_config(next_input[0], next_input[1], next_input[2], next_input[3])
49 |         config_avg_output = np.mean(config_output)
50 |         return [config_avg_output]
51 | """
52 |         
53 | class lira2(object):
54 |     def __init__(self, epochs, run_count):
55 |         self.configurer = lira2_bbho_configurer.Configurer(epochs, run_count)
56 | 
57 |     def evaluate(self, bbf_evaluation_i, bbf_evaluation_n, next_input):
58 | 
59 |         #So we make sure that we don't lose available memory with each run (due to small Keras bug I believe)
60 |         with tf.Session().as_default():
61 |             """
62 |             Get the config output, should be an average over training iterations
63 |             """
64 |             config_output = self.configurer.run_config(next_input)
65 | 
66 |             """
67 |             Then average over our epochs to get one scalar value
68 |             """
69 |             config_avg_output = np.mean(config_output)
70 |             return [config_avg_output]
71 | 
72 | class lira2_pre_transfer(object):
73 |     def __init__(self, epochs, run_count):
74 |         self.configurer = lira2_pre_transfer_bbho_configurer.Configurer(epochs, run_count)
75 | 
76 |     def evaluate(self, bbf_evaluation_i, bbf_evaluation_n, next_input):
77 | 
78 |         #So we make sure that we don't lose available memory with each run (due to small Keras bug I believe)
79 |         with tf.Session().as_default():
80 |             """
81 |             Get the config output, should be an average over training iterations
82 |             """
83 |             config_output = self.configurer.run_config(next_input)
84 | 
85 |             """
86 |             Then average over our epochs to get one scalar value
87 |             """
88 |             config_avg_output = np.mean(config_output)
89 |             return [config_avg_output]
90 | 


--------------------------------------------------------------------------------
/covariance_functions.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy.special as ss
  3 | import theano
  4 | import theano.tensor as T
  5 | 
  6 | x1 = T.fvector()
  7 | x2 = T.fvector()
  8 | l = T.fscalar()
  9 | v = T.scalar()
 10 | 
 11 | class covariance_function(object):
 12 |     #Superclass
 13 | 
 14 |     def __init__(self, lengthscale, v):
 15 |         self.lengthscale = lengthscale
 16 |         self.v = v
 17 | 
 18 | class dot_product(covariance_function):
 19 | 
 20 |     def __init__(self, lengthscale, v):
 21 |         covariance_function.__init__(self, lengthscale, v)
 22 |         self.f = theano.function([x1, x2], T.dot(x1.T, x2), allow_input_downcast=True)
 23 | 
 24 |     def evaluate(self, x_i, x_j):
 25 |         return 1 * self.f(x_i, x_j)
 26 | 
 27 | class brownian_motion(covariance_function):
 28 | 
 29 |     def __init__(self, lengthscale, v):
 30 |         covariance_function.__init__(self, lengthscale, v)
 31 |         self.f = theano.function([x1, x2], T.minimum(x1, x2), allow_input_downcast=True)
 32 | 
 33 |     def evaluate(self, x_i, x_j):
 34 |         return 1 * self.f(x_i, x_j)
 35 | 
 36 | class squared_exponential(covariance_function):
 37 | 
 38 |     def __init__(self, lengthscale, v):
 39 |         covariance_function.__init__(self, lengthscale, v)
 40 |         self.f = theano.function([x1, x2, l], 
 41 |                     T.exp(
 42 |                         T.dot(
 43 |                             (-1.0/T.dot(2.0, l)), 
 44 |                             T.sum(T.sqr(x1 - x2))
 45 |                         )
 46 |                     ) 
 47 |                 , allow_input_downcast=True)
 48 | 
 49 |     def evaluate(self, x_i, x_j):
 50 |         return self.f(x_i, x_j, self.lengthscale)
 51 | 
 52 | class ornstein_uhlenbeck(covariance_function):
 53 | 
 54 |     def __init__(self, lengthscale, v):
 55 |         covariance_function.__init__(self, lengthscale, v)
 56 |         self.f = theano.function([x1, x2], 
 57 |                     T.exp(-1.0 * T.sqrt(T.dot((x1-x2).T, (x1-x2)))),
 58 |                 allow_input_downcast=True)
 59 | 
 60 |     def evaluate(self, x_i, x_j):
 61 |         return self.f(x_i, x_j)
 62 | 
 63 | class periodic1(covariance_function):
 64 | 
 65 |     def __init__(self, lengthscale, v):
 66 |         covariance_function.__init__(self, lengthscale, v)
 67 |         self.f = theano.function([x1, x2], 
 68 |                     T.exp(-1.0 * T.sin(5.0 * np.pi * T.sum(T.sqr(x1-x2)))),
 69 |                 allow_input_downcast=True)
 70 | 
 71 |     
 72 |     def evaluate(self, x_i, x_j):
 73 |         return self.f(x_i, x_j)
 74 |         
 75 | class matern1(covariance_function):
 76 |     """
 77 |     NOT UPGRADING THIS YET SINCE THEANO DOESN'T HAVE THE BESSEL FUNCTION
 78 |     AND ALSO BECAUSE I DON'T KNOW WHY MY IMPLEMENTATION IS HORRIBLY BROKEN
 79 |         FOR THIS ONE, AS WELL
 80 |     """
 81 | 
 82 |     def __init__(self, lengthscale, v):
 83 |         covariance_function.__init__(self, lengthscale, v)
 84 | 
 85 |     def evaluate(self, x_i, x_j):
 86 |         dist = np.linalg.norm(x_i-x_j)
 87 |         return np.nan_to_num(((2**(1-self.v))/(ss.gamma(self.v))) * ((np.sqrt(2*self.v) * (dist/self.lengthscale))**self.v) * ss.kv(self.v, (np.sqrt(2*self.v) * (dist/self.lengthscale))))
 88 | 
 89 | class matern2(covariance_function):
 90 |     """However, this one works perfectly, and is so far the best one. 
 91 |         Credit to the "Practical Bayesian Optimization of Machine Learning" Research paper,
 92 |         for including this covariance function."""
 93 | 
 94 |     def __init__(self, lengthscale, v):
 95 |         covariance_function.__init__(self, lengthscale, v)
 96 |         self.dist = theano.function([x1, x2], T.sum(T.sqr(x1-x2)))
 97 |         self.f = theano.function([x1, x2], 
 98 |                 (1 + T.sqrt(5 * T.sum(T.sqr(x1-x2))) + (5.0/3.0) * T.sum(T.sqr(x1-x2))) * T.exp(-T.sqrt(5 * T.sum(T.sqr(x1-x2)))),
 99 |                 allow_input_downcast=True)
100 | 
101 |     def evaluate(self, x_i, x_j):
102 |         return self.f(x_i, x_j)
103 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | #Description 
 2 | 
 3 | BBHO (pronounced BeeBo because names are much more fun) is a Black Box Optimization program that uses [Bayesian Optimization](https://arxiv.org/pdf/1206.2944.pdf) to optimize a given unknown (black box) function. It does this using something known as Gaussian Processes for Regression, and then what is known as an acquisition function. To get an entire overarching and fundamental understanding of this method, 
 4 | 
 5 | ###[I've made a series on my blog detailing it.](https://dark-element.com/2016/10/10/bayesian-optimization-of-black-box-functions/)
 6 | 
 7 | ###[Resources & Sources where I learned what I needed to go from fundamentals to this](https://dark-element.com/2016/10/14/bayesian-optimization-of-black-box-functions-appendix-and-sources-resources/#resources-sources)
 8 | 
 9 | 
10 | #Installation 
11 | 
12 | 1. Clone this repository
13 | 2. Add in your own handler for whatever you are optimizing in the `black_box_functions.py` file, and how it is called in the beginning of `bbho.py`. In there, you can see several examples of different models I have previously optimized with BBHO. Each is represented as a Python class, with methods __init__() for the initialization, and evaluate() for feeding in new inputs. You can change how it is initialized to take whatever arguments you need, however I do not recommend changing the format for the evaluate() method. 
14 | 3. BBHO will pass the index of the evaluation number `bbf_evaluation_i`, the total evaluation number `bbf_evaluation_n` and finally a list of the hyper parameters `next_input`. You can choose to do nothing with `bbf_evaluation_i` and `bbf_evaluation_n`, however they are meant to be used to print the progress of the optimizer. 
15 | 4. Use the `next_input` variable as the inputs to whatever problem you are trying to optimize. In my case, I might have a keras neural network that assigns the mini batch size and learning rate like `mini_batch_size, learning_rate = next_input`, where in this case `next_input = [some_mini_batch_size_value, some_learning_rate_value]`
16 | 5. After you have fed this input in, just make sure that your handler returns a scalar. In my configurer for lira, I get the validation accuracy and then use `np.mean(config_output)` to get one value representing how well the inputs performed. I then return it, and so my handler's evaluate method only needs 4 lines.
17 | 6. With this, you should be able to start optimizing, unfortunately there are some parameters for BBHO itself.
18 | 
19 | #Configuration
20 | 
21 | 1. In my lira black box function, I initialize it with `epochs` and `run_count` variables. These represent the number of times to loop over the training set, and the number of times to run an entire training iteration, respectively. As said in the Installation instructions, you can change the initialization to respect whatever your black box function may be, but you do have to initialize what you are optimizing in the beginning of `bbho.py`.
22 | 2. After this, the remaining parameters have to do with the optimization:
23 | 
24 | `detail_n`: the number of intervals for each hyper parameter optimized, so that if you assign the `hps` variable later to be `[HyperParameter(0, 100)]`, and set `detail_n = 100`, that hyper parameter's range will be `1, 2, 3, ... 99`. If you were to set `detail_n = 50`, it would be something like `1, 3, 5, ..., 49`, and so on.
25 | 
26 | `maximizing`: a boolean for if we are maximizing or minimizing. I have not tested it extensively on minimizing, use at your own risk or do a slight transformation on the output of your handler to keep this True.
27 | 
28 | `bbf_evaluation_n`: the number of points evaluated with BBHO, not including the two random points it starts with. Changing this will change the time it takes to finish optimization.
29 | 
30 | `acquisition_function`: the acquisition function we will use later on. I have it set to the upper confidence bound acquisition function with k = confidence interval = 1.5, but feel free to change this to the options available in `acquisition_functions.py`. Further explanation of acquisition functions can be found in my blog series, [Part Three](https://dark-element.com/2016/10/13/bayesian-optimization-of-black-box-functions-part-3/) and [Appendix](https://dark-element.com/2016/10/14/bayesian-optimization-of-black-box-functions-appendix-and-sources-resources/). I have added some parameters for the exponential decay of the confidence interval, but this is for personal experiments of my own and I can not make any guarantees as to the efficacy of using a decay rate, yet.
31 | 
32 | `covariance_function`: the covariance function we will use later on. It is initially set to the matern 5/2 covariance function with lengthscale 1, but feel free to change this to the options available in `covariance_functions.py`. Further explanation of covariance functions can be found in my blog series, [Part Three](https://dark-element.com/2016/10/13/bayesian-optimization-of-black-box-functions-part-3/) and [Appendix](https://dark-element.com/2016/10/14/bayesian-optimization-of-black-box-functions-appendix-and-sources-resources/).
33 | 
34 | `hps`: mentioned earlier, this is a list of Hyper Parameter classes, found in `hyperparameter.py`. You should specify the Hyper Parameters according to the syntax `HyperParameter(min, max)`, with `min`, `max` according to the range of that hyper parameter. You should format the number of these according to the number of arguments your handler is ready for. If I was prepared for mini batch size and regularization rate in my handler, I might have `hps = [HyperParameter(0, 50), HyperParameter(0, 1)]` 
35 | 
36 | 
37 | Feel free to contact me with any questions or help, and Good luck, have fun!
38 | 
39 | 


--------------------------------------------------------------------------------
/bbho_base.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | import theano
  4 | import theano.tensor as T
  5 | from theano.tensor.nlinalg import matrix_inverse, eig
  6 | 
  7 | import itertools#For Cartesian product 
  8 | 
  9 | #We initialize the following here so we don't have to when we call the functions that use them.
 10 | 
 11 | "Matrix inverse"
 12 | m = T.matrix()
 13 | invert_matrix = theano.function([m], matrix_inverse(m), allow_input_downcast=True)
 14 | 
 15 | "Adds one axis if we are hoping to use the Prob Density function with vectors instead of matrices, as it's designed"
 16 | v = T.vector()
 17 | vector_to_column = theano.function([v], v.dimshuffle(0, 'x'), allow_input_downcast=True)
 18 | 
 19 | #Multivariate covariance mean & variance
 20 | test_cov = T.matrix()
 21 | test_cov_T = T.matrix()
 22 | test_cov_diag = T.vector()
 23 | training_cov_m_inv = T.matrix()
 24 | bbf_evaluations = T.matrix()
 25 | 
 26 | #Gaussian Distribution vars
 27 | inputs_m = T.matrix()
 28 | means_m = T.matrix()
 29 | variances_m = T.matrix()
 30 | 
 31 | inputs_v = T.vector()
 32 | means_v = T.vector()
 33 | variances_v = T.vector()
 34 | 
 35 | "Multivariate covariance mean"
 36 | compute_mv_mean = theano.function([test_cov_T, training_cov_m_inv, bbf_evaluations], 
 37 |                 outputs = T.dot(T.dot(test_cov_T, training_cov_m_inv), bbf_evaluations),
 38 |                 allow_input_downcast=True
 39 |             )
 40 | 
 41 | "Multivariate covariance variance"
 42 | """
 43 | We only need the diagonal axis at completion.
 44 | If we do the entire dot product, it's often with arrays of size
 45 | nx2, 2xn, which results in an array of size nxn, 
 46 | but since n is often huge this can result in an array that would take up
 47 | exobytes of storage. So, we do it like this so we only have to get a vector result.
 48 |  
 49 | sum(A * B^T, axis=1)
 50 | """
 51 | 
 52 | compute_mv_variance = theano.function([test_cov_diag, test_cov_T, training_cov_m_inv, test_cov], 
 53 |                 outputs = test_cov_diag - T.sum(T.dot(test_cov_T, training_cov_m_inv) * test_cov.T, axis=1),
 54 |                 allow_input_downcast=True
 55 |             )
 56 | 
 57 | """
 58 | I had a brief moment where I thought I was using Gaussian Distributions incorrectly 
 59 |     to get the correct result. However, when we get the cumulative distribution,
 60 |     or the probabilities up to a point, or the probability of a point; any time we
 61 |     need the probability of some input wrt a Gaussian Distribution. For this, we 
 62 |     obviously use the probability density function, that returns the probability of
 63 |     a given input point. So, you can find the pretty latex version under wikipedia.
 64 | """
 65 | 
 66 | "Probability Density Function / Gaussian Distribution Function (for matrix input)"
 67 | probability_density_function_m = theano.function([inputs_m, means_m, variances_m], 
 68 |                 outputs = (T.exp(-(T.sqr(inputs_m-means_m))/(2*variances_m)) / (T.sqrt(2*variances_m*np.pi))),
 69 |                 allow_input_downcast=True
 70 |             )
 71 | 
 72 | "Probability Density Function / Gaussian Distribution Function (for vector input)"
 73 | probability_density_function_v = theano.function([inputs_v, means_v, variances_v], 
 74 |                 outputs = (T.exp(-(T.sqr(inputs_v-means_v))/(2*variances_v)) / (T.sqrt(2*variances_v*np.pi))),
 75 |                 allow_input_downcast=True
 76 |             )
 77 | 
 78 | def gaussian_distribution_m(inputs, means, variances):
 79 |     #For matrices
 80 |     #Gets input x, means, and variances
 81 |     #Returns vector or scalar from input
 82 |     return probability_density_function_m(inputs, means, variances)
 83 | 
 84 | def gaussian_distribution_v(inputs, means, variances):
 85 |     #For vectors
 86 |     #Gets input x, means, and variances
 87 |     #Returns vector or scalar from input
 88 |     return probability_density_function_v(inputs, means, variances)
 89 | 
 90 | def cdf(inputs, means, variances):
 91 |     #Get values to compute cdf over
 92 |     #print inputs.shape, means.shape, variances.shape
 93 |     #print np.array([np.arange(input-100, input, .1) for input in inputs]).shape
 94 | 
 95 |     #Convert to matrix counterparts so we can correctly get an estimate of the cdf
 96 |     inputs = np.array([np.arange(input-100, input, .1) for input in inputs])
 97 | 
 98 |     #We just repeat these over the axis we are getting different inputs on, which 
 99 |     #   is why they are repeated according to inputs.shape[1]
100 |     means = np.array([np.repeat(mean, inputs.shape[1]) for mean in means])
101 |     variances = np.array([np.repeat(mean, inputs.shape[1]) for variance in variances])
102 | 
103 |     dist_values = gaussian_distribution_m(inputs, means, variances)
104 |     #dist_values = gaussian_distribution(inputs, means, variances)
105 |     
106 |     #Equivalent to the last element of cumulative sum
107 |     return np.sum(dist_values, axis=1)
108 | 
109 | def get_cov_matrix(f, cov):
110 |     #Given a vector f, generate the covariance matrix 
111 |     #f because known inputs
112 |     #Numpy loops faster than theano
113 |     f_n = len(f)
114 |     f_m = np.zeros(shape=(f_n, f_n))
115 |     for row_i, f_i in enumerate(f):
116 |         for col_i, f_j in enumerate(f):
117 |             f_m[row_i, col_i] = cov.evaluate(f_i, f_j)
118 | 
119 |     return f_m
120 | 
121 | def get_cov_vector(f, test_f, cov):
122 |     #Given a vector f and scalar f* (test_f)
123 |     #Generate a covariance vector for each value in f
124 |     #Numpy loops faster than theano
125 |     f_n = len(f)
126 |     f_v = np.zeros(shape=(f_n))
127 |     for row_i, f_i in enumerate(f):
128 |         f_v[row_i] = cov.evaluate(test_f, f_i)
129 | 
130 |     return f_v
131 | 
132 | """
133 | def cartesian_product(vectors):
134 |     return [np.array(i) for i in itertools.product(*vectors)]
135 | """
136 | 
137 | def get_cartesian_product_element_by_index(cp, index):
138 |     for i, e in enumerate(cp):
139 |         if i == index:
140 |             return e
141 |     else:
142 |         #Not in the cartesian product
143 |         return False
144 | 
145 | 
146 | def convert_vector_to_column(v):
147 |     return vector_to_column(v)
148 | 
149 | def theano_matrix_inv(m):
150 |     return invert_matrix(m)
151 | 
152 | def get_test_means(test_cov_T, training_cov_m_inv, bbf_evaluations):
153 |     return compute_mv_mean(test_cov_T, training_cov_m_inv, bbf_evaluations)
154 | 
155 | def get_test_variances(test_cov_diag, test_cov_T, training_cov_m_inv, test_cov):
156 |     return compute_mv_variance(test_cov_diag, test_cov_T, training_cov_m_inv, test_cov)
157 | 
158 | def exp_decay(initial, rate, iteration):
159 |     #Do our k*e^(r*t) exponential decay
160 |     return initial*np.exp(rate*iteration)
161 | 
162 | 
163 | 


--------------------------------------------------------------------------------
/bbho.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Read all the details in my blog post here:
  3 |     https://dark-element.com/2016/10/10/bayesian-optimization-of-black-box-functions/
  4 | Or the github repo here:
  5 |     https://github.com/DarkElement75/bbho
  6 | 
  7 | Made by Blake Edwards / Dark Element
  8 | """
  9 | 
 10 | import sys, time, itertools
 11 | 
 12 | import numpy as np
 13 | 
 14 | import theano
 15 | import theano.tensor as T
 16 | 
 17 | import hyperparameter
 18 | from hyperparameter import HyperParameter
 19 | 
 20 | import output_grapher
 21 | 
 22 | import acquisition_functions
 23 | from acquisition_functions import *
 24 | 
 25 | import covariance_functions 
 26 | from covariance_functions import *
 27 | 
 28 | import bbho_base
 29 | from bbho_base import *
 30 | 
 31 | import black_box_functions
 32 | 
 33 | """START TUNABLE PARAMETERS"""
 34 | """
 35 | POLICY GRADIENT IMPLEMENTATION
 36 | #We use our policy gradient black box function
 37 | #Configure specifics in the black_box_functions file and so on
 38 | epochs = 400
 39 | timestep_n = 200
 40 | run_count = 5
 41 | bbf = black_box_functions.policy_gradient(epochs, timestep_n, run_count)
 42 | """
 43 | 
 44 | """
 45 | DENNIS IMPLEMENTATION
 46 | #We use our DENNIS+LIRA black box function
 47 | #Configure specifics in the black_box_functions file and so on
 48 | epochs = 1000
 49 | run_count = 3
 50 | bbf = black_box_functions.dennis(epochs, run_count)
 51 | """
 52 | 
 53 | """
 54 | LIRA IMPLEMENTATION
 55 | """
 56 | #We use our DENNIS+LIRA black box function
 57 | #Configure specifics in the black_box_functions file and so on
 58 | epochs = 50
 59 | run_count = 3
 60 | bbf = black_box_functions.lira2_pre_transfer(epochs, run_count)
 61 | 
 62 | #For efficiency comparisons
 63 | start_time = time.time() 
 64 |         
 65 | #Number of evaluated input points / level of detail
 66 | #Note: increasing this causes massive increases in the computations required for an evaluation. 
 67 | detail_n = 50
 68 | 
 69 | #If we want the highest point or lowest point
 70 | maximizing = True
 71 | 
 72 | #Number of bbf evaluations allowed to perform before ending optimization
 73 | bbf_evaluation_n = 40
 74 | 
 75 | #Choice of acquisition function and acquisition function parameters
 76 | initial_confidence_interval = 1.5 
 77 | #confidence_interval_decay_rate = -4.0/bbf_evaluation_n
 78 | confidence_interval_decay_rate = 0
 79 | acquisition_function = upper_confidence_bound()
 80 | 
 81 | #Choice of covariance function and cf parameters
 82 | lengthscale = 1.0
 83 | v = [5/2.0]#For matern1(not currently functional)
 84 | covariance_function = matern2(lengthscale, v)
 85 | 
 86 | #Initialize ranges for each parameter into a resulting matrix
 87 | #Our level of detail / detail_n determines our step size for each
 88 | #Mini Batch Size,              Regularization Rate,  Dropout Percentage
 89 | hps = [HyperParameter(0, 100), HyperParameter(0, 7), HyperParameter(0, 1)]
 90 | 
 91 | #UI/graph settings for testing
 92 | plot_2d_results = False
 93 | plot_3d_results = False
 94 | 
 95 | """END TUNABLE PARAMETERS"""
 96 | 
 97 | #Initialize independent domains of each parameter
 98 | #We will do the cartesian product on the vectors contained here to get our entire sets of multidimensional inputs
 99 | independent_domains = np.array([np.arange(hp.min, hp.max, ((hp.max-hp.min)/float(detail_n))) for hp in hps])
100 | 
101 | #Make sure we don't have annoying problem where we might get one extra on accident
102 | for i in range(len(independent_domains)):
103 |     independent_domains[i] = independent_domains[i][:detail_n]
104 | 
105 | #Get past annoying problem of it not treating it like a proper matrix
106 | independent_domains = np.array([np.array(independent_domain) for independent_domain in independent_domains])
107 | 
108 | #Get the total number of outputs as n^r
109 | n = detail_n**len(hps)
110 | 
111 | """
112 | We make a copy so we have something that is not shuffled to cartesian product through from now on.
113 |     Later on, when we Get the cartesian product of all vectors contained to get entire multidimensional domain,
114 |     We have to restart over again, as iterators can only loop through it, and in order to restart we have to must initialize a new iterator.
115 |     So, that's why you'll see a lot of itertools.product(*domain) in here.
116 | """
117 | domain = np.copy(independent_domains)
118 | 
119 | #Get our axis vectors if plotting
120 | if plot_2d_results:
121 |     domain_x = np.copy(independent_domains[0])
122 |     domain_y = []
123 | 
124 | elif plot_3d_results:
125 |     domain_x = np.copy(independent_domains[0])
126 |     domain_y = np.copy(independent_domains[1])
127 | 
128 | #Now that we have full domain, we can shuffle the original to get two random input vectors
129 | for independent_domain in independent_domains:
130 |     np.random.shuffle(independent_domain)
131 | 
132 | #Get our different values easily by transposing
133 | x1, x2 = independent_domains.transpose()[:2]
134 | 
135 | #Known inputs
136 | training_inputs = T.vector()
137 | 
138 | #Known evaluations
139 | training_outputs = T.vector()
140 | 
141 | #Cartesian product of the ranges of each of our hyper parameter ranges
142 | test_domain = T.matrix()
143 | 
144 | #Now that we have our two random input vectors, evaluate them and store them in our bbf inputs and outputs vector
145 | #Modify the bbf function when you make this more complicated with input to a bot
146 | #This needs to not be a np.array since we have to append
147 | bbf_inputs = [x1, x2]
148 | 
149 | #This needs to be np array so we can do vector multiplication
150 | print "Evaluating Initial Random Inputs"
151 | bbf_evaluations = np.array([bbf.evaluate(0, bbf_evaluation_n, x1), bbf.evaluate(1, bbf_evaluation_n, x2)])
152 | 
153 | #Our main loop to go through every time we evaluate a new point, until we have exhausted our allowed 
154 | #   black box function evaluations.
155 | for bbf_evaluation_i in range(2, bbf_evaluation_n):
156 |     #sys.stdout.write("\rDetermining Point #%i" % (bbf_evaluation_i+1))
157 |     #sys.stdout.flush()
158 |     print "Determining Point #%i" % (bbf_evaluation_i+1)
159 | 
160 |     #Decay our confidence interval by decay rate, 
161 |     #   and adjust our evaluation index back accordingly to account for our first two random inputs
162 |     confidence_interval = exp_decay(initial_confidence_interval, confidence_interval_decay_rate, bbf_evaluation_i-2)
163 | 
164 |     #Since we reset this every time we generate through the domain
165 |     test_means = np.zeros(shape=(n))
166 |     test_variances = np.zeros(shape=(n))
167 |     test_values = np.zeros(shape=(n))
168 | 
169 |     #Generate our covariance matrices and vectors with theano backend
170 |     training_cov_m = get_cov_matrix(bbf_inputs, covariance_function)#K
171 |     
172 |     #Clip a small amount so we don't have singular matrix
173 |     training_cov_m = training_cov_m + (np.eye(training_cov_m.shape[0])*1e-7)
174 | 
175 |     #Invert
176 |     training_cov_m_inv = theano_matrix_inv(training_cov_m)#K^-1
177 | 
178 |     #Get matrix by getting our vectors for each test point and combining
179 |     test_cov_T = np.array([get_cov_vector(bbf_inputs, np.array(test_input), covariance_function) for test_input in itertools.product(*domain)])#K*
180 |     test_cov = test_cov_T.transpose()#K*T
181 |     
182 |     #Get each diag for each test input
183 |     test_cov_diag = np.array([covariance_function.evaluate(np.array(test_input), np.array(test_input)) for test_input in itertools.product(*domain)])#K**
184 | 
185 |     #Compute test mean using our Multivariate Gaussian Theorems
186 |     #We flatten so we don't have shape (100, 1), but shape (100,)
187 |     #test_mean = np.dot(np.dot(test_cov_T, training_cov_m_inv), bbf_evaluations)
188 |     test_means = get_test_means(test_cov_T, training_cov_m_inv, bbf_evaluations).flatten()
189 |     
190 |     #Compute test variance using our Multivariate Gaussian Theorems
191 |     #test_variance = test_cov_diag - np.dot(np.dot(test_cov_T, training_cov_m_inv), test_cov)
192 |     test_variances = get_test_variances(test_cov_diag, test_cov_T, training_cov_m_inv, test_cov)
193 | 
194 |     #Now that we have all our means u* and variances c* for every point in the domain,
195 |     #Move on to determining next point to evaluate using our acquisition function
196 |     #If we want the point that will give us next greatest input, do u + c, otherwise u - c
197 |     #Numpy adds faster
198 |     if maximizing:
199 |         test_values = test_means + test_variances
200 |     else:
201 |         test_values = test_means - test_variances
202 | 
203 |     if plot_2d_results or plot_3d_results:
204 |         output_grapher.graph_output(plot_2d_results, plot_3d_results, bbf_evaluation_i, bbf_evaluation_n, domain_x, domain_y, detail_n, test_means, bbf_inputs, bbf_evaluations, test_means+test_variances, test_means-test_variances)
205 | 
206 |     #Get the index of the next input to evaluate in our black box function
207 |     #Since acquisition functions return argmax values
208 |     next_input_i = acquisition_function.evaluate(test_means, test_variances, test_values, confidence_interval)
209 | 
210 |     #Add our new input
211 |     next_input = get_cartesian_product_element_by_index(itertools.product(*domain), next_input_i)
212 |     #next_input = domain[next_input_i]
213 |     #print "\tNew point: {}".format(next_input)
214 | 
215 |     bbf_inputs.append(np.array(next_input))
216 | 
217 |     #Evaluate new input
218 |     #We need this as nparray for vector multiplication
219 |     #But we need to append as well, so we have to do this.
220 |     #Luckily, it's our smallest np array
221 |     bbf_evaluations = list(bbf_evaluations)
222 | 
223 |     #Evaluate using our specified black box function
224 |     bbf_evaluations.append(bbf.evaluate(bbf_evaluation_i, bbf_evaluation_n, next_input))
225 | 
226 |     bbf_evaluations = np.array(bbf_evaluations)
227 | 
228 | best_input = bbf_inputs[np.argmax(bbf_evaluations)]
229 | print ""
230 | print bbf_inputs, bbf_evaluations
231 | print "Best input found after {} iterations: {}".format(bbf_evaluation_n, best_input)
232 | print "Time to run: %f" % (time.time() - start_time)
233 | 


--------------------------------------------------------------------------------