├── Examples
├── extendedRegression.py
└── trainRegression.py
├── LICENSE
├── README.md
├── docs
├── ClassificationExample.md
├── RegressionExample.md
├── Setup.md
├── _config.yml
├── _data
│ └── navigation.yml
├── _includes
│ └── navigation.html
├── _layouts
│ └── default.html
├── index.md
└── usage.md
└── tensorBNN
├── BNN_functions.py
├── activationFunctions.py
├── layer.py
├── likelihood.py
├── metrics.py
├── network.py
├── paramAdapter.py
└── predictor.py
/Examples/extendedRegression.py:
--------------------------------------------------------------------------------
1 | """
2 | An extended version of the trainRegression.py example with pretraining and
3 | some graphs at the end to visualize the output of the BNN.
4 | """
5 |
6 | import os
7 | import math
8 | import warnings
9 | import time
10 |
11 | import numpy as np
12 | import random as rn
13 | import tensorflow as tf
14 | import pylab as plt
15 |
16 |
17 | from tensorBNN.activationFunctions import Tanh
18 | from tensorBNN.layer import GaussianDenseLayer
19 | from tensorBNN.networkFinal import network
20 | from tensorBNN.likelihood import FixedGaussianLikelihood
21 | from tensorBNN.metrics import SquaredError, PercentError
22 | from tensorBNN.predictor import predictor
23 |
24 | startTime = time.time()
25 |
26 | # This supresses many deprecation warnings
27 | warnings.filterwarnings("ignore", category=DeprecationWarning)
28 | warnings.filterwarnings("ignore", category=UserWarning)
29 |
30 | # Set the GPU to use
31 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
32 |
33 | os.environ["PYTHONHASHSEED"] = "0"
34 | np.random.seed(42)
35 | rn.seed(12345)
36 | tf.random.set_seed(3)
37 |
38 |
39 | def main():
40 |
41 | trainIn=np.linspace(-2,2,num=11)
42 | valIn=np.linspace(-2+2/30,2.0-2/30,num=30)
43 | trainOut = np.sin(trainIn*math.pi*2)*trainIn-np.cos(trainIn*math.pi)
44 | valOut = np.sin(valIn*math.pi*2)*valIn-np.cos(valIn*math.pi)
45 |
46 |
47 | data=[trainIn, trainOut, valIn, valOut]
48 |
49 | dtype=tf.float32
50 |
51 | inputDims=1
52 | outputDims=1
53 | width = 10 # perceptrons per layer
54 | hidden = 3 # number of hidden layers
55 | patience=20
56 | cycles=3
57 | epochs=100
58 | seed=1000
59 |
60 |
61 | normInfo=(0,1) # mean, sd
62 |
63 | #Peform pre-training to start the Markov Chain at a better spot
64 | model = tf.keras.Sequential()
65 |
66 | model.add(tf.keras.layers.Dense(width, kernel_initializer='glorot_uniform',
67 | input_shape=(inputDims, ),
68 | activation="tanh"))
69 | model.add(tf.keras.layers.ReLU())
70 |
71 | for n in range(hidden-1):
72 | model.add(tf.keras.layers.Dense(width,
73 | kernel_initializer='glorot_uniform',
74 | activation="tanh"))
75 |
76 | model.add(tf.keras.layers.Dense(outputDims,
77 | kernel_initializer='glorot_uniform'))
78 |
79 | callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
80 | patience=patience,
81 | restore_best_weights=True)
82 |
83 | #Train with decreasing learning rate
84 | for x in range(cycles):
85 | model.compile(optimizer=tf.keras.optimizers.Adam(0.01*(10**(-x)),
86 | amsgrad=True),
87 | loss='mean_squared_error',
88 | metrics=['mean_absolute_error', 'mean_squared_error'])
89 | model.summary()
90 | model.fit(trainIn, trainOut.T, validation_data=(valIn, valOut.T),
91 | epochs=epochs, batch_size=32, callbacks=[callback])
92 |
93 | #Save the backup
94 | model.save("backup")
95 |
96 | #Extract weights and biases
97 | weights=[]
98 | biases=[]
99 | activation=[]
100 | for layer in model.layers:
101 | weightBias=layer.get_weights()
102 | if(len(weightBias)==2):
103 | weights.append(weightBias[0].T)
104 | bias=weightBias[1]
105 | bias=np.reshape(bias, (len(bias),1))
106 | biases.append(bias)
107 | if(len(weightBias)==1):
108 | activation.append(weightBias[0])
109 |
110 |
111 | likelihood=FixedGaussianLikelihood(sd=0.1)
112 | metricList=[SquaredError(mean=normInfo[0], sd=normInfo[1]),
113 | PercentError(mean=normInfo[0], sd=normInfo[1])]
114 |
115 | neuralNet = network(
116 | dtype, # network datatype
117 | inputDims, # dimension of input vector
118 | data[0], # training input data
119 | data[1].T, # training output data
120 | data[2], # validation input data
121 | data[3].T) # validation output data)
122 |
123 | layer = GaussianDenseLayer( # Dense layer object
124 | inputDims, # Size of layer input vector
125 | width, # Size of layer output vector
126 | seed=seed, # Random seed
127 | dtype=dtype,
128 | weights=weights[0], biases=biases[0])
129 | neuralNet.add(layer) # Layer datatype
130 | neuralNet.add(Tanh()) # Tanh activation function
131 | seed += 1000 # Increment random seed
132 | for n in range(hidden - 1): # Add more hidden layers
133 | neuralNet.add(GaussianDenseLayer(width,
134 | width,
135 | seed=seed,
136 | dtype=dtype,
137 | weights=weights[n+1], biases=biases[n+1]))
138 | neuralNet.add(Tanh())
139 | seed += 1000
140 |
141 | neuralNet.add(GaussianDenseLayer(width,
142 | outputDims,
143 | seed=seed,
144 | dtype=dtype,
145 | weights=weights[-1], biases=biases[-1]))
146 |
147 | neuralNet.setupMCMC(
148 | stepSizeStart=1e-3,#0.0004 # starting stepsize
149 | stepSizeMin=1e-4, #0.0002 # minimum stepsize
150 | stepSizeMax=1e-2, # maximum stepsize
151 | stepSizeOptions=100, # number of stepsize options in stepsize adapter
152 | leapfrogStart=1000, # starting number of leapfrog steps
153 | leapfogMin=100, # minimum number of leapfrog steps
154 | leapFrogMax=10000, # maximum number of leapfrog steps
155 | leapfrogIncrement=10, # stepsize between leapfrog steps in leapfrog step adapter
156 | hyperStepSize=0.001, # hyper parameter stepsize
157 | hyperLeapfrog=100, # hyper parameter number of leapfrog steps
158 | burnin=1000, # number of burnin epochs
159 | averagingSteps=10) # number of averaging steps for param adapters)
160 |
161 |
162 | neuralNet.train(
163 | 6001, # epochs to train for
164 | 10, # increment between network saves
165 | likelihood,
166 | metricList=metricList,
167 | adjustHypers=True,
168 | folderName="TrigRegression", # Name of folder for saved networks
169 | networksPerFile=50) # Number of networks saved per file
170 |
171 | print("Total time elapsed (seconds):", time.time() - startTime)
172 |
173 |
174 | #Load predictor
175 | loadedNetwork = predictor("TrigRegression/", tf.float32)
176 |
177 | #Look at the predictions ins the space between the training data
178 | closeIn=np.linspace(-2,2,num=1000)
179 | closeOut = np.sin(closeIn*math.pi*2)*closeIn-np.cos(closeIn*math.pi)
180 |
181 | closePredictions = np.squeeze(np.array(loadedNetwork.predict(
182 | np.array([closeIn]).T, n=1)))
183 | closePredictionsMean = np.mean(closePredictions, axis=0)
184 | closePredictionsStd = np.std(closePredictions, axis=0)
185 | plt.figure()
186 |
187 | plt.fill_between(closeIn, closePredictionsMean-2*closePredictionsStd,
188 | closePredictionsMean-1*closePredictionsStd, color=(1,1,0),
189 | label="2 sd")
190 | plt.fill_between(closeIn, closePredictionsMean-1*closePredictionsStd,
191 | closePredictionsMean+1*closePredictionsStd, color=(0,1,0),
192 | label="1 sd")
193 | plt.fill_between(closeIn, closePredictionsMean+1*closePredictionsStd,
194 | closePredictionsMean+2*closePredictionsStd, color=(1,1,0))
195 | plt.plot(closeIn,closePredictionsMean, color="k", label="predicted mean")
196 | plt.plot(closeIn, closeOut, color="r", label="true")
197 | plt.scatter(trainIn, trainOut, color="b", label="training data")
198 | plt.legend()
199 | plt.show()
200 |
201 | #Look at the predictions away from the training data
202 | farIn=np.linspace(-4,4,num=2000)
203 | farOut = np.sin(farIn*math.pi*2)*farIn-np.cos(farIn*math.pi)
204 |
205 | farPredictions = np.squeeze(np.array(loadedNetwork.predict(
206 | np.array([farIn]).T, n=1)))
207 | farPredictionsMean = np.mean(farPredictions, axis=0)
208 | farPredictionsStd = np.std(farPredictions, axis=0)
209 |
210 | plt.figure()
211 | plt.fill_between(farIn, farPredictionsMean-2*farPredictionsStd,
212 | farPredictionsMean-1*farPredictionsStd, color=(1,1,0),
213 | label="2 sd")
214 | plt.fill_between(farIn, farPredictionsMean-1*farPredictionsStd,
215 | farPredictionsMean+1*farPredictionsStd, color=(0,1,0),
216 | label="1 sd")
217 | plt.fill_between(farIn, farPredictionsMean+1*farPredictionsStd,
218 | farPredictionsMean+2*farPredictionsStd, color=(1,1,0))
219 | plt.plot(farIn,farPredictionsMean, color="k", label="predicted mean")
220 | plt.plot(farIn, farOut, color="r", label="true")
221 | plt.scatter(trainIn, trainOut, color="b", label="training data")
222 | plt.legend()
223 | plt.show()
224 |
225 |
226 | if(__name__ == "__main__"):
227 | main()
228 |
--------------------------------------------------------------------------------
/Examples/trainRegression.py:
--------------------------------------------------------------------------------
1 | import os
2 | import math
3 | import warnings
4 | import time
5 |
6 | import numpy as np
7 | import random as rn
8 | import tensorflow as tf
9 |
10 | from tensorBNN.activationFunctions import Tanh
11 | from tensorBNN.layer import GaussianDenseLayer
12 | from tensorBNN.networkFinal import network
13 | from tensorBNN.likelihood import FixedGaussianLikelihood
14 | from tensorBNN.metrics import SquaredError, PercentError
15 |
16 | startTime = time.time()
17 |
18 | # This supresses many deprecation warnings
19 | warnings.filterwarnings("ignore", category=DeprecationWarning)
20 | warnings.filterwarnings("ignore", category=UserWarning)
21 |
22 | # Set the GPU to use
23 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
24 |
25 | os.environ["PYTHONHASHSEED"] = "0"
26 | np.random.seed(42)
27 | rn.seed(12345)
28 | tf.random.set_seed(3)
29 |
30 |
31 | def main():
32 |
33 | trainIn=np.linspace(-2,2,num=11)
34 | valIn=np.linspace(-2+2/30,2.0-2/30,num=30)
35 | trainOut = np.sin(trainIn*math.pi*2)*trainIn-np.cos(trainIn*math.pi)
36 | valOut = np.sin(valIn*math.pi*2)*valIn-np.cos(valIn*math.pi)
37 |
38 |
39 | data=[trainIn, trainOut, valIn, valOut]
40 |
41 | dtype=tf.float32
42 |
43 | inputDims=1
44 | outputDims=1
45 | width = 10 # perceptrons per layer
46 | hidden = 3 # number of hidden layers
47 | seed=1000
48 |
49 | inputDims=1
50 | outputDims=1
51 |
52 | normInfo=(0,1) # mean, sd
53 |
54 | likelihood=FixedGaussianLikelihood(sd=0.1)
55 | metricList=[SquaredError(mean=normInfo[0], sd=normInfo[1]),
56 | PercentError(mean=normInfo[0], sd=normInfo[1])]
57 |
58 | neuralNet = network(
59 | dtype, # network datatype
60 | inputDims, # dimension of input vector
61 | data[0], # training input data
62 | data[1].T, # training output data
63 | data[2], # validation input data
64 | data[3].T) # validation output data)
65 |
66 | layer = GaussianDenseLayer( # Dense layer object
67 | inputDims, # Size of layer input vector
68 | width, # Size of layer output vector
69 | seed=seed, # Random seed
70 | dtype=dtype)
71 | neuralNet.add(layer) # Layer datatype
72 | neuralNet.add(Tanh()) # Tanh activation function
73 | seed += 1000 # Increment random seed
74 | for n in range(hidden - 1): # Add more hidden layers
75 | neuralNet.add(GaussianDenseLayer(width,
76 | width,
77 | seed=seed,
78 | dtype=dtype))
79 | neuralNet.add(Tanh())
80 | seed += 1000
81 |
82 | neuralNet.add(GaussianDenseLayer(width,
83 | outputDims,
84 | seed=seed,
85 | dtype=dtype))
86 |
87 | neuralNet.setupMCMC(
88 | stepSizeStart=1e-3,#0.0004 # starting stepsize
89 | stepSizeMin=1e-4, #0.0002 # minimum stepsize
90 | stepSizeMax=1e-2, # maximum stepsize
91 | stepSizeOptions=100, # number of stepsize options in stepsize adapter
92 | leapfrogStart=1000, # starting number of leapfrog steps
93 | leapfogMin=100, # minimum number of leapfrog steps
94 | leapFrogMax=10000, # maximum number of leapfrog steps
95 | leapfrogIncrement=10, # stepsize between leapfrog steps in leapfrog step adapter
96 | hyperStepSize=0.001, # hyper parameter stepsize
97 | hyperLeapfrog=100, # hyper parameter number of leapfrog steps
98 | burnin=1000, # number of burnin epochs
99 | averagingSteps=10) # number of averaging steps for param adapters)
100 |
101 |
102 | neuralNet.train(
103 | 6001, # epochs to train for
104 | 10, # increment between network saves
105 | likelihood,
106 | metricList=metricList,
107 | adjustHypers=True,
108 | folderName="TrigRegression", # Name of folder for saved networks
109 | networksPerFile=50) # Number of networks saved per file
110 |
111 | print("Total time elapsed (seconds):", time.time() - startTime)
112 |
113 |
114 |
115 | if(__name__ == "__main__"):
116 | main()
117 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Braden Kronheim
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TensorBNN
2 | This package contains code which can be used to train Bayesian Neural Networks using Hamiltonian Monte Carlo sampling as proposed by Radford Neal in his thesis "Bayesian Learning for Neural Networks" along with added features. The package is written in python3 and uses the packages `Tensorflow` and `Tensorflow-Probability` as the framework for the implementation.
3 |
4 | For detailed information about this implementation, please see our paper on the arXiv: [TensorBNN: Bayesian Inference for Neural Networks using Tensorflow](https://arxiv.org/abs/2009.14393). Cite as:
5 |
6 | B. Kronheim, M. Kuchera, H. Prosper, TensorBNN: Bayesian inference for neural network training using TensorFlow. arXiv:https://arxiv.org/abs/2009.14393.
7 |
8 |
9 | ## Dependencies
10 | All python code written here is in python3. The code is dependent upon the packages `numpy`, `emcee`,`tensorflow`, `tensorflow-probability`, and `scipy`.
11 |
12 | The package, along with `numpy`, `emcee`, and `scipy`, can be installed via
13 |
14 | ```
15 | pip install tensorBNN
16 | ```
17 |
18 | Alternatively, you can clone the repository and download `numpy`, `scipy,` and `emcee` from source through the command:
19 |
20 | ```
21 | pip install numpy scipy emcee
22 | ```
23 |
24 | In order for the repository to work having cloned it from github add the tensorBNN folder to the python source, and it should work the same as having donwloaded it via pip.
25 |
26 | TensorFlow and TensorFlow-probability must be instaled separately. The TensorFlow version should be the most recent (2.5 at the moment). Using a 1.x version will not work, and older versions of 2 might not either. It is also highly recomended that this code be run on a gpu due to its high computational complexity. TensorFlow for the gpu can be installed with the command:
27 |
28 | ```
29 | pip install tensorflow-gpu
30 | ```
31 |
32 | In order to be compatible with this version of tensorflow, the most recent version of tensorflow-probability (0.12.2) must be installed. This is done with the following command:
33 |
34 | ```
35 | pip install tensorflow-probability
36 | ```
37 |
38 |
39 | ## Usage
40 |
41 | In order to use this code you must import network, Dense Layer,an activation such as Relu, and a likelihood such as the Gaussian likelihood. This can be done as follows:
42 |
43 | ```
44 | from tensorBNN.layer import DenseLayer
45 | from tensorBNN.network import network
46 | from tensorBNN.activationFunctions import Relu
47 | from tensorBNN.likelihood import GaussianLikelihood
48 | ```
49 |
50 | Next, it is highly convenient to turn off the deprecation warnings. These are all from tensorflow, tensorflow-probability, and numpy intereacting with tensorflow, so it isn't something easily fixed and there are a lot of warnings. These are turned off with:
51 |
52 | ```
53 | import warnings
54 | warnings.filterwarnings("ignore", category=DeprecationWarning)
55 | warnings.filterwarnings("ignore", category=UserWarning)
56 | ```
57 |
58 | The other important setup task is determining whether or not to seed the random number generator before training. Please note that if you are using a gpu then there will always be some randomness which cannot be removed. To set all cpu random numbers use these lines of code:
59 |
60 | ```
61 | import os
62 |
63 | import numpy as np
64 | import random as rn
65 | import tensorflow as tf
66 |
67 | os.environ["PYTHONHASHSEED"] = "0"
68 | np.random.seed(42)
69 | rn.seed(12345)
70 | tf.random.set_seed(3)
71 | ```
72 |
73 | Moving on to the actual use of this code, start with the declaration of a network object:
74 |
75 | ```
76 | neuralNet = network.network(dtype, inputDims, trainX, trainY, validationX, validationY, mean, sd)
77 | ```
78 |
79 | The paramaters are described as follows:
80 | * dtype: data type for Tensors
81 | * inputDims: dimension of input vector
82 | * trainX: the training data input, shape is n by inputDims
83 | * trainY: the training data output
84 | * validateX: the validation data input, shape is n by inputDims
85 | * validateY: the validation data output
86 | * mean: the mean used to scale trainY and validateY
87 | * sd: standard deviation used to scale trainY and validateY
88 |
89 | Next, add all of the desired layers and activation functions as follows:
90 |
91 | ```
92 | neuralNet.add(DenseLayer(inputDims, outputDims, seed=seed, dtype=tf.float32))
93 | neuralNet.add(Relu())
94 | ```
95 |
96 | For added control, especially when using pre-trained networks it is possible to feed pretrained weights, biases, and values for the activation functions. This can be done as follows:
97 |
98 | ```
99 | neuralNet.add(DenseLayer(inputDims,outputDims, weights=weights, biases=biases, seed=seed, dtype=dtype))
100 | neuralNet.add(SquarePrelu(width, alpha=alpha**(0.5), activation=activation, dtype=dtype))
101 | ```
102 |
103 | The paramater inputDims is the output shape of the layer before, and the width is the ouput shape of the layers itself. The seed is used for seeding the random number generator. Currently, only ReLU is supported for easy predictions off of saved networks. The other activation functions can be used, but they will require more custom code to predict from saved networks.
104 |
105 | Next, the Markov Chain Monte Carlo algorithm must be initialized. This can be done as follows:
106 |
107 | ```
108 | neuralNet.setupMCMC(self, stepSize, stepMin, stepMax, stepNum, leapfrog, leapMin,
109 | leapMax, leapStep, hyperStepSize, hyperLeapfrog, burnin,
110 | cores, averagingSteps=2, a=4, delta=0.1):
111 | ```
112 |
113 | The paramaters are described as follows:
114 | * stepSize: the starting step size for the weights and biases
115 | * stepMin: the minimum step size
116 | * stepMax: the maximum step size
117 | * stepNum: the number of step sizes in grid
118 | * leapfrog: number of leapfrog steps for weights and biases
119 | * leapMin: the minimum number of leapfrog steps
120 | * leapMax: the maximum number of leapfrog steps
121 | * leapStep: the step in number of leapfrog for search grid
122 | * hyperStepSize: the starting step size for the hyper parameters
123 | * hyperLeapfrog: leapfrog steps for hyper parameters
124 | * cores: number of cores to use
125 | * averaginSteps: number of averaging steps
126 | * a: constant, 4 in paper
127 | * delta: constant, 0.1 in paper
128 |
129 | This code uses the adaptive Hamlitonain Monte Carlo described in "Adaptive Hamiltonian and Riemann Manifold Monte Carlo Samplers" by Wang, Mohamed, and de Freitas. In accordance with this paper there are a few more paramaters that can be adjusted, though it is recomended that their default values are kept.
130 |
131 | After initializaing the HMC, we must declare the likelihood that we want to use as well as any metrics. This can be accomplished through the following code:
132 |
133 | ```
134 | # Declare Gaussian Likelihood with sd of 0.1
135 | likelihood = GaussianLikelihood(sd = 0.1)
136 | metricList = [ #Declare metrics
137 | SquaredError(mean = 0, sd = 1, scaleExp = False),
138 | PercentError(mean = 10, sd = 2, scaleExp = True)]
139 | ```
140 |
141 |
142 | The last thing to do is actually tell the model to start learning this is done with the following command:
143 |
144 | ```
145 | network.train(
146 | epochs, # epochs to train for
147 | samplingStep, # increment between network saves
148 | likelihood,
149 | metricList = metricList,
150 | folderName = "Regression",
151 | # Name of folder for saved networks
152 | networksPerFile=50)
153 | # Number of networks saved per file
154 | ```
155 |
156 | The arguments have the following meanings:
157 |
158 | * Epochs: Number of training cycles
159 | * samplingStep: Epochs between sampled networks
160 | * likelihood: The likelihood function used to evaluate the prediction
161 | we defined previously
162 | * startSigma: Starting standard deviation for likelihood function
163 | for regression models
164 | * folderName: name of folder for saved networks
165 | * networksPerFile: number of networks saved in a given file
166 |
167 | Once the network has trained, which may take a while, the saved networks can be loaded and then used to make predictions using the following code:
168 |
169 | ```
170 | from TensorBNN.predictor import predictor
171 |
172 | network = predictor(filePath,
173 | dtype = dtype,
174 | # data type used by network
175 | customLayerDict={"dense2": Dense2},
176 | # A dense layer with a different
177 | # hyperprior
178 | likelihood = Likelihood)
179 | # The likelihood function is required to
180 | # calculate the probabilities for
181 | # re-weighting
182 |
183 | initialResults = network.predict(inputData, skip, dtype)
184 | ```
185 |
186 | The variable filePath is the directory from which the networks are being loaded, inputData is the normalized data for which predictions should be made, and dtype is the data type to be used for predictions. The customLayerDict is a dictionary holding the names and objects for any user defined layers. Likelihood is the likelihood function used to train the model.
187 |
188 | The variable initialResults will be a list of numpy arrays, each numpy array corresponding to the predcitions from a single network in the BNN. The skip variable instructs the predictor to only use every n networks, where n=skip
189 |
190 | Additionally, the predictor function allows for the calculation of the autocorrelation between different networks, as well as the autocorrelation length through:
191 |
192 | ```
193 | autocorrelations = network.autocorrelation(testData, nMax)
194 | autocorrelations = network.autoCorrelationLength(testData, nMax)
195 | ```
196 | Here, the autocorrelation is calculated based on the predictions of the different BNNs, and the results are averaged over the test data. nMax provides the largest lag value for the autocorrelation. These calculations are done with emcee.
197 |
198 |
199 | Finally, the predictor object can calculate new weights for the different networks if they were given new priors. These priors take the form of new Layer objects which must be referenced in an architecture file. The reweighting function call looks like this:
200 |
201 | ```
202 | weights = network.reweight(
203 | trainX, # training input
204 | trainY, # training output
205 | skip = 10, # Use every 10 saved networks
206 | architecture = "architecture2.txt")
207 | # New architecture file
208 | ```
209 |
210 |
--------------------------------------------------------------------------------
/docs/ClassificationExample.md:
--------------------------------------------------------------------------------
1 | # MNIST ClassificationExample
2 | On this page is a tutorial on training a classification BNN using the tools available inside of `TensorBNN` using the `MNIST` dataset. This dataset consists of a collection of 28x28 grayscale images of handwritten digits. This tutorial will show how to select two numbers and train a BNN to detect between the two.
3 |
4 | ## Data setup
5 | First, it is necesary to import all the packages that will be needed. The required ones are
6 | ```
7 | import os
8 |
9 | import numpy as np
10 | import random as rn
11 | import tensorflow as tf
12 |
13 | from sklearn.model_selection import train_test_split
14 |
15 | from Networks.activationFunctions import SquarePrelu, Sigmoid
16 | from Networks.BNN_functions import trainBasicClassification
17 | from Networks.layer import DenseLayer
18 | from Networks.network import network
19 | ```
20 | The 'os', 'numpy', 'random', and 'tensorflow' imports are all required to set the random seeds properly so that results are reproducible. The other imports are either for training and validation data spliting or for constructing the actual network. It is important to note, however, that if a GPU is used for training, which is highly recomended, it is impossible to obtain completely reproducible results simply because of how a GPU works. The code required to set these random seeds is:
21 |
22 | ```
23 | os.environ["PYTHONHASHSEED"] = "0"
24 | np.random.seed(42)
25 | rn.seed(12345)
26 | tf.random.set_seed(3)
27 | ```
28 | After setting up the random seeds, we need to get our dataset. This is accomplished though the code:
29 | ```
30 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data(path='mnist.npz')
31 | ```
32 | As the `MNIST` data consists of a bunch of pictures we need to reshape each picture into a vector and scale the pixel values between 0 and 1. This is accomplished here:
33 | ```
34 | x_train_shape = x_train.shape
35 |
36 | inputDims=x_train_shape[1]**2
37 | outputDims=1
38 |
39 | x_train = np.reshape(x_train, (x_train_shape[0],x_train_shape[1]**2))
40 | x_train = np.float32(x_train)/256
41 | ```
42 | We also collected our input and output dimensions, which will be important later.
43 | Next, we must collect the two numbers that we are interested in. For this tutorial we will use 3 and 8, but you are free to use whatever two numbers you desire. In the following block of code we create our new datasets.
44 | ```
45 | new_x_train = []
46 | new_y_train = []
47 |
48 | for y in range(len(y_train)):
49 | if(y_train[y]==3):
50 | new_y_train.append(0)
51 | new_x_train.append(x_train[y])
52 | if(y_train[y]==8):
53 | new_y_train.append(1)
54 | new_x_train.append(x_train[y])
55 | x_train = np.array(new_x_train)
56 | y_train = np.array(new_y_train)
57 | ```
58 | Finally, we perform an 80-20 train-validation split and store all of the datasets in a list.
59 | ```
60 | trainIn, valIn, trainOut, valOut = train_test_split(
61 | x_train, y_train, test_size=0.20, random_state=21)
62 | data=[trainIn, trainOut, valIn, valOut]
63 | ```
64 |
65 | ## Pretraining
66 | Next, we will use the pretraining feature built into `TensorBNN`. This feature allows the use of normal neuarl network optimization algorithms to give us a superior starting point for the BNN as it is a much slower algorithm. Pretraining the networks then allows for faster convergence of the BNN. To do the pretraining, we simply call `trainBasicClassification` from the `BNNfunctions` folder. A sample call is shown below with all of the arguments labeled.
67 | ```
68 | weights, biases, activation = trainBasicClassification(
69 | 2, # Number of hidden layers
70 | inputDims, # Input dimensions
71 | outputDims, # Output dimensions
72 | 20, # Number of perceptrons per layer
73 | nncycles, # Number of training cylces. The learning rate is decreased by a factor of 10 each cycle.
74 | 10, # Number of epochs per training cycle
75 | 0.1, # Slope value for `leaky-relu` activation
76 | data[0], # Training input data
77 | data[1], # Training output data
78 | data[2], # Validation input data
79 | data[3], # Validation output data
80 | "MNIST_pretrain", # Save the pretrain network under this name
81 | callbacks=True, # Use callbacks to restore best weights obtained while training
82 | callbackMetric="val_loss", # metric used to determine best weights
83 | patience=10) # number of epochs to wait after failing to improve callback metric
84 | ```
85 | Running this function will train a network in Keras and save it under the name "MNIST_pretrain". It will extract the weights, biases, and activation functions tensors from the final model and return them.
86 |
87 | ## BNN setup
88 | We are now finally ready to actually setup the BNN.
89 | First, we create a network object. This is accomplished through the following code:
90 | ```
91 | dtype = tf.float32 # This is the best trade off between speed and precision.
92 |
93 | neuralNet = network(
94 | dtype, # network datatype
95 | inputDims, # dimension of input vector
96 | data[0], # Training input data
97 | data[1], # Training output data
98 | data[2], # Validation input data
99 | data[3], # Validation output data
100 | tf.cast(0.0, dtype), # Mean of output data for unnormalization
101 | tf.cast(1.0, dtype)) # Standard deviation of output data
102 | ```
103 | Next, we need to add our layers. We will use two hidden layers of 20 percpetrons each with SquarePrelu activation functions for the first two layers and Sigmoid activation for the output layers. SquarePrelu activations are similar to normal prelu activations, which are essentially leaky-relu activations with a trainable slope paramter. The difference, though, is that the trained parameter is the plus or minus square root of the slope. This way, we can not have an activation function which is not a bijection.
104 | The code to add the layers is below:
105 | ```
106 | seed = 0 # seed for layer generation, irrelavent with pretraining
107 | width = 20 # number of perceptrons per layer
108 | alpha = 0.1 # starting slope value for SquarePrelu
109 | hidden = 2 # Number of hidden layers
110 | neuralNet.add( # add a layer
111 | DenseLayer( # dense layer object
112 | inputDims, # input dimension
113 | width, # number of perceptrons per layer
114 | weights=weights[0], # pretrained weights
115 | biases=biases[0], # pretrained biases
116 | seed=seed, # layer seed
117 | dtype=dtype)) # layer datatype
118 | neuralNet.add(SquarePrelu(width,
119 | alpha=alpha**(0.5), # starting slope parameter
120 | activation=None, # no activation pretrained
121 | dtype=dtype)) # activation datatype
122 | seed += 1000
123 | for n in range(hidden - 1): # Add the hidden layers
124 | neuralNet.add(DenseLayer(width,
125 | width,
126 | weights=weights[n + 1],
127 | biases=biases[n + 1],
128 | seed=seed,
129 | dtype=dtype))
130 | neuralNet.add(
131 | SquarePrelu(
132 | width,
133 | alpha=alpha**(0.5),
134 | activation=None,
135 | dtype=dtype))
136 | seed += 1000
137 |
138 | #Add the output layer
139 | neuralNet.add(DenseLayer(width,
140 | outputDims,
141 | weights=weights[-1],
142 | biases=biases[-1],
143 | seed=seed,
144 | dtype=dtype))
145 | neuralNet.add(Sigmoid()) # Sigmoid activation
146 | ```
147 | Next, we must setup the Markov Chain Monte Carlo algorithm. This is done by simply calling setupMCMC and providing it a lot of information.
148 | ```
149 | neuralNet.setupMCMC(
150 | 0.001, # Starting stepsize for Hamiltonian Monte Carlo (HMC)
151 | 0.0005, # Minimum possible stepsize for HMC
152 | 0.002, # Maximum possible stepsize for HMC
153 | 100, # Number of points to use in stepsize search grid
154 | 500, # Starting number of leapfrog steps for HMC
155 | 100, # Minimum number of leapfrog steps for HMC
156 | 2000, # Maximum number of leapfrog steps for HMC
157 | 1, # increment in leapfrog steps in leapfrog search grid
158 | 0.00001, # stepsize for hyper parameter HMC
159 | 30, # leapfrog steps for hyper paramater HMC
160 | 50, # Number of burnin steps to do
161 | 2, # Number of cores to use on computer
162 | 2) # Numberof stpes to average over in adaptive HMC algorithm
163 | ```
164 | Finally, we can actually train the network. We must give it a few last pieces of information and then it will be on its merry way.
165 | ```
166 | neuralNet.train(
167 | 2500, #Train for 2500 epochs
168 | 10, # Save every 10 networks
169 | folderName="MNIST_BNN", # Save inside the folder MNIST_BNN
170 | networksPerFile=25, # Start new files every 25 networks
171 | returnPredictions=False, # Don't return predictions
172 | regression=False) # Don't use regression algorithm, so use classification algorithm
173 | ```
174 | A final word of caution: this algorithm is not fast. For large datasets and large networks it is only feasible to run this on GPUs, and even then it may need several days to run. This example is small enough that it should run on normal computers, but it will still take several hours.
175 |
--------------------------------------------------------------------------------
/docs/RegressionExample.md:
--------------------------------------------------------------------------------
1 | # Regression Example
2 | Here, I will present an example of using `tensorBNN` to train a very basic regression problem. It will also highlight how the BNN represents model uncertainty very well.
3 | First, we need to import the nescesary packages. This is done through the commands
4 | ## Program Setup
5 | ```
6 | import os
7 | import math
8 |
9 | import numpy as np
10 | import random as rn
11 | import tensorflow as tf
12 |
13 | from tensorBNN.activationFunctions import Tanh
14 | from tensorBNN.layer import DenseLayer
15 | from tensorBNN.network import network
16 | from tensorBNN.likelihood import GaussianLikelihood
17 | ```
18 | In order to obtain reproducible results we need to set random seeds. In order to be sure that absolutely everything is seeded, we use the following four lines of code
19 | ```
20 | os.environ["PYTHONHASHSEED"] = "0"
21 | np.random.seed(42)
22 | rn.seed(12345)
23 | tf.random.set_seed(3)
24 | ```
25 | # Data preparation
26 | Next, we need to generate our dataset. We are simply going to use the function ```f(x)=x*sin(2pi*x)-cos(pi*x).```
27 | We will generate a training dataset with 31 points and a validation dataset with 30 points. This is done as follows.
28 | ```
29 | trainIn=np.linspace(-2,2,num=31)
30 | valIn=np.linspace(-2+2/30,2.0-2/30,num=30)
31 | trainOut = np.sin(trainIn*math.pi*2)*trainIn-np.cos(trainIn*math.pi)
32 | valOut = np.sin(valIn*math.pi*2)*valIn-np.cos(valIn*math.pi)
33 | ```
34 | After this we need to group our data together and declare the dataype we will be using.
35 | ```
36 | data=[trainIn, trainOut, valIn, valOut]
37 |
38 | dtype=tf.float32
39 | ```
40 | ## Network setup
41 | To get the network setup we need to first declare the number of input and output dimensions and the normalization we used on our output data. As we didn't normalize, we just say we have a mean of 0 and a standard deviation of 0 so `TensorBNN` doesn't try and unnormalize the data.
42 | ```
43 | inputDims=1
44 | outputDims=1
45 |
46 | normInfo=(0,1) # mean, sd
47 | ```
48 | Now we actually need to create the network object. This is done like so.
49 | ```
50 | neuralNet = network(
51 | dtype, # network datatype
52 | inputDims, # dimension of input vector
53 | data[0], # training input data
54 | data[1], # training output data
55 | data[2], # validation input data
56 | data[3], # validation output data)
57 | ```
58 | Next, we add the layers. We will be using two hidden layers with 10 perceptrons each and the hyperbolic tangent activation function.
59 | ```
60 | width = 10 # perceptrons per layer
61 | hidden = 2 # number of hidden layers
62 | seed = 0 # random seed
63 | neuralNet.add(
64 | DenseLayer( # Dense layer object
65 | inputDims, # Size of layer input vector
66 | width, # Size of layer output vector
67 | seed=seed, # Random seed
68 | dtype=dtype)) # Layer datatype
69 | neuralNet.add(Tanh()) # Tanh activation function
70 | seed += 1000 # Increment random seed
71 | for n in range(hidden - 1): # Add more hidden layers
72 | neuralNet.add(DenseLayer(width,
73 | width,
74 | seed=seed,
75 | dtype=dtype))
76 | neuralNet.add(Tanh())
77 | seed += 1000
78 |
79 | neuralNet.add(DenseLayer(width,
80 | outputDims,
81 | seed=seed,
82 | dtype=dtype))
83 | ```
84 | Now we need to initialize the Markov Chain Monte Carlo algorithm. We do this with the following code.
85 | ```
86 | neuralNet.setupMCMC(
87 | 0.005, # starting stepsize
88 | 0.0025, # minimum stepsize
89 | 0.01, # maximum stepsize
90 | 40, # number of stepsize options in stepsize adapter
91 | 2, # starting number of leapfrog steps
92 | 2, # minimum number of leapfrog steps
93 | 50, # maximum number of leapfrog steps
94 | 1, # stepsize between leapfrog steps in leapfrog step adapter
95 | 0.01, # hyper parameter stepsize
96 | 5, # hyper parameter number of leapfrog steps
97 | 20, # number of burnin epochs
98 | 20, # number of cores
99 | 2) # number of averaging steps for param adapters)
100 | ```
101 | Next we initialize the Likelihood object we use to evaluate predictions. We use a Gaussian likelihood with a starting standard deviation of 0.1.
102 | ```
103 | likelihood = GaussianLikelihood(sd = 0.1)
104 | ```
105 | We would also like to measure the performance of the network using a metric such as mean squared error, so we initialize a metric object and add it to a metric list.
106 | ```
107 | metricList = [SquaredError()]
108 | ```
109 |
110 | Finally, we get to actually train the network. This is done with the following code.
111 | ```
112 | neuralNet.train(
113 | 1000, # epochs to train for
114 | 2, # increment between network saves
115 | metricList = metricList, # List of evaluation metricx
116 | folderName="TrigRegression") # Name of folder for saved networks
117 | ```
118 | After this, just run the program.
119 |
--------------------------------------------------------------------------------
/docs/Setup.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | title: Setup
4 | ---
5 |
6 | # Setup
7 | All python code written here is intended to be used in Python3. The code is dependent upon the packages numpy, emcee, tensorflow, tensorflow-probability, and scipy.
8 |
9 | Numpy and scipy can be installed through the command:
10 |
11 | ```
12 | pip3 install numpy scipy emcee
13 | ```
14 |
15 | TensorFlow and TensorFlow-probability must be instaled separately. The TensorFlow version should be the most recent (2.3 at the moment). Using a 1.x version will not work, and neither will older versions of 2. It is also highly recomended that this code be run on a gpu due to its high computational complexity. TensorFlow for the gpu can be installed with the command:
16 |
17 | ```
18 | pip3 install tensorflow-gpu
19 | ```
20 |
21 | In order to be compatible with this version of tensorflow, the most recent version of tensorflow-probability (0.11) must be installed. This is done with the following command:
22 |
23 | ```
24 | pip3 install tensorflow-probability
25 | ```
26 |
27 | In order to use this code you can either clone this repository and copy the Networks folder into a folder named tensorBNN in the main folder of your project, or download it using pip.
28 | ```
29 | pip install tensorBNN
30 | git clone https://github.com/alpha-davidson/TensorBNN.git
31 | ```
32 |
33 | After this, you can use the following command to import the general network obejct, and similar commands for the other objects.
34 | ```
35 | from tensorBNN.network import network
36 | ```
37 |
38 |
--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-dinky
2 | defaults:
3 | # _docs
4 | - scope:
5 | path: ""
6 | type: docs
7 | values:
8 | sidebar:
9 | nav: "docs"
10 |
--------------------------------------------------------------------------------
/docs/_data/navigation.yml:
--------------------------------------------------------------------------------
1 | docs:
2 | - title: Home
3 | url: /docs/index.md
4 |
5 | - title: Setup
6 | url: /docs/Setup.md
7 |
8 | - title: Usage
9 | url: /docs/usage.md
10 |
11 |
--------------------------------------------------------------------------------
/docs/_includes/navigation.html:
--------------------------------------------------------------------------------
1 |
10 |
--------------------------------------------------------------------------------
/docs/_layouts/default.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |