├── Examples ├── extendedRegression.py └── trainRegression.py ├── LICENSE ├── README.md ├── docs ├── ClassificationExample.md ├── RegressionExample.md ├── Setup.md ├── _config.yml ├── _data │ └── navigation.yml ├── _includes │ └── navigation.html ├── _layouts │ └── default.html ├── index.md └── usage.md └── tensorBNN ├── BNN_functions.py ├── activationFunctions.py ├── layer.py ├── likelihood.py ├── metrics.py ├── network.py ├── paramAdapter.py └── predictor.py /Examples/extendedRegression.py: -------------------------------------------------------------------------------- 1 | """ 2 | An extended version of the trainRegression.py example with pretraining and 3 | some graphs at the end to visualize the output of the BNN. 4 | """ 5 | 6 | import os 7 | import math 8 | import warnings 9 | import time 10 | 11 | import numpy as np 12 | import random as rn 13 | import tensorflow as tf 14 | import pylab as plt 15 | 16 | 17 | from tensorBNN.activationFunctions import Tanh 18 | from tensorBNN.layer import GaussianDenseLayer 19 | from tensorBNN.networkFinal import network 20 | from tensorBNN.likelihood import FixedGaussianLikelihood 21 | from tensorBNN.metrics import SquaredError, PercentError 22 | from tensorBNN.predictor import predictor 23 | 24 | startTime = time.time() 25 | 26 | # This supresses many deprecation warnings 27 | warnings.filterwarnings("ignore", category=DeprecationWarning) 28 | warnings.filterwarnings("ignore", category=UserWarning) 29 | 30 | # Set the GPU to use 31 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 32 | 33 | os.environ["PYTHONHASHSEED"] = "0" 34 | np.random.seed(42) 35 | rn.seed(12345) 36 | tf.random.set_seed(3) 37 | 38 | 39 | def main(): 40 | 41 | trainIn=np.linspace(-2,2,num=11) 42 | valIn=np.linspace(-2+2/30,2.0-2/30,num=30) 43 | trainOut = np.sin(trainIn*math.pi*2)*trainIn-np.cos(trainIn*math.pi) 44 | valOut = np.sin(valIn*math.pi*2)*valIn-np.cos(valIn*math.pi) 45 | 46 | 47 | data=[trainIn, trainOut, valIn, valOut] 48 | 49 | dtype=tf.float32 50 | 51 | inputDims=1 52 | outputDims=1 53 | width = 10 # perceptrons per layer 54 | hidden = 3 # number of hidden layers 55 | patience=20 56 | cycles=3 57 | epochs=100 58 | seed=1000 59 | 60 | 61 | normInfo=(0,1) # mean, sd 62 | 63 | #Peform pre-training to start the Markov Chain at a better spot 64 | model = tf.keras.Sequential() 65 | 66 | model.add(tf.keras.layers.Dense(width, kernel_initializer='glorot_uniform', 67 | input_shape=(inputDims, ), 68 | activation="tanh")) 69 | model.add(tf.keras.layers.ReLU()) 70 | 71 | for n in range(hidden-1): 72 | model.add(tf.keras.layers.Dense(width, 73 | kernel_initializer='glorot_uniform', 74 | activation="tanh")) 75 | 76 | model.add(tf.keras.layers.Dense(outputDims, 77 | kernel_initializer='glorot_uniform')) 78 | 79 | callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', 80 | patience=patience, 81 | restore_best_weights=True) 82 | 83 | #Train with decreasing learning rate 84 | for x in range(cycles): 85 | model.compile(optimizer=tf.keras.optimizers.Adam(0.01*(10**(-x)), 86 | amsgrad=True), 87 | loss='mean_squared_error', 88 | metrics=['mean_absolute_error', 'mean_squared_error']) 89 | model.summary() 90 | model.fit(trainIn, trainOut.T, validation_data=(valIn, valOut.T), 91 | epochs=epochs, batch_size=32, callbacks=[callback]) 92 | 93 | #Save the backup 94 | model.save("backup") 95 | 96 | #Extract weights and biases 97 | weights=[] 98 | biases=[] 99 | activation=[] 100 | for layer in model.layers: 101 | weightBias=layer.get_weights() 102 | if(len(weightBias)==2): 103 | weights.append(weightBias[0].T) 104 | bias=weightBias[1] 105 | bias=np.reshape(bias, (len(bias),1)) 106 | biases.append(bias) 107 | if(len(weightBias)==1): 108 | activation.append(weightBias[0]) 109 | 110 | 111 | likelihood=FixedGaussianLikelihood(sd=0.1) 112 | metricList=[SquaredError(mean=normInfo[0], sd=normInfo[1]), 113 | PercentError(mean=normInfo[0], sd=normInfo[1])] 114 | 115 | neuralNet = network( 116 | dtype, # network datatype 117 | inputDims, # dimension of input vector 118 | data[0], # training input data 119 | data[1].T, # training output data 120 | data[2], # validation input data 121 | data[3].T) # validation output data) 122 | 123 | layer = GaussianDenseLayer( # Dense layer object 124 | inputDims, # Size of layer input vector 125 | width, # Size of layer output vector 126 | seed=seed, # Random seed 127 | dtype=dtype, 128 | weights=weights[0], biases=biases[0]) 129 | neuralNet.add(layer) # Layer datatype 130 | neuralNet.add(Tanh()) # Tanh activation function 131 | seed += 1000 # Increment random seed 132 | for n in range(hidden - 1): # Add more hidden layers 133 | neuralNet.add(GaussianDenseLayer(width, 134 | width, 135 | seed=seed, 136 | dtype=dtype, 137 | weights=weights[n+1], biases=biases[n+1])) 138 | neuralNet.add(Tanh()) 139 | seed += 1000 140 | 141 | neuralNet.add(GaussianDenseLayer(width, 142 | outputDims, 143 | seed=seed, 144 | dtype=dtype, 145 | weights=weights[-1], biases=biases[-1])) 146 | 147 | neuralNet.setupMCMC( 148 | stepSizeStart=1e-3,#0.0004 # starting stepsize 149 | stepSizeMin=1e-4, #0.0002 # minimum stepsize 150 | stepSizeMax=1e-2, # maximum stepsize 151 | stepSizeOptions=100, # number of stepsize options in stepsize adapter 152 | leapfrogStart=1000, # starting number of leapfrog steps 153 | leapfogMin=100, # minimum number of leapfrog steps 154 | leapFrogMax=10000, # maximum number of leapfrog steps 155 | leapfrogIncrement=10, # stepsize between leapfrog steps in leapfrog step adapter 156 | hyperStepSize=0.001, # hyper parameter stepsize 157 | hyperLeapfrog=100, # hyper parameter number of leapfrog steps 158 | burnin=1000, # number of burnin epochs 159 | averagingSteps=10) # number of averaging steps for param adapters) 160 | 161 | 162 | neuralNet.train( 163 | 6001, # epochs to train for 164 | 10, # increment between network saves 165 | likelihood, 166 | metricList=metricList, 167 | adjustHypers=True, 168 | folderName="TrigRegression", # Name of folder for saved networks 169 | networksPerFile=50) # Number of networks saved per file 170 | 171 | print("Total time elapsed (seconds):", time.time() - startTime) 172 | 173 | 174 | #Load predictor 175 | loadedNetwork = predictor("TrigRegression/", tf.float32) 176 | 177 | #Look at the predictions ins the space between the training data 178 | closeIn=np.linspace(-2,2,num=1000) 179 | closeOut = np.sin(closeIn*math.pi*2)*closeIn-np.cos(closeIn*math.pi) 180 | 181 | closePredictions = np.squeeze(np.array(loadedNetwork.predict( 182 | np.array([closeIn]).T, n=1))) 183 | closePredictionsMean = np.mean(closePredictions, axis=0) 184 | closePredictionsStd = np.std(closePredictions, axis=0) 185 | plt.figure() 186 | 187 | plt.fill_between(closeIn, closePredictionsMean-2*closePredictionsStd, 188 | closePredictionsMean-1*closePredictionsStd, color=(1,1,0), 189 | label="2 sd") 190 | plt.fill_between(closeIn, closePredictionsMean-1*closePredictionsStd, 191 | closePredictionsMean+1*closePredictionsStd, color=(0,1,0), 192 | label="1 sd") 193 | plt.fill_between(closeIn, closePredictionsMean+1*closePredictionsStd, 194 | closePredictionsMean+2*closePredictionsStd, color=(1,1,0)) 195 | plt.plot(closeIn,closePredictionsMean, color="k", label="predicted mean") 196 | plt.plot(closeIn, closeOut, color="r", label="true") 197 | plt.scatter(trainIn, trainOut, color="b", label="training data") 198 | plt.legend() 199 | plt.show() 200 | 201 | #Look at the predictions away from the training data 202 | farIn=np.linspace(-4,4,num=2000) 203 | farOut = np.sin(farIn*math.pi*2)*farIn-np.cos(farIn*math.pi) 204 | 205 | farPredictions = np.squeeze(np.array(loadedNetwork.predict( 206 | np.array([farIn]).T, n=1))) 207 | farPredictionsMean = np.mean(farPredictions, axis=0) 208 | farPredictionsStd = np.std(farPredictions, axis=0) 209 | 210 | plt.figure() 211 | plt.fill_between(farIn, farPredictionsMean-2*farPredictionsStd, 212 | farPredictionsMean-1*farPredictionsStd, color=(1,1,0), 213 | label="2 sd") 214 | plt.fill_between(farIn, farPredictionsMean-1*farPredictionsStd, 215 | farPredictionsMean+1*farPredictionsStd, color=(0,1,0), 216 | label="1 sd") 217 | plt.fill_between(farIn, farPredictionsMean+1*farPredictionsStd, 218 | farPredictionsMean+2*farPredictionsStd, color=(1,1,0)) 219 | plt.plot(farIn,farPredictionsMean, color="k", label="predicted mean") 220 | plt.plot(farIn, farOut, color="r", label="true") 221 | plt.scatter(trainIn, trainOut, color="b", label="training data") 222 | plt.legend() 223 | plt.show() 224 | 225 | 226 | if(__name__ == "__main__"): 227 | main() 228 | -------------------------------------------------------------------------------- /Examples/trainRegression.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import warnings 4 | import time 5 | 6 | import numpy as np 7 | import random as rn 8 | import tensorflow as tf 9 | 10 | from tensorBNN.activationFunctions import Tanh 11 | from tensorBNN.layer import GaussianDenseLayer 12 | from tensorBNN.networkFinal import network 13 | from tensorBNN.likelihood import FixedGaussianLikelihood 14 | from tensorBNN.metrics import SquaredError, PercentError 15 | 16 | startTime = time.time() 17 | 18 | # This supresses many deprecation warnings 19 | warnings.filterwarnings("ignore", category=DeprecationWarning) 20 | warnings.filterwarnings("ignore", category=UserWarning) 21 | 22 | # Set the GPU to use 23 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 24 | 25 | os.environ["PYTHONHASHSEED"] = "0" 26 | np.random.seed(42) 27 | rn.seed(12345) 28 | tf.random.set_seed(3) 29 | 30 | 31 | def main(): 32 | 33 | trainIn=np.linspace(-2,2,num=11) 34 | valIn=np.linspace(-2+2/30,2.0-2/30,num=30) 35 | trainOut = np.sin(trainIn*math.pi*2)*trainIn-np.cos(trainIn*math.pi) 36 | valOut = np.sin(valIn*math.pi*2)*valIn-np.cos(valIn*math.pi) 37 | 38 | 39 | data=[trainIn, trainOut, valIn, valOut] 40 | 41 | dtype=tf.float32 42 | 43 | inputDims=1 44 | outputDims=1 45 | width = 10 # perceptrons per layer 46 | hidden = 3 # number of hidden layers 47 | seed=1000 48 | 49 | inputDims=1 50 | outputDims=1 51 | 52 | normInfo=(0,1) # mean, sd 53 | 54 | likelihood=FixedGaussianLikelihood(sd=0.1) 55 | metricList=[SquaredError(mean=normInfo[0], sd=normInfo[1]), 56 | PercentError(mean=normInfo[0], sd=normInfo[1])] 57 | 58 | neuralNet = network( 59 | dtype, # network datatype 60 | inputDims, # dimension of input vector 61 | data[0], # training input data 62 | data[1].T, # training output data 63 | data[2], # validation input data 64 | data[3].T) # validation output data) 65 | 66 | layer = GaussianDenseLayer( # Dense layer object 67 | inputDims, # Size of layer input vector 68 | width, # Size of layer output vector 69 | seed=seed, # Random seed 70 | dtype=dtype) 71 | neuralNet.add(layer) # Layer datatype 72 | neuralNet.add(Tanh()) # Tanh activation function 73 | seed += 1000 # Increment random seed 74 | for n in range(hidden - 1): # Add more hidden layers 75 | neuralNet.add(GaussianDenseLayer(width, 76 | width, 77 | seed=seed, 78 | dtype=dtype)) 79 | neuralNet.add(Tanh()) 80 | seed += 1000 81 | 82 | neuralNet.add(GaussianDenseLayer(width, 83 | outputDims, 84 | seed=seed, 85 | dtype=dtype)) 86 | 87 | neuralNet.setupMCMC( 88 | stepSizeStart=1e-3,#0.0004 # starting stepsize 89 | stepSizeMin=1e-4, #0.0002 # minimum stepsize 90 | stepSizeMax=1e-2, # maximum stepsize 91 | stepSizeOptions=100, # number of stepsize options in stepsize adapter 92 | leapfrogStart=1000, # starting number of leapfrog steps 93 | leapfogMin=100, # minimum number of leapfrog steps 94 | leapFrogMax=10000, # maximum number of leapfrog steps 95 | leapfrogIncrement=10, # stepsize between leapfrog steps in leapfrog step adapter 96 | hyperStepSize=0.001, # hyper parameter stepsize 97 | hyperLeapfrog=100, # hyper parameter number of leapfrog steps 98 | burnin=1000, # number of burnin epochs 99 | averagingSteps=10) # number of averaging steps for param adapters) 100 | 101 | 102 | neuralNet.train( 103 | 6001, # epochs to train for 104 | 10, # increment between network saves 105 | likelihood, 106 | metricList=metricList, 107 | adjustHypers=True, 108 | folderName="TrigRegression", # Name of folder for saved networks 109 | networksPerFile=50) # Number of networks saved per file 110 | 111 | print("Total time elapsed (seconds):", time.time() - startTime) 112 | 113 | 114 | 115 | if(__name__ == "__main__"): 116 | main() 117 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Braden Kronheim 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TensorBNN 2 | This package contains code which can be used to train Bayesian Neural Networks using Hamiltonian Monte Carlo sampling as proposed by Radford Neal in his thesis "Bayesian Learning for Neural Networks" along with added features. The package is written in python3 and uses the packages `Tensorflow` and `Tensorflow-Probability` as the framework for the implementation. 3 | 4 | For detailed information about this implementation, please see our paper on the arXiv: [TensorBNN: Bayesian Inference for Neural Networks using Tensorflow](https://arxiv.org/abs/2009.14393). Cite as: 5 | 6 | B. Kronheim, M. Kuchera, H. Prosper, TensorBNN: Bayesian inference for neural network training using TensorFlow. arXiv:https://arxiv.org/abs/2009.14393. 7 | 8 | 9 | ## Dependencies 10 | All python code written here is in python3. The code is dependent upon the packages `numpy`, `emcee`,`tensorflow`, `tensorflow-probability`, and `scipy`. 11 | 12 | The package, along with `numpy`, `emcee`, and `scipy`, can be installed via 13 | 14 | ``` 15 | pip install tensorBNN 16 | ``` 17 | 18 | Alternatively, you can clone the repository and download `numpy`, `scipy,` and `emcee` from source through the command: 19 | 20 | ``` 21 | pip install numpy scipy emcee 22 | ``` 23 | 24 | In order for the repository to work having cloned it from github add the tensorBNN folder to the python source, and it should work the same as having donwloaded it via pip. 25 | 26 | TensorFlow and TensorFlow-probability must be instaled separately. The TensorFlow version should be the most recent (2.5 at the moment). Using a 1.x version will not work, and older versions of 2 might not either. It is also highly recomended that this code be run on a gpu due to its high computational complexity. TensorFlow for the gpu can be installed with the command: 27 | 28 | ``` 29 | pip install tensorflow-gpu 30 | ``` 31 | 32 | In order to be compatible with this version of tensorflow, the most recent version of tensorflow-probability (0.12.2) must be installed. This is done with the following command: 33 | 34 | ``` 35 | pip install tensorflow-probability 36 | ``` 37 | 38 | 39 | ## Usage 40 | 41 | In order to use this code you must import network, Dense Layer,an activation such as Relu, and a likelihood such as the Gaussian likelihood. This can be done as follows: 42 | 43 | ``` 44 | from tensorBNN.layer import DenseLayer 45 | from tensorBNN.network import network 46 | from tensorBNN.activationFunctions import Relu 47 | from tensorBNN.likelihood import GaussianLikelihood 48 | ``` 49 | 50 | Next, it is highly convenient to turn off the deprecation warnings. These are all from tensorflow, tensorflow-probability, and numpy intereacting with tensorflow, so it isn't something easily fixed and there are a lot of warnings. These are turned off with: 51 | 52 | ``` 53 | import warnings 54 | warnings.filterwarnings("ignore", category=DeprecationWarning) 55 | warnings.filterwarnings("ignore", category=UserWarning) 56 | ``` 57 | 58 | The other important setup task is determining whether or not to seed the random number generator before training. Please note that if you are using a gpu then there will always be some randomness which cannot be removed. To set all cpu random numbers use these lines of code: 59 | 60 | ``` 61 | import os 62 | 63 | import numpy as np 64 | import random as rn 65 | import tensorflow as tf 66 | 67 | os.environ["PYTHONHASHSEED"] = "0" 68 | np.random.seed(42) 69 | rn.seed(12345) 70 | tf.random.set_seed(3) 71 | ``` 72 | 73 | Moving on to the actual use of this code, start with the declaration of a network object: 74 | 75 | ``` 76 | neuralNet = network.network(dtype, inputDims, trainX, trainY, validationX, validationY, mean, sd) 77 | ``` 78 | 79 | The paramaters are described as follows: 80 | * dtype: data type for Tensors 81 | * inputDims: dimension of input vector 82 | * trainX: the training data input, shape is n by inputDims 83 | * trainY: the training data output 84 | * validateX: the validation data input, shape is n by inputDims 85 | * validateY: the validation data output 86 | * mean: the mean used to scale trainY and validateY 87 | * sd: standard deviation used to scale trainY and validateY 88 | 89 | Next, add all of the desired layers and activation functions as follows: 90 | 91 | ``` 92 | neuralNet.add(DenseLayer(inputDims, outputDims, seed=seed, dtype=tf.float32)) 93 | neuralNet.add(Relu()) 94 | ``` 95 | 96 | For added control, especially when using pre-trained networks it is possible to feed pretrained weights, biases, and values for the activation functions. This can be done as follows: 97 | 98 | ``` 99 | neuralNet.add(DenseLayer(inputDims,outputDims, weights=weights, biases=biases, seed=seed, dtype=dtype)) 100 | neuralNet.add(SquarePrelu(width, alpha=alpha**(0.5), activation=activation, dtype=dtype)) 101 | ``` 102 | 103 | The paramater inputDims is the output shape of the layer before, and the width is the ouput shape of the layers itself. The seed is used for seeding the random number generator. Currently, only ReLU is supported for easy predictions off of saved networks. The other activation functions can be used, but they will require more custom code to predict from saved networks. 104 | 105 | Next, the Markov Chain Monte Carlo algorithm must be initialized. This can be done as follows: 106 | 107 | ``` 108 | neuralNet.setupMCMC(self, stepSize, stepMin, stepMax, stepNum, leapfrog, leapMin, 109 | leapMax, leapStep, hyperStepSize, hyperLeapfrog, burnin, 110 | cores, averagingSteps=2, a=4, delta=0.1): 111 | ``` 112 | 113 | The paramaters are described as follows: 114 | * stepSize: the starting step size for the weights and biases 115 | * stepMin: the minimum step size 116 | * stepMax: the maximum step size 117 | * stepNum: the number of step sizes in grid 118 | * leapfrog: number of leapfrog steps for weights and biases 119 | * leapMin: the minimum number of leapfrog steps 120 | * leapMax: the maximum number of leapfrog steps 121 | * leapStep: the step in number of leapfrog for search grid 122 | * hyperStepSize: the starting step size for the hyper parameters 123 | * hyperLeapfrog: leapfrog steps for hyper parameters 124 | * cores: number of cores to use 125 | * averaginSteps: number of averaging steps 126 | * a: constant, 4 in paper 127 | * delta: constant, 0.1 in paper 128 | 129 | This code uses the adaptive Hamlitonain Monte Carlo described in "Adaptive Hamiltonian and Riemann Manifold Monte Carlo Samplers" by Wang, Mohamed, and de Freitas. In accordance with this paper there are a few more paramaters that can be adjusted, though it is recomended that their default values are kept. 130 | 131 | After initializaing the HMC, we must declare the likelihood that we want to use as well as any metrics. This can be accomplished through the following code: 132 | 133 | ``` 134 | # Declare Gaussian Likelihood with sd of 0.1 135 | likelihood = GaussianLikelihood(sd = 0.1) 136 | metricList = [ #Declare metrics 137 | SquaredError(mean = 0, sd = 1, scaleExp = False), 138 | PercentError(mean = 10, sd = 2, scaleExp = True)] 139 | ``` 140 | 141 | 142 | The last thing to do is actually tell the model to start learning this is done with the following command: 143 | 144 | ``` 145 | network.train( 146 | epochs, # epochs to train for 147 | samplingStep, # increment between network saves 148 | likelihood, 149 | metricList = metricList, 150 | folderName = "Regression", 151 | # Name of folder for saved networks 152 | networksPerFile=50) 153 | # Number of networks saved per file 154 | ``` 155 | 156 | The arguments have the following meanings: 157 | 158 | * Epochs: Number of training cycles 159 | * samplingStep: Epochs between sampled networks 160 | * likelihood: The likelihood function used to evaluate the prediction 161 | we defined previously 162 | * startSigma: Starting standard deviation for likelihood function 163 | for regression models 164 | * folderName: name of folder for saved networks 165 | * networksPerFile: number of networks saved in a given file 166 | 167 | Once the network has trained, which may take a while, the saved networks can be loaded and then used to make predictions using the following code: 168 | 169 | ``` 170 | from TensorBNN.predictor import predictor 171 | 172 | network = predictor(filePath, 173 | dtype = dtype, 174 | # data type used by network 175 | customLayerDict={"dense2": Dense2}, 176 | # A dense layer with a different 177 | # hyperprior 178 | likelihood = Likelihood) 179 | # The likelihood function is required to 180 | # calculate the probabilities for 181 | # re-weighting 182 | 183 | initialResults = network.predict(inputData, skip, dtype) 184 | ``` 185 | 186 | The variable filePath is the directory from which the networks are being loaded, inputData is the normalized data for which predictions should be made, and dtype is the data type to be used for predictions. The customLayerDict is a dictionary holding the names and objects for any user defined layers. Likelihood is the likelihood function used to train the model. 187 | 188 | The variable initialResults will be a list of numpy arrays, each numpy array corresponding to the predcitions from a single network in the BNN. The skip variable instructs the predictor to only use every n networks, where n=skip 189 | 190 | Additionally, the predictor function allows for the calculation of the autocorrelation between different networks, as well as the autocorrelation length through: 191 | 192 | ``` 193 | autocorrelations = network.autocorrelation(testData, nMax) 194 | autocorrelations = network.autoCorrelationLength(testData, nMax) 195 | ``` 196 | Here, the autocorrelation is calculated based on the predictions of the different BNNs, and the results are averaged over the test data. nMax provides the largest lag value for the autocorrelation. These calculations are done with emcee. 197 | 198 | 199 | Finally, the predictor object can calculate new weights for the different networks if they were given new priors. These priors take the form of new Layer objects which must be referenced in an architecture file. The reweighting function call looks like this: 200 | 201 | ``` 202 | weights = network.reweight( 203 | trainX, # training input 204 | trainY, # training output 205 | skip = 10, # Use every 10 saved networks 206 | architecture = "architecture2.txt") 207 | # New architecture file 208 | ``` 209 | 210 | -------------------------------------------------------------------------------- /docs/ClassificationExample.md: -------------------------------------------------------------------------------- 1 | # MNIST ClassificationExample 2 | On this page is a tutorial on training a classification BNN using the tools available inside of `TensorBNN` using the `MNIST` dataset. This dataset consists of a collection of 28x28 grayscale images of handwritten digits. This tutorial will show how to select two numbers and train a BNN to detect between the two. 3 | 4 | ## Data setup 5 | First, it is necesary to import all the packages that will be needed. The required ones are 6 | ``` 7 | import os 8 | 9 | import numpy as np 10 | import random as rn 11 | import tensorflow as tf 12 | 13 | from sklearn.model_selection import train_test_split 14 | 15 | from Networks.activationFunctions import SquarePrelu, Sigmoid 16 | from Networks.BNN_functions import trainBasicClassification 17 | from Networks.layer import DenseLayer 18 | from Networks.network import network 19 | ``` 20 | The 'os', 'numpy', 'random', and 'tensorflow' imports are all required to set the random seeds properly so that results are reproducible. The other imports are either for training and validation data spliting or for constructing the actual network. It is important to note, however, that if a GPU is used for training, which is highly recomended, it is impossible to obtain completely reproducible results simply because of how a GPU works. The code required to set these random seeds is: 21 | 22 | ``` 23 | os.environ["PYTHONHASHSEED"] = "0" 24 | np.random.seed(42) 25 | rn.seed(12345) 26 | tf.random.set_seed(3) 27 | ``` 28 | After setting up the random seeds, we need to get our dataset. This is accomplished though the code: 29 | ``` 30 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data(path='mnist.npz') 31 | ``` 32 | As the `MNIST` data consists of a bunch of pictures we need to reshape each picture into a vector and scale the pixel values between 0 and 1. This is accomplished here: 33 | ``` 34 | x_train_shape = x_train.shape 35 | 36 | inputDims=x_train_shape[1]**2 37 | outputDims=1 38 | 39 | x_train = np.reshape(x_train, (x_train_shape[0],x_train_shape[1]**2)) 40 | x_train = np.float32(x_train)/256 41 | ``` 42 | We also collected our input and output dimensions, which will be important later. 43 | Next, we must collect the two numbers that we are interested in. For this tutorial we will use 3 and 8, but you are free to use whatever two numbers you desire. In the following block of code we create our new datasets. 44 | ``` 45 | new_x_train = [] 46 | new_y_train = [] 47 | 48 | for y in range(len(y_train)): 49 | if(y_train[y]==3): 50 | new_y_train.append(0) 51 | new_x_train.append(x_train[y]) 52 | if(y_train[y]==8): 53 | new_y_train.append(1) 54 | new_x_train.append(x_train[y]) 55 | x_train = np.array(new_x_train) 56 | y_train = np.array(new_y_train) 57 | ``` 58 | Finally, we perform an 80-20 train-validation split and store all of the datasets in a list. 59 | ``` 60 | trainIn, valIn, trainOut, valOut = train_test_split( 61 | x_train, y_train, test_size=0.20, random_state=21) 62 | data=[trainIn, trainOut, valIn, valOut] 63 | ``` 64 | 65 | ## Pretraining 66 | Next, we will use the pretraining feature built into `TensorBNN`. This feature allows the use of normal neuarl network optimization algorithms to give us a superior starting point for the BNN as it is a much slower algorithm. Pretraining the networks then allows for faster convergence of the BNN. To do the pretraining, we simply call `trainBasicClassification` from the `BNNfunctions` folder. A sample call is shown below with all of the arguments labeled. 67 | ``` 68 | weights, biases, activation = trainBasicClassification( 69 | 2, # Number of hidden layers 70 | inputDims, # Input dimensions 71 | outputDims, # Output dimensions 72 | 20, # Number of perceptrons per layer 73 | nncycles, # Number of training cylces. The learning rate is decreased by a factor of 10 each cycle. 74 | 10, # Number of epochs per training cycle 75 | 0.1, # Slope value for `leaky-relu` activation 76 | data[0], # Training input data 77 | data[1], # Training output data 78 | data[2], # Validation input data 79 | data[3], # Validation output data 80 | "MNIST_pretrain", # Save the pretrain network under this name 81 | callbacks=True, # Use callbacks to restore best weights obtained while training 82 | callbackMetric="val_loss", # metric used to determine best weights 83 | patience=10) # number of epochs to wait after failing to improve callback metric 84 | ``` 85 | Running this function will train a network in Keras and save it under the name "MNIST_pretrain". It will extract the weights, biases, and activation functions tensors from the final model and return them. 86 | 87 | ## BNN setup 88 | We are now finally ready to actually setup the BNN. 89 | First, we create a network object. This is accomplished through the following code: 90 | ``` 91 | dtype = tf.float32 # This is the best trade off between speed and precision. 92 | 93 | neuralNet = network( 94 | dtype, # network datatype 95 | inputDims, # dimension of input vector 96 | data[0], # Training input data 97 | data[1], # Training output data 98 | data[2], # Validation input data 99 | data[3], # Validation output data 100 | tf.cast(0.0, dtype), # Mean of output data for unnormalization 101 | tf.cast(1.0, dtype)) # Standard deviation of output data 102 | ``` 103 | Next, we need to add our layers. We will use two hidden layers of 20 percpetrons each with SquarePrelu activation functions for the first two layers and Sigmoid activation for the output layers. SquarePrelu activations are similar to normal prelu activations, which are essentially leaky-relu activations with a trainable slope paramter. The difference, though, is that the trained parameter is the plus or minus square root of the slope. This way, we can not have an activation function which is not a bijection. 104 | The code to add the layers is below: 105 | ``` 106 | seed = 0 # seed for layer generation, irrelavent with pretraining 107 | width = 20 # number of perceptrons per layer 108 | alpha = 0.1 # starting slope value for SquarePrelu 109 | hidden = 2 # Number of hidden layers 110 | neuralNet.add( # add a layer 111 | DenseLayer( # dense layer object 112 | inputDims, # input dimension 113 | width, # number of perceptrons per layer 114 | weights=weights[0], # pretrained weights 115 | biases=biases[0], # pretrained biases 116 | seed=seed, # layer seed 117 | dtype=dtype)) # layer datatype 118 | neuralNet.add(SquarePrelu(width, 119 | alpha=alpha**(0.5), # starting slope parameter 120 | activation=None, # no activation pretrained 121 | dtype=dtype)) # activation datatype 122 | seed += 1000 123 | for n in range(hidden - 1): # Add the hidden layers 124 | neuralNet.add(DenseLayer(width, 125 | width, 126 | weights=weights[n + 1], 127 | biases=biases[n + 1], 128 | seed=seed, 129 | dtype=dtype)) 130 | neuralNet.add( 131 | SquarePrelu( 132 | width, 133 | alpha=alpha**(0.5), 134 | activation=None, 135 | dtype=dtype)) 136 | seed += 1000 137 | 138 | #Add the output layer 139 | neuralNet.add(DenseLayer(width, 140 | outputDims, 141 | weights=weights[-1], 142 | biases=biases[-1], 143 | seed=seed, 144 | dtype=dtype)) 145 | neuralNet.add(Sigmoid()) # Sigmoid activation 146 | ``` 147 | Next, we must setup the Markov Chain Monte Carlo algorithm. This is done by simply calling setupMCMC and providing it a lot of information. 148 | ``` 149 | neuralNet.setupMCMC( 150 | 0.001, # Starting stepsize for Hamiltonian Monte Carlo (HMC) 151 | 0.0005, # Minimum possible stepsize for HMC 152 | 0.002, # Maximum possible stepsize for HMC 153 | 100, # Number of points to use in stepsize search grid 154 | 500, # Starting number of leapfrog steps for HMC 155 | 100, # Minimum number of leapfrog steps for HMC 156 | 2000, # Maximum number of leapfrog steps for HMC 157 | 1, # increment in leapfrog steps in leapfrog search grid 158 | 0.00001, # stepsize for hyper parameter HMC 159 | 30, # leapfrog steps for hyper paramater HMC 160 | 50, # Number of burnin steps to do 161 | 2, # Number of cores to use on computer 162 | 2) # Numberof stpes to average over in adaptive HMC algorithm 163 | ``` 164 | Finally, we can actually train the network. We must give it a few last pieces of information and then it will be on its merry way. 165 | ``` 166 | neuralNet.train( 167 | 2500, #Train for 2500 epochs 168 | 10, # Save every 10 networks 169 | folderName="MNIST_BNN", # Save inside the folder MNIST_BNN 170 | networksPerFile=25, # Start new files every 25 networks 171 | returnPredictions=False, # Don't return predictions 172 | regression=False) # Don't use regression algorithm, so use classification algorithm 173 | ``` 174 | A final word of caution: this algorithm is not fast. For large datasets and large networks it is only feasible to run this on GPUs, and even then it may need several days to run. This example is small enough that it should run on normal computers, but it will still take several hours. 175 | -------------------------------------------------------------------------------- /docs/RegressionExample.md: -------------------------------------------------------------------------------- 1 | # Regression Example 2 | Here, I will present an example of using `tensorBNN` to train a very basic regression problem. It will also highlight how the BNN represents model uncertainty very well. 3 | First, we need to import the nescesary packages. This is done through the commands 4 | ## Program Setup 5 | ``` 6 | import os 7 | import math 8 | 9 | import numpy as np 10 | import random as rn 11 | import tensorflow as tf 12 | 13 | from tensorBNN.activationFunctions import Tanh 14 | from tensorBNN.layer import DenseLayer 15 | from tensorBNN.network import network 16 | from tensorBNN.likelihood import GaussianLikelihood 17 | ``` 18 | In order to obtain reproducible results we need to set random seeds. In order to be sure that absolutely everything is seeded, we use the following four lines of code 19 | ``` 20 | os.environ["PYTHONHASHSEED"] = "0" 21 | np.random.seed(42) 22 | rn.seed(12345) 23 | tf.random.set_seed(3) 24 | ``` 25 | # Data preparation 26 | Next, we need to generate our dataset. We are simply going to use the function ```f(x)=x*sin(2pi*x)-cos(pi*x).``` 27 | We will generate a training dataset with 31 points and a validation dataset with 30 points. This is done as follows. 28 | ``` 29 | trainIn=np.linspace(-2,2,num=31) 30 | valIn=np.linspace(-2+2/30,2.0-2/30,num=30) 31 | trainOut = np.sin(trainIn*math.pi*2)*trainIn-np.cos(trainIn*math.pi) 32 | valOut = np.sin(valIn*math.pi*2)*valIn-np.cos(valIn*math.pi) 33 | ``` 34 | After this we need to group our data together and declare the dataype we will be using. 35 | ``` 36 | data=[trainIn, trainOut, valIn, valOut] 37 | 38 | dtype=tf.float32 39 | ``` 40 | ## Network setup 41 | To get the network setup we need to first declare the number of input and output dimensions and the normalization we used on our output data. As we didn't normalize, we just say we have a mean of 0 and a standard deviation of 0 so `TensorBNN` doesn't try and unnormalize the data. 42 | ``` 43 | inputDims=1 44 | outputDims=1 45 | 46 | normInfo=(0,1) # mean, sd 47 | ``` 48 | Now we actually need to create the network object. This is done like so. 49 | ``` 50 | neuralNet = network( 51 | dtype, # network datatype 52 | inputDims, # dimension of input vector 53 | data[0], # training input data 54 | data[1], # training output data 55 | data[2], # validation input data 56 | data[3], # validation output data) 57 | ``` 58 | Next, we add the layers. We will be using two hidden layers with 10 perceptrons each and the hyperbolic tangent activation function. 59 | ``` 60 | width = 10 # perceptrons per layer 61 | hidden = 2 # number of hidden layers 62 | seed = 0 # random seed 63 | neuralNet.add( 64 | DenseLayer( # Dense layer object 65 | inputDims, # Size of layer input vector 66 | width, # Size of layer output vector 67 | seed=seed, # Random seed 68 | dtype=dtype)) # Layer datatype 69 | neuralNet.add(Tanh()) # Tanh activation function 70 | seed += 1000 # Increment random seed 71 | for n in range(hidden - 1): # Add more hidden layers 72 | neuralNet.add(DenseLayer(width, 73 | width, 74 | seed=seed, 75 | dtype=dtype)) 76 | neuralNet.add(Tanh()) 77 | seed += 1000 78 | 79 | neuralNet.add(DenseLayer(width, 80 | outputDims, 81 | seed=seed, 82 | dtype=dtype)) 83 | ``` 84 | Now we need to initialize the Markov Chain Monte Carlo algorithm. We do this with the following code. 85 | ``` 86 | neuralNet.setupMCMC( 87 | 0.005, # starting stepsize 88 | 0.0025, # minimum stepsize 89 | 0.01, # maximum stepsize 90 | 40, # number of stepsize options in stepsize adapter 91 | 2, # starting number of leapfrog steps 92 | 2, # minimum number of leapfrog steps 93 | 50, # maximum number of leapfrog steps 94 | 1, # stepsize between leapfrog steps in leapfrog step adapter 95 | 0.01, # hyper parameter stepsize 96 | 5, # hyper parameter number of leapfrog steps 97 | 20, # number of burnin epochs 98 | 20, # number of cores 99 | 2) # number of averaging steps for param adapters) 100 | ``` 101 | Next we initialize the Likelihood object we use to evaluate predictions. We use a Gaussian likelihood with a starting standard deviation of 0.1. 102 | ``` 103 | likelihood = GaussianLikelihood(sd = 0.1) 104 | ``` 105 | We would also like to measure the performance of the network using a metric such as mean squared error, so we initialize a metric object and add it to a metric list. 106 | ``` 107 | metricList = [SquaredError()] 108 | ``` 109 | 110 | Finally, we get to actually train the network. This is done with the following code. 111 | ``` 112 | neuralNet.train( 113 | 1000, # epochs to train for 114 | 2, # increment between network saves 115 | metricList = metricList, # List of evaluation metricx 116 | folderName="TrigRegression") # Name of folder for saved networks 117 | ``` 118 | After this, just run the program. 119 | -------------------------------------------------------------------------------- /docs/Setup.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Setup 4 | --- 5 | 6 | # Setup 7 | All python code written here is intended to be used in Python3. The code is dependent upon the packages numpy, emcee, tensorflow, tensorflow-probability, and scipy. 8 | 9 | Numpy and scipy can be installed through the command: 10 | 11 | ``` 12 | pip3 install numpy scipy emcee 13 | ``` 14 | 15 | TensorFlow and TensorFlow-probability must be instaled separately. The TensorFlow version should be the most recent (2.3 at the moment). Using a 1.x version will not work, and neither will older versions of 2. It is also highly recomended that this code be run on a gpu due to its high computational complexity. TensorFlow for the gpu can be installed with the command: 16 | 17 | ``` 18 | pip3 install tensorflow-gpu 19 | ``` 20 | 21 | In order to be compatible with this version of tensorflow, the most recent version of tensorflow-probability (0.11) must be installed. This is done with the following command: 22 | 23 | ``` 24 | pip3 install tensorflow-probability 25 | ``` 26 | 27 | In order to use this code you can either clone this repository and copy the Networks folder into a folder named tensorBNN in the main folder of your project, or download it using pip. 28 | ``` 29 | pip install tensorBNN 30 | git clone https://github.com/alpha-davidson/TensorBNN.git 31 | ``` 32 | 33 | After this, you can use the following command to import the general network obejct, and similar commands for the other objects. 34 | ``` 35 | from tensorBNN.network import network 36 | ``` 37 | 38 | -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-dinky 2 | defaults: 3 | # _docs 4 | - scope: 5 | path: "" 6 | type: docs 7 | values: 8 | sidebar: 9 | nav: "docs" 10 | -------------------------------------------------------------------------------- /docs/_data/navigation.yml: -------------------------------------------------------------------------------- 1 | docs: 2 | - title: Home 3 | url: /docs/index.md 4 | 5 | - title: Setup 6 | url: /docs/Setup.md 7 | 8 | - title: Usage 9 | url: /docs/usage.md 10 | 11 | -------------------------------------------------------------------------------- /docs/_includes/navigation.html: -------------------------------------------------------------------------------- 1 | 10 | -------------------------------------------------------------------------------- /docs/_layouts/default.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | {% seo %} 8 | 9 | 10 | 11 | 14 | 15 | 16 |
17 |
18 |

{{ site.title | default: site.github.repository_name }}

19 |

{{ site.description | default: site.github.project_tagline }}

20 | 21 | 22 | {% include navigation.html %} 23 | 24 | {% if site.github.is_project_page %} 25 |

This project is maintained by {{ site.github.owner_name }}

26 | {% endif %} 27 | 28 | {% if site.github.is_user_page %} 29 | 32 | {% endif %} 33 | 34 | 41 |
42 | 43 |
44 | {{ content }} 45 |
46 | 47 | 48 |
49 | 50 | {% if site.google_analytics %} 51 | 59 | {% endif %} 60 | 61 | 62 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Home 4 | --- 5 | 6 | 7 | # Tensor BNN 8 | This package contains code which can be used to create full Bayesian Neural Networks using Hamiltonian Monte Carlo sampling as proposed by Radford Neal in his thesis "Bayesian Learning for Neural Networks" along with some added features. The package is written in python and uses the packages `Tensorflow` and `Tensorflow-Probability` as the framework for the implementation. 9 | 10 | For instructions on how to setup this package, [click here](Setup.md). 11 | 12 | If you would like an explanation of how to use the code, [click here](usage.md). 13 | -------------------------------------------------------------------------------- /docs/usage.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Usage 4 | --- 5 | 6 | # Usage 7 | 8 | Through the use of this package it is possible to easily make Bayesian Neural Networks for regression and binary classification learning problems. The folder `Examples` contains an excellent example of a regression problem, and a currently outdated binary classification problem. 9 | 10 | More generally, in order to use this code you must import network, Dense Layer, an activation such as Relu, and a likelihood function such a a Gaussian likelihood. This can be done as follows: 11 | 12 | ``` 13 | from tensorBNN.layer import DenseLayer 14 | from tensorBNN.network import network 15 | from tensorBNN.activationFunctions import Relu 16 | from tensorBNN.likelihood import GaussianLikelihood 17 | ``` 18 | 19 | Next, it is highly convenient to turn off the deprecation warnings. These are all from tensorflow, tensorflow-probability, and numpy intereacting with tensorflow, so it isn't something easily fixed and there are a lot of warnings. These are turned off with: 20 | 21 | ``` 22 | import warnings 23 | warnings.filterwarnings("ignore", category=DeprecationWarning) 24 | warnings.filterwarnings("ignore", category=UserWarning) 25 | ``` 26 | 27 | The other important setup task is determining whether or not to seed the random number generator before training. Please note that if you are using a gpu then there will always be some randomness which cannot be removed. To set all cpu random numbers use these lines of code: 28 | 29 | ``` 30 | import os 31 | 32 | import numpy as np 33 | import random as rn 34 | import tensorflow as tf 35 | 36 | os.environ["PYTHONHASHSEED"] = "0" 37 | np.random.seed(42) 38 | rn.seed(12345) 39 | tf.random.set_seed(3) 40 | ``` 41 | 42 | Moving on to the actual use of this code, start with the declaration of a network object: 43 | 44 | ``` 45 | neuralNet = network.network(dtype, inputDims, trainX, trainY, validationX, validationY, mean, sd) 46 | ``` 47 | 48 | The paramaters are described as follows: 49 | * dtype: data type for Tensors 50 | * inputDims: dimension of input vector 51 | * trainX: the training data input, shape is n by inputDims 52 | * trainY: the training data output 53 | * validateX: the validation data input, shape is n by inputDims 54 | * validateY: the validation data output 55 | * mean: the mean used to scale trainY and validateY 56 | * sd: standard deviation used to scale trainY and validateY 57 | 58 | Next, add all of the desired layers and activation functions as follows: 59 | 60 | ``` 61 | neuralNet.add(DenseLayer(inputDims, outputDims, seed=seed, dtype=tf.float32)) 62 | neuralNet.add(Relu()) 63 | ``` 64 | 65 | For added control, especially when using pre-trained networks it is possible to feed pretrained weights, biases, and values for the activation functions. This can be done as follows: 66 | 67 | ``` 68 | neuralNet.add(DenseLayer(inputDims,outputDims, weights=weights, biases=biases, seed=seed, dtype=dtype)) 69 | neuralNet.add(SquarePrelu(width, alpha=alpha**(0.5), activation=activation, dtype=dtype)) 70 | ``` 71 | 72 | The paramater inputDims is the output shape of the layer before, and the width is the ouput shape of the layers itself. The seed is used for seeding the random number generator. Currently, only ReLU is supported for easy predictions off of saved networks. The other activation functions can be used, but they will require more custom code to predict from saved networks. 73 | 74 | Next, the Markov Chain Monte Carlo algorithm must be initialized. This can be done as follows: 75 | 76 | ``` 77 | neuralNet.setupMCMC(self, stepSize, stepMin, stepMax, stepNum, leapfrog, leapMin, 78 | leapMax, leapStep, hyperStepSize, hyperLeapfrog, burnin, 79 | cores, averagingSteps=2, a=4, delta=0.1): 80 | ``` 81 | 82 | The paramaters are described as follows: 83 | * stepSize: the starting step size for the weights and biases 84 | * stepMin: the minimum step size 85 | * stepMax: the maximum step size 86 | * stepNum: the number of step sizes in grid 87 | * leapfrog: number of leapfrog steps for weights and biases 88 | * leapMin: the minimum number of leapfrog steps 89 | * leapMax: the maximum number of leapfrog steps 90 | * leapStep: the step in number of leapfrog for search grid 91 | * hyperStepSize: the starting step size for the hyper parameters 92 | * hyperLeapfrog: leapfrog steps for hyper parameters 93 | * cores: number of cores to use 94 | * averaginSteps: number of averaging steps 95 | * a: constant, 4 in paper 96 | * delta: constant, 0.1 in paper 97 | 98 | This code uses the adaptive Hamlitonain Monte Carlo described in "Adaptive Hamiltonian and Riemann Manifold Monte Carlo Samplers" by Wang, Mohamed, and de Freitas. In accordance with this paper there are a few more paramaters that can be adjusted, though it is recomended that their default values are kept. 99 | 100 | After initializaing the HMC, we must declare the likelihood that we want to use as well as any metrics. This can be accomplished through the following code: 101 | 102 | ``` 103 | # Declare Gaussian Likelihood with sd of 0.1 104 | likelihood = GaussianLikelihood(sd = 0.1) 105 | metricList = [ #Declare metrics 106 | SquaredError(mean = 0, sd = 1, scaleExp = False), 107 | PercentError(mean = 10, sd = 2, scaleExp = True)] 108 | ``` 109 | 110 | 111 | The last thing to do is actually tell the model to start learning this is done with the following command: 112 | 113 | ``` 114 | network.train( 115 | epochs, # epochs to train for 116 | samplingStep, # increment between network saves 117 | likelihood, 118 | metricList = metricList, 119 | folderName = "Regression", 120 | # Name of folder for saved networks 121 | networksPerFile=50) 122 | # Number of networks saved per file 123 | ``` 124 | 125 | The arguments have the following meanings: 126 | 127 | * Epochs: Number of training cycles 128 | * samplingStep: Epochs between sampled networks 129 | * likelihood: The likelihood function used to evaluate the prediction 130 | we defined previously 131 | * startSigma: Starting standard deviation for likelihood function 132 | for regression models 133 | * folderName: name of folder for saved networks 134 | * networksPerFile: number of networks saved in a given file 135 | 136 | Once the network has trained, which may take a while, the saved networks can be loaded and then used to make predictions using the following code: 137 | 138 | ``` 139 | from TensorBNN.predictor import predictor 140 | 141 | network = predictor(filePath, 142 | dtype = dtype, 143 | # data type used by network 144 | customLayerDict={"dense2": Dense2}, 145 | # A dense layer with a different 146 | # hyperprior 147 | likelihood = Likelihood) 148 | # The likelihood function is required to 149 | # calculate the probabilities for 150 | # re-weighting 151 | 152 | initialResults = network.predict(inputData, skip, dtype) 153 | ``` 154 | 155 | The variable filePath is the directory from which the networks are being loaded, inputData is the normalized data for which predictions should be made, and dtype is the data type to be used for predictions. The customLayerDict is a dictionary holding the names and objects for any user defined layers. Likelihood is the likelihood function used to train the model. 156 | 157 | The variable initialResults will be a list of numpy arrays, each numpy array corresponding to the predcitions from a single network in the BNN. The skip variable instructs the predictor to only use every n networks, where n=skip 158 | 159 | Additionally, the predictor function allows for the calculation of the autocorrelation between different networks, as well as the autocorrelation length through: 160 | 161 | ``` 162 | autocorrelations = network.autocorrelation(testData, nMax) 163 | autocorrelations = network.autoCorrelationLength(testData, nMax) 164 | ``` 165 | Here, the autocorrelation is calculated based on the predictions of the different BNNs, and the results are averaged over the test data. nMax provides the largest lag value for the autocorrelation. These calculations are done with emcee. 166 | 167 | 168 | Finally, the predictor object can calculate new weights for the different networks if they were given new priors. These priors take the form of new Layer objects which must be referenced in an architecture file. The reweighting function call looks like this: 169 | 170 | ``` 171 | weights = network.reweight( 172 | trainX, # training input 173 | trainY, # training output 174 | skip = 10, # Use every 10 saved networks 175 | architecture = "architecture2.txt") 176 | # New architecture file 177 | ``` 178 | -------------------------------------------------------------------------------- /tensorBNN/BNN_functions.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | 7 | @tf.function 8 | def multivariateLogProb(sigmaIn, mu, x, dtype=tf.float32): 9 | """ Calculates the log probability of x given mu and sigma defining 10 | a multivariate normal distribution. 11 | 12 | Arguments: 13 | * sigmaIn: an n-dimensional vector with the standard deviations of 14 | * the distribution 15 | * mu: an n-dimensional vector with the means of the distribution 16 | * x: m n-dimensional vectors to have their probabilities calculated 17 | * dtype: data type of calculation 18 | Returns: 19 | * prob: an m-dimensional vector with the log-probabilities of x 20 | """ 21 | sigma = sigmaIn 22 | 23 | sigma = tf.maximum(sigma, tf.cast(10**(-8), dtype)) 24 | sigma = tf.minimum(sigma, tf.cast(10**(8), dtype)) 25 | logDet = 2 * tf.reduce_sum(input_tensor=tf.math.log(sigma)) 26 | k = tf.size(input=sigma, out_type=dtype) 27 | inv = tf.divide(1, sigma) 28 | difSigma = tf.math.multiply(inv, tf.subtract(x, mu)) 29 | difSigmaSquared = tf.reduce_sum(tf.math.multiply(difSigma, difSigma)) 30 | twoPi = tf.cast(2 * math.pi, dtype) 31 | 32 | logLikelihood = -0.5 * (logDet + difSigmaSquared + k * tf.math.log(twoPi)) 33 | 34 | return(logLikelihood) 35 | 36 | 37 | @tf.function 38 | def cauchyLogProb(gamma, x0, x, dtype=tf.float32): 39 | """ Calculates the log probability of x given x0 and gamma defining 40 | a cauchy distribution. 41 | 42 | Arguments: 43 | * gamma: the gamma value for the distribution 44 | * x0: the mean value for the distribution 45 | * x: an n-dimensional vectors to have their probabilities calculated 46 | * dtype: data type of calculation 47 | Returns: 48 | * prob: an n-dimensional vector with the log-probabilities of x 49 | """ 50 | 51 | a = tf.math.log(1 + ((x - x0) / gamma)**2) 52 | b = tf.math.log(tf.cast(math.pi * gamma, dtype)) 53 | c = tf.ones_like(x) 54 | d = -tf.math.scalar_mul(b, c) 55 | prob = a + d 56 | prob = tf.cast(prob, dtype) 57 | return(prob) 58 | 59 | 60 | def trainBasicRegression( 61 | hidden, 62 | inputDims, 63 | outputDims, 64 | width, 65 | cycles, 66 | epochs, 67 | alpha, 68 | trainIn, 69 | trainOut, 70 | valIn, 71 | valOut, 72 | name, 73 | callbacks=True, 74 | callbackMetric="val_loss", 75 | patience=10): 76 | """Trains a basic regression neural network and returns its weights. Uses 77 | the amsgrad optimizer and a learning rate of 0.01 which decays by a factor 78 | of 10 each cycle. The activation function is leaky relu with the specified 79 | alpha value. Saves the network as name in case something goes wrong with 80 | the BNN code so the network does not need to be retrained. 81 | 82 | Arguments: 83 | * hidden: number of hidden layers 84 | * inputDims: input dimension 85 | * outputDims: output dimension 86 | * width: width of hidden layers 87 | * cycles: number of training cycles with decaying learning rates 88 | * epochs: number of epochs per cycle 89 | * alpha: slope value for leaky ReLU 90 | * trainIn: training input data 91 | * trainOut: training output data 92 | * valIn: validation input data 93 | * valOut: validation output data 94 | * name: name of network 95 | * callbacks: whether to use callbacks 96 | * callbackMetric: metric to use for early stopping 97 | * patience: early stopping patience 98 | 99 | Returns: 100 | * weights: list containing all weight matrices 101 | * biases: list containing all bias vectors 102 | * activation: list containing all activation vectors 103 | """ 104 | 105 | # Set seed 106 | tf.random.set_seed(1000) 107 | 108 | # Create model 109 | model = tf.keras.Sequential() 110 | 111 | model.add( 112 | tf.keras.layers.Dense( 113 | width, 114 | kernel_initializer="glorot_uniform", 115 | input_shape=( 116 | inputDims, 117 | ))) 118 | model.add(tf.keras.layers.LeakyReLU(alpha=alpha)) 119 | 120 | for n in range(hidden - 1): 121 | model.add( 122 | tf.keras.layers.Dense( 123 | width, 124 | kernel_initializer="glorot_uniform")) 125 | model.add(tf.keras.layers.LeakyReLU(alpha=alpha)) 126 | 127 | model.add( 128 | tf.keras.layers.Dense( 129 | outputDims, 130 | kernel_initializer="glorot_uniform")) 131 | 132 | callback = tf.keras.callbacks.EarlyStopping( 133 | monitor=callbackMetric, patience=patience, restore_best_weights=True) 134 | 135 | # Train with decreasing learning rate 136 | for x in range(cycles): 137 | model.compile(optimizer=tf.keras.optimizers.Adam(0.01 * (10**(-x)), 138 | amsgrad=True), 139 | loss="mean_squared_error", 140 | metrics=["mean_absolute_error", "mean_squared_error"]) 141 | model.summary() 142 | 143 | if(callbacks): 144 | model.fit( 145 | trainIn, 146 | trainOut, 147 | validation_data=( 148 | valIn, 149 | valOut), 150 | epochs=epochs, 151 | batch_size=32, 152 | callbacks=[callback]) 153 | else: 154 | model.fit( 155 | trainIn, 156 | trainOut, 157 | validation_data=( 158 | valIn, 159 | valOut), 160 | epochs=epochs, 161 | batch_size=32) 162 | 163 | # Save the backup 164 | model.save(name) 165 | 166 | # Extract weights and biases 167 | weights = [] 168 | biases = [] 169 | activation = [] 170 | for layer in model.layers: 171 | weightBias = layer.get_weights() 172 | if(len(weightBias) == 2): 173 | weights.append(weightBias[0].T) 174 | bias = weightBias[1] 175 | bias = np.reshape(bias, (len(bias), 1)) 176 | biases.append(bias) 177 | if(len(weightBias) == 1): 178 | activation.append(weightBias[0]) 179 | 180 | return(weights, biases, activation) 181 | 182 | 183 | def trainBasicClassification( 184 | hidden, 185 | inputDims, 186 | outputDims, 187 | width, 188 | cycles, 189 | epochs, 190 | alpha, 191 | trainIn, 192 | trainOut, 193 | valIn, 194 | valOut, 195 | name, 196 | callbacks=True, 197 | callbackMetric="val_loss", 198 | patience=10): 199 | """ Trains a basic binary classification neural network and returns its 200 | weights. Uses the amsgrad optimizer and a learning rate of 0.01 which 201 | decays by a factor of 10 each cycle. The activation function is leaky_relu 202 | with the specified alpha value. Saves the network as name in case something 203 | goes wrong with the BNN code so the network does not need to be retrained. 204 | 205 | Arguments: 206 | * hidden: number of hidden layers 207 | * inputDims: input dimension 208 | * outputDims: output dimension 209 | * width: width of hidden layers 210 | * cycles: number of training cycles with decaying learning rates 211 | * epochs: number of epochs per cycle 212 | * alpha: slope value for leaky ReLU 213 | * trainIn: training input data 214 | * trainOut: training output data 215 | * valIn: validation input data 216 | * valOut: validation output data 217 | * callbacks: whether to use callbacks 218 | * callbackMetric: metric to use for early stopping 219 | * patience: early stopping patience 220 | Returns: 221 | * weights: list containing all weight matrices 222 | * biases: list containing all bias vectors 223 | * activation: list containing all activation vectors 224 | """ 225 | 226 | tf.random.set_seed(1000) 227 | 228 | model = tf.keras.Sequential() 229 | 230 | model.add( 231 | tf.keras.layers.Dense( 232 | width, 233 | kernel_initializer="glorot_uniform", 234 | input_shape=( 235 | inputDims, 236 | ))) 237 | model.add(tf.keras.layers.LeakyReLU(alpha=alpha)) 238 | 239 | for n in range(hidden - 1): 240 | model.add( 241 | tf.keras.layers.Dense( 242 | width, 243 | kernel_initializer="glorot_uniform")) 244 | model.add(tf.keras.layers.LeakyReLU(alpha=alpha)) 245 | 246 | model.add( 247 | tf.keras.layers.Dense( 248 | outputDims, 249 | kernel_initializer="glorot_uniform", 250 | activation="sigmoid")) 251 | 252 | callback = tf.keras.callbacks.EarlyStopping( 253 | monitor=callbackMetric, patience=patience, restore_best_weights=True) 254 | 255 | for x in range(cycles): 256 | model.compile(optimizer=tf.keras.optimizers.Adam(0.001 * (10**(-x)), 257 | amsgrad=True), 258 | loss=tf.keras.losses.BinaryCrossentropy(), 259 | metrics=["accuracy", "mse"]) 260 | model.summary() 261 | if(callbacks): 262 | model.fit( 263 | trainIn, 264 | trainOut, 265 | validation_data=( 266 | valIn, 267 | valOut), 268 | epochs=epochs, 269 | batch_size=32, 270 | callbacks=[callback]) 271 | else: 272 | model.fit( 273 | trainIn, 274 | trainOut, 275 | validation_data=( 276 | valIn, 277 | valOut), 278 | epochs=epochs, 279 | batch_size=32) 280 | 281 | # Save the backup 282 | model.save(name) 283 | 284 | # Extract weights and biases 285 | weights = [] 286 | biases = [] 287 | activation = [] 288 | for layer in model.layers: 289 | weightBias = layer.get_weights() 290 | if(len(weightBias) == 2): 291 | weights.append(weightBias[0].T) 292 | bias = weightBias[1] 293 | bias = np.reshape(bias, (len(bias), 1)) 294 | biases.append(bias) 295 | if(len(weightBias) == 1): 296 | activation.append(weightBias[0]) 297 | 298 | return(weights, biases, activation) 299 | -------------------------------------------------------------------------------- /tensorBNN/activationFunctions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import tensorflow as tf 4 | import tensorflow_probability as tfp 5 | 6 | from tensorflow.python.ops import gen_nn_ops 7 | 8 | from tensorBNN.BNN_functions import multivariateLogProb 9 | from tensorBNN.layer import Layer 10 | 11 | tfd = tfp.distributions 12 | 13 | 14 | class Exp(Layer): 15 | """Exponential activation function""" 16 | 17 | def __init__(self, inputDims=None, outputDims=None): 18 | self.numTensors = 0 19 | self.numHyperTensors = 0 20 | self.name = "Exp" 21 | 22 | def predict(self, inputTensor, _): 23 | result = tf.math.exp(inputTensor) 24 | return(result) 25 | 26 | 27 | class Relu(Layer): 28 | """Relu activation function""" 29 | 30 | def __init__(self, inputDims=None, outputDims=None): 31 | self.numTensors = 0 32 | self.numHyperTensors = 0 33 | self.name = "relu" 34 | 35 | def predict(self, inputTensor, _): 36 | result = gen_nn_ops.relu(inputTensor) 37 | return(result) 38 | 39 | 40 | class Sigmoid(Layer): 41 | """Sigmoid activation function""" 42 | 43 | def __init__(self, inputDims=None, outputDims=None): 44 | self.numTensors = 0 45 | self.numHyperTensors = 0 46 | self.name = "sigmoid" 47 | 48 | def predict(self, inputTensor, _): 49 | result = tf.math.sigmoid(inputTensor) 50 | return(result) 51 | 52 | 53 | class Tanh(Layer): 54 | """Tanh activation function""" 55 | 56 | def __init__(self, inputDims=None, outputDims=None): 57 | self.numTensors = 0 58 | self.numHyperTensors = 0 59 | self.name = "tanh" 60 | 61 | def predict(self, inputTensor, _): 62 | result = tf.math.tanh(inputTensor) 63 | return(result) 64 | 65 | 66 | class Elu(Layer): 67 | """Elu activation function""" 68 | 69 | def __init__(self, inputDims=None, outputDims=None): 70 | self.numTensors = 0 71 | self.numHyperTensors = 0 72 | self.name = "elu" 73 | 74 | def predict(self, inputTensor, _): 75 | result = gen_nn_ops.elu(inputTensor) 76 | return(result) 77 | 78 | 79 | class Softmax(Layer): 80 | """Softmax activation function""" 81 | 82 | def __init__(self, inputDims=None, outputDims=None): 83 | self.numTensors = 0 84 | self.numHyperTensors = 0 85 | self.name = "softmax" 86 | 87 | def predict(self, inputTensor, _): 88 | result = gen_nn_ops.softmax(inputTensor) 89 | return(result) 90 | 91 | 92 | class Leaky_relu(Layer): 93 | """Leaky relu activation function""" 94 | 95 | def __init__(self, alpha=0.3, inputDims=None, outputDims=None, 96 | activation=None): 97 | self.numTensors = 1 98 | self.numHyperTensors = 0 99 | self.name = "leakyrelu" 100 | if activation is not None: 101 | alpha = activation 102 | self.parameters = [alpha] 103 | 104 | def predict(self, inputTensor, _): 105 | result = tf.nn.leaky_relu(inputTensor, self.parameters[0]) 106 | return(result) 107 | 108 | def calculateProbs(self, *args): 109 | """Present for compatability.""" 110 | return(0.0) 111 | 112 | def updateParameters(self, *args): 113 | """Present for compatability.""" 114 | self.parameters = self.parameters 115 | 116 | 117 | class Prelu(Layer): 118 | """Prelu activation function""" 119 | 120 | def __init__( 121 | self, 122 | inputDims, 123 | outputDims=None, 124 | dtype=np.float32, 125 | alpha=0.2, 126 | activation=None, 127 | seed=1): 128 | """ 129 | Arguments: 130 | * inputDims: number of input dimensions 131 | * dtype: data type of input and output values 132 | * alpha: Single custom starting slope value 133 | * activation: optional custom values for starting slope values 134 | * seed: seed used for random numbers 135 | """ 136 | self.numTensors = 1 # Number of tensors used for predictions 137 | self.numHyperTensors = 1 # Number of tensor for hyper paramaters 138 | self.inputDims = inputDims 139 | self.dtype = dtype 140 | self.seed = seed 141 | self.name = "prelu" 142 | 143 | # Starting rate value and hyperRate 144 | rate = tf.cast(0.3, dtype) 145 | self.hyperRate = tf.cast(0.3, self.dtype) 146 | 147 | # Starting weight mean, weight SD, bias mean, and bias SD 148 | 149 | self.hypers = [tf.cast(rate, self.dtype)] 150 | 151 | # Starting weights and biases 152 | if(activation is None): 153 | self.parameters = [ 154 | alpha * 155 | tf.ones( 156 | shape=(inputDims), 157 | dtype=self.dtype)] 158 | else: 159 | self.parameters = [activation] 160 | 161 | @tf.function 162 | def exponentialLogProb(self, rate, x): 163 | """Calcualtes the log probability of an exponential distribution. 164 | 165 | Arguments: 166 | * rate: rate parameter for the distribution 167 | * x: input value 168 | Returns: 169 | * logProb: log probability of x 170 | """ 171 | 172 | rate = tf.math.abs(rate) 173 | logProb = -rate * x + tf.math.log(rate) 174 | 175 | return(logProb) 176 | 177 | @tf.function 178 | def calculateProbs(self, slopes): 179 | """Calculates the log probability of the slopes given 180 | their distributions in this layer. 181 | 182 | Arguments: 183 | * weightsBias: list with new possible weight and bias tensors 184 | 185 | Returns: 186 | * prob: log prob of weights and biases given their distributions 187 | """ 188 | 189 | val = self.exponentialLogProb(self.hypers[0], slopes) 190 | prob = tf.reduce_sum(input_tensor=val) 191 | 192 | return(prob) 193 | 194 | @tf.function 195 | def calculateHyperProbs(self, hypers, slopes): 196 | """Calculates the log probability of a set of weights and biases given 197 | new distribtuions as well as the probability of the new distribution 198 | means and SDs given their distribtuions. 199 | 200 | Arguments: 201 | * hypers: a list containg 4 new possible hyper parameters 202 | * weightBias: a list with the current weight and bias matrices 203 | 204 | Returns: 205 | * prob: log probability of weights and biases given the new hypers 206 | and the probability of the new hyper parameters given their priors 207 | """ 208 | 209 | slopes = tf.math.abs(slopes[0]) 210 | prob = 0 211 | 212 | # Calculate probability of new hypers 213 | val = self.exponentialLogProb(self.hyperRate, hypers[0]) 214 | prob += tf.reduce_sum(input_tensor=val) 215 | 216 | # Calculate probability of weights and biases given new hypers 217 | val = self.exponentialLogProb(hypers[0], slopes) 218 | prob += tf.reduce_sum(input_tensor=val) 219 | 220 | return(prob) 221 | 222 | @tf.function 223 | def expand(self, current): 224 | """Expands tensors to that they are of rank 2 225 | 226 | Arguments: 227 | * current: tensor to expand 228 | Returns: 229 | * expanded: expanded tensor 230 | 231 | """ 232 | currentShape = tf.pad( 233 | tensor=tf.shape(input=current), 234 | paddings=[[tf.where(tf.rank(current) > 1, 0, 1), 0]], 235 | constant_values=1) 236 | expanded = tf.reshape(current, currentShape) 237 | return(expanded) 238 | 239 | def predict(self, inputTensor, slopes): 240 | """Calculates the output of the layer based on the given input tensor 241 | and weight and bias values 242 | 243 | Arguments: 244 | * inputTensor: the input tensor the layer acts on 245 | * weightBias: a list with the current weight and bias tensors 246 | Returns: 247 | * result: the output of the layer 248 | 249 | """ 250 | slopes = slopes[0] 251 | slopes = tf.squeeze(slopes) 252 | slopes = tf.reshape(slopes, (len(slopes), 1)) 253 | activated = tf.multiply(slopes, inputTensor) 254 | result = tf.where(tf.math.less(inputTensor, 0), activated, inputTensor) 255 | return(self.expand(result)) 256 | 257 | def updateParameters(self, slopes): 258 | """ Updates the network parameters 259 | 260 | Arguments: 261 | * slopes: new slope parameter 262 | """ 263 | self.parameters = [slopes[0]] 264 | 265 | def updateHypers(self, hypers): 266 | """ Updates the network parameters 267 | 268 | Arguments: 269 | * slopes: new slope parameter 270 | """ 271 | self.hypers = [tf.maximum(tf.cast(0.01, self.dtype), hypers[0])] 272 | 273 | 274 | class SquarePrelu(Layer): 275 | """Prelu activation function""" 276 | 277 | def __init__( 278 | self, 279 | inputDims, 280 | outputDims=None, 281 | dtype=np.float32, 282 | alpha=0.2, 283 | activation=None, 284 | seed=1): 285 | """ 286 | Arguments: 287 | * inputDims: number of input dimensions 288 | * dtype: data type of input and output values 289 | * alpha: Single custom starting slope value 290 | * activation: optional custom values for starting slope values 291 | * seed: seed used for random numbers 292 | """ 293 | self.numTensors = 1 # Number of tensors used for predictions 294 | self.numHyperTensors = 2 # Number of tensor for hyper paramaters 295 | self.inputDims = inputDims 296 | self.dtype = dtype 297 | self.seed = seed 298 | self.name = "squareprelu" 299 | 300 | # Starting rate value and hyperRate 301 | mean = tf.cast(0.0, dtype) 302 | sd = tf.cast(0.3, dtype) 303 | 304 | meanMean = tf.cast(0.0, dtype) 305 | meanSD = tf.cast(0.3, dtype) 306 | sdMean = tf.cast(0.3, dtype) 307 | sdSD = tf.cast(0.1, dtype) 308 | 309 | self.meanHyper = tfd.MultivariateNormalDiag(loc=[meanMean], 310 | scale_diag=[meanSD]) 311 | 312 | self.sdHyper = tfd.MultivariateNormalDiag(loc=[sdMean], 313 | scale_diag=[sdSD]) 314 | 315 | # Starting weight mean, weight SD, bias mean, and bias SD 316 | 317 | self.hypers = [mean, sd] 318 | 319 | # Starting weights and biases 320 | if(activation is None): 321 | self.parameters = [ 322 | alpha * 323 | tf.ones( 324 | shape=(inputDims), 325 | dtype=self.dtype)] 326 | else: 327 | self.parameters = [activation] 328 | 329 | @tf.function 330 | def calculateProbs(self, slopes): 331 | """Calculates the log probability of the slopes given 332 | their distributions in this layer. 333 | 334 | Arguments: 335 | * weightsBias: list with new possible weight and bias tensors 336 | 337 | Returns: 338 | * prob: log prob of weights and biases given their distributions 339 | """ 340 | 341 | prob = tf.reduce_sum( 342 | multivariateLogProb( 343 | self.hypers[1], 344 | self.hypers[0], 345 | slopes, 346 | dtype=self.dtype)) 347 | 348 | return(prob) 349 | 350 | @tf.function 351 | def calculateHyperProbs(self, hypers, slopes): 352 | """Calculates the log probability of a set of weights and biases given 353 | new distribtuions as well as the probability of the new distribution 354 | means and SDs given their distribtuions. 355 | 356 | Arguments: 357 | * hypers: a list containg 4 new possible hyper parameters 358 | * weightBias: a list with the current weight and bias matrices 359 | 360 | Returns: 361 | * prob: log probability of weights and biases given the new hypers 362 | and the probability of the new hyper parameters given their priors 363 | """ 364 | 365 | mean = hypers[0] 366 | sd = hypers[1] 367 | 368 | slopes = tf.square(slopes[0]) 369 | 370 | prob = tf.reduce_sum( 371 | multivariateLogProb( 372 | sd, mean, slopes, dtype=self.dtype)) 373 | 374 | # Calculate probability of new hypers 375 | val = self.meanHyper.log_prob([mean]) 376 | prob += tf.reduce_sum(input_tensor=val) 377 | 378 | # Calculate probability of weights and biases given new hypers 379 | val = self.sdHyper.log_prob([sd]) 380 | prob += tf.reduce_sum(input_tensor=val) 381 | 382 | return(prob) 383 | 384 | @tf.function 385 | def expand(self, current): 386 | """Expands tensors to that they are of rank 2 387 | 388 | Arguments: 389 | * current: tensor to expand 390 | Returns: 391 | * expanded: expanded tensor 392 | 393 | """ 394 | currentShape = tf.pad( 395 | tensor=tf.shape(input=current), 396 | paddings=[[tf.where(tf.rank(current) > 1, 0, 1), 0]], 397 | constant_values=1) 398 | expanded = tf.reshape(current, currentShape) 399 | return(expanded) 400 | 401 | def predict(self, inputTensor, slopes): 402 | """Calculates the output of the layer based on the given input tensor 403 | and weight and bias values 404 | 405 | Arguments: 406 | * inputTensor: the input tensor the layer acts on 407 | * weightBias: a list with the current weight and bias tensors 408 | Returns: 409 | * result: the output of the layer 410 | 411 | """ 412 | slopes = slopes[0]**2 413 | slopes = tf.squeeze(slopes) 414 | slopes = tf.reshape(slopes, (len(slopes), 1)) 415 | activated = tf.multiply(slopes, inputTensor) 416 | result = tf.where(tf.math.less(inputTensor, 0), activated, inputTensor) 417 | return(self.expand(result)) 418 | 419 | def updateParameters(self, slopes): 420 | """ Updates the network parameters 421 | 422 | Arguments: 423 | * slopes: new slope parameter 424 | """ 425 | self.parameters = [slopes[0]] 426 | 427 | def updateHypers(self, hypers): 428 | """ Updates the network parameters 429 | 430 | Arguments: 431 | * slopes: new slope parameter 432 | """ 433 | self.hypers = [hypers[0], hypers[1]] 434 | -------------------------------------------------------------------------------- /tensorBNN/layer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import tensorflow_probability as tfp 4 | 5 | from tensorBNN.BNN_functions import cauchyLogProb, multivariateLogProb 6 | 7 | tfd = tfp.distributions 8 | 9 | 10 | class Layer(object): 11 | """ A basic layer object. This must have input and output dimensions, but 12 | the remaining variables can be used depending on the specific use. This 13 | object can be implemented as a basic layer or as an activation function. 14 | """ 15 | 16 | def __init__( 17 | self, 18 | inputDims, 19 | outputDims, 20 | weights=None, 21 | biases=None, 22 | activation=None, 23 | dtype=np.float32, 24 | alpha=0, 25 | seed=1): 26 | """ 27 | Arguments: 28 | * inputDims: number of input dimensions 29 | * outputDims: number of output dimensions 30 | * weights: list of starting weight matrices 31 | * biases: list of starting bias vectors 32 | * activation: list of starting activation function values 33 | * dtype: data type of input and output values 34 | * alpha: constant used for activation functions 35 | * seed: seed used for random numbers 36 | """ 37 | self.numTensors = 0 # Number of tensors used for predictions 38 | self.numHyperTensors = 0 # Number of tensor for hyper paramaters 39 | self.inputDims = inputDims 40 | self.outputDims = outputDims 41 | self.dtype = dtype 42 | self.seed = seed 43 | self.name = "name" 44 | 45 | def calculateProbs(self, tensors): 46 | """Calculates the log probability of a set of tensors given 47 | their distributions in this layer. 48 | 49 | Arguments: 50 | * tensors: list with new possible tensors for layer 51 | 52 | Returns: 53 | * prob: log prob of new tensors given their distributions 54 | """ 55 | return(tf.Constant(0.0, shape=(), dtype=tf.float32)) 56 | 57 | def calculateHyperProbs(self, hypers, tensors): 58 | """Calculates the log probability of a set of tensors given 59 | new distribtuions as well as the probability of the new distribution 60 | means and SDs given their distribtuions. 61 | 62 | Arguments: 63 | * hypers: a list containg new possible hyper parameters 64 | * tensors: a list with the current tensors 65 | 66 | Returns: 67 | * prob: log probability of tensors given the new hypers 68 | and the probability of the new hyper parameters given their priors 69 | """ 70 | return(tf.constant(0.0)) 71 | 72 | def expand(self, current): 73 | """Expands tensors to that they are of rank 2 74 | 75 | Arguments: 76 | * current: tensor to expand 77 | Returns: 78 | * expanded: expanded tensor 79 | 80 | """ 81 | currentShape = tf.pad( 82 | tensor=tf.shape(input=current), 83 | paddings=[[tf.where(tf.rank(current) > 1, 0, 1), 0]], 84 | constant_values=1) 85 | expanded = tf.reshape(current, currentShape) 86 | return(expanded) 87 | 88 | def predict(self, inputTensor, tensors): 89 | """Calculates the output of the layer based on the given input tensor 90 | and weight and bias values 91 | 92 | Arguments: 93 | * inputTensor: the input tensor the layer acts on 94 | * tensors: a list with the current layer tensors 95 | Returns: 96 | * result: the output of the layer 97 | """ 98 | pass 99 | 100 | 101 | class CauchyDenseLayer(Layer): 102 | """Creates a 1 Dimensional Dense Bayesian Layer with Cauchy priors. 103 | 104 | Currently, the starting weight and bias mean values are 0.0 with a standard 105 | deviation of 1.0/sqrt(outputDims). The distribution that these values are 106 | subject to have these values as their means, and a standard deviation of 107 | 2.0/sqrt(outputDims). 108 | """ 109 | 110 | def __init__( 111 | self, 112 | inputDims, 113 | outputDims, 114 | weights=None, 115 | biases=None, 116 | dtype=np.float32, 117 | seed=1): 118 | """ 119 | Arguments: 120 | * inputDims: number of input dimensions 121 | * outputDims: number of output dimensions 122 | * weights: list of starting weight matrices 123 | * biases: list of starting bias vectors 124 | * dtype: data type of input and output values 125 | * seed: seed used for random numbers 126 | """ 127 | self.numTensors = 2 # Number of tensors used for predictions 128 | self.numHyperTensors = 4 # Number of tensor for hyper paramaters 129 | self.inputDims = inputDims 130 | self.outputDims = outputDims 131 | self.dtype = dtype 132 | self.seed = seed 133 | self.name = "dense" 134 | 135 | # Weight mean value and mean distribution 136 | weightsx0 = 0.0 137 | self.weightsx0Hyper = tfd.MultivariateNormalDiag(loc=[weightsx0], 138 | scale_diag=[.2]) 139 | 140 | # weight gamma value and gamma distribution 141 | weightsGamma = 0.5**0.5 142 | self.weightsGammaHyper = tfd.MultivariateNormalDiag(loc=[weightsGamma], 143 | scale_diag=[0.5]) 144 | 145 | # bias mean value and mean distribution 146 | biasesx0 = 0.0 147 | self.biasesx0Hyper = tfd.MultivariateNormalDiag(loc=[biasesx0], 148 | scale_diag=[.2]) 149 | 150 | # bias gamma value and gamma distribution 151 | biasesGamma = 0.5**0.5 152 | self.biasesGammaHyper = tfd.MultivariateNormalDiag(loc=[biasesGamma], 153 | scale_diag=[0.5]) 154 | 155 | # Starting weight mean, weight gamma, bias mean, and bias gamma 156 | self.hypers = tf.cast( 157 | [[weightsx0], [weightsGamma], [biasesx0], [biasesGamma]], 158 | self.dtype) 159 | 160 | # Starting weights and biases 161 | if(weights is None): 162 | self.parameters = self.sample() 163 | else: 164 | self.parameters = [weights, biases] 165 | 166 | def calculateProbs(self, hypers, tensors): 167 | """Calculates the log probability of a set of weights and biases given 168 | their distributions in this layer. 169 | 170 | Arguments: 171 | * weightsBias: list with new possible weight and bias tensors 172 | 173 | Returns: 174 | * prob: log prob of weights and biases given their distributions 175 | """ 176 | # Create the tensors used to calculate probability 177 | weightsx0 = hypers[0] 178 | weightsGamma = hypers[1]**2 179 | biasesx0 = hypers[2] 180 | biasesGamma = hypers[3]**2 181 | weights = tensors[0] 182 | biases = tensors[1] 183 | 184 | prob = tf.cast(0, self.dtype) 185 | 186 | # Calculate probability of weights and biases given new hypers 187 | val = cauchyLogProb(weightsGamma[0], weightsx0[0], weights, 188 | dtype=self.dtype) 189 | prob += tf.reduce_sum(input_tensor=val) 190 | val = cauchyLogProb( 191 | biasesGamma[0], 192 | biasesx0[0], 193 | biases, 194 | dtype=self.dtype) 195 | prob += tf.reduce_sum(input_tensor=val) 196 | 197 | return(prob) 198 | 199 | def calculateHyperProbs(self, hypers, tensors): 200 | """Calculates the log probability of a set of weights and biases given 201 | new distribtuions as well as the probability of the new distribution 202 | means and SDs given their distribtuions. 203 | 204 | Arguments: 205 | * hypers: a list containg 4 new possible hyper parameters 206 | * tensors: a list with the current weight and bias matrices 207 | 208 | Returns: 209 | * prob: log probability of weights and biases given the new hypers 210 | and the probability of the new hyper parameters given their priors 211 | """ 212 | weightsx0 = hypers[0] 213 | weightsGamma = hypers[1]**2 214 | biasesx0 = hypers[2] 215 | biasesGamma = hypers[3]**2 216 | weights = tensors[0] 217 | biases = tensors[1] 218 | 219 | prob = tf.cast(0, self.dtype) 220 | 221 | val = self.weightsx0Hyper.log_prob([[weightsx0]]) 222 | prob += tf.cast(tf.reduce_sum(input_tensor=val), self.dtype) 223 | val = self.weightsGammaHyper.log_prob([[weightsGamma]]) 224 | prob += tf.cast(tf.reduce_sum(input_tensor=val), self.dtype) 225 | 226 | val = self.biasesx0Hyper.log_prob([[biasesx0]]) 227 | prob += tf.cast(tf.reduce_sum(input_tensor=val), self.dtype) 228 | val = self.biasesGammaHyper.log_prob([[biasesGamma]]) 229 | prob += tf.cast(tf.reduce_sum(input_tensor=val), self.dtype) 230 | 231 | # Calculate probability of weights and biases given new hypers 232 | val = cauchyLogProb(weightsGamma[0], weightsx0[0], weights, 233 | dtype=self.dtype) 234 | prob += tf.reduce_sum(input_tensor=val) 235 | val = cauchyLogProb( 236 | biasesGamma[0], 237 | biasesx0[0], 238 | biases, 239 | dtype=self.dtype) 240 | prob += tf.reduce_sum(input_tensor=val) 241 | 242 | return(prob) 243 | 244 | def sample(self): 245 | """Creates randomized weight and bias tensors based off 246 | of their distributions 247 | 248 | Returns: 249 | * tempWeights: randomized weight tensor in first list position 250 | * tempBiases: randomized bias tensor in second list position 251 | """ 252 | 253 | tempWeights = tf.random.normal((self.outputDims, self.inputDims), 254 | mean=self.hypers[0], 255 | stddev=(2 / self.outputDims)**(0.5), 256 | seed=self.seed, 257 | dtype=self.dtype) 258 | tempBiases = tf.random.normal((self.outputDims, 1), 259 | mean=self.hypers[2], 260 | stddev=(2 / self.outputDims)**(0.5), 261 | seed=self.seed + 1, 262 | dtype=self.dtype) 263 | 264 | return([tempWeights, tempBiases]) 265 | 266 | def predict(self, inputTensor, tensors): 267 | """Calculates the output of the layer based on the given input tensor 268 | and weight and bias values 269 | 270 | Arguments: 271 | * inputTensor: the input tensor the layer acts on 272 | * tensors: a list with the current weight and bias tensors 273 | Returns: 274 | * result: the output of the layer 275 | """ 276 | weightTensor = self.expand(tensors[0]) 277 | biasTensor = self.expand(tensors[1]) 278 | result = tf.add(tf.matmul(weightTensor, inputTensor), biasTensor) 279 | return(result) 280 | 281 | 282 | class GaussianDenseLayer(Layer): 283 | """Creates a 1 Dimensional Dense Bayesian Layer with Gaussian priors. 284 | 285 | Currently, the starting weight and bias mean values are 0.0 with a standard 286 | deviation of 1.0/sqrt(outputDims). The distribution that these values are 287 | subject to have these values as their means, and a standard deviation of 288 | 2.0/sqrt(outputDims). 289 | """ 290 | 291 | def __init__( 292 | self, 293 | inputDims, 294 | outputDims, 295 | weights=None, 296 | biases=None, 297 | dtype=np.float32, 298 | seed=1): 299 | """ 300 | Arguments: 301 | * inputDims: number of input dimensions 302 | * outputDims: number of output dimensions 303 | * weights: list of starting weight matrices 304 | * biases: list of starting bias vectors 305 | * dtype: data type of input and output values 306 | * seed: seed used for random numbers 307 | """ 308 | self.numTensors = 2 # Number of tensors used for predictions 309 | self.numHyperTensors = 4 # Number of tensor for hyper paramaters 310 | self.inputDims = inputDims 311 | self.outputDims = outputDims 312 | self.dtype = dtype 313 | self.seed = seed 314 | self.name = "denseGaussian" 315 | 316 | # Weight mean value and mean distribution 317 | weightsMean = 0.0 318 | self.weightsMeanHyper = tfd.MultivariateNormalDiag(loc=[weightsMean], 319 | scale_diag=[.1]) 320 | 321 | # weight SD value and SD distribution 322 | weightsSD = 1.0 323 | self.weightsSDHyper = tfd.MultivariateNormalDiag(loc=[weightsSD], 324 | scale_diag=[0.1]) 325 | 326 | # bias mean value and mean distribution 327 | biasesMean = 0.0 328 | self.biasesMeanHyper = tfd.MultivariateNormalDiag(loc=[biasesMean], 329 | scale_diag=[.1]) 330 | 331 | # bias SD value and SD distribution 332 | biasesSD = 1.0 333 | self.biasesSDHyper = tfd.MultivariateNormalDiag(loc=[biasesSD], 334 | scale_diag=[0.1]) 335 | 336 | # Starting weight mean, weight SD, bias mean, and bias SD 337 | self.hypers = tf.cast( 338 | [[weightsMean], [weightsSD], [biasesMean], [biasesSD]], self.dtype) 339 | 340 | # Starting weights and biases 341 | if(weights is None): 342 | self.parameters = self.sample() 343 | else: 344 | self.parameters = [weights, biases] 345 | 346 | def calculateProbs(self, hypers, tensors): 347 | """Calculates the log probability of a set of weights and biases given 348 | their distributions in this layer. 349 | 350 | Arguments: 351 | * weightsBias: list with new possible weight and bias tensors 352 | 353 | Returns: 354 | * prob: log prob of weights and biases given their distributions 355 | """ 356 | # Create the tensors used to calculate probability 357 | weightsMean = hypers[0] 358 | weightsSD = hypers[1]**2 # Ensure positive sd 359 | biasesMean = hypers[2] 360 | biasesSD = hypers[3]**2 # Ensure positive sd 361 | weights = tensors[0] 362 | biases = tensors[1] 363 | 364 | prob = tf.cast(0, self.dtype) 365 | 366 | # Calculate probability of weights and biases given new hypers 367 | val = multivariateLogProb(weightsSD[0], weightsMean[0], weights, 368 | dtype=self.dtype) 369 | prob += tf.reduce_sum(input_tensor=val) 370 | val = multivariateLogProb( 371 | biasesSD[0], 372 | biasesMean[0], 373 | biases, 374 | dtype=self.dtype) 375 | prob += tf.reduce_sum(input_tensor=val) 376 | 377 | return(prob) 378 | 379 | def calculateHyperProbs(self, hypers, tensors): 380 | """Calculates the log probability of a set of weights and biases given 381 | new distribtuions as well as the probability of the new distribution 382 | means and SDs given their distribtuions. 383 | 384 | Arguments: 385 | * hypers: a list containg 4 new possible hyper parameters 386 | * tensors: a list with the current weight and bias matrices 387 | 388 | Returns: 389 | * prob: log probability of weights and biases given the new hypers 390 | and the probability of the new hyper parameters given their priors 391 | """ 392 | weightsMean = hypers[0] 393 | weightsSD = hypers[1]**2 394 | biasesMean = hypers[2] 395 | biasesSD = hypers[3]**2 396 | weights = tensors[0] 397 | biases = tensors[1] 398 | 399 | prob = tf.cast(0, self.dtype) 400 | 401 | val = self.weightsMeanHyper.log_prob([[weightsMean]]) 402 | prob += tf.cast(tf.reduce_sum(input_tensor=val), self.dtype) 403 | val = self.weightsSDHyper.log_prob([[weightsSD]]) 404 | prob += tf.cast(tf.reduce_sum(input_tensor=val), self.dtype) 405 | 406 | val = self.biasesMeanHyper.log_prob([[biasesMean]]) 407 | prob += tf.cast(tf.reduce_sum(input_tensor=val), self.dtype) 408 | val = self.biasesSDHyper.log_prob([[biasesSD]]) 409 | prob += tf.cast(tf.reduce_sum(input_tensor=val), self.dtype) 410 | 411 | # Calculate probability of weights and biases given new hypers 412 | val = multivariateLogProb(weightsSD[0], weightsMean[0], weights, 413 | dtype=self.dtype) 414 | prob += tf.reduce_sum(input_tensor=val) 415 | val = multivariateLogProb( 416 | biasesSD[0], 417 | biasesMean[0], 418 | biases, 419 | dtype=self.dtype) 420 | prob += tf.reduce_sum(input_tensor=val) 421 | 422 | return(prob) 423 | 424 | def sample(self): 425 | """Creates randomized weight and bias tensors based off 426 | of their distributions 427 | 428 | Returns: 429 | * tempWeights: randomized weight tensor in first list position 430 | * tempBiases: randomized bias tensor in second list position 431 | """ 432 | 433 | tempWeights = tf.random.normal((self.outputDims, self.inputDims), 434 | mean=self.hypers[0], 435 | stddev=(2 / self.outputDims)**(0.5), 436 | seed=self.seed, 437 | dtype=self.dtype) 438 | tempBiases = tf.random.normal((self.outputDims, 1), 439 | mean=self.hypers[2], 440 | stddev=(2 / self.outputDims)**(0.5), 441 | seed=self.seed + 1, 442 | dtype=self.dtype) 443 | 444 | return([tempWeights, tempBiases]) 445 | 446 | def predict(self, inputTensor, tensors): 447 | """Calculates the output of the layer based on the given input tensor 448 | and weight and bias values 449 | 450 | Arguments: 451 | * inputTensor: the input tensor the layer acts on 452 | * tensors: a list with the current weight and bias tensors 453 | Returns: 454 | * result: the output of the layer 455 | """ 456 | weightTensor = self.expand(tensors[0]) 457 | biasTensor = self.expand(tensors[1]) 458 | result = tf.add(tf.matmul(weightTensor, inputTensor), biasTensor) 459 | return(result) 460 | 461 | DenseLayer = CauchyDenseLayer # For backwards compatibiltiy 462 | -------------------------------------------------------------------------------- /tensorBNN/likelihood.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow_probability as tfp 3 | 4 | from tensorBNN.BNN_functions import multivariateLogProb 5 | 6 | tfd = tfp.distributions 7 | 8 | 9 | class Likelihood(object): 10 | def __init__(self, *argv, **kwargs): 11 | """ 12 | When declared, this constructor will be given keywords corresponding 13 | to any possible hyper parameters, as well as whether it should be 14 | calculated when the hyperparameters are being adjusted. This will 15 | likely be the case if it has hyper paramters. 16 | """ 17 | self.hypers = [] 18 | self.mainProbsInHypers = False 19 | 20 | def makeResponseLikelihood(self, *argv, **kwargs): 21 | """ This method will make a prediction and predict its probability 22 | given the likelihood funtion implemented. It will need at least the 23 | following inputs and must have the following outputs: 24 | Arguments: 25 | * argv: an undetermined number of tensors containg the weights 26 | and biases 27 | * realVals: the actual values for the predicted quantities 28 | * predict: the function used to make a prediction from the 29 | current neural net 30 | * dtype: the datatype of the network 31 | Returns: 32 | * result: the log probabilities of the real vals given the 33 | predicted values 34 | """ 35 | self.hypers = [] 36 | 37 | def calculateLogProb(self, *argv, **kwargs): 38 | """ This is a version of makeResponseLikelihood designed to deal with 39 | multiple sets of hyper paramters. It is used for reweighting in the 40 | predictor object, not during training as makeResponseLikelihood is. 41 | It also requires at least the following inputs and outputs: 42 | Arguments: 43 | * argv: an undetermined number of tensors containg the weights 44 | and biases 45 | * realVals: the actual values for the predicted quantities 46 | * hypers: A list containing all the hyper paramters 47 | * predict: the function used to make a prediction from the 48 | current neural net 49 | * dtype: the datatype of the network 50 | Returns: 51 | * result: the log probabilities of the real vals given the 52 | predicted values 53 | """ 54 | pass 55 | 56 | def display(self, hypers): 57 | """An optional method which can be used to display relavent information 58 | during the evaluation phase of a network. 59 | """ 60 | pass 61 | 62 | 63 | class GaussianLikelihood(Likelihood): 64 | 65 | def __init__(self, *argv, **kwargs): 66 | self.hypers = [[kwargs["sd"]**0.5]] 67 | self.mainProbsInHypers = True 68 | 69 | def makeResponseLikelihood(self, *argv, **kwargs): 70 | """Make a prediction and predict its probability from a multivariate 71 | normal distribution 72 | 73 | Arguments: 74 | * argv: an undetermined number of tensors containg the weights 75 | and biases 76 | * realVals: the actual values for the predicted quantities 77 | * sd: standard deviation for output distribution, uses 78 | current hyper parameter value if nothing is given 79 | * hyperStates: A list containing all the hyper paramters 80 | * predict: the function used to make a prediction from the 81 | current neural net 82 | * dtype: the datatype of the network 83 | Returns: 84 | * result: the log probabilities of the real vals given the 85 | predicted values 86 | """ 87 | 88 | sd = kwargs["hyperStates"][-1]**2 89 | 90 | current = kwargs["predict"](True, argv[0]) 91 | current = tf.transpose(current) 92 | sigma = tf.ones_like(current) * sd 93 | realVals = tf.reshape(kwargs["realVals"], current.shape) 94 | result = multivariateLogProb(sigma, current, realVals, kwargs["dtype"]) 95 | 96 | return(result) 97 | 98 | def calcultateLogProb(self, *argv, **kwargs): 99 | """Make a prediction and predict its probability from a multivariate 100 | normal distribution 101 | 102 | rguments: 103 | * argv: an undetermined number of tensors containg the weights 104 | and biases 105 | * realVals: the actual values for the predicted quantities 106 | * hypers: A list containing all the hyper paramters 107 | * predict: the function used to make a prediction from the 108 | current neural net 109 | * dtype: the datatype of the network 110 | * n: Use every n networks 111 | Returns: 112 | * result: the log probabilities of the real vals given the 113 | predicted values 114 | """ 115 | if(kwargs["sd"] is None): 116 | sd = kwargs["hyperStates"][-1] 117 | else: 118 | sd = kwargs["sd"] 119 | current = kwargs["predict"](argv[0], n=kwargs["n"]) 120 | for x in range(len(current)): 121 | current[x] = tf.transpose(current[x]) 122 | realVals = tf.reshape(kwargs["realVals"], current[0].shape) 123 | result = [] 124 | for x in range(len(current)): 125 | 126 | result.append(multivariateLogProb(tf.ones_like(current[0]) * sd, 127 | current[x], realVals, 128 | kwargs["dtype"])) 129 | return(result) 130 | 131 | def display(self, hypers): 132 | print("Loss Standard Deviation: ", hypers[-1].numpy()[0]**2) 133 | pass 134 | 135 | 136 | class FixedGaussianLikelihood(Likelihood): 137 | 138 | def __init__(self, *argv, **kwargs): 139 | self.hypers = [] 140 | self.sd = kwargs["sd"] 141 | self.mainProbsInHypers = False 142 | 143 | def makeResponseLikelihood(self, *argv, **kwargs): 144 | """Make a prediction and predict its probability from a multivariate 145 | normal distribution 146 | 147 | Arguments: 148 | * argv: an undetermined number of tensors containg the weights 149 | and biases 150 | * realVals: the actual values for the predicted quantities 151 | * sd: standard deviation for output distribution, uses 152 | current hyper parameter value if nothing is given 153 | * hyperStates: A list containing all the hyper paramters 154 | * predict: the function used to make a prediction from the 155 | current neural net 156 | * dtype: the datatype of the network 157 | Returns: 158 | * result: the log probabilities of the real vals given the 159 | predicted values 160 | """ 161 | 162 | sd = tf.cast(self.sd, kwargs["dtype"]) 163 | current = kwargs["predict"](True, argv[0]) 164 | current = tf.transpose(current) 165 | sigma = tf.ones_like(current) * sd 166 | realVals = tf.reshape(kwargs["realVals"], current.shape) 167 | result = multivariateLogProb(sigma, current, realVals, kwargs["dtype"]) 168 | 169 | return(result) 170 | 171 | def calcultateLogProb(self, *argv, **kwargs): 172 | """Make a prediction and predict its probability from a multivariate 173 | normal distribution 174 | 175 | rguments: 176 | * argv: an undetermined number of tensors containg the weights 177 | and biases 178 | * realVals: the actual values for the predicted quantities 179 | * hypers: A list containing all the hyper paramters 180 | * predict: the function used to make a prediction from the 181 | current neural net 182 | * dtype: the datatype of the network 183 | * n: Use every n networks 184 | Returns: 185 | * result: the log probabilities of the real vals given the 186 | predicted values 187 | """ 188 | current = kwargs["predict"](argv[0], n=kwargs["n"]) 189 | for x in range(len(current)): 190 | current[x] = tf.transpose(current[x]) 191 | realVals = tf.reshape(kwargs["realVals"], current[0].shape) 192 | result = [] 193 | for x in range(len(current)): 194 | temp = multivariateLogProb(tf.ones_like(current[0]) * self.sd, 195 | current[x], realVals, 196 | kwargs["dtype"]) 197 | result.append(temp) 198 | 199 | return(result) 200 | 201 | def display(self, hypers): 202 | pass 203 | 204 | 205 | class BernoulliLikelihood(Likelihood): 206 | def __init__(self, *argv, **kwargs): 207 | self.hypers = [] 208 | self.mainProbsInHypers = False 209 | 210 | def makeResponseLikelihood(self, *argv, **kwargs): 211 | """Make a prediction and predict its probability from a Bernoulli 212 | normal distribution 213 | 214 | Arguments: 215 | * argv: an undetermined number of tensors containg the weights 216 | and biases 217 | * realVals: the actual values for the predicted quantities 218 | * predict: the function used to make a prediction from the 219 | current neural net 220 | * dtype: the datatype of the network 221 | Returns: 222 | * result: the log probabilities of the real vals given the 223 | predicted values 224 | """ 225 | current = kwargs["predict"](True, argv[0]) 226 | current = tf.cast( 227 | tf.clip_by_value( 228 | current, 229 | 1e-8, 230 | 1 - 1e-7), 231 | kwargs["dtype"]) 232 | 233 | # Prediction distribution 234 | dist = tfd.Bernoulli( 235 | probs=current) 236 | result = dist.log_prob(tf.transpose(kwargs["realVals"])) 237 | return(result) 238 | 239 | def calcultateLogProb(self, *argv, **kwargs): 240 | result = [] 241 | for x in range(len(kwargs["hypers"])): 242 | result.append(tf.cast(0, kwargs["dtype"])) 243 | return(result) 244 | -------------------------------------------------------------------------------- /tensorBNN/metrics.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class Metric(object): 5 | """ A basic metric object. This can be implemented into any desired 6 | metric within the BNN training loop. 7 | """ 8 | def __init__(self, scaleExp = False, mean=0, sd=1, *argv, **kwargs): 9 | self.scaleExp = scaleExp 10 | self.mean = mean 11 | self.sd = sd 12 | 13 | def calculate(self, predictionsTrain, predictionValidate, realTrain, 14 | realValidate, *argv, **kwargs): 15 | """ Calculates the metric 16 | 17 | Arguments: 18 | * predictionsTrain: training predictions 19 | * predictionsValidate: validation predictions 20 | * realTrain: real training values 21 | * realValidate: real validation values 22 | """ 23 | pass 24 | 25 | def display(self): 26 | """Displays the metric""" 27 | pass 28 | 29 | 30 | class SquaredError(Metric): 31 | """ Calculates the mean squared error of a prediction. 32 | """ 33 | 34 | def calculate(self, predictionsTrain, predictionsValidate, realTrain, 35 | realValidate): 36 | 37 | predictionsTrain = tf.add(tf.multiply(tf.transpose(predictionsTrain), 38 | self.sd), self.mean) 39 | predictionsValidate = tf.add(tf.multiply(tf.transpose(predictionsValidate), 40 | self.sd), self.mean) 41 | 42 | realTrain = tf.add(tf.multiply(realTrain, self.sd), self.mean) 43 | realValidate = tf.add(tf.multiply(realValidate, self.sd), self.mean) 44 | 45 | if(self.scaleExp): 46 | predictionsTrain = tf.exp(predictionsTrain) 47 | realTrain = tf.exp(realTrain) 48 | realValidate = tf.exp(realValidate) 49 | 50 | realTrain = tf.reshape(realTrain, predictionsTrain.shape) 51 | realValidate = tf.reshape(realValidate, predictionsValidate.shape) 52 | 53 | 54 | squaredError = tf.reduce_mean( 55 | input_tensor=tf.math.squared_difference( 56 | predictionsTrain, realTrain)) 57 | self.squaredErrorTrain=squaredError.numpy() 58 | 59 | squaredError = tf.reduce_mean( 60 | input_tensor=tf.math.squared_difference( 61 | predictionsValidate, realValidate)) 62 | self.squaredErrorValidate=squaredError.numpy() 63 | 64 | def display(self): 65 | 66 | print("training squared error{: 9.5f}".format(self.squaredErrorTrain), 67 | "validation squared error{: 9.5f}".format( 68 | self.squaredErrorValidate)) 69 | 70 | class PercentError(Metric): 71 | """Calculates percent error of a prediction""" 72 | def __init__(self, scaleExp = False, mean=0, sd=1, *argv, **kwargs): 73 | self.scaleExp =scaleExp 74 | self.mean = mean 75 | self.sd = sd 76 | 77 | 78 | def calculate(self, predictionsTrain, predictionsValidate, realTrain, 79 | realValidate): 80 | predictionsTrain = tf.add(tf.multiply(tf.transpose(predictionsTrain), 81 | self.sd), self.mean) 82 | predictionsValidate = tf.add(tf.multiply(tf.transpose(predictionsValidate), 83 | self.sd), self.mean) 84 | 85 | realTrain = tf.add(tf.multiply(realTrain, self.sd), self.mean) 86 | realValidate = tf.add(tf.multiply(realValidate, self.sd), self.mean) 87 | 88 | if(self.scaleExp): 89 | predictionsTrain = tf.exp(predictionsTrain) 90 | predictionsValidate = tf.exp(predictionsValidate) 91 | realTrain = tf.exp(realTrain) 92 | realValidate = tf.exp(realValidate) 93 | 94 | realTrain = tf.reshape(realTrain, predictionsTrain.shape) 95 | realValidate = tf.reshape(realValidate, predictionsValidate.shape) 96 | 97 | self.percentErrorTrain = tf.reduce_mean( 98 | input_tensor=tf.multiply( 99 | tf.abs(tf.divide(tf.subtract(predictionsTrain, realTrain), 100 | realTrain)), 100)) 101 | self.percentErrorValidate = tf.reduce_mean( 102 | input_tensor=tf.multiply( 103 | tf.abs(tf.divide(tf.subtract(predictionsValidate, realValidate), 104 | realValidate)), 100)) 105 | 106 | def display(self): 107 | print("training percent error{: 7.3f}".format(self.percentErrorTrain), 108 | "validation percent error{: 7.3f}".format(self.percentErrorValidate)) 109 | 110 | class Accuracy(Metric): 111 | """ Caluclates the accuracy of predictions """ 112 | def calculate(self, predictionsTrain, predictionsValidate, realTrain, 113 | realValidate): 114 | predictionsTrain = tf.add(tf.multiply(tf.transpose(predictionsTrain), 115 | self.sd), 116 | self.mean) 117 | predictionsValidate = tf.add(tf.multiply(tf.transpose(predictionsValidate), 118 | self.sd), 119 | self.mean) 120 | realTrain = tf.add(tf.multiply(realTrain, self.sd), self.mean) 121 | realValidate = tf.add(tf.multiply(realValidate, self.sd), self.mean) 122 | 123 | if(self.scaleExp): 124 | predictionsTrain = tf.exp(predictionsTrain) 125 | predictionsValidate = tf.exp(predictionsValidate) 126 | realTrain = tf.exp(realTrain) 127 | realValidate = tf.exp(realValidate) 128 | 129 | realTrain = tf.reshape(realTrain, predictionsTrain.shape) 130 | realValidate = tf.reshape(realValidate, predictionsValidate.shape) 131 | 132 | self.accuracyTrain = 1 - tf.reduce_mean(tf.abs( 133 | realTrain - tf.round(predictionsTrain))) 134 | self.accuracyValidate = 1 - tf.reduce_mean(tf.abs( 135 | realValidate - tf.round(predictionsValidate))) 136 | 137 | 138 | def display(self): 139 | print("training accuracy{: 9.5f}".format(self.accuracyTrain), 140 | "validation accuracy{: 9.5f}".format( 141 | self.accuracyValidate)) 142 | -------------------------------------------------------------------------------- /tensorBNN/network.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import numpy as np 5 | import tensorflow as tf 6 | import tensorflow_probability as tfp 7 | 8 | from tensorBNN.paramAdapter import paramAdapter 9 | 10 | tfd = tfp.distributions 11 | 12 | 13 | class network(object): 14 | """An object used for storing all of the variables required to create 15 | a Bayesian Neural Network using Hamiltonian Monte Carlo and then training 16 | the network. 17 | """ 18 | 19 | def __init__( 20 | self, 21 | dtype, 22 | inputDims, 23 | trainX, 24 | trainY, 25 | validateX, 26 | validateY): 27 | """ 28 | Arguments: 29 | * dtype: data type for Tensors 30 | * inputDims: dimension of input vector 31 | * trainX: the training data input, shape is n by inputDims 32 | * trainY: the training data output 33 | * validateX: the validation data input, shape is n by inputDims 34 | * validateY: the validation data output 35 | * mean: the mean used to scale trainY and validateY 36 | * sd: standard deviation used to scale trainY and validateY 37 | """ 38 | self.dtype = dtype 39 | self.iteration = None 40 | 41 | self.trainX = tf.reshape( 42 | tf.constant( 43 | trainX, dtype=self.dtype), [ 44 | len(trainX), inputDims]) 45 | self.trainY = tf.constant(trainY, dtype=self.dtype) 46 | 47 | self.validateX = tf.reshape( 48 | tf.constant( 49 | validateX, dtype=self.dtype), [ 50 | len(validateX), inputDims]) 51 | self.validateY = tf.constant(validateY, dtype=self.dtype) 52 | 53 | self.states = [] # List with the weight and bias state placeholders 54 | self.hyperStates = [] # List with hyper parameter state placeholders 55 | 56 | self.layers = [] # List of all the layers 57 | 58 | self.currentInnerStep = None 59 | 60 | def metrics(self, trainPredict, trainReal, validatePredict, validateReal): 61 | """Calculates the average squared error and percent difference of the 62 | current network 63 | Arguments: 64 | * predictions: output from the network 65 | * scaleExp: boolean value to determine whether to take the 66 | exponential of the data and scale it 67 | * train: boolean value to determine whether to use the training 68 | data 69 | * mean: mean value used for unshifiting a distribution 70 | * sd: sd value used for unscalling a distribution 71 | Returns: 72 | * logits: output from the network 73 | * squaredError: the mean squared error of predictions from the 74 | network 75 | * percentError: the percent error of the predictions from the 76 | network 77 | """ 78 | 79 | for metric in self.metricList: 80 | metric.calculate(trainPredict, validatePredict, trainReal, 81 | validateReal) 82 | metric.display() 83 | 84 | def calculateProbs(self, *argv, sd=None): 85 | """Calculates the log probability of the current network values 86 | as well as the log probability of their prediction. 87 | Arguments: 88 | * argv: an undetermined number of tensors containg the weights 89 | and biases. 90 | * sd: standard deviation for output distribution, uses current 91 | hyper value if none is given 92 | Returns: 93 | * prob: log probability of network values and network prediction 94 | """ 95 | if(len(argv) != len(self.states)): 96 | argv = argv[0] 97 | 98 | temp = self.makeResponseLikelihood(argv, predict=self.predict, 99 | dtype=self.dtype, 100 | hyperStates=self.hyperStates, 101 | realVals=self.trainY, sd=sd) 102 | prob = tf.reduce_sum(temp) 103 | 104 | # probability of the network parameters 105 | index = 0 106 | for n in range(len(self.layers)): 107 | numTensors = self.layers[n].numTensors 108 | if(numTensors > 0): 109 | prob += self.layers[n].calculateProbs( 110 | argv[index:index + numTensors]) 111 | index += numTensors 112 | return(prob) 113 | 114 | def calculateHyperProbs(self, *argv): 115 | """Calculates the log probability of the current hyper parameters 116 | Arguments: 117 | * argv: an undetermined number of tensors containg the hyper 118 | parameters 119 | Returns: 120 | * prob: log probability of hyper parameters given their priors 121 | """ 122 | prob = 0 123 | indexh = 0 124 | index = 0 125 | for n in range(len(self.layers)): 126 | numHyperTensors = self.layers[n].numHyperTensors 127 | numTensors = self.layers[n].numTensors 128 | if(numHyperTensors > 0): 129 | 130 | prob += self.layers[n].calculateHyperProbs( 131 | argv[indexh:indexh + numHyperTensors], 132 | self.states[index:index + numTensors]) 133 | indexh += numHyperTensors 134 | index += numTensors 135 | 136 | if(self.likelihood.mainProbsInHypers): 137 | prob += self.calculateProbs(self.states, sd=argv[-1]) 138 | 139 | return(prob) 140 | 141 | def predict(self, train, *argv): 142 | """Makes a prediction 143 | Arguments: 144 | * train: a boolean value which determines whether to use training 145 | data 146 | * argv: an undetermined number of tensors containg the weights 147 | and biases. 148 | Returns: 149 | * prediction: a prediction from the network 150 | """ 151 | tensors = argv 152 | if(len(tensors) == 0): 153 | tensors = self.states 154 | else: 155 | tensors = tensors[0] 156 | x = self.trainX 157 | if(not train): 158 | x = self.validateX 159 | 160 | def innerPrediction(x, layers): 161 | prediction = tf.transpose(a=x) 162 | index = 0 163 | for n in range(len(self.layers)): 164 | numTensors = layers[n].numTensors 165 | prediction = layers[n].predict( 166 | prediction, tensors[index:index + numTensors]) 167 | index += numTensors 168 | return(prediction) 169 | prediction = innerPrediction(x, self.layers) 170 | 171 | return(prediction) 172 | 173 | def add(self, layer, parameters=None): 174 | """Adds a new layer to the network 175 | Arguments: 176 | * layer: the layer to be added 177 | * parameters: list containing weight, bias, and acitvation 178 | matrices 179 | """ 180 | self.layers.append(layer) 181 | if(layer.numTensors > 0): 182 | if parameters is None: 183 | for states in layer.parameters: 184 | self.states.append(states) 185 | else: 186 | for states in parameters: 187 | self.states.append(states) 188 | 189 | if(layer.numHyperTensors > 0): 190 | for states in layer.hypers: 191 | self.hyperStates.append(states) 192 | 193 | def setupMCMC(self, stepSizeStart=1e-3, stepSizeMin=1e-4, stepSizeMax=1e-2, 194 | stepSizeOptions=40, leapfrogStart=1000, leapfogMin=100, 195 | leapFrogMax=10000, leapfrogIncrement=1, hyperStepSize=1e-2, 196 | hyperLeapfrog=100, burnin=1000, 197 | cores=4, averagingSteps=10, a=4, delta=0.1, strikes=5, 198 | randomSteps=10, dualAveraging=False): 199 | """Sets up the MCMC algorithms 200 | Arguments: 201 | * stepSizeStart: the starting step size for the weights and biases 202 | * stepSizeMin: the minimum step size 203 | * stepSizeMax: the maximum step size 204 | * stepSizeOptions: the number of step sizes in grid 205 | * leapfrogStart: number of leapfrog steps for weights and biases 206 | * leapFrogMax: the minimum number of leapfrog steps 207 | * leapMax: the maximum number of leapfrog steps 208 | * leapfrogIncrement: the step in number of leapfrog for search grid 209 | * hyperStepSize: the starting step size for the hyper parameters 210 | * hyperLeapfrog: leapfrog steps for hyper parameters 211 | * cores: number of cores to use 212 | * averaginSteps: number of averaging steps 213 | * a: constant, 4 in paper 214 | * delta: constant, 0.1 in paper 215 | * strikes: iterations with no movement before reseting adapter 216 | * randomSteps: averaging cycles at beginning with random values 217 | Returns nothing 218 | """ 219 | 220 | # Adapt the step size and number of leapfrog steps 221 | self.adapt = paramAdapter(stepSizeStart, 222 | leapfrogStart, 223 | stepSizeMin, 224 | stepSizeMax, 225 | stepSizeOptions, 226 | leapfogMin, 227 | leapFrogMax, 228 | leapfrogIncrement, 229 | averagingSteps, 230 | burnin / averagingSteps, 231 | a=a, 232 | delta=delta, 233 | cores=cores, 234 | strikes=strikes, 235 | randomSteps=randomSteps) 236 | 237 | self.step_size = tf.cast(stepSizeStart, self.dtype) 238 | self.leapfrog = tf.cast(leapfrogStart, tf.int32) 239 | self.cores = cores 240 | self.burnin = burnin 241 | self.target = 0.95 242 | 243 | self.gamma = tf.cast(0.4, self.dtype) 244 | self.t0 = tf.cast(10, self.dtype) 245 | self.kappa = tf.cast(0.75, self.dtype) 246 | self.h = tf.cast([0], self.dtype) 247 | self.logEpsilonBar = tf.cast([0], self.dtype) 248 | self.mu = tf.cast(tf.math.log(100*hyperStepSize), self.dtype) 249 | 250 | self.dualAveraging = dualAveraging 251 | self.gamma2 = tf.cast(0.4, self.dtype) 252 | self.t02 = tf.cast(10, self.dtype) 253 | self.kappa2 = tf.cast(0.75, self.dtype) 254 | self.h2 = tf.cast([0], self.dtype) 255 | self.logEpsilonBar2 = tf.cast([0], self.dtype) 256 | self.mu2 = tf.cast(tf.math.log(100*stepSizeStart), self.dtype) 257 | 258 | self.hyper_step_size = tf.Variable(tf.cast(np.array(hyperStepSize), 259 | self.dtype)) 260 | 261 | # Setup the Markov Chain for the network parameters 262 | self.mainKernel = tfp.mcmc.HamiltonianMonteCarlo( 263 | target_log_prob_fn=self.calculateProbs, 264 | num_leapfrog_steps=self.leapfrog, 265 | step_size=[self.step_size], 266 | state_gradients_are_stopped=True, 267 | name="main") 268 | 269 | self.hyperLeapfrog = hyperLeapfrog 270 | # Setup the Transition Kernel for the hyper parameters 271 | hyperKernel = tfp.mcmc.HamiltonianMonteCarlo( 272 | target_log_prob_fn=self.calculateHyperProbs, 273 | num_leapfrog_steps=hyperLeapfrog, 274 | step_size=[self.hyper_step_size], 275 | state_gradients_are_stopped=True) 276 | 277 | self.hyperKernel = tfp.mcmc.DualAveragingStepSizeAdaptation( 278 | inner_kernel=hyperKernel, num_adaptation_steps=int(burnin * 0.8)) 279 | 280 | @tf.function(jit_compile=True, experimental_relax_shapes=True) 281 | def stepMCMCNoHypers(self, states, hyperStates, mainStep, leapfrogVal, 282 | mainAccept=tf.cast([1], tf.float32), sampleNumber=1): 283 | """ Steps the markov chain for each of the network parameters and the 284 | hyper parameters forward one step 285 | Has no arguments, returns nothing. 286 | """ 287 | 288 | def InnerStepMain(i, states, hyperStates, leapfrog, step_size): 289 | 290 | def calculateProbs(*argv): 291 | if(len(argv) != len(self.states)): 292 | argv = argv[0] 293 | 294 | prob = 0 295 | indexh = 0 296 | index = 0 297 | for n in range(len(self.layers)): 298 | numHyperTensors = self.layers[n].numHyperTensors 299 | numTensors = self.layers[n].numTensors 300 | if(numHyperTensors > 0): 301 | 302 | prob += self.layers[n].calculateProbs( 303 | hyperStates[indexh:indexh + numHyperTensors], 304 | argv[index:index + numTensors]) 305 | indexh += numHyperTensors 306 | index += numTensors 307 | 308 | temp = self.makeResponseLikelihood(argv, predict=self.predict, 309 | dtype=self.dtype, 310 | hyperStates=hyperStates, 311 | realVals=self.trainY, 312 | sd=hyperStates[-1]) 313 | prob += tf.reduce_sum(temp) 314 | return(prob) 315 | hmc = tfp.mcmc.HamiltonianMonteCarlo 316 | kernel = hmc(target_log_prob_fn=calculateProbs, 317 | num_leapfrog_steps=leapfrog, 318 | step_size=step_size, 319 | state_gradients_are_stopped=True) 320 | 321 | states, kernel_results = tfp.mcmc.sample_chain( 322 | num_results=1, 323 | num_burnin_steps=0, # start collecting data on first step 324 | current_state=states, # starting parts of chain 325 | parallel_iterations=8, 326 | kernel=kernel, 327 | trace_fn=lambda _, pkr: [pkr.accepted_results.step_size, 328 | pkr.log_accept_ratio, 329 | pkr.accepted_results.target_log_prob]) 330 | 331 | acceptRate = tf.where(kernel_results[1] < 0, 332 | tf.exp(kernel_results[1]), 1) 333 | 334 | return(states, [step_size], acceptRate) 335 | 336 | def oneStep(i, params, hyperParams, mainStep, mainAccept, leap): 337 | 338 | params, mainStep, mainAccept = InnerStepMain(i, params, 339 | hyperParams, leap, 340 | mainStep) 341 | for x in range(len(params)): 342 | params[x] = params[x][0] 343 | 344 | return(tf.add(i, 1), params, hyperParams, mainStep[0], mainAccept, 345 | leap) 346 | 347 | def condition(i, states, hyperStates, mainStep, mainAccept, 348 | leapfrogVal): 349 | return(tf.less(i, sampleNumber)) 350 | 351 | i = tf.constant(0) 352 | i, states, hyperStates, mainStep, mainAccept, \ 353 | leapfrogVal = tf.while_loop(condition, oneStep, 354 | [i, states, hyperStates, mainStep, 355 | mainAccept, leapfrogVal]) 356 | 357 | return(states, mainStep, mainAccept) 358 | 359 | @tf.function(jit_compile=True, experimental_relax_shapes=True) 360 | def stepMCMC(self, states, hyperStates, mainStep, hyperStep, logEpsilonBar, 361 | h, iter_, leapfrogVal, mainAccept=tf.cast([1], tf.float32), 362 | hyperAccept=tf.cast([1], tf.float32), sampleNumber=1): 363 | """ Steps the markov chain for each of the network parameters and the 364 | hyper parameters forward one step 365 | Has no arguments, returns nothing. 366 | """ 367 | 368 | def InnerStepMain(i, states, hyperStates, leapfrog, step_size, epoch): 369 | 370 | def calculateProbs(*argv): 371 | if(len(argv) != len(self.states)): 372 | argv = argv[0] 373 | 374 | prob = 0 375 | indexh = 0 376 | index = 0 377 | for n in range(len(self.layers)): 378 | numHyperTensors = self.layers[n].numHyperTensors 379 | numTensors = self.layers[n].numTensors 380 | if(numHyperTensors > 0): 381 | 382 | prob += self.layers[n].calculateProbs( 383 | hyperStates[indexh:indexh + numHyperTensors], 384 | argv[index:index + numTensors]) 385 | indexh += numHyperTensors 386 | index += numTensors 387 | 388 | prob += tf.reduce_sum(self.makeResponseLikelihood( 389 | argv, predict=self.predict, dtype=self.dtype, 390 | hyperStates=hyperStates, realVals=self.trainY, 391 | sd=hyperStates[-1])) 392 | return(prob) 393 | 394 | kernel = tfp.mcmc.HamiltonianMonteCarlo( 395 | target_log_prob_fn=calculateProbs, 396 | num_leapfrog_steps=leapfrog, 397 | step_size=step_size, 398 | state_gradients_are_stopped=True) 399 | 400 | states, kernel_results = tfp.mcmc.sample_chain( 401 | num_results=1, 402 | num_burnin_steps=0, # start collecting data on first step 403 | current_state=states, # starting parts of chain 404 | parallel_iterations=8, 405 | kernel=kernel, 406 | trace_fn=lambda _, pkr: [pkr.accepted_results.step_size, 407 | pkr.log_accept_ratio, 408 | pkr.accepted_results.target_log_prob]) 409 | 410 | acceptRate = tf.where(kernel_results[1] < 0, 411 | tf.exp(kernel_results[1]), 1) 412 | return(states, [step_size], acceptRate) 413 | 414 | def InnerStepHyper(i, states, hyperStates, leapfrog, step_size, 415 | logEpsilonBar, h, epoch): 416 | 417 | def calculateProbs(*argv): 418 | if(len(argv) != len(self.hyperStates)): 419 | argv = argv[0] 420 | 421 | prob = 0 422 | indexh = 0 423 | index = 0 424 | for n in range(len(self.layers)): 425 | numHyperTensors = self.layers[n].numHyperTensors 426 | numTensors = self.layers[n].numTensors 427 | if(numHyperTensors > 0): 428 | 429 | prob += self.layers[n].calculateHyperProbs( 430 | argv[indexh:indexh + numHyperTensors], 431 | states[index:index + numTensors]) 432 | indexh += numHyperTensors 433 | index += numTensors 434 | 435 | if(self.likelihood.mainProbsInHypers): 436 | prob += tf.reduce_sum(self.makeResponseLikelihood( 437 | states, predict=self.predict, dtype=self.dtype, 438 | hyperStates=argv, realVals=self.trainY, sd=argv[-1])) 439 | 440 | return(prob) 441 | 442 | kernel = tfp.mcmc.HamiltonianMonteCarlo( 443 | target_log_prob_fn=calculateProbs, 444 | num_leapfrog_steps=leapfrog, 445 | step_size=step_size, 446 | state_gradients_are_stopped=True) 447 | 448 | hyperStates, kernel_results = tfp.mcmc.sample_chain( 449 | num_results=1, 450 | num_burnin_steps=0, # start collecting data on first step 451 | current_state=hyperStates, # starting parts of chain 452 | parallel_iterations=8, 453 | kernel=kernel, 454 | trace_fn=lambda _, pkr: [pkr.accepted_results.step_size, 455 | pkr.log_accept_ratio, 456 | pkr.accepted_results.target_log_prob]) 457 | m = epoch + 1 458 | 459 | accept = tf.where(kernel_results[1] < 0, 460 | tf.exp(kernel_results[1]), 1) 461 | h = (1-1/(m+self.t0))*h+(1/(m+self.t0))*(self.target-accept) 462 | 463 | logEpsilon = self.mu-h*(m**0.5)/self.gamma 464 | 465 | logEpsilonBar = (1-m**(-self.kappa))*logEpsilonBar 466 | logEpsilonBar += m**(-self.kappa)*logEpsilon 467 | 468 | step_size = tf.where(m < self.burnin * 0.8, 469 | tf.math.exp(logEpsilonBar), step_size) 470 | 471 | return(hyperStates, step_size, logEpsilonBar, h, accept) 472 | 473 | def oneStep(i, params, hyperParams, mainStep, hyperStep, logEpsilonBar, 474 | h, epoch, mainAccept, hyperAccept, leap): 475 | 476 | params, mainStep, mainAccept = InnerStepMain(i, params, 477 | hyperParams, leap, 478 | mainStep, epoch) 479 | for x in range(len(params)): 480 | params[x] = params[x][0] 481 | 482 | hyperParams, hyperStep, logEpsilonBar, h, hyperAccept = \ 483 | InnerStepHyper(i, params, hyperParams, self.hyperLeapfrog, 484 | hyperStep, logEpsilonBar, h, epoch) 485 | for x in range(len(hyperParams)): 486 | hyperParams[x] = hyperParams[x][0] 487 | hyperStep = hyperStep[0] 488 | 489 | return(tf.add(i, 1), params, hyperParams, mainStep[0], hyperStep, 490 | logEpsilonBar, h, epoch, mainAccept, hyperAccept, leap) 491 | 492 | def condition(i, states, hyperStates, mainStep, hyperStep, 493 | logEpsilonBar, h, epoch, mainAccept, hyperAccept, leap): 494 | return(tf.less(i, sampleNumber)) 495 | 496 | i = tf.constant(0) 497 | epoch = tf.cast(iter_, self.dtype) 498 | i, states, hyperStates, mainStep, hyperStep, logEpsilonBar, h, epoch, \ 499 | mainAccept, hyperAccept, leapfrogVal = \ 500 | tf.while_loop(condition, oneStep, [i, states, hyperStates, 501 | mainStep, hyperStep, 502 | logEpsilonBar, h, epoch, 503 | mainAccept, hyperAccept, 504 | leapfrogVal]) 505 | 506 | return(states, hyperStates, mainStep, hyperStep, logEpsilonBar, h, 507 | mainAccept, hyperAccept) 508 | 509 | def train( 510 | self, 511 | epochs, 512 | samplingStep, 513 | likelihood, 514 | metricList=[], 515 | adjustHypers=True, 516 | scaleExp=False, 517 | folderName=None, 518 | networksPerFile=1000, 519 | displaySkip=1): 520 | """Trains the network 521 | Arguements: 522 | * Epochs: Number of training cycles 523 | * samplingStep: Epochs between sampled networks 524 | * likelihood: Object containing the output likelihood for the BNN 525 | * scaleExp: whether the metrics should be scaled via exp 526 | * folderName: name of folder for saved networks 527 | * networksPerFile: number of networks saved in a given file 528 | * returnPredictions: whether to return the prediction from the 529 | network 530 | Returns: 531 | * results: the output of the network when sampled 532 | (if returnPrediction=True) 533 | """ 534 | # Create response likelihood 535 | startSampling = self.burnin 536 | 537 | self.likelihood = likelihood 538 | self.makeResponseLikelihood = self.likelihood.makeResponseLikelihood 539 | self.metricList = metricList 540 | self.adjustHypers = adjustHypers 541 | 542 | for val in self.likelihood.hypers: 543 | self.hyperStates.append(tf.cast(val, self.dtype)) 544 | 545 | # Create the folder and files for the networks 546 | filePath = None 547 | files = [] 548 | if(folderName is not None): 549 | filePath = os.path.join(os.getcwd(), folderName) 550 | if(not os.path.isdir(filePath)): 551 | os.mkdir(filePath) 552 | for n in range(len(self.states)): 553 | files.append( 554 | open(filePath + "/" + str(n) + ".0" + ".txt", "wb")) 555 | files.append(open(filePath + "/hypers" + "0" + ".txt", "wb")) 556 | previousLeap = self.leapfrog 557 | with open(filePath + "/architecture.txt", "wb") as f: 558 | for layer in (self.layers): 559 | f.write((layer.name+"\n").encode("utf-8")) 560 | 561 | iter_ = 0 562 | tf.random.set_seed(50) 563 | 564 | self.mainAccept = tf.cast([0], tf.float32) 565 | self.hyperAccept = tf.cast([0], tf.float32) 566 | startTime = time.time() 567 | while(iter_ < epochs): # Main training loop 568 | # 569 | if(self.adjustHypers): 570 | returnVals = self.stepMCMC(self.states, self.hyperStates, 571 | self.step_size, 572 | self.hyper_step_size, 573 | self.logEpsilonBar, self.h, 574 | tf.cast(iter_, self.dtype), 575 | self.leapfrog, 576 | tf.cast(self.mainAccept, 577 | self.dtype), 578 | tf.cast(self.hyperAccept, 579 | self.dtype)) 580 | self.states, self.hyperStates, self.step_size, \ 581 | self.hyper_step_size, self.logEpsilonBar, self.h, \ 582 | self.mainAccept, self.hyperAccept = returnVals 583 | 584 | else: 585 | self.states, self.step_size, self.mainAccept = \ 586 | self.stepMCMCNoHypers(self.states, self.hyperStates, 587 | self.step_size, self.leapfrog, 588 | tf.cast(self.mainAccept, self.dtype)) 589 | 590 | previousLeap = self.leapfrog 591 | iter_ += 1 592 | 593 | if(iter_ % displaySkip == 0): 594 | print() 595 | print("iter:{:>2}".format(iter_)) 596 | print("step size", self.step_size.numpy()) 597 | print("hyper step size", self.hyper_step_size.numpy()) 598 | print("leapfrog", self.leapfrog.numpy()) 599 | print("Main acceptance", self.mainAccept.numpy()[0]) 600 | print("Hyper acceptance", self.hyperAccept.numpy()[0]) 601 | self.metrics(self.predict(train=True), self.trainY, 602 | self.predict(train=False), self.validateY) 603 | step, leap = self.adapt.update(self.states) 604 | self.step_size = step + self.step_size * 0 605 | self.leapfrog = leap + self.leapfrog * 0 606 | 607 | self.step_size = tf.cast(self.step_size, self.dtype) 608 | 609 | # Create new files to record network 610 | indexShift = iter_ - startSampling - 1 611 | indexInterval = networksPerFile * samplingStep 612 | if(iter_ > startSampling and indexShift % indexInterval == 0): 613 | for file in files: 614 | file.close() 615 | temp = [] 616 | for n in range(len(self.states)): 617 | temp.append(open(filePath + "/" + str(n) + "." + 618 | str(int((iter_-startSampling) // 619 | (networksPerFile * 620 | samplingStep))) + 621 | ".txt", "wb")) 622 | temp.append(open(filePath + "/hypers" + 623 | str(int((iter_-startSampling) // 624 | (networksPerFile * samplingStep))) + 625 | ".txt", "wb")) 626 | files = temp 627 | 628 | # Update the summary file 629 | file = open(filePath + "/summary.txt", "wb") 630 | for n in range(len(self.states)): 631 | val = "" 632 | for sizes in self.states[n].shape: 633 | val += str(sizes) + " " 634 | val = val.strip() + "\n" 635 | file.write(val.encode("utf-8")) 636 | numNetworks = (indexShift) // samplingStep 637 | numFiles = numNetworks // networksPerFile 638 | if(numNetworks % networksPerFile != 0): 639 | numFiles += 1 640 | file.write((str(numNetworks) + " " + str(numFiles) + 641 | " " + str(len(self.states))+"\n").encode("utf-8")) 642 | hyperStateCount = 0 643 | for state in self.hyperStates: 644 | hyperStateCount += tf.size(state) 645 | file.write(str(hyperStateCount.numpy()).encode("utf-8")) 646 | file.close() 647 | # Record prediction 648 | if(iter_ > startSampling and (iter_) % samplingStep == 0): 649 | if(filePath is not None): 650 | for n in range(len(files)-1): 651 | np.savetxt(files[n], self.states[n]) 652 | tempStates = [] 653 | for state in self.hyperStates: 654 | length = 1 655 | for x in state.shape: 656 | length = length*x 657 | if(length > 1): 658 | splitStates = tf.split(state, length) 659 | for splitState in splitStates: 660 | tempStates.append(splitState) 661 | else: 662 | tempStates.append(state) 663 | np.savetxt(files[-1], tempStates) 664 | if(iter_ % displaySkip == 0): 665 | likelihood.display(self.hyperStates) 666 | print("Time elapsed:", time.time() - startTime) 667 | startTime = time.time() 668 | 669 | for file in files: 670 | file.close() 671 | -------------------------------------------------------------------------------- /tensorBNN/paramAdapter.py: -------------------------------------------------------------------------------- 1 | import math 2 | import random 3 | import sys 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | 8 | from multiprocessing import Pool 9 | 10 | 11 | class paramAdapter(object): 12 | """This object stores the variables required to implement an adaptive 13 | step size and number of leapfrog steps as detailed in "Adaptive Hamiltonian 14 | and Riemann Manifold Monte Carlo Samplers" by Wang, Mohamed, and 15 | de Freitas. This method performs Bayesian inference on these paramaters 16 | assuming a uniform prior between specified values. Over time, the 17 | probability of a new state being proposed decreases so that the values will 18 | converge to specific values. 19 | 20 | In a slight divergence from the paper three features are included to 21 | prevent the adapter from settling to early into an non-optimal position, to 22 | compensate for the optimal position chaning drastically through trainining, 23 | and to generally improve the suggested points. First, the adapter will 24 | randomly propose for a certain number of steps at the beginning as set by 25 | the randomSteps keyword argument. Secondly, if the adapter goes through a 26 | set number of iterations specified with the strikes keyword argument and 27 | the SJD is 0 every single time then the entire paramAdapter is reset to its 28 | initial condition. It is quite possible that this will happen after the BNN 29 | converges to a minimum and the maximum feasible step size is much smaller. 30 | Finally, the adapter will scale the leapfrog steps and step size to the 31 | range -1 to 1 in order for the 0 mean Gaussian priors used in the Bayeisan 32 | inference to better fit the data. 33 | 34 | In order to more rapidly search through the grid of possible step sizes 35 | and leapfrog steps this object uses parallel processing so that all 36 | available computing resources are used. 37 | """ 38 | 39 | def __init__(self, e1, L1, el, eu, eNumber, Ll, Lu, lStep, m, k, a=4, 40 | delta=0.1, cores=4, strikes=10, randomSteps=10): 41 | """ Creates a paramAdapter object. 42 | 43 | Arguments: 44 | * e1: starting step size 45 | * L1: starting number of leapfrog steps 46 | * el: lower step size bound 47 | * eu: upper step size bound 48 | * eNumber: number of step sizes in gride 49 | * Ll: lower leapfrog bound 50 | * Lu: upper leapfrog bound 51 | * lStep: leapfrog step size in grid 52 | * m: number of averaging steps 53 | * k: iterations before proposal probability starts decreasing 54 | * a: constant, 4 in paper 55 | * delta: constant, 0.1 in paper 56 | * cores: number of cores to use in processing 57 | * strikes: iterations with no movement before reseting adapter 58 | * randomSteps: averaging cycles at beginning with random values 59 | """ 60 | self.dtype=tf.float32 61 | self.currentE = e1 62 | self.currentL = L1 63 | self.el = tf.cast(el, self.dtype) 64 | self.eu = tf.cast(eu, self.dtype) 65 | self.Ll = tf.cast(Ll, self.dtype) 66 | self.Lu = tf.cast(Lu, self.dtype) 67 | self.eNumber = tf.cast(eNumber, tf.int32) 68 | self.eGrid = tf.linspace(el, eu, num=eNumber) 69 | self.lGrid = tf.cast(np.array(range(Ll, Lu + 1, int(lStep))), self.dtype) 70 | self.lNumber = tf.cast(len(self.lGrid), tf.int32) 71 | self.delta = tf.cast(delta, self.dtype) 72 | kappa = tf.cast(0.2, self.dtype) 73 | self.sigma = tf.linalg.diag( 74 | [1 / ((kappa * (2))**2), 1 / ((kappa * (2))**2)]) 75 | self.previousGamma = [] 76 | 77 | self.allSD = [] 78 | self.k = k 79 | self.K = tf.zeros([0,0], dtype=self.dtype) 80 | self.m = m 81 | self.currentData = [] 82 | self.allData = [] 83 | self.maxR = tf.cast(1e-8, self.dtype) 84 | self.a = tf.cast(a, self.dtype) 85 | self.i = tf.cast(-2, self.dtype) 86 | self.previous_state = None 87 | self.current_state = None 88 | #np.random.seed(10) 89 | 90 | self.cores = cores 91 | self.strikes = 0 92 | self.maxStrikes = 50#strikes 93 | self.randomSteps = randomSteps 94 | 95 | def calck(self, gammaI, gammaJ, el, eu, sigma): 96 | """ Calculates the covariance k between two states 97 | 98 | Arguments: 99 | * gammaI: state 1 100 | * gammaJ: state 2 101 | Returns: 102 | * k: covaraiance between gammaI and gammaJ 103 | """ 104 | gamma1 = tf.transpose([[-1+2*(gammaI[0]-el)/(eu-el), 105 | -1+2*(tf.cast(gammaI[1], self.dtype)-self.Ll)/(self.Lu-self.Ll)]]) 106 | gamma2 = tf.transpose([[-1+2*(gammaJ[0]-el)/(eu-el), 107 | -1+2*(tf.cast(gammaJ[1], self.dtype)-self.Ll)/(self.Lu-self.Ll)]]) 108 | 109 | k = tf.exp(-0.5 * (tf.matmul(tf.transpose(gamma1), 110 | tf.matmul(sigma, gamma2)))) 111 | return(k) 112 | 113 | def calcUCB(self, testGamma, previousGamma, inverseR, s, inverse, p, rootbeta, el, eu, sigma): 114 | """ Calculates a varraint of the upper confidence bound for a test 115 | state. 116 | 117 | Arguments: 118 | * testGamma: the test state 119 | * s: a scaling factor 120 | * inverse: inverse of the covariance matrix 121 | * inverseR: inverse of the covariance matrix time the data 122 | * p: the decay value 123 | * rootBeta: a constant based on the number of variables in the 124 | state 125 | Returns: 126 | * ucb: upper confidence bound 127 | """ 128 | k = []#[None] * self.inverse.shape[0] 129 | for gamma, index in zip(previousGamma, 130 | range(len(previousGamma))): 131 | #k[index] = self.calck(gamma, testGamma) 132 | k.append([self.calck(gamma, testGamma, el, eu, sigma)[0,0]]) 133 | k = tf.cast(k, self.dtype) 134 | mean = tf.matmul(tf.transpose(k), inverseR) * s 135 | variance = tf.matmul(inverse, k) 136 | variance = tf.matmul(tf.transpose(k), variance) 137 | 138 | variance = self.calck(testGamma, testGamma, el, eu, sigma) - variance 139 | 140 | ucb = mean + variance * p * rootbeta 141 | return(ucb, mean, variance) 142 | 143 | def reset(self): 144 | """Resets the adapter""" 145 | tf.print("Reset") 146 | self.previousGamma = [] 147 | 148 | self.allSD = [] 149 | self.K = tf.zeros([0,0]) 150 | self.currentData = [] 151 | self.allData = [] 152 | self.maxR = 1e-8 153 | self.i = -2 154 | self.previous_state = None 155 | self.current_state = None 156 | self.strikes = 0 157 | 158 | @tf.function(jit_compile=True, experimental_relax_shapes=True) 159 | def gridSearch(self, previousGamma, inverseR, s, inverse, p, rootbeta, el, eu, sigma): 160 | eCount = tf.constant(0, dtype=tf.int32) 161 | lCount = tf.constant(0, dtype=tf.int32) 162 | cond = lambda eCount, lCount, e, L, ucb, mean, variance, previousGamma, inverseR, s, inverse, p, rootbeta, el, eu, sigma: tf.less(lCount, self.lNumber) 163 | e = tf.cast([[el]], self.dtype) 164 | L = tf.cast([[self.Ll]], self.dtype) 165 | ucb = tf.cast([[-1000000000]], self.dtype) 166 | variance = tf.cast([[-1000000000]], self.dtype) 167 | mean=tf.cast([[-1000000000]], self.dtype) 168 | 169 | 170 | def processChunk(eCount, lCount, e, L, ucb, mean, variance, previousGamma, inverseR, s, inverse, p, rootbeta, el, eu, sigma): 171 | """Processes a chunk of the e, L combinations. 172 | 173 | Arguments: 174 | * eList: list of step sizes to check 175 | * lList: list of leapfrog steps to check 176 | 177 | Returns: 178 | * best: a tuple of the form ((best e, best L), ucb) where the e and 179 | L selected are those with the highest ucb, which is also included 180 | """ 181 | newE = self.eGrid[eCount] 182 | newL = self.lGrid[lCount] 183 | newUcb, newMean, newVariance = self.calcUCB([newE, newL], previousGamma, inverseR, s, inverse, p, rootbeta, el, eu, sigma) 184 | e = tf.where(newUcb>ucb,newE, e) 185 | L = tf.where(newUcb>ucb,newL, L) 186 | mean = tf.where(newUcb>ucb,newMean, mean) 187 | variance = tf.where(newUcb>ucb,newVariance, variance) 188 | ucb = tf.where(newUcb>ucb,newUcb, ucb) 189 | lCount = tf.where(eCount==self.eNumber-1, lCount+1, lCount) 190 | eCount = tf.where(eCount==self.eNumber-1, 0, eCount+1) 191 | return(eCount, lCount, e, L, ucb, mean, variance, previousGamma, inverseR, s, inverse, p, rootbeta, el, eu, sigma) 192 | 193 | eCount, lCount, e, L, ucb, mean, variance, previousGamma, inverseR, s, inverse, p, rootbeta, el, eu, sigma = tf.while_loop(cond, processChunk, [eCount, lCount, e, L, ucb, mean, variance, 194 | previousGamma, inverseR, s, inverse, p, rootbeta, el, eu, sigma]) 195 | 196 | return(tf.cast(e[0,0], self.dtype), tf.cast(L[0,0], self.dtype)) 197 | 198 | 199 | def update(self, state): 200 | """ Steps the adapter forward by one step 201 | 202 | Arguments: 203 | * state: the newest state proposed by the HMC algorithm 204 | Returns: 205 | * currentE: the new step size 206 | * currentL: the new number of leapfrog steps 207 | """ 208 | if(self.i self.randomSteps): 226 | self.strikes += 1 227 | else: 228 | self.strikes = 0 229 | 230 | # Update E and L if this is not just an averaging step 231 | if(self.i % self.m == 0 and self.i > 0): 232 | u = tf.random.uniform([1,1],minval=0, maxval=1) 233 | self.p = max(self.i / self.m - self.k + 1, 1)**(-0.5) 234 | if(u < self.p+u*0): # Over time the probability of updating will decay 235 | mean = tf.math.reduce_mean(self.currentData) 236 | sd = tf.math.reduce_std(self.currentData) 237 | self.currentData = [] 238 | self.allData.append(mean) 239 | self.allSD.append(sd) 240 | self.maxR = tf.math.reduce_max(self.allData) 241 | # Update the covariance matrix 242 | self.previousGamma.append((self.currentE, self.currentL)) 243 | size = len(self.previousGamma) 244 | newK = tf.ones([size, size]) 245 | if(size > 0): 246 | #newK[:size - 1, :size - 1] = self.K 247 | newK = self.K 248 | newKExtra=[] 249 | for gamma, index in zip(self.previousGamma, range( 250 | len(self.previousGamma))): 251 | k = self.calck(gamma, self.previousGamma[-1], self.el, self.eu, self.sigma) 252 | #newK[-1, index] = k 253 | #newK[index, -1] = k 254 | newKExtra.append(k[0,0]) 255 | newK = tf.concat([newK, [newKExtra[:-1]]],axis=0) 256 | newK = tf.concat([newK, tf.transpose([newKExtra])], axis=1) 257 | self.K = newK 258 | self.s = self.a / self.maxR # update scalling constant 259 | 260 | sigmaNu = tf.math.reduce_mean(self.allSD) # Variance of noise 261 | 262 | # calculate inverse and other values only once 263 | try: # In case the covaraince matrix is singular 264 | self.inverse = tf.linalg.inv( 265 | self.K + (sigmaNu**2) * tf.eye(self.K.shape[0])) 266 | except tf.errors.InvalidArgumentError: 267 | self.inverse = tf.linalg.inv( 268 | self.K + (sigmaNu**2) * tf.eye(self.K.shape[0]) + 269 | 0.1 * tf.eye(self.K.shape[0])) 270 | self.inverseR = tf.matmul(self.inverse, tf.expand_dims(tf.cast(self.allData, tf.float32),1)) 271 | 272 | 273 | 274 | self.rootbeta = (self.i / self.m + 1)**(3) * math.pi**2 275 | self.rootbeta /= (3 * self.delta) 276 | self.rootbeta = tf.math.log(self.rootbeta)*2 277 | self.rootbeta = self.rootbeta**(0.5) 278 | 279 | # Start parallel searches, take best result found 280 | if(self.i//self.m >= self.randomSteps): 281 | self.currentE, self.currentL = self.gridSearch(self.previousGamma, self.inverseR, self.s, self.inverse, self.p, self.rootbeta, self.el, self.eu, self.sigma) 282 | else: 283 | self.currentE = random.choice(self.eGrid) 284 | self.currentL = random.choice(self.lGrid) 285 | if(size==50): 286 | self.K=self.K[1:,1:] 287 | self.previousGamma=self.previousGamma[1:] 288 | self.allData=self.allData[1:] 289 | self.allSD=self.allSD[1:] 290 | 291 | self.i += 1 292 | return(tf.cast(self.currentE, self.dtype), tf.cast(self.currentL, tf.int32)) 293 | -------------------------------------------------------------------------------- /tensorBNN/predictor.py: -------------------------------------------------------------------------------- 1 | from tensorBNN.activationFunctions import (Exp, Relu, Sigmoid, Tanh, Elu, 2 | Softmax, Leaky_relu, Prelu, 3 | SquarePrelu) 4 | from tensorBNN.layer import DenseLayer, GaussianDenseLayer 5 | from tensorBNN.likelihood import GaussianLikelihood 6 | 7 | from emcee.autocorr import integrated_time, function_1d 8 | 9 | import numpy as np 10 | import tensorflow as tf 11 | 12 | import math 13 | 14 | 15 | class predictor(object): 16 | def __init__(self, directoryPath, dtype, customLayerDict={}, 17 | likelihood=GaussianLikelihood(sd=0.1)): 18 | """ The constructor here obtains the necesary information to make basic 19 | predictions, and also the basic likelihood function for future 20 | reweighting. 21 | 22 | Arguments: 23 | * directoryPath: Path to folder containing saved networks 24 | * dtype: data type of network 25 | * customLayerDict: Dictionary containing any custom layers with 26 | their names as keys 27 | * likelihood: Likelihood object used in training. If reweighting 28 | is not performed this does not matter. 29 | """ 30 | self.layerDict = {"Exp": Exp, "relu": Relu, "sigmoid": Sigmoid, 31 | "tanh": Tanh, "elu": Elu, "softmax": Softmax, 32 | "leakyrelu": Leaky_relu, "prelu": Prelu, 33 | "squareprelu": SquarePrelu, "dense": DenseLayer, 34 | "denseGaussian": GaussianDenseLayer} 35 | self.directoryPath = directoryPath 36 | self.layerDict.update(customLayerDict) 37 | self.dtype = dtype 38 | self.loadNetworks() 39 | self.loadArchitecture() 40 | self.likelihood = likelihood 41 | self.weightsTrain = [] 42 | 43 | def loadNetworks(self): 44 | """Loads saved networks. 45 | """ 46 | 47 | summary = [] 48 | with open(self.directoryPath + "summary.txt", "r") as file: 49 | for line in iter(file): 50 | summary.append(line.split()) 51 | numNetworks = int(summary[-2][0]) 52 | numFiles = int(summary[-2][1]) 53 | numMatrices = int(summary[-2][2]) 54 | numHypers = int(summary[-1][0]) 55 | 56 | numNetworks //= numFiles 57 | 58 | matrices = [] 59 | vectors = [] 60 | for x in range(numFiles*numNetworks): 61 | vectors.append([]) 62 | 63 | for n in range(numMatrices): 64 | if(len(summary[n]) == 2): 65 | weightsSplitDims = (numNetworks * 66 | numFiles, int(summary[n][0]), 67 | int(summary[n][1])) 68 | else: 69 | weightsSplitDims = (numNetworks * 70 | numFiles, int(summary[n][0]), int(1)) 71 | weights0 = np.zeros(weightsSplitDims) 72 | for m in range(numFiles): 73 | weights = np.loadtxt( 74 | self.directoryPath + 75 | str(n) + 76 | "." + 77 | str(m) + 78 | ".txt", 79 | dtype=np.float32, 80 | ndmin=2) 81 | for k in range(numNetworks): 82 | 83 | netNumber = m * numNetworks + k 84 | index1 = weightsSplitDims[1] * k 85 | index2 = weightsSplitDims[1] * (k + 1) 86 | index3 = weightsSplitDims[2] 87 | weights0[netNumber, :, :] = weights[index1:index2, :index3] 88 | newVector = tf.cast(weights[index1:index2, :index3], 89 | self.dtype).numpy().flatten() 90 | vectors[netNumber].append(newVector) 91 | matrices.append(tf.cast(weights0, self.dtype)) 92 | for x in range(len(vectors)): 93 | vectors[x] = np.concatenate(vectors[x]) 94 | 95 | hypers = [] 96 | if(numHypers > 0): 97 | for m in range(numFiles): 98 | weights = np.loadtxt( 99 | self.directoryPath + "hypers" + str(m) + ".txt", 100 | dtype=np.float32, ndmin=1) 101 | for k in range(numNetworks): 102 | netNumber = m * numNetworks + k 103 | index1 = numHypers * k 104 | index2 = numHypers * (k + 1) 105 | hypers.append(weights[index1:index2]) 106 | 107 | numNetworks *= numFiles 108 | 109 | self.numNetworks = numNetworks 110 | self.numMatrices = numMatrices 111 | self.matrices = matrices 112 | self.hypers = hypers 113 | self.vectors = vectors 114 | 115 | def loadArchitecture(self, architecture=None): 116 | self.layers = [] 117 | if(architecture is None): 118 | with open(self.directoryPath + "architecture.txt", "r") as file: 119 | for line in iter(file): 120 | cleanedLine = line.replace("\n", "") 121 | cleanedLine = self.layerDict[cleanedLine](inputDims=1, 122 | outputDims=1) 123 | self.layers.append(cleanedLine) 124 | else: 125 | with open(architecture, "r") as file: 126 | for line in iter(file): 127 | cleanedLine = line.replace("\n", "") 128 | cleanedLine = self.layerDict[cleanedLine](inputDims=1, 129 | outputDims=1) 130 | self.layers.append(cleanedLine) 131 | 132 | def predict(self, inputMatrix, n=1): 133 | """Make predictions from an ensemble of neural networks. 134 | Arguments: 135 | * inputMatrix: The input data 136 | * n: Predict using every n networks 137 | Returns: 138 | * initialResults: List with all networks used 139 | """ 140 | 141 | inputVal = np.transpose(inputMatrix) 142 | initialResults = [None] * math.ceil(self.numNetworks/n) 143 | for m in range(0, self.numNetworks, n): 144 | current = inputVal 145 | matrixIndex = 0 146 | for layer in self.layers: 147 | numTensors = layer.numTensors 148 | tensorList = [] 149 | for x in range(numTensors): 150 | tensorList.append(self.matrices[matrixIndex+x][m, :, :]) 151 | matrixIndex += numTensors 152 | current = layer.predict(current, tensorList) 153 | initialResults[m//n] = current.numpy() 154 | 155 | return(initialResults) 156 | 157 | def trainProbs(self, trainX, trainY, n, likelihood): 158 | """ Calculate the negative log likelihoods for the training data. 159 | 160 | Arguments: 161 | * trainX: training input data 162 | * trainY: training output data 163 | * n: Predict using every n networks 164 | """ 165 | weights = [] 166 | if(likelihood is not None): 167 | hyperCountShape = np.array(self.likelihood.hypers).shape 168 | hyperCount = 1 169 | for x in hyperCountShape: 170 | hyperCount *= x 171 | likelihoodHyper = [] 172 | for hyper in self.hypers: 173 | likelihoodHyper.append([hyper[-hyperCount:]]) 174 | weights = self.likelihood.calcultateLogProb(tf.transpose(trainX), 175 | realVals=trainY, 176 | n=n, 177 | hypers=likelihoodHyper, 178 | predict=self.predict, 179 | dtype=self.dtype) 180 | else: 181 | for m in range(0, self.numNetworks, n): 182 | weights.append(tf.cast(0, self.dtype)) 183 | for m in range(0, self.numNetworks, n): 184 | matrixIndex = 0 185 | hyperIndex = 0 186 | current = -weights[m//n] 187 | for layer in self.layers: 188 | numTensors = layer.numTensors 189 | numHyperTensors = layer.numHyperTensors 190 | tensorList = [] 191 | hyperList = [] 192 | for x in range(numTensors): 193 | tensorList.append(self.matrices[matrixIndex+x][m, :, :]) 194 | for x in range(numHyperTensors): 195 | hyperList.append(self.hypers[m][hyperIndex+x]) 196 | hyperIndex += numHyperTensors 197 | matrixIndex += numTensors 198 | current -= tf.cast(layer.calculateHyperProbs(hyperList, 199 | tensorList), 200 | self.dtype).numpy() 201 | weights[m//n] = current 202 | self.weightsTrain = np.array(weights) 203 | 204 | def reweight(self, architecture, trainX=None, trainY=None, n=1, 205 | likelihood=None): 206 | """ Calculate new weights for each network if they have the new 207 | hyper paramters described in architecture. The weights are calculated 208 | according to p(theta|priors2)/p(theta|priors1). The new priors can be 209 | anything, but the layers must still accept the same size inputs and 210 | number of hyper paramters as the base networks. 211 | 212 | Arguments: 213 | * trainX: training input data 214 | * trainY: training output data 215 | * architecture: new architecture file 216 | * n: Predict using every n networks 217 | 218 | Returns: 219 | * weighting: Numpy array with new weights for the networks. 220 | """ 221 | 222 | if(len(self.weightsTrain) == 0): 223 | self.trainProbs(trainX, trainY, n, likelihood) 224 | 225 | self.loadArchitecture(architecture=architecture) 226 | 227 | weights = [] 228 | if(likelihood is not None): 229 | 230 | hyperCountShape = np.array(likelihood.hypers).shape 231 | hyperCount = 1 232 | for x in hyperCountShape: 233 | hyperCount *= x 234 | likelihoodHypers = [] 235 | for hyper in self.hypers: 236 | likelihoodHypers.append([hyper[-hyperCount:]]) 237 | weights = likelihood.calcultateLogProb(tf.transpose(trainX), 238 | realVals=trainY, 239 | n=n, 240 | hypers=likelihoodHypers, 241 | predict=self.predict, 242 | dtype=self.dtype) 243 | 244 | else: 245 | for m in range(0, self.numNetworks, n): 246 | weights.append(tf.cast(0, self.dtype)) 247 | 248 | for m in range(0, self.numNetworks, n): 249 | matrixIndex = 0 250 | hyperIndex = 0 251 | current = -weights[m//n] 252 | for layer in self.layers: 253 | numTensors = layer.numTensors 254 | numHyperTensors = layer.numHyperTensors 255 | tensorList = [] 256 | hyperList = [] 257 | for x in range(numTensors): 258 | tensorList.append(self.matrices[matrixIndex+x][m, :, :]) 259 | for x in range(numHyperTensors): 260 | hyperList.append(self.hypers[m][hyperIndex+x]) 261 | hyperIndex += numHyperTensors 262 | matrixIndex += numTensors 263 | current -= tf.cast(layer.calculateHyperProbs(hyperList, 264 | tensorList), 265 | self.dtype).numpy() 266 | weights[m//n] = current 267 | self.weights = np.array(weights) 268 | weighting = np.exp(self.weightsTrain-self.weights) 269 | weighting = weighting/np.sum(weighting) 270 | 271 | self.loadArchitecture() 272 | 273 | return(weighting) 274 | 275 | def autocorrelation(self, inputData, nMax): 276 | predictions = self.predict(inputData, n=1) 277 | output = np.squeeze(np.array(predictions)).T 278 | 279 | valFunc = 0 280 | accepted = 0 281 | 282 | for x in range(len(output)): 283 | temp = (integrated_time(output[x], tol=5, quiet=True)) 284 | if(not math.isnan(temp)): 285 | valFunc += np.array((function_1d(output[x]))) 286 | accepted += 1 287 | 288 | valFunc = valFunc/accepted 289 | if(nMax < len(valFunc)): 290 | valFunc = valFunc[:nMax] 291 | 292 | return(valFunc) 293 | 294 | def autoCorrelationLength(self, inputData, nMax): 295 | predictions = self.predict(inputData, n=1) 296 | output = np.squeeze(np.array(predictions)).T 297 | 298 | val = 0 299 | accepted = 0 300 | 301 | for x in range(len(output)): 302 | temp = (integrated_time(output[x], tol=5, quiet=True)) 303 | if(not math.isnan(temp)): 304 | val += temp 305 | accepted += 1 306 | 307 | val = val/accepted 308 | 309 | if(val[0] > nMax): 310 | print("Correlation time is greater than maximum accepted value.") 311 | 312 | return(val[0]) 313 | 314 | def extractParameters(self): 315 | """ 316 | Returns a list with all the parameter matrices. The first axis in each 317 | matrix corresponds to the network. 318 | """ 319 | return(self.matrices) 320 | 321 | def extractHyperParameters(self): 322 | """ 323 | Returns an array with all the hyper parameters. The first axis 324 | corresponds to the network. 325 | """ 326 | return(np.array(self.hypers)) 327 | 328 | def parameterStatistics(self): 329 | """ 330 | Returns two list, the first with the means of all the parameters, 331 | and the second with standard deviations of the parameters. 332 | """ 333 | parameterMeans = [] 334 | parameterSds = [] 335 | for matrix in self.matrices: 336 | parameterMeans.append(np.mean(matrix, axis=0)) 337 | parameterSds.append(np.std(matrix, axis=0)) 338 | 339 | return(parameterMeans, parameterSds) 340 | 341 | def hyperStatistics(self): 342 | """ 343 | Returns two arrays, the first with the means of all the hyper 344 | parameters, and the second with standard deviations of the hyper 345 | parameters. 346 | """ 347 | hypers = np.array(self.hypers) 348 | hyperMeans = np.mean(hypers, axis=0) 349 | hyperSds = np.std(hypers, axis=0) 350 | 351 | return(hyperMeans, hyperSds) 352 | --------------------------------------------------------------------------------