├── Examples
    ├── extendedRegression.py
    └── trainRegression.py
├── LICENSE
├── README.md
├── docs
    ├── ClassificationExample.md
    ├── RegressionExample.md
    ├── Setup.md
    ├── _config.yml
    ├── _data
    │   └── navigation.yml
    ├── _includes
    │   └── navigation.html
    ├── _layouts
    │   └── default.html
    ├── index.md
    └── usage.md
└── tensorBNN
    ├── BNN_functions.py
    ├── activationFunctions.py
    ├── layer.py
    ├── likelihood.py
    ├── metrics.py
    ├── network.py
    ├── paramAdapter.py
    └── predictor.py


/Examples/extendedRegression.py:
--------------------------------------------------------------------------------
  1 | """
  2 | An extended version of the trainRegression.py example with pretraining and
  3 | some graphs at the end to visualize the output of the BNN.
  4 | """
  5 | 
  6 | import os
  7 | import math
  8 | import warnings
  9 | import time
 10 | 
 11 | import numpy as np
 12 | import random as rn
 13 | import tensorflow as tf
 14 | import pylab as plt
 15 | 
 16 | 
 17 | from tensorBNN.activationFunctions import Tanh
 18 | from tensorBNN.layer import GaussianDenseLayer
 19 | from tensorBNN.networkFinal import network
 20 | from tensorBNN.likelihood import FixedGaussianLikelihood
 21 | from tensorBNN.metrics import SquaredError, PercentError
 22 | from tensorBNN.predictor import predictor
 23 | 
 24 | startTime = time.time()
 25 | 
 26 | # This supresses many deprecation warnings
 27 | warnings.filterwarnings("ignore", category=DeprecationWarning)
 28 | warnings.filterwarnings("ignore", category=UserWarning)
 29 | 
 30 | # Set the GPU to use
 31 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 32 | 
 33 | os.environ["PYTHONHASHSEED"] = "0"
 34 | np.random.seed(42)
 35 | rn.seed(12345)
 36 | tf.random.set_seed(3)
 37 | 
 38 | 
 39 | def main():
 40 | 
 41 |     trainIn=np.linspace(-2,2,num=11)
 42 |     valIn=np.linspace(-2+2/30,2.0-2/30,num=30)
 43 |     trainOut = np.sin(trainIn*math.pi*2)*trainIn-np.cos(trainIn*math.pi)
 44 |     valOut = np.sin(valIn*math.pi*2)*valIn-np.cos(valIn*math.pi)
 45 | 
 46 | 
 47 |     data=[trainIn, trainOut, valIn, valOut]
 48 | 
 49 |     dtype=tf.float32
 50 | 
 51 |     inputDims=1
 52 |     outputDims=1
 53 |     width = 10 # perceptrons per layer
 54 |     hidden = 3 # number of hidden layers
 55 |     patience=20
 56 |     cycles=3
 57 |     epochs=100
 58 |     seed=1000
 59 | 
 60 | 
 61 |     normInfo=(0,1) # mean, sd
 62 | 
 63 |     #Peform pre-training to start the Markov Chain at a better spot
 64 |     model = tf.keras.Sequential()
 65 |     
 66 |     model.add(tf.keras.layers.Dense(width, kernel_initializer='glorot_uniform',
 67 |                                     input_shape=(inputDims, ),
 68 |                                     activation="tanh"))
 69 |     model.add(tf.keras.layers.ReLU())
 70 | 
 71 |     for n in range(hidden-1):
 72 |         model.add(tf.keras.layers.Dense(width, 
 73 |                                         kernel_initializer='glorot_uniform',
 74 |                                         activation="tanh"))
 75 |         
 76 |     model.add(tf.keras.layers.Dense(outputDims, 
 77 |                                     kernel_initializer='glorot_uniform'))
 78 | 
 79 |     callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', 
 80 |                                                 patience=patience,
 81 |                                                 restore_best_weights=True)
 82 | 
 83 |     #Train with decreasing learning rate
 84 |     for x in range(cycles):
 85 |         model.compile(optimizer=tf.keras.optimizers.Adam(0.01*(10**(-x)),
 86 |                                                          amsgrad=True),
 87 |                   loss='mean_squared_error',
 88 |                   metrics=['mean_absolute_error', 'mean_squared_error'])
 89 |         model.summary()
 90 |         model.fit(trainIn, trainOut.T, validation_data=(valIn, valOut.T), 
 91 |                   epochs=epochs, batch_size=32, callbacks=[callback])
 92 | 
 93 |     #Save the backup 
 94 |     model.save("backup")
 95 | 
 96 |     #Extract weights and biases
 97 |     weights=[]
 98 |     biases=[]
 99 |     activation=[]
100 |     for layer in model.layers:
101 |         weightBias=layer.get_weights()
102 |         if(len(weightBias)==2):
103 |             weights.append(weightBias[0].T)
104 |             bias=weightBias[1]
105 |             bias=np.reshape(bias, (len(bias),1))
106 |             biases.append(bias)
107 |         if(len(weightBias)==1):
108 |             activation.append(weightBias[0])
109 |     
110 |     
111 |     likelihood=FixedGaussianLikelihood(sd=0.1)
112 |     metricList=[SquaredError(mean=normInfo[0], sd=normInfo[1]), 
113 |                 PercentError(mean=normInfo[0], sd=normInfo[1])]
114 | 
115 |     neuralNet = network(
116 |                 dtype, # network datatype
117 |                 inputDims, # dimension of input vector
118 |                 data[0], # training input data
119 |                 data[1].T, # training output data
120 |                 data[2], # validation input data
121 |                 data[3].T) # validation output data)
122 |     
123 |     layer = GaussianDenseLayer( # Dense layer object
124 |         inputDims, # Size of layer input vector
125 |         width, # Size of layer output vector
126 |         seed=seed, # Random seed
127 |         dtype=dtype,
128 |         weights=weights[0], biases=biases[0])
129 |     neuralNet.add(layer) # Layer datatype
130 |     neuralNet.add(Tanh()) # Tanh activation function
131 |     seed += 1000 # Increment random seed
132 |     for n in range(hidden - 1): # Add more hidden layers
133 |         neuralNet.add(GaussianDenseLayer(width,
134 |                                 width,
135 |                                 seed=seed,
136 |                                 dtype=dtype,
137 |                                 weights=weights[n+1], biases=biases[n+1]))
138 |         neuralNet.add(Tanh())
139 |         seed += 1000
140 | 
141 |     neuralNet.add(GaussianDenseLayer(width,
142 |                             outputDims,
143 |                             seed=seed,
144 |                             dtype=dtype,
145 |                             weights=weights[-1], biases=biases[-1]))
146 | 
147 |     neuralNet.setupMCMC(
148 |         stepSizeStart=1e-3,#0.0004 # starting stepsize
149 |         stepSizeMin=1e-4, #0.0002 # minimum stepsize
150 |         stepSizeMax=1e-2, # maximum stepsize
151 |         stepSizeOptions=100, # number of stepsize options in stepsize adapter
152 |         leapfrogStart=1000, # starting number of leapfrog steps
153 |         leapfogMin=100, # minimum number of leapfrog steps
154 |         leapFrogMax=10000, # maximum number of leapfrog steps
155 |         leapfrogIncrement=10, # stepsize between leapfrog steps in leapfrog step adapter
156 |         hyperStepSize=0.001, # hyper parameter stepsize
157 |         hyperLeapfrog=100, # hyper parameter number of leapfrog steps
158 |         burnin=1000, # number of burnin epochs
159 |         averagingSteps=10) # number of averaging steps for param adapters)
160 | 
161 | 		
162 |     neuralNet.train(
163 |         6001, # epochs to train for
164 |         10, # increment between network saves
165 |         likelihood,
166 |         metricList=metricList,
167 |         adjustHypers=True,
168 |         folderName="TrigRegression", # Name of folder for saved networks
169 |         networksPerFile=50) # Number of networks saved per file
170 |         
171 |     print("Total time elapsed (seconds):", time.time() - startTime)
172 | 
173 | 
174 |     #Load predictor
175 |     loadedNetwork = predictor("TrigRegression/", tf.float32)
176 | 
177 |     #Look at the predictions ins the space between the training data
178 |     closeIn=np.linspace(-2,2,num=1000)
179 |     closeOut = np.sin(closeIn*math.pi*2)*closeIn-np.cos(closeIn*math.pi)
180 | 
181 |     closePredictions = np.squeeze(np.array(loadedNetwork.predict(
182 |                                                   np.array([closeIn]).T, n=1)))
183 |     closePredictionsMean = np.mean(closePredictions, axis=0)
184 |     closePredictionsStd = np.std(closePredictions, axis=0)
185 |     plt.figure()
186 | 
187 |     plt.fill_between(closeIn, closePredictionsMean-2*closePredictionsStd,
188 |                      closePredictionsMean-1*closePredictionsStd, color=(1,1,0),
189 |                      label="2 sd")
190 |     plt.fill_between(closeIn, closePredictionsMean-1*closePredictionsStd,
191 |                      closePredictionsMean+1*closePredictionsStd, color=(0,1,0),
192 |                      label="1 sd")
193 |     plt.fill_between(closeIn, closePredictionsMean+1*closePredictionsStd,
194 |                      closePredictionsMean+2*closePredictionsStd, color=(1,1,0))
195 |     plt.plot(closeIn,closePredictionsMean, color="k", label="predicted mean")
196 |     plt.plot(closeIn, closeOut, color="r", label="true")
197 |     plt.scatter(trainIn, trainOut, color="b", label="training data")
198 |     plt.legend()
199 |     plt.show()
200 | 
201 |     #Look at the predictions away from the training data
202 |     farIn=np.linspace(-4,4,num=2000)
203 |     farOut = np.sin(farIn*math.pi*2)*farIn-np.cos(farIn*math.pi)
204 | 
205 |     farPredictions = np.squeeze(np.array(loadedNetwork.predict(
206 |                                                     np.array([farIn]).T, n=1)))
207 |     farPredictionsMean = np.mean(farPredictions, axis=0)
208 |     farPredictionsStd = np.std(farPredictions, axis=0)
209 |     
210 |     plt.figure()
211 |     plt.fill_between(farIn, farPredictionsMean-2*farPredictionsStd,
212 |                      farPredictionsMean-1*farPredictionsStd, color=(1,1,0),
213 |                      label="2 sd")
214 |     plt.fill_between(farIn, farPredictionsMean-1*farPredictionsStd,
215 |                      farPredictionsMean+1*farPredictionsStd, color=(0,1,0),
216 |                      label="1 sd")
217 |     plt.fill_between(farIn, farPredictionsMean+1*farPredictionsStd,
218 |                      farPredictionsMean+2*farPredictionsStd, color=(1,1,0))
219 |     plt.plot(farIn,farPredictionsMean, color="k", label="predicted mean")
220 |     plt.plot(farIn, farOut, color="r", label="true")
221 |     plt.scatter(trainIn, trainOut, color="b", label="training data")
222 |     plt.legend()
223 |     plt.show()
224 | 
225 | 
226 | if(__name__ == "__main__"):
227 |     main()
228 | 


--------------------------------------------------------------------------------
/Examples/trainRegression.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import math
  3 | import warnings
  4 | import time
  5 | 
  6 | import numpy as np
  7 | import random as rn
  8 | import tensorflow as tf
  9 | 
 10 | from tensorBNN.activationFunctions import Tanh
 11 | from tensorBNN.layer import GaussianDenseLayer
 12 | from tensorBNN.networkFinal import network
 13 | from tensorBNN.likelihood import FixedGaussianLikelihood
 14 | from tensorBNN.metrics import SquaredError, PercentError
 15 | 
 16 | startTime = time.time()
 17 | 
 18 | # This supresses many deprecation warnings
 19 | warnings.filterwarnings("ignore", category=DeprecationWarning)
 20 | warnings.filterwarnings("ignore", category=UserWarning)
 21 | 
 22 | # Set the GPU to use
 23 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 24 | 
 25 | os.environ["PYTHONHASHSEED"] = "0"
 26 | np.random.seed(42)
 27 | rn.seed(12345)
 28 | tf.random.set_seed(3)
 29 | 
 30 | 
 31 | def main():
 32 | 
 33 |     trainIn=np.linspace(-2,2,num=11)
 34 |     valIn=np.linspace(-2+2/30,2.0-2/30,num=30)
 35 |     trainOut = np.sin(trainIn*math.pi*2)*trainIn-np.cos(trainIn*math.pi)
 36 |     valOut = np.sin(valIn*math.pi*2)*valIn-np.cos(valIn*math.pi)
 37 | 
 38 | 
 39 |     data=[trainIn, trainOut, valIn, valOut]
 40 | 
 41 |     dtype=tf.float32
 42 |     
 43 |     inputDims=1
 44 |     outputDims=1
 45 |     width = 10 # perceptrons per layer
 46 |     hidden = 3 # number of hidden layers
 47 |     seed=1000
 48 |     
 49 |     inputDims=1
 50 |     outputDims=1
 51 | 
 52 |     normInfo=(0,1) # mean, sd
 53 | 
 54 |     likelihood=FixedGaussianLikelihood(sd=0.1)
 55 |     metricList=[SquaredError(mean=normInfo[0], sd=normInfo[1]), 
 56 |                 PercentError(mean=normInfo[0], sd=normInfo[1])]
 57 | 
 58 |     neuralNet = network(
 59 |                 dtype, # network datatype
 60 |                 inputDims, # dimension of input vector
 61 |                 data[0], # training input data
 62 |                 data[1].T, # training output data
 63 |                 data[2], # validation input data
 64 |                 data[3].T) # validation output data)
 65 |     
 66 |     layer = GaussianDenseLayer( # Dense layer object
 67 |         inputDims, # Size of layer input vector
 68 |         width, # Size of layer output vector
 69 |         seed=seed, # Random seed
 70 |         dtype=dtype)
 71 |     neuralNet.add(layer) # Layer datatype
 72 |     neuralNet.add(Tanh()) # Tanh activation function
 73 |     seed += 1000 # Increment random seed
 74 |     for n in range(hidden - 1): # Add more hidden layers
 75 |         neuralNet.add(GaussianDenseLayer(width,
 76 |                                 width,
 77 |                                 seed=seed,
 78 |                                 dtype=dtype))
 79 |         neuralNet.add(Tanh())
 80 |         seed += 1000
 81 | 
 82 |     neuralNet.add(GaussianDenseLayer(width,
 83 |                             outputDims,
 84 |                             seed=seed,
 85 |                             dtype=dtype))
 86 | 
 87 |     neuralNet.setupMCMC(
 88 |         stepSizeStart=1e-3,#0.0004 # starting stepsize
 89 |         stepSizeMin=1e-4, #0.0002 # minimum stepsize
 90 |         stepSizeMax=1e-2, # maximum stepsize
 91 |         stepSizeOptions=100, # number of stepsize options in stepsize adapter
 92 |         leapfrogStart=1000, # starting number of leapfrog steps
 93 |         leapfogMin=100, # minimum number of leapfrog steps
 94 |         leapFrogMax=10000, # maximum number of leapfrog steps
 95 |         leapfrogIncrement=10, # stepsize between leapfrog steps in leapfrog step adapter
 96 |         hyperStepSize=0.001, # hyper parameter stepsize
 97 |         hyperLeapfrog=100, # hyper parameter number of leapfrog steps
 98 |         burnin=1000, # number of burnin epochs
 99 |         averagingSteps=10) # number of averaging steps for param adapters)
100 | 
101 | 		
102 |     neuralNet.train(
103 |         6001, # epochs to train for
104 |         10, # increment between network saves
105 |         likelihood,
106 |         metricList=metricList,
107 |         adjustHypers=True,
108 |         folderName="TrigRegression", # Name of folder for saved networks
109 |         networksPerFile=50) # Number of networks saved per file
110 |         
111 |     print("Total time elapsed (seconds):", time.time() - startTime)
112 | 
113 | 
114 | 
115 | if(__name__ == "__main__"):
116 |     main()
117 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Braden Kronheim
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # TensorBNN
  2 | This package contains code which can be used to train Bayesian Neural Networks using Hamiltonian Monte Carlo sampling as proposed by Radford Neal in his thesis "Bayesian Learning for Neural Networks" along with added features. The package is written in python3 and uses the packages `Tensorflow` and `Tensorflow-Probability` as the framework for the implementation. 
  3 | 
  4 | For detailed information about this implementation, please see our paper on the arXiv: [TensorBNN: Bayesian Inference for Neural Networks using Tensorflow](https://arxiv.org/abs/2009.14393). Cite as: 
  5 | 
  6 | B. Kronheim, M. Kuchera, H. Prosper, TensorBNN: Bayesian inference for neural network training using TensorFlow. arXiv:https://arxiv.org/abs/2009.14393.
  7 | 
  8 | 
  9 | ## Dependencies
 10 | All python code written here is in python3. The code is dependent upon the packages `numpy`, `emcee`,`tensorflow`, `tensorflow-probability`, and `scipy`.
 11 | 
 12 | The package, along with `numpy`, `emcee`, and  `scipy`, can be installed via
 13 | 
 14 | ```
 15 | pip install tensorBNN
 16 | ```
 17 | 
 18 | Alternatively, you can clone the repository and download `numpy`, `scipy,` and `emcee` from source through the command:
 19 | 
 20 | ```
 21 | pip install numpy scipy emcee
 22 | ```
 23 | 
 24 | In order for the repository to work having cloned it from github add the tensorBNN folder to the python source, and it should work the same as having donwloaded it via pip.
 25 | 
 26 | TensorFlow and TensorFlow-probability must be instaled separately. The TensorFlow version should be the most recent (2.5 at the moment). Using a 1.x version will not work, and older versions of 2 might not either. It is also highly recomended that this code be run on a gpu due to its high computational complexity. TensorFlow for the gpu can be installed with the command:
 27 | 
 28 | ```
 29 | pip install tensorflow-gpu
 30 | ```
 31 | 
 32 | In order to be compatible with this version of tensorflow, the most recent version of tensorflow-probability (0.12.2) must be installed. This is done with the following command:
 33 | 
 34 | ```
 35 | pip install tensorflow-probability
 36 | ```
 37 | 
 38 | 
 39 | ## Usage
 40 | 
 41 | In order to use this code you must import network, Dense Layer,an activation such as Relu, and a likelihood such as the Gaussian likelihood. This can be done as follows:
 42 | 
 43 | ```
 44 | from tensorBNN.layer import DenseLayer
 45 | from tensorBNN.network import network
 46 | from tensorBNN.activationFunctions import Relu
 47 | from tensorBNN.likelihood import GaussianLikelihood
 48 | ```
 49 | 
 50 | Next, it is highly convenient to turn off the deprecation warnings. These are all from tensorflow, tensorflow-probability, and numpy intereacting with tensorflow, so it isn't something easily fixed and there are a lot of warnings. These are turned off with:
 51 | 
 52 | ```
 53 | import warnings
 54 | warnings.filterwarnings("ignore", category=DeprecationWarning)
 55 | warnings.filterwarnings("ignore", category=UserWarning)
 56 | ```
 57 | 
 58 | The other important setup task is determining whether or not to seed the random number generator before training. Please note that if you are using a gpu then there will always be some randomness which cannot be removed. To set all cpu random numbers use these lines of code:
 59 | 
 60 | ```
 61 | import os
 62 | 
 63 | import numpy as np
 64 | import random as rn
 65 | import tensorflow as tf
 66 | 
 67 | os.environ["PYTHONHASHSEED"] = "0"
 68 | np.random.seed(42)
 69 | rn.seed(12345)
 70 | tf.random.set_seed(3)
 71 | ```
 72 | 
 73 | Moving on to the actual use of this code, start with the declaration of a network object:
 74 | 
 75 | ```
 76 | neuralNet = network.network(dtype, inputDims, trainX, trainY, validationX, validationY, mean, sd)
 77 | ```
 78 | 
 79 | The paramaters are described as follows:
 80 | * dtype: data type for Tensors
 81 | * inputDims: dimension of input vector
 82 | * trainX: the training data input, shape is n by inputDims
 83 | * trainY: the training data output
 84 | * validateX: the validation data input, shape is n by inputDims
 85 | * validateY: the validation data output
 86 | * mean: the mean used to scale trainY and validateY
 87 | * sd: standard deviation used to scale trainY and validateY
 88 | 
 89 | Next, add all of the desired layers and activation functions as follows:
 90 | 
 91 | ```
 92 | neuralNet.add(DenseLayer(inputDims, outputDims, seed=seed, dtype=tf.float32))
 93 | neuralNet.add(Relu())
 94 | ```
 95 | 
 96 | For added control, especially when using pre-trained networks it is possible to feed pretrained weights, biases, and values for the activation functions. This can be done as follows:
 97 | 
 98 | ```
 99 | neuralNet.add(DenseLayer(inputDims,outputDims, weights=weights, biases=biases, seed=seed, dtype=dtype))
100 | neuralNet.add(SquarePrelu(width, alpha=alpha**(0.5), activation=activation, dtype=dtype))
101 | ```
102 | 
103 | The paramater inputDims is the output shape of the layer before, and the width is the ouput shape of the layers itself. The seed is used for seeding the random number generator. Currently, only ReLU is supported for easy predictions off of saved networks. The other activation functions can be used, but they will require more custom code to predict from saved networks.
104 | 
105 | Next, the Markov Chain Monte Carlo algorithm must be initialized. This can be done as follows:
106 | 
107 | ```
108 | neuralNet.setupMCMC(self, stepSize, stepMin, stepMax, stepNum, leapfrog, leapMin,
109 |                     leapMax, leapStep, hyperStepSize, hyperLeapfrog, burnin,
110 |                     cores, averagingSteps=2, a=4, delta=0.1):
111 | ```
112 | 
113 | The paramaters are described as follows:
114 | * stepSize: the starting step size for the weights and biases
115 | * stepMin: the minimum step size
116 | * stepMax: the maximum step size
117 | * stepNum: the number of step sizes in grid
118 | * leapfrog: number of leapfrog steps for weights and biases
119 | * leapMin: the minimum number of leapfrog steps
120 | * leapMax: the maximum number of leapfrog steps
121 | * leapStep: the step in number of leapfrog for search grid
122 | * hyperStepSize: the starting step size for the hyper parameters
123 | * hyperLeapfrog: leapfrog steps for hyper parameters
124 | * cores: number of cores to use
125 | * averaginSteps: number of averaging steps
126 | * a: constant, 4 in paper
127 | * delta: constant, 0.1 in paper
128 | 
129 | This code uses the adaptive Hamlitonain Monte Carlo described in "Adaptive Hamiltonian and Riemann Manifold Monte Carlo Samplers" by Wang, Mohamed, and de Freitas. In accordance with this paper there are a few more paramaters that can be adjusted, though it is recomended that their default values are kept.
130 | 
131 | After initializaing the HMC, we must declare the likelihood that we want to use as well as any metrics. This can be accomplished through the following code:
132 | 
133 | ```
134 | # Declare Gaussian Likelihood with sd of 0.1
135 | likelihood =  GaussianLikelihood(sd = 0.1)
136 | metricList = [ #Declare metrics
137 |     SquaredError(mean = 0, sd = 1, scaleExp = False),
138 |     PercentError(mean = 10, sd = 2, scaleExp = True)]
139 | ```
140 | 
141 | 
142 | The last thing to do is actually tell the model to start learning this is done with the following command:
143 | 
144 | ```
145 | network.train(
146 |         epochs, # epochs to train for
147 |         samplingStep, # increment between network saves
148 |         likelihood,
149 |         metricList = metricList,
150 |         folderName = "Regression", 
151 |         # Name of folder for saved networks
152 |         networksPerFile=50)
153 |         # Number of networks saved per file
154 | ```
155 | 
156 | The arguments have the following meanings:
157 | 
158 | * Epochs: Number of training cycles
159 | * samplingStep: Epochs between sampled networks
160 | * likelihood: The likelihood function used to evaluate the prediction 
161 |               we defined previously
162 | * startSigma: Starting standard deviation for likelihood function
163 |               for regression models
164 | * folderName: name of folder for saved networks
165 | * networksPerFile: number of networks saved in a given file
166 | 
167 | Once the network has trained, which may take a while, the saved networks can be loaded and then used to make predictions using the following code:
168 | 
169 | ```
170 | from TensorBNN.predictor import predictor 
171 | 
172 | network = predictor(filePath,
173 |                     dtype = dtype, 
174 |                     # data type used by network
175 |                     customLayerDict={"dense2": Dense2},
176 |                     # A dense layer with a different 
177 |                     # hyperprior
178 |                     likelihood = Likelihood)
179 |                     # The likelihood function is required to  
180 |                     # calculate the probabilities for 
181 |                     # re-weighting
182 | 
183 | initialResults = network.predict(inputData, skip, dtype)
184 | ```
185 | 
186 | The variable filePath is the directory from which the networks are being loaded, inputData is the normalized data for which predictions should be made, and dtype is the data type to be used for predictions. The customLayerDict is a dictionary holding the names and objects for any user defined layers. Likelihood is the likelihood function used to train the model.
187 | 
188 | The variable initialResults will be a list of numpy arrays, each numpy array corresponding to the predcitions from a single network in the BNN. The skip variable instructs the predictor to only use every n networks, where n=skip
189 | 
190 | Additionally, the predictor function allows for the calculation of the autocorrelation between different networks, as well as the autocorrelation length through:
191 | 
192 | ```
193 | autocorrelations = network.autocorrelation(testData, nMax)
194 | autocorrelations = network.autoCorrelationLength(testData, nMax)
195 | ```
196 | Here, the autocorrelation is calculated based on the predictions of the different BNNs, and the results are averaged over the test data. nMax provides the largest lag value for the autocorrelation. These calculations are done with emcee.
197 | 
198 | 
199 | Finally, the predictor object can calculate new weights for the different networks if they were given new priors. These priors take the form of new Layer objects which must be referenced in an architecture file. The reweighting function call looks like this:
200 | 
201 | ```
202 | weights = network.reweight(                                            
203 |                     trainX, # training input
204 |                     trainY, # training output
205 |                     skip = 10, # Use every 10 saved networks
206 |                     architecture = "architecture2.txt")
207 |                     # New architecture file
208 | ```
209 | 
210 | 


--------------------------------------------------------------------------------
/docs/ClassificationExample.md:
--------------------------------------------------------------------------------
  1 | # MNIST ClassificationExample
  2 | On this page is a tutorial on training a classification BNN using the tools available inside of `TensorBNN` using the `MNIST` dataset. This dataset consists of a collection of 28x28 grayscale images of handwritten digits. This tutorial will show how to select two numbers and train a BNN to detect between the two.
  3 | 
  4 | ## Data setup
  5 | First, it is necesary to import all the packages that will be needed. The required ones are
  6 | ```
  7 | import os
  8 | 
  9 | import numpy as np
 10 | import random as rn
 11 | import tensorflow as tf
 12 | 
 13 | from sklearn.model_selection import train_test_split
 14 | 
 15 | from Networks.activationFunctions import SquarePrelu, Sigmoid
 16 | from Networks.BNN_functions import trainBasicClassification
 17 | from Networks.layer import DenseLayer
 18 | from Networks.network import network
 19 | ```
 20 | The 'os', 'numpy', 'random', and 'tensorflow' imports are all required to set the random seeds properly so that results are reproducible. The other imports are either for training and validation data spliting or for constructing the actual network. It is important to note, however, that if a GPU is used for training, which is highly recomended, it is impossible to obtain completely reproducible results simply because of how a GPU works. The code required to set these random seeds is:
 21 | 
 22 | ```
 23 | os.environ["PYTHONHASHSEED"] = "0"
 24 | np.random.seed(42)
 25 | rn.seed(12345)
 26 | tf.random.set_seed(3)
 27 | ```
 28 | After setting up the random seeds, we need to get our dataset. This is accomplished though the code: 
 29 | ``` 
 30 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data(path='mnist.npz')
 31 | ```
 32 | As the `MNIST` data consists of a bunch of pictures we need to reshape each picture into a vector and scale the pixel values between 0 and 1. This is accomplished here:
 33 | ```
 34 | x_train_shape = x_train.shape
 35 | 
 36 | inputDims=x_train_shape[1]**2
 37 | outputDims=1
 38 | 
 39 | x_train = np.reshape(x_train, (x_train_shape[0],x_train_shape[1]**2))
 40 | x_train = np.float32(x_train)/256
 41 | ```
 42 | We also collected our input and output dimensions, which will be important later.
 43 | Next, we must collect the two numbers that we are interested in. For this tutorial we will use 3 and 8, but you are free to use whatever two numbers you desire. In the following block of code we create our new datasets.
 44 | ```
 45 | new_x_train = []
 46 | new_y_train = []
 47 | 
 48 | for y in range(len(y_train)):
 49 |     if(y_train[y]==3):
 50 |         new_y_train.append(0)
 51 |         new_x_train.append(x_train[y])
 52 |     if(y_train[y]==8):
 53 |         new_y_train.append(1)
 54 |         new_x_train.append(x_train[y])
 55 | x_train = np.array(new_x_train)
 56 | y_train = np.array(new_y_train)
 57 | ```
 58 | Finally, we perform an 80-20 train-validation split and store all of the datasets in a list.
 59 | ```
 60 | trainIn, valIn, trainOut, valOut = train_test_split(
 61 |             x_train, y_train, test_size=0.20, random_state=21)
 62 | data=[trainIn, trainOut, valIn, valOut]
 63 | ```
 64 | 
 65 | ## Pretraining
 66 | Next, we will use the pretraining feature built into `TensorBNN`. This feature allows the use of normal neuarl network optimization algorithms to give us a superior starting point for the BNN as it is a much slower algorithm. Pretraining the networks then allows for faster convergence of the BNN. To do the pretraining, we simply call `trainBasicClassification` from the `BNNfunctions` folder. A sample call is shown below with all of the arguments labeled.
 67 | ```
 68 | weights, biases, activation = trainBasicClassification(
 69 |       2, # Number of hidden layers
 70 |       inputDims, # Input dimensions
 71 |       outputDims, # Output dimensions
 72 |       20, # Number of perceptrons per layer 
 73 |       nncycles, # Number of training cylces. The learning rate is decreased by a factor of 10 each cycle.
 74 |       10, # Number of epochs per training cycle
 75 |       0.1, # Slope value for `leaky-relu` activation
 76 |       data[0], # Training input data
 77 |       data[1], # Training output data
 78 |       data[2], # Validation input data
 79 |       data[3], # Validation output data
 80 |       "MNIST_pretrain", # Save the pretrain network under this name
 81 |       callbacks=True, # Use callbacks to restore best weights obtained while training
 82 |       callbackMetric="val_loss", # metric used to determine best weights
 83 |       patience=10) # number of epochs to wait after failing to improve callback metric
 84 | ```
 85 | Running this function will train a network in Keras and save it under the name "MNIST_pretrain". It will extract the weights, biases, and activation functions tensors from the final model and return them.
 86 | 
 87 | ## BNN setup
 88 | We are now finally ready to actually setup the BNN.
 89 | First, we create a network object. This is accomplished through the following code:
 90 | ```
 91 | dtype = tf.float32 # This is the best trade off between speed and precision.
 92 | 
 93 | neuralNet = network(
 94 |             dtype, # network datatype
 95 |             inputDims, # dimension of input vector
 96 |             data[0], # Training input data
 97 |             data[1], # Training output data
 98 |             data[2], # Validation input data
 99 |             data[3], # Validation output data
100 |             tf.cast(0.0, dtype), # Mean of output data for unnormalization
101 |             tf.cast(1.0, dtype)) # Standard deviation of output data
102 | ```
103 | Next, we need to add our layers. We will use two hidden layers of 20 percpetrons each with SquarePrelu activation functions for the first two layers and Sigmoid activation for the output layers. SquarePrelu activations are similar to normal prelu activations, which are essentially leaky-relu activations with a trainable slope paramter. The difference, though, is that the trained parameter is the plus or minus square root of the slope. This way, we can not have an activation function which is not a bijection.
104 | The code to add the layers is below:
105 | ```
106 | seed = 0 # seed for layer generation, irrelavent with pretraining
107 | width = 20 # number of perceptrons per layer
108 | alpha = 0.1 # starting slope value for SquarePrelu
109 | hidden = 2 # Number of hidden layers
110 | neuralNet.add( # add a layer
111 |     DenseLayer( # dense layer object
112 |         inputDims, # input dimension
113 |         width, # number of perceptrons per layer
114 |         weights=weights[0], # pretrained weights
115 |         biases=biases[0], # pretrained biases
116 |         seed=seed, # layer seed
117 |         dtype=dtype)) # layer datatype
118 | neuralNet.add(SquarePrelu(width, 
119 |                           alpha=alpha**(0.5), # starting slope parameter
120 |                           activation=None, # no activation pretrained
121 |                           dtype=dtype)) # activation datatype
122 | seed += 1000
123 | for n in range(hidden - 1): # Add the hidden layers
124 |     neuralNet.add(DenseLayer(width,
125 |                              width,
126 |                              weights=weights[n + 1],
127 |                              biases=biases[n + 1],
128 |                              seed=seed,
129 |                              dtype=dtype))
130 |     neuralNet.add(
131 |         SquarePrelu(
132 |             width,
133 |             alpha=alpha**(0.5),
134 |             activation=None,
135 |             dtype=dtype))
136 |     seed += 1000
137 | 
138 | #Add the output layer
139 | neuralNet.add(DenseLayer(width,
140 |                          outputDims,
141 |                          weights=weights[-1],
142 |                          biases=biases[-1],
143 |                          seed=seed,
144 |                          dtype=dtype))
145 | neuralNet.add(Sigmoid()) # Sigmoid activation
146 | ```
147 | Next, we must setup the Markov Chain Monte Carlo algorithm. This is done by simply calling setupMCMC and providing it a lot of information.
148 | ```
149 | neuralNet.setupMCMC(
150 |         0.001, # Starting stepsize for Hamiltonian Monte Carlo (HMC)
151 |         0.0005, # Minimum possible stepsize for HMC
152 |         0.002, # Maximum possible stepsize for HMC
153 |         100, # Number of points to use in stepsize search grid
154 |         500, # Starting number of leapfrog steps for HMC
155 |         100, # Minimum number of leapfrog steps for HMC
156 |         2000, # Maximum number of leapfrog steps for HMC
157 |         1, # increment in leapfrog steps in leapfrog search grid
158 |         0.00001, # stepsize for hyper parameter HMC
159 |         30, # leapfrog steps for hyper paramater HMC
160 |         50, # Number of burnin steps to do
161 |         2, # Number of cores to use on computer
162 |         2) # Numberof stpes to average over in adaptive HMC algorithm
163 | ```
164 | Finally, we can actually train the network. We must give it a few last pieces of information and then it will be on its merry way. 
165 | ```
166 | neuralNet.train(
167 |         2500, #Train for 2500 epochs
168 |         10, # Save every 10 networks
169 |         folderName="MNIST_BNN", # Save inside the folder MNIST_BNN
170 |         networksPerFile=25, # Start new files every 25 networks
171 |         returnPredictions=False, # Don't return predictions
172 |         regression=False) # Don't use regression algorithm, so use classification algorithm 
173 | ```
174 | A final word of caution: this algorithm is not fast. For large datasets and large networks it is only feasible to run this on GPUs, and even then it may need several days to run. This example is small enough that it should run on normal computers, but it will still take several hours. 
175 | 


--------------------------------------------------------------------------------
/docs/RegressionExample.md:
--------------------------------------------------------------------------------
  1 | # Regression Example
  2 | Here, I will present an example of using `tensorBNN` to train a very basic regression problem. It will also highlight how the BNN represents model uncertainty very well. 
  3 | First, we need to import the nescesary packages. This is done through the commands
  4 | ## Program Setup
  5 | ```
  6 | import os
  7 | import math
  8 | 
  9 | import numpy as np
 10 | import random as rn
 11 | import tensorflow as tf
 12 | 
 13 | from tensorBNN.activationFunctions import Tanh
 14 | from tensorBNN.layer import DenseLayer
 15 | from tensorBNN.network import network
 16 | from tensorBNN.likelihood import GaussianLikelihood
 17 | ```
 18 | In order to obtain reproducible results we need to set random seeds. In order to be sure that absolutely everything is seeded, we use the following four lines of code
 19 | ```
 20 | os.environ["PYTHONHASHSEED"] = "0"
 21 | np.random.seed(42)
 22 | rn.seed(12345)
 23 | tf.random.set_seed(3)
 24 | ```
 25 | # Data preparation 
 26 | Next, we need to generate our dataset. We are simply going to use the function ```f(x)=x*sin(2pi*x)-cos(pi*x).``` 
 27 | We will generate a training dataset with 31 points and a validation dataset with 30 points. This is done as follows.
 28 | ```
 29 | trainIn=np.linspace(-2,2,num=31)
 30 | valIn=np.linspace(-2+2/30,2.0-2/30,num=30)
 31 | trainOut = np.sin(trainIn*math.pi*2)*trainIn-np.cos(trainIn*math.pi)
 32 | valOut = np.sin(valIn*math.pi*2)*valIn-np.cos(valIn*math.pi)
 33 | ```
 34 | After this we need to group our data together and declare the dataype we will be using.
 35 | ```
 36 | data=[trainIn, trainOut, valIn, valOut]
 37 | 
 38 | dtype=tf.float32
 39 | ```
 40 | ## Network setup
 41 | To get the network setup we need to first declare the number of input and output dimensions and the normalization we used on our output data. As we didn't normalize, we just say we have a mean of 0 and a standard deviation of 0 so `TensorBNN` doesn't try and unnormalize the data.
 42 | ```
 43 | inputDims=1
 44 | outputDims=1
 45 | 
 46 | normInfo=(0,1) # mean, sd
 47 | ```
 48 | Now we actually need to create the network object. This is done like so.
 49 | ```
 50 | neuralNet = network(
 51 |             dtype, # network datatype
 52 |             inputDims, # dimension of input vector
 53 |             data[0], # training input data
 54 |             data[1], # training output data
 55 |             data[2], # validation input data
 56 |             data[3], # validation output data)
 57 | ```
 58 | Next, we add the layers. We will be using two hidden layers with 10 perceptrons each and the hyperbolic tangent activation function.
 59 | ```
 60 | width = 10 # perceptrons per layer
 61 | hidden = 2 # number of hidden layers
 62 | seed = 0 # random seed
 63 | neuralNet.add(
 64 |     DenseLayer( # Dense layer object
 65 |         inputDims, # Size of layer input vector
 66 |         width, # Size of layer output vector
 67 |         seed=seed, # Random seed
 68 |         dtype=dtype)) # Layer datatype
 69 | neuralNet.add(Tanh()) # Tanh activation function
 70 | seed += 1000 # Increment random seed
 71 | for n in range(hidden - 1): # Add more hidden layers
 72 |     neuralNet.add(DenseLayer(width,
 73 |                              width,
 74 |                              seed=seed,
 75 |                              dtype=dtype))
 76 |     neuralNet.add(Tanh())
 77 |     seed += 1000
 78 | 
 79 | neuralNet.add(DenseLayer(width,
 80 |                          outputDims,
 81 |                          seed=seed,
 82 |                          dtype=dtype))
 83 | ```
 84 | Now we need to initialize the Markov Chain Monte Carlo algorithm. We do this with the following code.
 85 | ```
 86 | neuralNet.setupMCMC(
 87 |         0.005, # starting stepsize
 88 |         0.0025, # minimum stepsize
 89 |         0.01, # maximum stepsize
 90 |         40, # number of stepsize options in stepsize adapter
 91 |         2, # starting number of leapfrog steps
 92 |         2, # minimum number of leapfrog steps
 93 |         50, # maximum number of leapfrog steps
 94 |         1, # stepsize between leapfrog steps in leapfrog step adapter
 95 |         0.01, # hyper parameter stepsize
 96 |         5, # hyper parameter number of leapfrog steps
 97 |         20, # number of burnin epochs
 98 |         20, # number of cores
 99 |         2) # number of averaging steps for param adapters)
100 | ```
101 | Next we initialize the Likelihood object we use to evaluate predictions. We use a Gaussian likelihood with a starting standard deviation of 0.1.
102 | ```
103 | likelihood = GaussianLikelihood(sd = 0.1)
104 | ```
105 | We would also like to measure the performance of the network using a metric such as mean squared error, so we initialize a metric object and add it to a metric list.
106 | ```
107 | metricList = [SquaredError()]
108 | ```
109 | 
110 | Finally, we get to actually train the network. This is done with the following code.
111 | ```
112 | neuralNet.train(
113 |         1000, # epochs to train for
114 |         2, # increment between network saves
115 |         metricList = metricList, # List of evaluation metricx
116 |         folderName="TrigRegression") # Name of folder for saved networks
117 | ```
118 | After this, just run the program. 
119 | 


--------------------------------------------------------------------------------
/docs/Setup.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default
 3 | title: Setup
 4 | ---
 5 | 
 6 | # Setup
 7 | All python code written here is intended to be used in Python3. The code is dependent upon the packages numpy, emcee, tensorflow, tensorflow-probability, and scipy.
 8 | 
 9 | Numpy and scipy can be installed through the command:
10 | 
11 | ```
12 | pip3 install numpy scipy emcee
13 | ```
14 | 
15 | TensorFlow and TensorFlow-probability must be instaled separately. The TensorFlow version should be the most recent (2.3 at the moment). Using a 1.x version will not work, and neither will older versions of 2. It is also highly recomended that this code be run on a gpu due to its high computational complexity. TensorFlow for the gpu can be installed with the command:
16 | 
17 | ```
18 | pip3 install tensorflow-gpu
19 | ```
20 | 
21 | In order to be compatible with this version of tensorflow, the most recent version of tensorflow-probability (0.11) must be installed. This is done with the following command:
22 | 
23 | ```
24 | pip3 install tensorflow-probability
25 | ```
26 | 
27 | In order to use this code you can either clone this repository and copy the Networks folder into a folder named tensorBNN in the main folder of your project, or download it using pip.
28 | ```
29 | pip install tensorBNN
30 | git clone https://github.com/alpha-davidson/TensorBNN.git
31 | ```
32 | 
33 | After this, you can use the following command to import the general network obejct, and similar commands for the other objects.
34 | ```
35 | from tensorBNN.network import network
36 | ```
37 | 
38 | 


--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
 1 | theme: jekyll-theme-dinky
 2 | defaults:
 3 | # _docs
 4 | - scope:
 5 |     path: ""
 6 |     type: docs
 7 |   values:
 8 |     sidebar:
 9 |       nav: "docs"
10 | 


--------------------------------------------------------------------------------
/docs/_data/navigation.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   - title: Home
 3 |     url: /docs/index.md
 4 | 
 5 |   - title: Setup
 6 |     url: /docs/Setup.md
 7 |         
 8 |   - title: Usage
 9 |     url: /docs/usage.md
10 |     
11 | 


--------------------------------------------------------------------------------
/docs/_includes/navigation.html:
--------------------------------------------------------------------------------
 1 | <nav>
 2 |   <ul>
 3 |     <li><a href="/TensorBNN/">Home</a></li>
 4 |     <li><a href="/TensorBNN/Setup.html">Setup</a></li>
 5 |     <li><a href="/TensorBNN/usage.html">Usage</a></li>
 6 |     <li><a href="/TensorBNN/ClassificationExample.html">Classification Tutorial</a></li>
 7 |     <li><a href="/TensorBNN/RegressionExample.html">Regression Tutorial</a></li>
 8 |   </ul>
 9 | </nav>
10 | 


--------------------------------------------------------------------------------
/docs/_layouts/default.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="{{ site.lang | default: "en-US" }}">
 3 |   <head>
 4 |     <meta charset="utf-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 | 
 7 | {% seo %}
 8 |     <link rel="stylesheet" href="{{ '/assets/css/style.css?v=' | append: site.github.build_revision | relative_url }}">
 9 |     <script src="{{ '/assets/js/scale.fix.js' | relative_url }}"></script>
10 |     <meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
11 |     <!--[if lt IE 9]>
12 |     <script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script>
13 |     <![endif]-->
14 |   </head>
15 |   <body>
16 |     <div class="wrapper">
17 |       <header>
18 |         <h1 class="header">{{ site.title | default: site.github.repository_name }}</h1>
19 |         <p class="header">{{ site.description | default: site.github.project_tagline }}</p>
20 | 
21 |         
22 |         {% include navigation.html %}
23 | 
24 |         {% if site.github.is_project_page %}
25 |           <p class="header">This project is maintained by <a class="header name" href="{{ site.github.owner_url }}">{{ site.github.owner_name }}</a></p>
26 |         {% endif %}
27 | 
28 |         {% if site.github.is_user_page %}
29 |           <ul>
30 |             <li><a class="buttons github" href="{{ site.github.owner_url }}">GitHub Profile</a></li>
31 |           </ul>
32 |         {% endif %}
33 |         
34 |         <ul>
35 |           {% if site.show_downloads %}
36 |             <li class="download"><a class="buttons" href="{{ site.github.zip_url }}">Download ZIP</a></li>
37 |             <li class="download"><a class="buttons" href="{{ site.github.tar_url }}">Download TAR</a></li>
38 |           {% endif %}
39 |           <li><a class="buttons github" href="{{ site.github.repository_url }}">View On GitHub</a></li>
40 |         </ul>
41 |       </header>
42 | 
43 |       <section>
44 |         {{ content }}
45 |       </section>
46 | 
47 |      
48 |     </div>
49 |     <!--[if !IE]><script>fixScale(document);</script><![endif]-->
50 |     {% if site.google_analytics %}
51 |       <script>
52 |         (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
53 |         (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
54 |         m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
55 |         })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
56 |         ga('create', '{{ site.google_analytics }}', 'auto');
57 |         ga('send', 'pageview');
58 |       </script>
59 |     {% endif %}
60 |   </body>
61 | </html>
62 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default
 3 | title: Home
 4 | ---
 5 | 
 6 | 
 7 | # Tensor BNN
 8 | This package contains code which can be used to create full Bayesian Neural Networks using Hamiltonian Monte Carlo sampling as proposed by Radford Neal in his thesis "Bayesian Learning for Neural Networks" along with some added features. The package is written in python and uses the packages `Tensorflow` and `Tensorflow-Probability` as the framework for the implementation. 
 9 | 
10 | For instructions on how to setup this package, [click here](Setup.md).
11 | 
12 | If you would like an explanation of how to use the code, [click here](usage.md).
13 | 


--------------------------------------------------------------------------------
/docs/usage.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | layout: default
  3 | title: Usage
  4 | ---
  5 | 
  6 | # Usage
  7 | 
  8 | Through the use of this package it is possible to easily make Bayesian Neural Networks for regression and binary classification learning problems. The folder `Examples` contains an excellent example of a regression problem, and a currently outdated binary classification problem.
  9 | 
 10 | More generally, in order to use this code you must import network, Dense Layer, an activation such as Relu, and a likelihood function such a a Gaussian likelihood. This can be done as follows:
 11 | 
 12 | ```
 13 | from tensorBNN.layer import DenseLayer
 14 | from tensorBNN.network import network
 15 | from tensorBNN.activationFunctions import Relu
 16 | from tensorBNN.likelihood import GaussianLikelihood
 17 | ```
 18 | 
 19 | Next, it is highly convenient to turn off the deprecation warnings. These are all from tensorflow, tensorflow-probability, and numpy intereacting with tensorflow, so it isn't something easily fixed and there are a lot of warnings. These are turned off with:
 20 | 
 21 | ```
 22 | import warnings
 23 | warnings.filterwarnings("ignore", category=DeprecationWarning)
 24 | warnings.filterwarnings("ignore", category=UserWarning)
 25 | ```
 26 | 
 27 | The other important setup task is determining whether or not to seed the random number generator before training. Please note that if you are using a gpu then there will always be some randomness which cannot be removed. To set all cpu random numbers use these lines of code:
 28 | 
 29 | ```
 30 | import os
 31 | 
 32 | import numpy as np
 33 | import random as rn
 34 | import tensorflow as tf
 35 | 
 36 | os.environ["PYTHONHASHSEED"] = "0"
 37 | np.random.seed(42)
 38 | rn.seed(12345)
 39 | tf.random.set_seed(3)
 40 | ```
 41 | 
 42 | Moving on to the actual use of this code, start with the declaration of a network object:
 43 | 
 44 | ```
 45 | neuralNet = network.network(dtype, inputDims, trainX, trainY, validationX, validationY, mean, sd)
 46 | ```
 47 | 
 48 | The paramaters are described as follows:
 49 | * dtype: data type for Tensors
 50 | * inputDims: dimension of input vector
 51 | * trainX: the training data input, shape is n by inputDims
 52 | * trainY: the training data output
 53 | * validateX: the validation data input, shape is n by inputDims
 54 | * validateY: the validation data output
 55 | * mean: the mean used to scale trainY and validateY
 56 | * sd: standard deviation used to scale trainY and validateY
 57 | 
 58 | Next, add all of the desired layers and activation functions as follows:
 59 | 
 60 | ```
 61 | neuralNet.add(DenseLayer(inputDims, outputDims, seed=seed, dtype=tf.float32))
 62 | neuralNet.add(Relu())
 63 | ```
 64 | 
 65 | For added control, especially when using pre-trained networks it is possible to feed pretrained weights, biases, and values for the activation functions. This can be done as follows:
 66 | 
 67 | ```
 68 | neuralNet.add(DenseLayer(inputDims,outputDims, weights=weights, biases=biases, seed=seed, dtype=dtype))
 69 | neuralNet.add(SquarePrelu(width, alpha=alpha**(0.5), activation=activation, dtype=dtype))
 70 | ```
 71 | 
 72 | The paramater inputDims is the output shape of the layer before, and the width is the ouput shape of the layers itself. The seed is used for seeding the random number generator. Currently, only ReLU is supported for easy predictions off of saved networks. The other activation functions can be used, but they will require more custom code to predict from saved networks.
 73 | 
 74 | Next, the Markov Chain Monte Carlo algorithm must be initialized. This can be done as follows:
 75 | 
 76 | ```
 77 | neuralNet.setupMCMC(self, stepSize, stepMin, stepMax, stepNum, leapfrog, leapMin,
 78 |                     leapMax, leapStep, hyperStepSize, hyperLeapfrog, burnin,
 79 |                     cores, averagingSteps=2, a=4, delta=0.1):
 80 | ```
 81 | 
 82 | The paramaters are described as follows:
 83 | * stepSize: the starting step size for the weights and biases
 84 | * stepMin: the minimum step size
 85 | * stepMax: the maximum step size
 86 | * stepNum: the number of step sizes in grid
 87 | * leapfrog: number of leapfrog steps for weights and biases
 88 | * leapMin: the minimum number of leapfrog steps
 89 | * leapMax: the maximum number of leapfrog steps
 90 | * leapStep: the step in number of leapfrog for search grid
 91 | * hyperStepSize: the starting step size for the hyper parameters
 92 | * hyperLeapfrog: leapfrog steps for hyper parameters
 93 | * cores: number of cores to use
 94 | * averaginSteps: number of averaging steps
 95 | * a: constant, 4 in paper
 96 | * delta: constant, 0.1 in paper
 97 | 
 98 | This code uses the adaptive Hamlitonain Monte Carlo described in "Adaptive Hamiltonian and Riemann Manifold Monte Carlo Samplers" by Wang, Mohamed, and de Freitas. In accordance with this paper there are a few more paramaters that can be adjusted, though it is recomended that their default values are kept.
 99 | 
100 | After initializaing the HMC, we must declare the likelihood that we want to use as well as any metrics. This can be accomplished through the following code:
101 | 
102 | ```
103 | # Declare Gaussian Likelihood with sd of 0.1
104 | likelihood =  GaussianLikelihood(sd = 0.1)
105 | metricList = [ #Declare metrics
106 |     SquaredError(mean = 0, sd = 1, scaleExp = False),
107 |     PercentError(mean = 10, sd = 2, scaleExp = True)]
108 | ```
109 | 
110 | 
111 | The last thing to do is actually tell the model to start learning this is done with the following command:
112 | 
113 | ```
114 | network.train(
115 |         epochs, # epochs to train for
116 |         samplingStep, # increment between network saves
117 |         likelihood,
118 |         metricList = metricList,
119 |         folderName = "Regression", 
120 |         # Name of folder for saved networks
121 |         networksPerFile=50)
122 |         # Number of networks saved per file
123 | ```
124 | 
125 | The arguments have the following meanings:
126 | 
127 | * Epochs: Number of training cycles
128 | * samplingStep: Epochs between sampled networks
129 | * likelihood: The likelihood function used to evaluate the prediction 
130 |               we defined previously
131 | * startSigma: Starting standard deviation for likelihood function
132 |               for regression models
133 | * folderName: name of folder for saved networks
134 | * networksPerFile: number of networks saved in a given file
135 | 
136 | Once the network has trained, which may take a while, the saved networks can be loaded and then used to make predictions using the following code:
137 | 
138 | ```
139 | from TensorBNN.predictor import predictor 
140 | 
141 | network = predictor(filePath,
142 |                     dtype = dtype, 
143 |                     # data type used by network
144 |                     customLayerDict={"dense2": Dense2},
145 |                     # A dense layer with a different 
146 |                     # hyperprior
147 |                     likelihood = Likelihood)
148 |                     # The likelihood function is required to  
149 |                     # calculate the probabilities for 
150 |                     # re-weighting
151 | 
152 | initialResults = network.predict(inputData, skip, dtype)
153 | ```
154 | 
155 | The variable filePath is the directory from which the networks are being loaded, inputData is the normalized data for which predictions should be made, and dtype is the data type to be used for predictions. The customLayerDict is a dictionary holding the names and objects for any user defined layers. Likelihood is the likelihood function used to train the model.
156 | 
157 | The variable initialResults will be a list of numpy arrays, each numpy array corresponding to the predcitions from a single network in the BNN. The skip variable instructs the predictor to only use every n networks, where n=skip
158 | 
159 | Additionally, the predictor function allows for the calculation of the autocorrelation between different networks, as well as the autocorrelation length through:
160 | 
161 | ```
162 | autocorrelations = network.autocorrelation(testData, nMax)
163 | autocorrelations = network.autoCorrelationLength(testData, nMax)
164 | ```
165 | Here, the autocorrelation is calculated based on the predictions of the different BNNs, and the results are averaged over the test data. nMax provides the largest lag value for the autocorrelation. These calculations are done with emcee.
166 | 
167 | 
168 | Finally, the predictor object can calculate new weights for the different networks if they were given new priors. These priors take the form of new Layer objects which must be referenced in an architecture file. The reweighting function call looks like this:
169 | 
170 | ```
171 | weights = network.reweight(                                            
172 |                     trainX, # training input
173 |                     trainY, # training output
174 |                     skip = 10, # Use every 10 saved networks
175 |                     architecture = "architecture2.txt")
176 |                     # New architecture file
177 | ```
178 | 


--------------------------------------------------------------------------------
/tensorBNN/BNN_functions.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | 
  7 | @tf.function
  8 | def multivariateLogProb(sigmaIn, mu, x, dtype=tf.float32):
  9 |     """ Calculates the log probability of x given mu and sigma defining
 10 |     a multivariate normal distribution.
 11 | 
 12 |     Arguments:
 13 |         * sigmaIn: an n-dimensional vector with the standard deviations of
 14 |         * the distribution
 15 |         * mu: an n-dimensional vector with the means of the distribution
 16 |         * x: m n-dimensional vectors to have their probabilities calculated
 17 |         * dtype: data type of calculation
 18 |     Returns:
 19 |         * prob: an m-dimensional vector with the log-probabilities of x
 20 |     """
 21 |     sigma = sigmaIn
 22 | 
 23 |     sigma = tf.maximum(sigma, tf.cast(10**(-8), dtype))
 24 |     sigma = tf.minimum(sigma, tf.cast(10**(8), dtype))
 25 |     logDet = 2 * tf.reduce_sum(input_tensor=tf.math.log(sigma))
 26 |     k = tf.size(input=sigma, out_type=dtype)
 27 |     inv = tf.divide(1, sigma)
 28 |     difSigma = tf.math.multiply(inv, tf.subtract(x, mu))
 29 |     difSigmaSquared = tf.reduce_sum(tf.math.multiply(difSigma, difSigma))
 30 |     twoPi = tf.cast(2 * math.pi, dtype)
 31 | 
 32 |     logLikelihood = -0.5 * (logDet + difSigmaSquared + k * tf.math.log(twoPi))
 33 | 
 34 |     return(logLikelihood)
 35 | 
 36 | 
 37 | @tf.function
 38 | def cauchyLogProb(gamma, x0, x, dtype=tf.float32):
 39 |     """ Calculates the log probability of x given x0 and gamma defining
 40 |     a cauchy distribution.
 41 | 
 42 |     Arguments:
 43 |         * gamma: the gamma value for the distribution
 44 |         * x0: the mean value for the distribution
 45 |         * x: an n-dimensional vectors to have their probabilities calculated
 46 |         * dtype: data type of calculation
 47 |     Returns:
 48 |         * prob: an n-dimensional vector with the log-probabilities of x
 49 |     """
 50 | 
 51 |     a = tf.math.log(1 + ((x - x0) / gamma)**2)
 52 |     b = tf.math.log(tf.cast(math.pi * gamma, dtype))
 53 |     c = tf.ones_like(x)
 54 |     d = -tf.math.scalar_mul(b, c)
 55 |     prob = a + d
 56 |     prob = tf.cast(prob, dtype)
 57 |     return(prob)
 58 | 
 59 | 
 60 | def trainBasicRegression(
 61 |         hidden,
 62 |         inputDims,
 63 |         outputDims,
 64 |         width,
 65 |         cycles,
 66 |         epochs,
 67 |         alpha,
 68 |         trainIn,
 69 |         trainOut,
 70 |         valIn,
 71 |         valOut,
 72 |         name,
 73 |         callbacks=True,
 74 |         callbackMetric="val_loss",
 75 |         patience=10):
 76 |     """Trains a basic regression neural network and returns its weights. Uses
 77 |     the amsgrad optimizer and a learning rate of 0.01 which decays by a factor
 78 |     of 10 each cycle. The activation function is leaky relu with the specified
 79 |     alpha value. Saves the network as name in case something goes wrong with
 80 |     the BNN code so the network does not need to be retrained.
 81 | 
 82 |     Arguments:
 83 |         * hidden: number of hidden layers
 84 |         * inputDims: input dimension
 85 |         * outputDims: output dimension
 86 |         * width: width of hidden layers
 87 |         * cycles: number of training cycles with decaying learning rates
 88 |         * epochs: number of epochs per cycle
 89 |         * alpha: slope value for leaky ReLU
 90 |         * trainIn: training input data
 91 |         * trainOut: training output data
 92 |         * valIn: validation input data
 93 |         * valOut: validation output data
 94 |         * name: name of network
 95 |         * callbacks: whether to use callbacks
 96 |         * callbackMetric: metric to use for early stopping
 97 |         * patience: early stopping patience
 98 | 
 99 |     Returns:
100 |         * weights: list containing all weight matrices
101 |         * biases: list containing all bias vectors
102 |         * activation: list containing all activation vectors
103 |     """
104 | 
105 |     # Set seed
106 |     tf.random.set_seed(1000)
107 | 
108 |     # Create model
109 |     model = tf.keras.Sequential()
110 | 
111 |     model.add(
112 |         tf.keras.layers.Dense(
113 |             width,
114 |             kernel_initializer="glorot_uniform",
115 |             input_shape=(
116 |                 inputDims,
117 |             )))
118 |     model.add(tf.keras.layers.LeakyReLU(alpha=alpha))
119 | 
120 |     for n in range(hidden - 1):
121 |         model.add(
122 |             tf.keras.layers.Dense(
123 |                 width,
124 |                 kernel_initializer="glorot_uniform"))
125 |         model.add(tf.keras.layers.LeakyReLU(alpha=alpha))
126 | 
127 |     model.add(
128 |         tf.keras.layers.Dense(
129 |             outputDims,
130 |             kernel_initializer="glorot_uniform"))
131 | 
132 |     callback = tf.keras.callbacks.EarlyStopping(
133 |         monitor=callbackMetric, patience=patience, restore_best_weights=True)
134 | 
135 |     # Train with decreasing learning rate
136 |     for x in range(cycles):
137 |         model.compile(optimizer=tf.keras.optimizers.Adam(0.01 * (10**(-x)),
138 |                       amsgrad=True),
139 |                       loss="mean_squared_error",
140 |                       metrics=["mean_absolute_error", "mean_squared_error"])
141 |         model.summary()
142 | 
143 |         if(callbacks):
144 |             model.fit(
145 |                 trainIn,
146 |                 trainOut,
147 |                 validation_data=(
148 |                     valIn,
149 |                     valOut),
150 |                 epochs=epochs,
151 |                 batch_size=32,
152 |                 callbacks=[callback])
153 |         else:
154 |             model.fit(
155 |                 trainIn,
156 |                 trainOut,
157 |                 validation_data=(
158 |                     valIn,
159 |                     valOut),
160 |                 epochs=epochs,
161 |                 batch_size=32)
162 | 
163 |     # Save the backup
164 |     model.save(name)
165 | 
166 |     # Extract weights and biases
167 |     weights = []
168 |     biases = []
169 |     activation = []
170 |     for layer in model.layers:
171 |         weightBias = layer.get_weights()
172 |         if(len(weightBias) == 2):
173 |             weights.append(weightBias[0].T)
174 |             bias = weightBias[1]
175 |             bias = np.reshape(bias, (len(bias), 1))
176 |             biases.append(bias)
177 |         if(len(weightBias) == 1):
178 |             activation.append(weightBias[0])
179 | 
180 |     return(weights, biases, activation)
181 | 
182 | 
183 | def trainBasicClassification(
184 |         hidden,
185 |         inputDims,
186 |         outputDims,
187 |         width,
188 |         cycles,
189 |         epochs,
190 |         alpha,
191 |         trainIn,
192 |         trainOut,
193 |         valIn,
194 |         valOut,
195 |         name,
196 |         callbacks=True,
197 |         callbackMetric="val_loss",
198 |         patience=10):
199 |     """ Trains a basic binary classification neural network and returns its
200 |     weights. Uses the amsgrad optimizer and a learning rate of 0.01 which
201 |     decays by a factor of 10 each cycle. The activation function is leaky_relu
202 |     with the specified alpha value. Saves the network as name in case something
203 |     goes wrong with the BNN code so the network does not need to be retrained.
204 | 
205 |     Arguments:
206 |         * hidden: number of hidden layers
207 |         * inputDims: input dimension
208 |         * outputDims: output dimension
209 |         * width: width of hidden layers
210 |         * cycles: number of training cycles with decaying learning rates
211 |         * epochs: number of epochs per cycle
212 |         * alpha: slope value for leaky ReLU
213 |         * trainIn: training input data
214 |         * trainOut: training output data
215 |         * valIn: validation input data
216 |         * valOut: validation output data
217 |         * callbacks: whether to use callbacks
218 |         * callbackMetric: metric to use for early stopping
219 |         * patience: early stopping patience
220 |     Returns:
221 |         * weights: list containing all weight matrices
222 |         * biases: list containing all bias vectors
223 |         * activation: list containing all activation vectors
224 |     """
225 | 
226 |     tf.random.set_seed(1000)
227 | 
228 |     model = tf.keras.Sequential()
229 | 
230 |     model.add(
231 |         tf.keras.layers.Dense(
232 |             width,
233 |             kernel_initializer="glorot_uniform",
234 |             input_shape=(
235 |                 inputDims,
236 |             )))
237 |     model.add(tf.keras.layers.LeakyReLU(alpha=alpha))
238 | 
239 |     for n in range(hidden - 1):
240 |         model.add(
241 |             tf.keras.layers.Dense(
242 |                 width,
243 |                 kernel_initializer="glorot_uniform"))
244 |         model.add(tf.keras.layers.LeakyReLU(alpha=alpha))
245 | 
246 |     model.add(
247 |         tf.keras.layers.Dense(
248 |             outputDims,
249 |             kernel_initializer="glorot_uniform",
250 |             activation="sigmoid"))
251 | 
252 |     callback = tf.keras.callbacks.EarlyStopping(
253 |         monitor=callbackMetric, patience=patience, restore_best_weights=True)
254 | 
255 |     for x in range(cycles):
256 |         model.compile(optimizer=tf.keras.optimizers.Adam(0.001 * (10**(-x)),
257 |                       amsgrad=True),
258 |                       loss=tf.keras.losses.BinaryCrossentropy(),
259 |                       metrics=["accuracy", "mse"])
260 |         model.summary()
261 |         if(callbacks):
262 |             model.fit(
263 |                 trainIn,
264 |                 trainOut,
265 |                 validation_data=(
266 |                     valIn,
267 |                     valOut),
268 |                 epochs=epochs,
269 |                 batch_size=32,
270 |                 callbacks=[callback])
271 |         else:
272 |             model.fit(
273 |                 trainIn,
274 |                 trainOut,
275 |                 validation_data=(
276 |                     valIn,
277 |                     valOut),
278 |                 epochs=epochs,
279 |                 batch_size=32)
280 | 
281 |     # Save the backup
282 |     model.save(name)
283 | 
284 |     # Extract weights and biases
285 |     weights = []
286 |     biases = []
287 |     activation = []
288 |     for layer in model.layers:
289 |         weightBias = layer.get_weights()
290 |         if(len(weightBias) == 2):
291 |             weights.append(weightBias[0].T)
292 |             bias = weightBias[1]
293 |             bias = np.reshape(bias, (len(bias), 1))
294 |             biases.append(bias)
295 |         if(len(weightBias) == 1):
296 |             activation.append(weightBias[0])
297 | 
298 |     return(weights, biases, activation)
299 | 


--------------------------------------------------------------------------------
/tensorBNN/activationFunctions.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | import tensorflow as tf
  4 | import tensorflow_probability as tfp
  5 | 
  6 | from tensorflow.python.ops import gen_nn_ops
  7 | 
  8 | from tensorBNN.BNN_functions import multivariateLogProb
  9 | from tensorBNN.layer import Layer
 10 | 
 11 | tfd = tfp.distributions
 12 | 
 13 | 
 14 | class Exp(Layer):
 15 |     """Exponential activation function"""
 16 | 
 17 |     def __init__(self, inputDims=None, outputDims=None):
 18 |         self.numTensors = 0
 19 |         self.numHyperTensors = 0
 20 |         self.name = "Exp"
 21 | 
 22 |     def predict(self, inputTensor, _):
 23 |         result = tf.math.exp(inputTensor)
 24 |         return(result)
 25 | 
 26 | 
 27 | class Relu(Layer):
 28 |     """Relu activation function"""
 29 | 
 30 |     def __init__(self, inputDims=None, outputDims=None):
 31 |         self.numTensors = 0
 32 |         self.numHyperTensors = 0
 33 |         self.name = "relu"
 34 | 
 35 |     def predict(self, inputTensor, _):
 36 |         result = gen_nn_ops.relu(inputTensor)
 37 |         return(result)
 38 | 
 39 | 
 40 | class Sigmoid(Layer):
 41 |     """Sigmoid activation function"""
 42 | 
 43 |     def __init__(self, inputDims=None, outputDims=None):
 44 |         self.numTensors = 0
 45 |         self.numHyperTensors = 0
 46 |         self.name = "sigmoid"
 47 | 
 48 |     def predict(self, inputTensor, _):
 49 |         result = tf.math.sigmoid(inputTensor)
 50 |         return(result)
 51 | 
 52 | 
 53 | class Tanh(Layer):
 54 |     """Tanh activation function"""
 55 | 
 56 |     def __init__(self, inputDims=None, outputDims=None):
 57 |         self.numTensors = 0
 58 |         self.numHyperTensors = 0
 59 |         self.name = "tanh"
 60 | 
 61 |     def predict(self, inputTensor, _):
 62 |         result = tf.math.tanh(inputTensor)
 63 |         return(result)
 64 | 
 65 | 
 66 | class Elu(Layer):
 67 |     """Elu activation function"""
 68 | 
 69 |     def __init__(self, inputDims=None, outputDims=None):
 70 |         self.numTensors = 0
 71 |         self.numHyperTensors = 0
 72 |         self.name = "elu"
 73 | 
 74 |     def predict(self, inputTensor, _):
 75 |         result = gen_nn_ops.elu(inputTensor)
 76 |         return(result)
 77 | 
 78 | 
 79 | class Softmax(Layer):
 80 |     """Softmax activation function"""
 81 | 
 82 |     def __init__(self, inputDims=None, outputDims=None):
 83 |         self.numTensors = 0
 84 |         self.numHyperTensors = 0
 85 |         self.name = "softmax"
 86 | 
 87 |     def predict(self, inputTensor, _):
 88 |         result = gen_nn_ops.softmax(inputTensor)
 89 |         return(result)
 90 | 
 91 | 
 92 | class Leaky_relu(Layer):
 93 |     """Leaky relu activation function"""
 94 | 
 95 |     def __init__(self, alpha=0.3, inputDims=None, outputDims=None,
 96 |                  activation=None):
 97 |         self.numTensors = 1
 98 |         self.numHyperTensors = 0
 99 |         self.name = "leakyrelu"
100 |         if activation is not None:
101 |             alpha = activation
102 |         self.parameters = [alpha]
103 | 
104 |     def predict(self, inputTensor, _):
105 |         result = tf.nn.leaky_relu(inputTensor, self.parameters[0])
106 |         return(result)
107 | 
108 |     def calculateProbs(self, *args):
109 |         """Present for compatability."""
110 |         return(0.0)
111 | 
112 |     def updateParameters(self, *args):
113 |         """Present for compatability."""
114 |         self.parameters = self.parameters
115 | 
116 | 
117 | class Prelu(Layer):
118 |     """Prelu activation function"""
119 | 
120 |     def __init__(
121 |             self,
122 |             inputDims,
123 |             outputDims=None,
124 |             dtype=np.float32,
125 |             alpha=0.2,
126 |             activation=None,
127 |             seed=1):
128 |         """
129 |         Arguments:
130 |             * inputDims: number of input dimensions
131 |             * dtype: data type of input and output values
132 |             * alpha: Single custom starting slope value
133 |             * activation: optional custom values for starting slope values
134 |             * seed: seed used for random numbers
135 |         """
136 |         self.numTensors = 1  # Number of tensors used for predictions
137 |         self.numHyperTensors = 1  # Number of tensor for hyper paramaters
138 |         self.inputDims = inputDims
139 |         self.dtype = dtype
140 |         self.seed = seed
141 |         self.name = "prelu"
142 | 
143 |         # Starting rate value and hyperRate
144 |         rate = tf.cast(0.3, dtype)
145 |         self.hyperRate = tf.cast(0.3, self.dtype)
146 | 
147 |         # Starting weight mean, weight SD, bias mean, and bias SD
148 | 
149 |         self.hypers = [tf.cast(rate, self.dtype)]
150 | 
151 |         # Starting weights and biases
152 |         if(activation is None):
153 |             self.parameters = [
154 |                 alpha *
155 |                 tf.ones(
156 |                     shape=(inputDims),
157 |                     dtype=self.dtype)]
158 |         else:
159 |             self.parameters = [activation]
160 | 
161 |     @tf.function
162 |     def exponentialLogProb(self, rate, x):
163 |         """Calcualtes the log probability of an exponential distribution.
164 | 
165 |         Arguments:
166 |             * rate: rate parameter for the distribution
167 |             * x: input value
168 |         Returns:
169 |             * logProb: log probability of x
170 |         """
171 | 
172 |         rate = tf.math.abs(rate)
173 |         logProb = -rate * x + tf.math.log(rate)
174 | 
175 |         return(logProb)
176 | 
177 |     @tf.function
178 |     def calculateProbs(self, slopes):
179 |         """Calculates the log probability of the slopes given
180 |         their distributions in this layer.
181 | 
182 |         Arguments:
183 |             * weightsBias: list with new possible weight and bias tensors
184 | 
185 |         Returns:
186 |             * prob: log prob of weights and biases given their distributions
187 |         """
188 | 
189 |         val = self.exponentialLogProb(self.hypers[0], slopes)
190 |         prob = tf.reduce_sum(input_tensor=val)
191 | 
192 |         return(prob)
193 | 
194 |     @tf.function
195 |     def calculateHyperProbs(self, hypers, slopes):
196 |         """Calculates the log probability of a set of weights and biases given
197 |         new distribtuions as well as the probability of the new distribution
198 |         means and SDs given their distribtuions.
199 | 
200 |         Arguments:
201 |             * hypers: a list containg 4 new possible hyper parameters
202 |             * weightBias: a list with the current weight and bias matrices
203 | 
204 |         Returns:
205 |             * prob: log probability of weights and biases given the new hypers
206 |             and the probability of the new hyper parameters given their priors
207 |         """
208 | 
209 |         slopes = tf.math.abs(slopes[0])
210 |         prob = 0
211 | 
212 |         # Calculate probability of new hypers
213 |         val = self.exponentialLogProb(self.hyperRate, hypers[0])
214 |         prob += tf.reduce_sum(input_tensor=val)
215 | 
216 |         # Calculate probability of weights and biases given new hypers
217 |         val = self.exponentialLogProb(hypers[0], slopes)
218 |         prob += tf.reduce_sum(input_tensor=val)
219 | 
220 |         return(prob)
221 | 
222 |     @tf.function
223 |     def expand(self, current):
224 |         """Expands tensors to that they are of rank 2
225 | 
226 |         Arguments:
227 |             * current: tensor to expand
228 |         Returns:
229 |             * expanded: expanded tensor
230 | 
231 |         """
232 |         currentShape = tf.pad(
233 |             tensor=tf.shape(input=current),
234 |             paddings=[[tf.where(tf.rank(current) > 1, 0, 1), 0]],
235 |             constant_values=1)
236 |         expanded = tf.reshape(current, currentShape)
237 |         return(expanded)
238 | 
239 |     def predict(self, inputTensor, slopes):
240 |         """Calculates the output of the layer based on the given input tensor
241 |         and weight and bias values
242 | 
243 |         Arguments:
244 |             * inputTensor: the input tensor the layer acts on
245 |             * weightBias: a list with the current weight and bias tensors
246 |         Returns:
247 |             * result: the output of the layer
248 | 
249 |         """
250 |         slopes = slopes[0]
251 |         slopes = tf.squeeze(slopes)
252 |         slopes = tf.reshape(slopes, (len(slopes), 1))
253 |         activated = tf.multiply(slopes, inputTensor)
254 |         result = tf.where(tf.math.less(inputTensor, 0), activated, inputTensor)
255 |         return(self.expand(result))
256 | 
257 |     def updateParameters(self, slopes):
258 |         """ Updates the network parameters
259 | 
260 |         Arguments:
261 |             * slopes: new slope parameter
262 |         """
263 |         self.parameters = [slopes[0]]
264 | 
265 |     def updateHypers(self, hypers):
266 |         """ Updates the network parameters
267 | 
268 |         Arguments:
269 |             * slopes: new slope parameter
270 |         """
271 |         self.hypers = [tf.maximum(tf.cast(0.01, self.dtype), hypers[0])]
272 | 
273 | 
274 | class SquarePrelu(Layer):
275 |     """Prelu activation function"""
276 | 
277 |     def __init__(
278 |             self,
279 |             inputDims,
280 |             outputDims=None,
281 |             dtype=np.float32,
282 |             alpha=0.2,
283 |             activation=None,
284 |             seed=1):
285 |         """
286 |         Arguments:
287 |             * inputDims: number of input dimensions
288 |             * dtype: data type of input and output values
289 |             * alpha: Single custom starting slope value
290 |             * activation: optional custom values for starting slope values
291 |             * seed: seed used for random numbers
292 |         """
293 |         self.numTensors = 1  # Number of tensors used for predictions
294 |         self.numHyperTensors = 2  # Number of tensor for hyper paramaters
295 |         self.inputDims = inputDims
296 |         self.dtype = dtype
297 |         self.seed = seed
298 |         self.name = "squareprelu"
299 | 
300 |         # Starting rate value and hyperRate
301 |         mean = tf.cast(0.0, dtype)
302 |         sd = tf.cast(0.3, dtype)
303 | 
304 |         meanMean = tf.cast(0.0, dtype)
305 |         meanSD = tf.cast(0.3, dtype)
306 |         sdMean = tf.cast(0.3, dtype)
307 |         sdSD = tf.cast(0.1, dtype)
308 | 
309 |         self.meanHyper = tfd.MultivariateNormalDiag(loc=[meanMean],
310 |                                                     scale_diag=[meanSD])
311 | 
312 |         self.sdHyper = tfd.MultivariateNormalDiag(loc=[sdMean],
313 |                                                   scale_diag=[sdSD])
314 | 
315 |         # Starting weight mean, weight SD, bias mean, and bias SD
316 | 
317 |         self.hypers = [mean, sd]
318 | 
319 |         # Starting weights and biases
320 |         if(activation is None):
321 |             self.parameters = [
322 |                 alpha *
323 |                 tf.ones(
324 |                     shape=(inputDims),
325 |                     dtype=self.dtype)]
326 |         else:
327 |             self.parameters = [activation]
328 | 
329 |     @tf.function
330 |     def calculateProbs(self, slopes):
331 |         """Calculates the log probability of the slopes given
332 |         their distributions in this layer.
333 | 
334 |         Arguments:
335 |             * weightsBias: list with new possible weight and bias tensors
336 | 
337 |         Returns:
338 |             * prob: log prob of weights and biases given their distributions
339 |         """
340 | 
341 |         prob = tf.reduce_sum(
342 |             multivariateLogProb(
343 |                 self.hypers[1],
344 |                 self.hypers[0],
345 |                 slopes,
346 |                 dtype=self.dtype))
347 | 
348 |         return(prob)
349 | 
350 |     @tf.function
351 |     def calculateHyperProbs(self, hypers, slopes):
352 |         """Calculates the log probability of a set of weights and biases given
353 |         new distribtuions as well as the probability of the new distribution
354 |         means and SDs given their distribtuions.
355 | 
356 |         Arguments:
357 |             * hypers: a list containg 4 new possible hyper parameters
358 |             * weightBias: a list with the current weight and bias matrices
359 | 
360 |         Returns:
361 |             * prob: log probability of weights and biases given the new hypers
362 |             and the probability of the new hyper parameters given their priors
363 |         """
364 | 
365 |         mean = hypers[0]
366 |         sd = hypers[1]
367 | 
368 |         slopes = tf.square(slopes[0])
369 | 
370 |         prob = tf.reduce_sum(
371 |             multivariateLogProb(
372 |                 sd, mean, slopes, dtype=self.dtype))
373 | 
374 |         # Calculate probability of new hypers
375 |         val = self.meanHyper.log_prob([mean])
376 |         prob += tf.reduce_sum(input_tensor=val)
377 | 
378 |         # Calculate probability of weights and biases given new hypers
379 |         val = self.sdHyper.log_prob([sd])
380 |         prob += tf.reduce_sum(input_tensor=val)
381 | 
382 |         return(prob)
383 | 
384 |     @tf.function
385 |     def expand(self, current):
386 |         """Expands tensors to that they are of rank 2
387 | 
388 |         Arguments:
389 |             * current: tensor to expand
390 |         Returns:
391 |             * expanded: expanded tensor
392 | 
393 |         """
394 |         currentShape = tf.pad(
395 |             tensor=tf.shape(input=current),
396 |             paddings=[[tf.where(tf.rank(current) > 1, 0, 1), 0]],
397 |             constant_values=1)
398 |         expanded = tf.reshape(current, currentShape)
399 |         return(expanded)
400 | 
401 |     def predict(self, inputTensor, slopes):
402 |         """Calculates the output of the layer based on the given input tensor
403 |         and weight and bias values
404 | 
405 |         Arguments:
406 |             * inputTensor: the input tensor the layer acts on
407 |             * weightBias: a list with the current weight and bias tensors
408 |         Returns:
409 |             * result: the output of the layer
410 | 
411 |         """
412 |         slopes = slopes[0]**2
413 |         slopes = tf.squeeze(slopes)
414 |         slopes = tf.reshape(slopes, (len(slopes), 1))
415 |         activated = tf.multiply(slopes, inputTensor)
416 |         result = tf.where(tf.math.less(inputTensor, 0), activated, inputTensor)
417 |         return(self.expand(result))
418 | 
419 |     def updateParameters(self, slopes):
420 |         """ Updates the network parameters
421 | 
422 |         Arguments:
423 |             * slopes: new slope parameter
424 |         """
425 |         self.parameters = [slopes[0]]
426 | 
427 |     def updateHypers(self, hypers):
428 |         """ Updates the network parameters
429 | 
430 |         Arguments:
431 |             * slopes: new slope parameter
432 |         """
433 |         self.hypers = [hypers[0], hypers[1]]
434 | 


--------------------------------------------------------------------------------
/tensorBNN/layer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import tensorflow_probability as tfp
  4 | 
  5 | from tensorBNN.BNN_functions import cauchyLogProb, multivariateLogProb
  6 | 
  7 | tfd = tfp.distributions
  8 | 
  9 | 
 10 | class Layer(object):
 11 |     """ A basic layer object. This must have input and output dimensions, but
 12 |     the remaining variables can be used depending on the specific use. This
 13 |     object can be implemented as a basic layer or as an activation function.
 14 |     """
 15 | 
 16 |     def __init__(
 17 |             self,
 18 |             inputDims,
 19 |             outputDims,
 20 |             weights=None,
 21 |             biases=None,
 22 |             activation=None,
 23 |             dtype=np.float32,
 24 |             alpha=0,
 25 |             seed=1):
 26 |         """
 27 |         Arguments:
 28 |             * inputDims: number of input dimensions
 29 |             * outputDims: number of output dimensions
 30 |             * weights: list of starting weight matrices
 31 |             * biases: list of starting bias vectors
 32 |             * activation: list of starting activation function values
 33 |             * dtype: data type of input and output values
 34 |             * alpha: constant used for activation functions
 35 |             * seed: seed used for random numbers
 36 |         """
 37 |         self.numTensors = 0  # Number of tensors used for predictions
 38 |         self.numHyperTensors = 0  # Number of tensor for hyper paramaters
 39 |         self.inputDims = inputDims
 40 |         self.outputDims = outputDims
 41 |         self.dtype = dtype
 42 |         self.seed = seed
 43 |         self.name = "name"
 44 | 
 45 |     def calculateProbs(self, tensors):
 46 |         """Calculates the log probability of a set of tensors given
 47 |         their distributions in this layer.
 48 | 
 49 |         Arguments:
 50 |             * tensors: list with new possible tensors for layer
 51 | 
 52 |         Returns:
 53 |             * prob: log prob of new tensors given their distributions
 54 |         """
 55 |         return(tf.Constant(0.0, shape=(), dtype=tf.float32))
 56 | 
 57 |     def calculateHyperProbs(self, hypers, tensors):
 58 |         """Calculates the log probability of a set of tensors given
 59 |         new distribtuions as well as the probability of the new distribution
 60 |         means and SDs given their distribtuions.
 61 | 
 62 |         Arguments:
 63 |             * hypers: a list containg new possible hyper parameters
 64 |             * tensors: a list with the current tensors
 65 | 
 66 |         Returns:
 67 |             * prob: log probability of tensors given the new hypers
 68 |             and the probability of the new hyper parameters given their priors
 69 |         """
 70 |         return(tf.constant(0.0))
 71 | 
 72 |     def expand(self, current):
 73 |         """Expands tensors to that they are of rank 2
 74 | 
 75 |         Arguments:
 76 |             * current: tensor to expand
 77 |         Returns:
 78 |             * expanded: expanded tensor
 79 | 
 80 |         """
 81 |         currentShape = tf.pad(
 82 |             tensor=tf.shape(input=current),
 83 |             paddings=[[tf.where(tf.rank(current) > 1, 0, 1), 0]],
 84 |             constant_values=1)
 85 |         expanded = tf.reshape(current, currentShape)
 86 |         return(expanded)
 87 | 
 88 |     def predict(self, inputTensor, tensors):
 89 |         """Calculates the output of the layer based on the given input tensor
 90 |         and weight and bias values
 91 | 
 92 |         Arguments:
 93 |             * inputTensor: the input tensor the layer acts on
 94 |             * tensors: a list with the current layer tensors
 95 |         Returns:
 96 |             * result: the output of the layer
 97 |         """
 98 |         pass
 99 | 
100 | 
101 | class CauchyDenseLayer(Layer):
102 |     """Creates a 1 Dimensional Dense Bayesian Layer with Cauchy priors.
103 | 
104 |     Currently, the starting weight and bias mean values are 0.0 with a standard
105 |     deviation of 1.0/sqrt(outputDims). The distribution that these values are
106 |     subject to have these values as their means, and a standard deviation of
107 |     2.0/sqrt(outputDims).
108 |     """
109 | 
110 |     def __init__(
111 |             self,
112 |             inputDims,
113 |             outputDims,
114 |             weights=None,
115 |             biases=None,
116 |             dtype=np.float32,
117 |             seed=1):
118 |         """
119 |         Arguments:
120 |             * inputDims: number of input dimensions
121 |             * outputDims: number of output dimensions
122 |             * weights: list of starting weight matrices
123 |             * biases: list of starting bias vectors
124 |             * dtype: data type of input and output values
125 |             * seed: seed used for random numbers
126 |         """
127 |         self.numTensors = 2  # Number of tensors used for predictions
128 |         self.numHyperTensors = 4  # Number of tensor for hyper paramaters
129 |         self.inputDims = inputDims
130 |         self.outputDims = outputDims
131 |         self.dtype = dtype
132 |         self.seed = seed
133 |         self.name = "dense"
134 | 
135 |         # Weight mean value and mean distribution
136 |         weightsx0 = 0.0
137 |         self.weightsx0Hyper = tfd.MultivariateNormalDiag(loc=[weightsx0],
138 |                                                          scale_diag=[.2])
139 | 
140 |         # weight gamma value and gamma distribution
141 |         weightsGamma = 0.5**0.5
142 |         self.weightsGammaHyper = tfd.MultivariateNormalDiag(loc=[weightsGamma],
143 |                                                             scale_diag=[0.5])
144 | 
145 |         # bias mean value and mean distribution
146 |         biasesx0 = 0.0
147 |         self.biasesx0Hyper = tfd.MultivariateNormalDiag(loc=[biasesx0],
148 |                                                         scale_diag=[.2])
149 | 
150 |         # bias gamma value and gamma distribution
151 |         biasesGamma = 0.5**0.5
152 |         self.biasesGammaHyper = tfd.MultivariateNormalDiag(loc=[biasesGamma],
153 |                                                            scale_diag=[0.5])
154 | 
155 |         # Starting weight mean, weight gamma, bias mean, and bias gamma
156 |         self.hypers = tf.cast(
157 |             [[weightsx0], [weightsGamma], [biasesx0], [biasesGamma]],
158 |             self.dtype)
159 | 
160 |         # Starting weights and biases
161 |         if(weights is None):
162 |             self.parameters = self.sample()
163 |         else:
164 |             self.parameters = [weights, biases]
165 | 
166 |     def calculateProbs(self, hypers, tensors):
167 |         """Calculates the log probability of a set of weights and biases given
168 |         their distributions in this layer.
169 | 
170 |         Arguments:
171 |             * weightsBias: list with new possible weight and bias tensors
172 | 
173 |         Returns:
174 |             * prob: log prob of weights and biases given their distributions
175 |         """
176 |         # Create the tensors used to calculate probability
177 |         weightsx0 = hypers[0]
178 |         weightsGamma = hypers[1]**2
179 |         biasesx0 = hypers[2]
180 |         biasesGamma = hypers[3]**2
181 |         weights = tensors[0]
182 |         biases = tensors[1]
183 | 
184 |         prob = tf.cast(0, self.dtype)
185 | 
186 |         # Calculate probability of weights and biases given new hypers
187 |         val = cauchyLogProb(weightsGamma[0], weightsx0[0], weights,
188 |                             dtype=self.dtype)
189 |         prob += tf.reduce_sum(input_tensor=val)
190 |         val = cauchyLogProb(
191 |             biasesGamma[0],
192 |             biasesx0[0],
193 |             biases,
194 |             dtype=self.dtype)
195 |         prob += tf.reduce_sum(input_tensor=val)
196 | 
197 |         return(prob)
198 | 
199 |     def calculateHyperProbs(self, hypers, tensors):
200 |         """Calculates the log probability of a set of weights and biases given
201 |         new distribtuions as well as the probability of the new distribution
202 |         means and SDs given their distribtuions.
203 | 
204 |         Arguments:
205 |             * hypers: a list containg 4 new possible hyper parameters
206 |             * tensors: a list with the current weight and bias matrices
207 | 
208 |         Returns:
209 |             * prob: log probability of weights and biases given the new hypers
210 |             and the probability of the new hyper parameters given their priors
211 |         """
212 |         weightsx0 = hypers[0]
213 |         weightsGamma = hypers[1]**2
214 |         biasesx0 = hypers[2]
215 |         biasesGamma = hypers[3]**2
216 |         weights = tensors[0]
217 |         biases = tensors[1]
218 | 
219 |         prob = tf.cast(0, self.dtype)
220 | 
221 |         val = self.weightsx0Hyper.log_prob([[weightsx0]])
222 |         prob += tf.cast(tf.reduce_sum(input_tensor=val), self.dtype)
223 |         val = self.weightsGammaHyper.log_prob([[weightsGamma]])
224 |         prob += tf.cast(tf.reduce_sum(input_tensor=val), self.dtype)
225 | 
226 |         val = self.biasesx0Hyper.log_prob([[biasesx0]])
227 |         prob += tf.cast(tf.reduce_sum(input_tensor=val), self.dtype)
228 |         val = self.biasesGammaHyper.log_prob([[biasesGamma]])
229 |         prob += tf.cast(tf.reduce_sum(input_tensor=val), self.dtype)
230 | 
231 |         # Calculate probability of weights and biases given new hypers
232 |         val = cauchyLogProb(weightsGamma[0], weightsx0[0], weights,
233 |                             dtype=self.dtype)
234 |         prob += tf.reduce_sum(input_tensor=val)
235 |         val = cauchyLogProb(
236 |             biasesGamma[0],
237 |             biasesx0[0],
238 |             biases,
239 |             dtype=self.dtype)
240 |         prob += tf.reduce_sum(input_tensor=val)
241 | 
242 |         return(prob)
243 | 
244 |     def sample(self):
245 |         """Creates randomized weight and bias tensors based off
246 |         of their distributions
247 | 
248 |         Returns:
249 |             * tempWeights: randomized weight tensor in first list position
250 |             * tempBiases: randomized bias tensor in second list position
251 |         """
252 | 
253 |         tempWeights = tf.random.normal((self.outputDims, self.inputDims),
254 |                                        mean=self.hypers[0],
255 |                                        stddev=(2 / self.outputDims)**(0.5),
256 |                                        seed=self.seed,
257 |                                        dtype=self.dtype)
258 |         tempBiases = tf.random.normal((self.outputDims, 1),
259 |                                       mean=self.hypers[2],
260 |                                       stddev=(2 / self.outputDims)**(0.5),
261 |                                       seed=self.seed + 1,
262 |                                       dtype=self.dtype)
263 | 
264 |         return([tempWeights, tempBiases])
265 | 
266 |     def predict(self, inputTensor, tensors):
267 |         """Calculates the output of the layer based on the given input tensor
268 |         and weight and bias values
269 | 
270 |         Arguments:
271 |             * inputTensor: the input tensor the layer acts on
272 |             * tensors: a list with the current weight and bias tensors
273 |         Returns:
274 |             * result: the output of the layer
275 |         """
276 |         weightTensor = self.expand(tensors[0])
277 |         biasTensor = self.expand(tensors[1])
278 |         result = tf.add(tf.matmul(weightTensor, inputTensor), biasTensor)
279 |         return(result)
280 | 
281 | 
282 | class GaussianDenseLayer(Layer):
283 |     """Creates a 1 Dimensional Dense Bayesian Layer with Gaussian priors.
284 | 
285 |     Currently, the starting weight and bias mean values are 0.0 with a standard
286 |     deviation of 1.0/sqrt(outputDims). The distribution that these values are
287 |     subject to have these values as their means, and a standard deviation of
288 |     2.0/sqrt(outputDims).
289 |     """
290 | 
291 |     def __init__(
292 |             self,
293 |             inputDims,
294 |             outputDims,
295 |             weights=None,
296 |             biases=None,
297 |             dtype=np.float32,
298 |             seed=1):
299 |         """
300 |         Arguments:
301 |             * inputDims: number of input dimensions
302 |             * outputDims: number of output dimensions
303 |             * weights: list of starting weight matrices
304 |             * biases: list of starting bias vectors
305 |             * dtype: data type of input and output values
306 |             * seed: seed used for random numbers
307 |         """
308 |         self.numTensors = 2  # Number of tensors used for predictions
309 |         self.numHyperTensors = 4  # Number of tensor for hyper paramaters
310 |         self.inputDims = inputDims
311 |         self.outputDims = outputDims
312 |         self.dtype = dtype
313 |         self.seed = seed
314 |         self.name = "denseGaussian"
315 | 
316 |         # Weight mean value and mean distribution
317 |         weightsMean = 0.0
318 |         self.weightsMeanHyper = tfd.MultivariateNormalDiag(loc=[weightsMean],
319 |                                                            scale_diag=[.1])
320 | 
321 |         # weight SD value and SD distribution
322 |         weightsSD = 1.0
323 |         self.weightsSDHyper = tfd.MultivariateNormalDiag(loc=[weightsSD],
324 |                                                          scale_diag=[0.1])
325 | 
326 |         # bias mean value and mean distribution
327 |         biasesMean = 0.0
328 |         self.biasesMeanHyper = tfd.MultivariateNormalDiag(loc=[biasesMean],
329 |                                                           scale_diag=[.1])
330 | 
331 |         # bias SD value and SD distribution
332 |         biasesSD = 1.0
333 |         self.biasesSDHyper = tfd.MultivariateNormalDiag(loc=[biasesSD],
334 |                                                         scale_diag=[0.1])
335 | 
336 |         # Starting weight mean, weight SD, bias mean, and bias SD
337 |         self.hypers = tf.cast(
338 |             [[weightsMean], [weightsSD], [biasesMean], [biasesSD]], self.dtype)
339 | 
340 |         # Starting weights and biases
341 |         if(weights is None):
342 |             self.parameters = self.sample()
343 |         else:
344 |             self.parameters = [weights, biases]
345 | 
346 |     def calculateProbs(self, hypers, tensors):
347 |         """Calculates the log probability of a set of weights and biases given
348 |         their distributions in this layer.
349 | 
350 |         Arguments:
351 |             * weightsBias: list with new possible weight and bias tensors
352 | 
353 |         Returns:
354 |             * prob: log prob of weights and biases given their distributions
355 |         """
356 |         # Create the tensors used to calculate probability
357 |         weightsMean = hypers[0]
358 |         weightsSD = hypers[1]**2  # Ensure positive sd
359 |         biasesMean = hypers[2]
360 |         biasesSD = hypers[3]**2  # Ensure positive sd
361 |         weights = tensors[0]
362 |         biases = tensors[1]
363 | 
364 |         prob = tf.cast(0, self.dtype)
365 | 
366 |         # Calculate probability of weights and biases given new hypers
367 |         val = multivariateLogProb(weightsSD[0], weightsMean[0], weights,
368 |                                   dtype=self.dtype)
369 |         prob += tf.reduce_sum(input_tensor=val)
370 |         val = multivariateLogProb(
371 |             biasesSD[0],
372 |             biasesMean[0],
373 |             biases,
374 |             dtype=self.dtype)
375 |         prob += tf.reduce_sum(input_tensor=val)
376 | 
377 |         return(prob)
378 | 
379 |     def calculateHyperProbs(self, hypers, tensors):
380 |         """Calculates the log probability of a set of weights and biases given
381 |         new distribtuions as well as the probability of the new distribution
382 |         means and SDs given their distribtuions.
383 | 
384 |         Arguments:
385 |             * hypers: a list containg 4 new possible hyper parameters
386 |             * tensors: a list with the current weight and bias matrices
387 | 
388 |         Returns:
389 |             * prob: log probability of weights and biases given the new hypers
390 |             and the probability of the new hyper parameters given their priors
391 |         """
392 |         weightsMean = hypers[0]
393 |         weightsSD = hypers[1]**2
394 |         biasesMean = hypers[2]
395 |         biasesSD = hypers[3]**2
396 |         weights = tensors[0]
397 |         biases = tensors[1]
398 | 
399 |         prob = tf.cast(0, self.dtype)
400 | 
401 |         val = self.weightsMeanHyper.log_prob([[weightsMean]])
402 |         prob += tf.cast(tf.reduce_sum(input_tensor=val), self.dtype)
403 |         val = self.weightsSDHyper.log_prob([[weightsSD]])
404 |         prob += tf.cast(tf.reduce_sum(input_tensor=val), self.dtype)
405 | 
406 |         val = self.biasesMeanHyper.log_prob([[biasesMean]])
407 |         prob += tf.cast(tf.reduce_sum(input_tensor=val), self.dtype)
408 |         val = self.biasesSDHyper.log_prob([[biasesSD]])
409 |         prob += tf.cast(tf.reduce_sum(input_tensor=val), self.dtype)
410 | 
411 |         # Calculate probability of weights and biases given new hypers
412 |         val = multivariateLogProb(weightsSD[0], weightsMean[0], weights,
413 |                                   dtype=self.dtype)
414 |         prob += tf.reduce_sum(input_tensor=val)
415 |         val = multivariateLogProb(
416 |             biasesSD[0],
417 |             biasesMean[0],
418 |             biases,
419 |             dtype=self.dtype)
420 |         prob += tf.reduce_sum(input_tensor=val)
421 | 
422 |         return(prob)
423 | 
424 |     def sample(self):
425 |         """Creates randomized weight and bias tensors based off
426 |         of their distributions
427 | 
428 |         Returns:
429 |             * tempWeights: randomized weight tensor in first list position
430 |             * tempBiases: randomized bias tensor in second list position
431 |         """
432 | 
433 |         tempWeights = tf.random.normal((self.outputDims, self.inputDims),
434 |                                        mean=self.hypers[0],
435 |                                        stddev=(2 / self.outputDims)**(0.5),
436 |                                        seed=self.seed,
437 |                                        dtype=self.dtype)
438 |         tempBiases = tf.random.normal((self.outputDims, 1),
439 |                                       mean=self.hypers[2],
440 |                                       stddev=(2 / self.outputDims)**(0.5),
441 |                                       seed=self.seed + 1,
442 |                                       dtype=self.dtype)
443 | 
444 |         return([tempWeights, tempBiases])
445 | 
446 |     def predict(self, inputTensor, tensors):
447 |         """Calculates the output of the layer based on the given input tensor
448 |         and weight and bias values
449 | 
450 |         Arguments:
451 |             * inputTensor: the input tensor the layer acts on
452 |             * tensors: a list with the current weight and bias tensors
453 |         Returns:
454 |             * result: the output of the layer
455 |         """
456 |         weightTensor = self.expand(tensors[0])
457 |         biasTensor = self.expand(tensors[1])
458 |         result = tf.add(tf.matmul(weightTensor, inputTensor), biasTensor)
459 |         return(result)
460 | 
461 | DenseLayer = CauchyDenseLayer  # For backwards compatibiltiy
462 | 


--------------------------------------------------------------------------------
/tensorBNN/likelihood.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorflow_probability as tfp
  3 | 
  4 | from tensorBNN.BNN_functions import multivariateLogProb
  5 | 
  6 | tfd = tfp.distributions
  7 | 
  8 | 
  9 | class Likelihood(object):
 10 |     def __init__(self, *argv, **kwargs):
 11 |         """
 12 |         When declared, this constructor will be given keywords corresponding
 13 |         to any possible hyper parameters, as well as whether it should be
 14 |         calculated when the hyperparameters are being adjusted. This will
 15 |         likely be the case if it has hyper paramters.
 16 |         """
 17 |         self.hypers = []
 18 |         self.mainProbsInHypers = False
 19 | 
 20 |     def makeResponseLikelihood(self, *argv, **kwargs):
 21 |         """ This method will make a prediction and predict its probability
 22 |         given the likelihood funtion implemented. It will need at least the
 23 |         following inputs and must have the following outputs:
 24 |         Arguments:
 25 |             * argv: an undetermined number of tensors containg the weights
 26 |             and biases
 27 |             * realVals: the actual values for the predicted quantities
 28 |             * predict: the function used to make a prediction from the
 29 |             current neural net
 30 |             * dtype: the datatype of the network
 31 |         Returns:
 32 |             * result: the log probabilities of the real vals given the
 33 |             predicted values
 34 |         """
 35 |         self.hypers = []
 36 | 
 37 |     def calculateLogProb(self, *argv, **kwargs):
 38 |         """ This is a version of makeResponseLikelihood designed to deal with
 39 |         multiple sets of hyper paramters. It is used for reweighting in the
 40 |         predictor object, not during training as makeResponseLikelihood is.
 41 |         It also requires at least the following inputs and outputs:
 42 |         Arguments:
 43 |             * argv: an undetermined number of tensors containg the weights
 44 |             and biases
 45 |             * realVals: the actual values for the predicted quantities
 46 |             * hypers: A list containing all the hyper paramters
 47 |             * predict: the function used to make a prediction from the
 48 |             current neural net
 49 |             * dtype: the datatype of the network
 50 |         Returns:
 51 |             * result: the log probabilities of the real vals given the
 52 |             predicted values
 53 |         """
 54 |         pass
 55 | 
 56 |     def display(self, hypers):
 57 |         """An optional method which can be used to display relavent information
 58 |         during the evaluation phase of a network.
 59 |         """
 60 |         pass
 61 | 
 62 | 
 63 | class GaussianLikelihood(Likelihood):
 64 | 
 65 |     def __init__(self, *argv, **kwargs):
 66 |         self.hypers = [[kwargs["sd"]**0.5]]
 67 |         self.mainProbsInHypers = True
 68 | 
 69 |     def makeResponseLikelihood(self, *argv, **kwargs):
 70 |         """Make a prediction and predict its probability from a multivariate
 71 |         normal distribution
 72 | 
 73 |         Arguments:
 74 |             * argv: an undetermined number of tensors containg the weights
 75 |             and biases
 76 |             * realVals: the actual values for the predicted quantities
 77 |             * sd: standard deviation for output distribution, uses
 78 |             current hyper parameter value if nothing is given
 79 |             * hyperStates: A list containing all the hyper paramters
 80 |             * predict: the function used to make a prediction from the
 81 |             current neural net
 82 |             * dtype: the datatype of the network
 83 |         Returns:
 84 |             * result: the log probabilities of the real vals given the
 85 |             predicted values
 86 |         """
 87 | 
 88 |         sd = kwargs["hyperStates"][-1]**2
 89 | 
 90 |         current = kwargs["predict"](True, argv[0])
 91 |         current = tf.transpose(current)
 92 |         sigma = tf.ones_like(current) * sd
 93 |         realVals = tf.reshape(kwargs["realVals"], current.shape)
 94 |         result = multivariateLogProb(sigma, current, realVals, kwargs["dtype"])
 95 | 
 96 |         return(result)
 97 | 
 98 |     def calcultateLogProb(self, *argv, **kwargs):
 99 |         """Make a prediction and predict its probability from a multivariate
100 |         normal distribution
101 | 
102 |         rguments:
103 |             * argv: an undetermined number of tensors containg the weights
104 |             and biases
105 |             * realVals: the actual values for the predicted quantities
106 |             * hypers: A list containing all the hyper paramters
107 |             * predict: the function used to make a prediction from the
108 |             current neural net
109 |             * dtype: the datatype of the network
110 |             * n: Use every n networks
111 |         Returns:
112 |             * result: the log probabilities of the real vals given the
113 |             predicted values
114 |         """
115 |         if(kwargs["sd"] is None):
116 |             sd = kwargs["hyperStates"][-1]
117 |         else:
118 |             sd = kwargs["sd"]
119 |         current = kwargs["predict"](argv[0], n=kwargs["n"])
120 |         for x in range(len(current)):
121 |             current[x] = tf.transpose(current[x])
122 |         realVals = tf.reshape(kwargs["realVals"], current[0].shape)
123 |         result = []
124 |         for x in range(len(current)):
125 | 
126 |             result.append(multivariateLogProb(tf.ones_like(current[0]) * sd,
127 |                                               current[x], realVals,
128 |                                               kwargs["dtype"]))
129 |         return(result)
130 | 
131 |     def display(self, hypers):
132 |         print("Loss Standard Deviation: ", hypers[-1].numpy()[0]**2)
133 |         pass
134 | 
135 | 
136 | class FixedGaussianLikelihood(Likelihood):
137 | 
138 |     def __init__(self, *argv, **kwargs):
139 |         self.hypers = []
140 |         self.sd = kwargs["sd"]
141 |         self.mainProbsInHypers = False
142 | 
143 |     def makeResponseLikelihood(self, *argv, **kwargs):
144 |         """Make a prediction and predict its probability from a multivariate
145 |         normal distribution
146 | 
147 |         Arguments:
148 |             * argv: an undetermined number of tensors containg the weights
149 |             and biases
150 |             * realVals: the actual values for the predicted quantities
151 |             * sd: standard deviation for output distribution, uses
152 |             current hyper parameter value if nothing is given
153 |             * hyperStates: A list containing all the hyper paramters
154 |             * predict: the function used to make a prediction from the
155 |             current neural net
156 |             * dtype: the datatype of the network
157 |         Returns:
158 |             * result: the log probabilities of the real vals given the
159 |             predicted values
160 |         """
161 | 
162 |         sd = tf.cast(self.sd, kwargs["dtype"])
163 |         current = kwargs["predict"](True, argv[0])
164 |         current = tf.transpose(current)
165 |         sigma = tf.ones_like(current) * sd
166 |         realVals = tf.reshape(kwargs["realVals"], current.shape)
167 |         result = multivariateLogProb(sigma, current, realVals, kwargs["dtype"])
168 | 
169 |         return(result)
170 | 
171 |     def calcultateLogProb(self, *argv, **kwargs):
172 |         """Make a prediction and predict its probability from a multivariate
173 |         normal distribution
174 | 
175 |         rguments:
176 |             * argv: an undetermined number of tensors containg the weights
177 |             and biases
178 |             * realVals: the actual values for the predicted quantities
179 |             * hypers: A list containing all the hyper paramters
180 |             * predict: the function used to make a prediction from the
181 |             current neural net
182 |             * dtype: the datatype of the network
183 |             * n: Use every n networks
184 |         Returns:
185 |             * result: the log probabilities of the real vals given the
186 |             predicted values
187 |         """
188 |         current = kwargs["predict"](argv[0], n=kwargs["n"])
189 |         for x in range(len(current)):
190 |             current[x] = tf.transpose(current[x])
191 |         realVals = tf.reshape(kwargs["realVals"], current[0].shape)
192 |         result = []
193 |         for x in range(len(current)):
194 |             temp = multivariateLogProb(tf.ones_like(current[0]) * self.sd,
195 |                                        current[x], realVals,
196 |                                        kwargs["dtype"])
197 |             result.append(temp)
198 | 
199 |         return(result)
200 | 
201 |     def display(self, hypers):
202 |         pass
203 | 
204 | 
205 | class BernoulliLikelihood(Likelihood):
206 |     def __init__(self, *argv, **kwargs):
207 |         self.hypers = []
208 |         self.mainProbsInHypers = False
209 | 
210 |     def makeResponseLikelihood(self, *argv,  **kwargs):
211 |         """Make a prediction and predict its probability from a Bernoulli
212 |            normal distribution
213 | 
214 |         Arguments:
215 |             * argv: an undetermined number of tensors containg the weights
216 |             and biases
217 |             * realVals: the actual values for the predicted quantities
218 |             * predict: the function used to make a prediction from the
219 |             current neural net
220 |             * dtype: the datatype of the network
221 |         Returns:
222 |             * result: the log probabilities of the real vals given the
223 |             predicted values
224 |         """
225 |         current = kwargs["predict"](True, argv[0])
226 |         current = tf.cast(
227 |             tf.clip_by_value(
228 |                 current,
229 |                 1e-8,
230 |                 1 - 1e-7),
231 |             kwargs["dtype"])
232 | 
233 |         # Prediction distribution
234 |         dist = tfd.Bernoulli(
235 |             probs=current)
236 |         result = dist.log_prob(tf.transpose(kwargs["realVals"]))
237 |         return(result)
238 | 
239 |     def calcultateLogProb(self, *argv, **kwargs):
240 |         result = []
241 |         for x in range(len(kwargs["hypers"])):
242 |             result.append(tf.cast(0, kwargs["dtype"]))
243 |         return(result)
244 | 


--------------------------------------------------------------------------------
/tensorBNN/metrics.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | 
  3 | 
  4 | class Metric(object):
  5 |     """ A basic metric object. This can be implemented into any desired
  6 |     metric within the BNN training loop.
  7 |     """
  8 |     def __init__(self, scaleExp = False, mean=0, sd=1,  *argv, **kwargs):
  9 |         self.scaleExp = scaleExp
 10 |         self.mean = mean
 11 |         self.sd = sd
 12 |     
 13 |     def calculate(self, predictionsTrain, predictionValidate, realTrain,
 14 |                       realValidate, *argv, **kwargs):
 15 |         """ Calculates the metric
 16 | 
 17 |         Arguments:
 18 |             * predictionsTrain: training predictions
 19 |             * predictionsValidate: validation predictions
 20 |             * realTrain: real training values
 21 |             * realValidate: real validation values
 22 |         """
 23 |         pass
 24 |     
 25 |     def display(self):
 26 |         """Displays the metric"""
 27 |         pass
 28 |         
 29 | 
 30 | class SquaredError(Metric):
 31 |     """ Calculates the mean squared error of a prediction.
 32 |     """
 33 |     
 34 |     def calculate(self,  predictionsTrain, predictionsValidate, realTrain, 
 35 |                   realValidate):
 36 |         
 37 |         predictionsTrain = tf.add(tf.multiply(tf.transpose(predictionsTrain),
 38 |                                               self.sd), self.mean)
 39 |         predictionsValidate = tf.add(tf.multiply(tf.transpose(predictionsValidate), 
 40 |                                                  self.sd), self.mean)
 41 |         
 42 |         realTrain = tf.add(tf.multiply(realTrain, self.sd), self.mean)
 43 |         realValidate = tf.add(tf.multiply(realValidate, self.sd), self.mean)
 44 | 
 45 |         if(self.scaleExp):
 46 |             predictionsTrain = tf.exp(predictionsTrain)
 47 |             realTrain = tf.exp(realTrain)
 48 |             realValidate = tf.exp(realValidate)
 49 | 
 50 |         realTrain = tf.reshape(realTrain, predictionsTrain.shape)
 51 |         realValidate = tf.reshape(realValidate, predictionsValidate.shape)
 52 |         
 53 |         
 54 |         squaredError = tf.reduce_mean(
 55 |             input_tensor=tf.math.squared_difference(
 56 |                 predictionsTrain, realTrain))
 57 |         self.squaredErrorTrain=squaredError.numpy()
 58 |     
 59 |         squaredError = tf.reduce_mean(
 60 |             input_tensor=tf.math.squared_difference(
 61 |                 predictionsValidate, realValidate))
 62 |         self.squaredErrorValidate=squaredError.numpy()
 63 |     
 64 |     def display(self):
 65 |         
 66 |         print("training squared error{: 9.5f}".format(self.squaredErrorTrain),
 67 |                     "validation squared error{: 9.5f}".format(
 68 |                         self.squaredErrorValidate))
 69 | 
 70 | class PercentError(Metric):
 71 |     """Calculates percent error of a prediction"""
 72 |     def __init__(self, scaleExp = False, mean=0, sd=1, *argv, **kwargs):
 73 |         self.scaleExp =scaleExp
 74 |         self.mean = mean
 75 |         self.sd = sd
 76 |           
 77 |         
 78 |     def calculate(self,  predictionsTrain, predictionsValidate, realTrain,
 79 |                   realValidate):
 80 |         predictionsTrain = tf.add(tf.multiply(tf.transpose(predictionsTrain), 
 81 |                                               self.sd), self.mean)
 82 |         predictionsValidate = tf.add(tf.multiply(tf.transpose(predictionsValidate), 
 83 |                                                  self.sd), self.mean)
 84 |         
 85 |         realTrain = tf.add(tf.multiply(realTrain, self.sd), self.mean)
 86 |         realValidate = tf.add(tf.multiply(realValidate, self.sd), self.mean)
 87 | 
 88 |         if(self.scaleExp):
 89 |             predictionsTrain = tf.exp(predictionsTrain)
 90 |             predictionsValidate = tf.exp(predictionsValidate)
 91 |             realTrain = tf.exp(realTrain)
 92 |             realValidate = tf.exp(realValidate)
 93 | 
 94 |         realTrain = tf.reshape(realTrain, predictionsTrain.shape)
 95 |         realValidate = tf.reshape(realValidate, predictionsValidate.shape)
 96 |                 
 97 |         self.percentErrorTrain = tf.reduce_mean(
 98 |             input_tensor=tf.multiply(
 99 |                 tf.abs(tf.divide(tf.subtract(predictionsTrain, realTrain),
100 |                                  realTrain)), 100))
101 |         self.percentErrorValidate = tf.reduce_mean(
102 |             input_tensor=tf.multiply(
103 |                 tf.abs(tf.divide(tf.subtract(predictionsValidate, realValidate), 
104 |                                  realValidate)), 100))
105 |    
106 |     def display(self):
107 |         print("training percent error{: 7.3f}".format(self.percentErrorTrain),
108 |                       "validation percent error{: 7.3f}".format(self.percentErrorValidate))
109 |         
110 | class Accuracy(Metric):
111 |     """ Caluclates the accuracy of predictions """
112 |     def calculate(self,  predictionsTrain, predictionsValidate, realTrain, 
113 |                   realValidate):
114 |         predictionsTrain = tf.add(tf.multiply(tf.transpose(predictionsTrain), 
115 |                                               self.sd), 
116 |                         self.mean)
117 |         predictionsValidate = tf.add(tf.multiply(tf.transpose(predictionsValidate), 
118 |                                                  self.sd), 
119 |                         self.mean)
120 |         realTrain = tf.add(tf.multiply(realTrain, self.sd), self.mean)
121 |         realValidate = tf.add(tf.multiply(realValidate, self.sd), self.mean)
122 | 
123 |         if(self.scaleExp):
124 |             predictionsTrain = tf.exp(predictionsTrain)
125 |             predictionsValidate = tf.exp(predictionsValidate)
126 |             realTrain = tf.exp(realTrain)
127 |             realValidate = tf.exp(realValidate)
128 |         
129 |         realTrain = tf.reshape(realTrain, predictionsTrain.shape)
130 |         realValidate = tf.reshape(realValidate, predictionsValidate.shape)
131 | 
132 |         self.accuracyTrain = 1 - tf.reduce_mean(tf.abs(
133 |                                             realTrain - tf.round(predictionsTrain)))
134 |         self.accuracyValidate = 1 - tf.reduce_mean(tf.abs(
135 |                                             realValidate - tf.round(predictionsValidate)))
136 | 
137 |  
138 |     def display(self):
139 |         print("training accuracy{: 9.5f}".format(self.accuracyTrain),
140 |                       "validation accuracy{: 9.5f}".format(
141 |                           self.accuracyValidate))
142 | 


--------------------------------------------------------------------------------
/tensorBNN/network.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | 
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | import tensorflow_probability as tfp
  7 | 
  8 | from tensorBNN.paramAdapter import paramAdapter
  9 | 
 10 | tfd = tfp.distributions
 11 | 
 12 | 
 13 | class network(object):
 14 |     """An object used for storing all of the variables required to create
 15 |     a Bayesian Neural Network using Hamiltonian Monte Carlo and then training
 16 |     the network.
 17 |     """
 18 | 
 19 |     def __init__(
 20 |             self,
 21 |             dtype,
 22 |             inputDims,
 23 |             trainX,
 24 |             trainY,
 25 |             validateX,
 26 |             validateY):
 27 |         """
 28 |         Arguments:
 29 |             * dtype: data type for Tensors
 30 |             * inputDims: dimension of input vector
 31 |             * trainX: the training data input, shape is n by inputDims
 32 |             * trainY: the training data output
 33 |             * validateX: the validation data input, shape is n by inputDims
 34 |             * validateY: the validation data output
 35 |             * mean: the mean used to scale trainY and validateY
 36 |             * sd: standard deviation used to scale trainY and validateY
 37 |         """
 38 |         self.dtype = dtype
 39 |         self.iteration = None
 40 | 
 41 |         self.trainX = tf.reshape(
 42 |             tf.constant(
 43 |                 trainX, dtype=self.dtype), [
 44 |                 len(trainX), inputDims])
 45 |         self.trainY = tf.constant(trainY, dtype=self.dtype)
 46 | 
 47 |         self.validateX = tf.reshape(
 48 |             tf.constant(
 49 |                 validateX, dtype=self.dtype), [
 50 |                 len(validateX), inputDims])
 51 |         self.validateY = tf.constant(validateY, dtype=self.dtype)
 52 | 
 53 |         self.states = []  # List with the weight and bias state placeholders
 54 |         self.hyperStates = []  # List with hyper parameter state placeholders
 55 | 
 56 |         self.layers = []  # List of all the layers
 57 | 
 58 |         self.currentInnerStep = None
 59 | 
 60 |     def metrics(self, trainPredict, trainReal, validatePredict, validateReal):
 61 |         """Calculates the average squared error and percent difference of the
 62 |         current network
 63 |         Arguments:
 64 |             * predictions: output from the network
 65 |             * scaleExp: boolean value to determine whether to take the
 66 |                         exponential of the data and scale it
 67 |             * train: boolean value to determine whether to use the training
 68 |                      data
 69 |             * mean: mean value used for unshifiting a distribution
 70 |             * sd: sd value used for unscalling a distribution
 71 |         Returns:
 72 |             * logits: output from the network
 73 |             * squaredError: the mean squared error of predictions from the
 74 |                             network
 75 |             * percentError: the percent error of the predictions from the
 76 |                             network
 77 |         """
 78 | 
 79 |         for metric in self.metricList:
 80 |             metric.calculate(trainPredict, validatePredict, trainReal,
 81 |                              validateReal)
 82 |             metric.display()
 83 | 
 84 |     def calculateProbs(self, *argv, sd=None):
 85 |         """Calculates the log probability of the current network values
 86 |         as well as the log probability of their prediction.
 87 |         Arguments:
 88 |             * argv: an undetermined number of tensors containg the weights
 89 |             and biases.
 90 |             * sd: standard deviation for output distribution, uses current
 91 |             hyper value if none is given
 92 |         Returns:
 93 |             * prob: log probability of network values and network prediction
 94 |         """
 95 |         if(len(argv) != len(self.states)):
 96 |             argv = argv[0]
 97 | 
 98 |         temp = self.makeResponseLikelihood(argv, predict=self.predict,
 99 |                                            dtype=self.dtype,
100 |                                            hyperStates=self.hyperStates,
101 |                                            realVals=self.trainY, sd=sd)
102 |         prob = tf.reduce_sum(temp)
103 | 
104 |         # probability of the network parameters
105 |         index = 0
106 |         for n in range(len(self.layers)):
107 |             numTensors = self.layers[n].numTensors
108 |             if(numTensors > 0):
109 |                 prob += self.layers[n].calculateProbs(
110 |                     argv[index:index + numTensors])
111 |                 index += numTensors
112 |         return(prob)
113 | 
114 |     def calculateHyperProbs(self, *argv):
115 |         """Calculates the log probability of the current hyper parameters
116 |         Arguments:
117 |             * argv: an undetermined number of tensors containg the hyper
118 |                     parameters
119 |         Returns:
120 |             * prob: log probability of hyper parameters given their priors
121 |         """
122 |         prob = 0
123 |         indexh = 0
124 |         index = 0
125 |         for n in range(len(self.layers)):
126 |             numHyperTensors = self.layers[n].numHyperTensors
127 |             numTensors = self.layers[n].numTensors
128 |             if(numHyperTensors > 0):
129 | 
130 |                 prob += self.layers[n].calculateHyperProbs(
131 |                     argv[indexh:indexh + numHyperTensors],
132 |                     self.states[index:index + numTensors])
133 |                 indexh += numHyperTensors
134 |                 index += numTensors
135 | 
136 |         if(self.likelihood.mainProbsInHypers):
137 |             prob += self.calculateProbs(self.states, sd=argv[-1])
138 | 
139 |         return(prob)
140 | 
141 |     def predict(self, train, *argv):
142 |         """Makes a prediction
143 |         Arguments:
144 |             * train: a boolean value which determines whether to use training
145 |                      data
146 |             * argv: an undetermined number of tensors containg the weights
147 |             and biases.
148 |         Returns:
149 |             * prediction: a prediction from the network
150 |         """
151 |         tensors = argv
152 |         if(len(tensors) == 0):
153 |             tensors = self.states
154 |         else:
155 |             tensors = tensors[0]
156 |         x = self.trainX
157 |         if(not train):
158 |             x = self.validateX
159 | 
160 |         def innerPrediction(x, layers):
161 |             prediction = tf.transpose(a=x)
162 |             index = 0
163 |             for n in range(len(self.layers)):
164 |                 numTensors = layers[n].numTensors
165 |                 prediction = layers[n].predict(
166 |                     prediction, tensors[index:index + numTensors])
167 |                 index += numTensors
168 |             return(prediction)
169 |         prediction = innerPrediction(x, self.layers)
170 | 
171 |         return(prediction)
172 | 
173 |     def add(self, layer, parameters=None):
174 |         """Adds a new layer to the network
175 |         Arguments:
176 |             * layer: the layer to be added
177 |             * parameters: list containing weight, bias, and acitvation
178 |             matrices
179 |         """
180 |         self.layers.append(layer)
181 |         if(layer.numTensors > 0):
182 |             if parameters is None:
183 |                 for states in layer.parameters:
184 |                     self.states.append(states)
185 |             else:
186 |                 for states in parameters:
187 |                     self.states.append(states)
188 | 
189 |         if(layer.numHyperTensors > 0):
190 |             for states in layer.hypers:
191 |                 self.hyperStates.append(states)
192 | 
193 |     def setupMCMC(self, stepSizeStart=1e-3, stepSizeMin=1e-4, stepSizeMax=1e-2,
194 |                   stepSizeOptions=40, leapfrogStart=1000, leapfogMin=100,
195 |                   leapFrogMax=10000, leapfrogIncrement=1, hyperStepSize=1e-2,
196 |                   hyperLeapfrog=100, burnin=1000,
197 |                   cores=4, averagingSteps=10, a=4, delta=0.1, strikes=5,
198 |                   randomSteps=10, dualAveraging=False):
199 |         """Sets up the MCMC algorithms
200 |         Arguments:
201 |             * stepSizeStart: the starting step size for the weights and biases
202 |             * stepSizeMin: the minimum step size
203 |             * stepSizeMax: the maximum step size
204 |             * stepSizeOptions: the number of step sizes in grid
205 |             * leapfrogStart: number of leapfrog steps for weights and biases
206 |             * leapFrogMax: the minimum number of leapfrog steps
207 |             * leapMax: the maximum number of leapfrog steps
208 |             * leapfrogIncrement: the step in number of leapfrog for search grid
209 |             * hyperStepSize: the starting step size for the hyper parameters
210 |             * hyperLeapfrog: leapfrog steps for hyper parameters
211 |             * cores: number of cores to use
212 |             * averaginSteps: number of averaging steps
213 |             * a: constant, 4 in paper
214 |             * delta: constant, 0.1 in paper
215 |             * strikes: iterations with no movement before reseting adapter
216 |             * randomSteps: averaging cycles at beginning with random values
217 |         Returns nothing
218 |         """
219 | 
220 |         # Adapt the step size and number of leapfrog steps
221 |         self.adapt = paramAdapter(stepSizeStart,
222 |                                   leapfrogStart,
223 |                                   stepSizeMin,
224 |                                   stepSizeMax,
225 |                                   stepSizeOptions,
226 |                                   leapfogMin,
227 |                                   leapFrogMax,
228 |                                   leapfrogIncrement,
229 |                                   averagingSteps,
230 |                                   burnin / averagingSteps,
231 |                                   a=a,
232 |                                   delta=delta,
233 |                                   cores=cores,
234 |                                   strikes=strikes,
235 |                                   randomSteps=randomSteps)
236 | 
237 |         self.step_size = tf.cast(stepSizeStart, self.dtype)
238 |         self.leapfrog = tf.cast(leapfrogStart, tf.int32)
239 |         self.cores = cores
240 |         self.burnin = burnin
241 |         self.target = 0.95
242 | 
243 |         self.gamma = tf.cast(0.4, self.dtype)
244 |         self.t0 = tf.cast(10, self.dtype)
245 |         self.kappa = tf.cast(0.75, self.dtype)
246 |         self.h = tf.cast([0], self.dtype)
247 |         self.logEpsilonBar = tf.cast([0], self.dtype)
248 |         self.mu = tf.cast(tf.math.log(100*hyperStepSize), self.dtype)
249 | 
250 |         self.dualAveraging = dualAveraging
251 |         self.gamma2 = tf.cast(0.4, self.dtype)
252 |         self.t02 = tf.cast(10, self.dtype)
253 |         self.kappa2 = tf.cast(0.75, self.dtype)
254 |         self.h2 = tf.cast([0], self.dtype)
255 |         self.logEpsilonBar2 = tf.cast([0], self.dtype)
256 |         self.mu2 = tf.cast(tf.math.log(100*stepSizeStart), self.dtype)
257 | 
258 |         self.hyper_step_size = tf.Variable(tf.cast(np.array(hyperStepSize),
259 |                                                    self.dtype))
260 | 
261 |         # Setup the Markov Chain for the network parameters
262 |         self.mainKernel = tfp.mcmc.HamiltonianMonteCarlo(
263 |             target_log_prob_fn=self.calculateProbs,
264 |             num_leapfrog_steps=self.leapfrog,
265 |             step_size=[self.step_size],
266 |             state_gradients_are_stopped=True,
267 |             name="main")
268 | 
269 |         self.hyperLeapfrog = hyperLeapfrog
270 |         # Setup the Transition Kernel for the hyper parameters
271 |         hyperKernel = tfp.mcmc.HamiltonianMonteCarlo(
272 |             target_log_prob_fn=self.calculateHyperProbs,
273 |             num_leapfrog_steps=hyperLeapfrog,
274 |             step_size=[self.hyper_step_size],
275 |             state_gradients_are_stopped=True)
276 | 
277 |         self.hyperKernel = tfp.mcmc.DualAveragingStepSizeAdaptation(
278 |             inner_kernel=hyperKernel, num_adaptation_steps=int(burnin * 0.8))
279 | 
280 |     @tf.function(jit_compile=True, experimental_relax_shapes=True)
281 |     def stepMCMCNoHypers(self, states, hyperStates, mainStep, leapfrogVal,
282 |                          mainAccept=tf.cast([1], tf.float32),  sampleNumber=1):
283 |         """ Steps the markov chain for each of the network parameters and the
284 |         hyper parameters forward one step
285 |         Has no arguments, returns nothing.
286 |         """
287 | 
288 |         def InnerStepMain(i, states, hyperStates, leapfrog, step_size):
289 | 
290 |             def calculateProbs(*argv):
291 |                 if(len(argv) != len(self.states)):
292 |                     argv = argv[0]
293 | 
294 |                 prob = 0
295 |                 indexh = 0
296 |                 index = 0
297 |                 for n in range(len(self.layers)):
298 |                     numHyperTensors = self.layers[n].numHyperTensors
299 |                     numTensors = self.layers[n].numTensors
300 |                     if(numHyperTensors > 0):
301 | 
302 |                         prob += self.layers[n].calculateProbs(
303 |                             hyperStates[indexh:indexh + numHyperTensors],
304 |                             argv[index:index + numTensors])
305 |                         indexh += numHyperTensors
306 |                         index += numTensors
307 | 
308 |                 temp = self.makeResponseLikelihood(argv, predict=self.predict,
309 |                                                    dtype=self.dtype,
310 |                                                    hyperStates=hyperStates,
311 |                                                    realVals=self.trainY,
312 |                                                    sd=hyperStates[-1])
313 |                 prob += tf.reduce_sum(temp)
314 |                 return(prob)
315 |             hmc = tfp.mcmc.HamiltonianMonteCarlo
316 |             kernel = hmc(target_log_prob_fn=calculateProbs,
317 |                          num_leapfrog_steps=leapfrog,
318 |                          step_size=step_size,
319 |                          state_gradients_are_stopped=True)
320 | 
321 |             states, kernel_results = tfp.mcmc.sample_chain(
322 |                 num_results=1,
323 |                 num_burnin_steps=0,  # start collecting data on first step
324 |                 current_state=states,  # starting parts of chain
325 |                 parallel_iterations=8,
326 |                 kernel=kernel,
327 |                 trace_fn=lambda _, pkr: [pkr.accepted_results.step_size,
328 |                                          pkr.log_accept_ratio,
329 |                                          pkr.accepted_results.target_log_prob])
330 | 
331 |             acceptRate = tf.where(kernel_results[1] < 0,
332 |                                   tf.exp(kernel_results[1]), 1)
333 | 
334 |             return(states, [step_size], acceptRate)
335 | 
336 |         def oneStep(i, params, hyperParams, mainStep, mainAccept, leap):
337 | 
338 |             params, mainStep, mainAccept = InnerStepMain(i, params,
339 |                                                          hyperParams, leap,
340 |                                                          mainStep)
341 |             for x in range(len(params)):
342 |                 params[x] = params[x][0]
343 | 
344 |             return(tf.add(i, 1), params, hyperParams, mainStep[0], mainAccept,
345 |                    leap)
346 | 
347 |         def condition(i, states, hyperStates, mainStep, mainAccept,
348 |                       leapfrogVal):
349 |             return(tf.less(i, sampleNumber))
350 | 
351 |         i = tf.constant(0)
352 |         i, states, hyperStates, mainStep, mainAccept, \
353 |             leapfrogVal = tf.while_loop(condition, oneStep,
354 |                                         [i, states, hyperStates, mainStep,
355 |                                          mainAccept, leapfrogVal])
356 | 
357 |         return(states, mainStep, mainAccept)
358 | 
359 |     @tf.function(jit_compile=True, experimental_relax_shapes=True)
360 |     def stepMCMC(self, states, hyperStates, mainStep, hyperStep, logEpsilonBar,
361 |                  h, iter_, leapfrogVal, mainAccept=tf.cast([1], tf.float32),
362 |                  hyperAccept=tf.cast([1], tf.float32),  sampleNumber=1):
363 |         """ Steps the markov chain for each of the network parameters and the
364 |         hyper parameters forward one step
365 |         Has no arguments, returns nothing.
366 |         """
367 | 
368 |         def InnerStepMain(i, states, hyperStates, leapfrog, step_size, epoch):
369 | 
370 |             def calculateProbs(*argv):
371 |                 if(len(argv) != len(self.states)):
372 |                     argv = argv[0]
373 | 
374 |                 prob = 0
375 |                 indexh = 0
376 |                 index = 0
377 |                 for n in range(len(self.layers)):
378 |                     numHyperTensors = self.layers[n].numHyperTensors
379 |                     numTensors = self.layers[n].numTensors
380 |                     if(numHyperTensors > 0):
381 | 
382 |                         prob += self.layers[n].calculateProbs(
383 |                             hyperStates[indexh:indexh + numHyperTensors],
384 |                             argv[index:index + numTensors])
385 |                         indexh += numHyperTensors
386 |                         index += numTensors
387 | 
388 |                 prob += tf.reduce_sum(self.makeResponseLikelihood(
389 |                     argv, predict=self.predict, dtype=self.dtype,
390 |                     hyperStates=hyperStates, realVals=self.trainY,
391 |                     sd=hyperStates[-1]))
392 |                 return(prob)
393 | 
394 |             kernel = tfp.mcmc.HamiltonianMonteCarlo(
395 |                 target_log_prob_fn=calculateProbs,
396 |                 num_leapfrog_steps=leapfrog,
397 |                 step_size=step_size,
398 |                 state_gradients_are_stopped=True)
399 | 
400 |             states, kernel_results = tfp.mcmc.sample_chain(
401 |                 num_results=1,
402 |                 num_burnin_steps=0,  # start collecting data on first step
403 |                 current_state=states,  # starting parts of chain
404 |                 parallel_iterations=8,
405 |                 kernel=kernel,
406 |                 trace_fn=lambda _, pkr: [pkr.accepted_results.step_size,
407 |                                          pkr.log_accept_ratio,
408 |                                          pkr.accepted_results.target_log_prob])
409 | 
410 |             acceptRate = tf.where(kernel_results[1] < 0,
411 |                                   tf.exp(kernel_results[1]), 1)
412 |             return(states, [step_size], acceptRate)
413 | 
414 |         def InnerStepHyper(i, states, hyperStates, leapfrog, step_size,
415 |                            logEpsilonBar, h, epoch):
416 | 
417 |             def calculateProbs(*argv):
418 |                 if(len(argv) != len(self.hyperStates)):
419 |                     argv = argv[0]
420 | 
421 |                 prob = 0
422 |                 indexh = 0
423 |                 index = 0
424 |                 for n in range(len(self.layers)):
425 |                     numHyperTensors = self.layers[n].numHyperTensors
426 |                     numTensors = self.layers[n].numTensors
427 |                     if(numHyperTensors > 0):
428 | 
429 |                         prob += self.layers[n].calculateHyperProbs(
430 |                             argv[indexh:indexh + numHyperTensors],
431 |                             states[index:index + numTensors])
432 |                         indexh += numHyperTensors
433 |                         index += numTensors
434 | 
435 |                 if(self.likelihood.mainProbsInHypers):
436 |                     prob += tf.reduce_sum(self.makeResponseLikelihood(
437 |                         states, predict=self.predict, dtype=self.dtype,
438 |                         hyperStates=argv, realVals=self.trainY, sd=argv[-1]))
439 | 
440 |                 return(prob)
441 | 
442 |             kernel = tfp.mcmc.HamiltonianMonteCarlo(
443 |                 target_log_prob_fn=calculateProbs,
444 |                 num_leapfrog_steps=leapfrog,
445 |                 step_size=step_size,
446 |                 state_gradients_are_stopped=True)
447 | 
448 |             hyperStates, kernel_results = tfp.mcmc.sample_chain(
449 |                 num_results=1,
450 |                 num_burnin_steps=0,  # start collecting data on first step
451 |                 current_state=hyperStates,  # starting parts of chain
452 |                 parallel_iterations=8,
453 |                 kernel=kernel,
454 |                 trace_fn=lambda _, pkr: [pkr.accepted_results.step_size,
455 |                                          pkr.log_accept_ratio,
456 |                                          pkr.accepted_results.target_log_prob])
457 |             m = epoch + 1
458 | 
459 |             accept = tf.where(kernel_results[1] < 0,
460 |                               tf.exp(kernel_results[1]), 1)
461 |             h = (1-1/(m+self.t0))*h+(1/(m+self.t0))*(self.target-accept)
462 | 
463 |             logEpsilon = self.mu-h*(m**0.5)/self.gamma
464 | 
465 |             logEpsilonBar = (1-m**(-self.kappa))*logEpsilonBar
466 |             logEpsilonBar += m**(-self.kappa)*logEpsilon
467 | 
468 |             step_size = tf.where(m < self.burnin * 0.8,
469 |                                  tf.math.exp(logEpsilonBar), step_size)
470 | 
471 |             return(hyperStates, step_size, logEpsilonBar, h, accept)
472 | 
473 |         def oneStep(i, params, hyperParams, mainStep, hyperStep, logEpsilonBar,
474 |                     h, epoch, mainAccept, hyperAccept, leap):
475 | 
476 |             params, mainStep, mainAccept = InnerStepMain(i, params,
477 |                                                          hyperParams, leap,
478 |                                                          mainStep, epoch)
479 |             for x in range(len(params)):
480 |                 params[x] = params[x][0]
481 | 
482 |             hyperParams, hyperStep, logEpsilonBar, h, hyperAccept = \
483 |                 InnerStepHyper(i, params, hyperParams, self.hyperLeapfrog,
484 |                                hyperStep, logEpsilonBar, h, epoch)
485 |             for x in range(len(hyperParams)):
486 |                 hyperParams[x] = hyperParams[x][0]
487 |             hyperStep = hyperStep[0]
488 | 
489 |             return(tf.add(i, 1), params, hyperParams, mainStep[0], hyperStep,
490 |                    logEpsilonBar, h, epoch, mainAccept, hyperAccept, leap)
491 | 
492 |         def condition(i, states, hyperStates, mainStep, hyperStep,
493 |                       logEpsilonBar, h, epoch, mainAccept, hyperAccept, leap):
494 |                         return(tf.less(i, sampleNumber))
495 | 
496 |         i = tf.constant(0)
497 |         epoch = tf.cast(iter_, self.dtype)
498 |         i, states, hyperStates, mainStep, hyperStep, logEpsilonBar, h, epoch, \
499 |             mainAccept, hyperAccept, leapfrogVal = \
500 |             tf.while_loop(condition, oneStep, [i, states, hyperStates,
501 |                                                mainStep, hyperStep,
502 |                                                logEpsilonBar, h, epoch,
503 |                                                mainAccept, hyperAccept,
504 |                                                leapfrogVal])
505 | 
506 |         return(states, hyperStates, mainStep, hyperStep, logEpsilonBar, h,
507 |                mainAccept, hyperAccept)
508 | 
509 |     def train(
510 |             self,
511 |             epochs,
512 |             samplingStep,
513 |             likelihood,
514 |             metricList=[],
515 |             adjustHypers=True,
516 |             scaleExp=False,
517 |             folderName=None,
518 |             networksPerFile=1000,
519 |             displaySkip=1):
520 |         """Trains the network
521 |         Arguements:
522 |             * Epochs: Number of training cycles
523 |             * samplingStep: Epochs between sampled networks
524 |             * likelihood: Object containing the output likelihood for the BNN
525 |             * scaleExp: whether the metrics should be scaled via exp
526 |             * folderName: name of folder for saved networks
527 |             * networksPerFile: number of networks saved in a given file
528 |             * returnPredictions: whether to return the prediction from the
529 |                                  network
530 |         Returns:
531 |             * results: the output of the network when sampled
532 |                        (if returnPrediction=True)
533 |         """
534 |         # Create response likelihood
535 |         startSampling = self.burnin
536 | 
537 |         self.likelihood = likelihood
538 |         self.makeResponseLikelihood = self.likelihood.makeResponseLikelihood
539 |         self.metricList = metricList
540 |         self.adjustHypers = adjustHypers
541 | 
542 |         for val in self.likelihood.hypers:
543 |             self.hyperStates.append(tf.cast(val, self.dtype))
544 | 
545 |         # Create the folder and files for the networks
546 |         filePath = None
547 |         files = []
548 |         if(folderName is not None):
549 |             filePath = os.path.join(os.getcwd(), folderName)
550 |             if(not os.path.isdir(filePath)):
551 |                 os.mkdir(filePath)
552 |             for n in range(len(self.states)):
553 |                 files.append(
554 |                     open(filePath + "/" + str(n) + ".0" + ".txt", "wb"))
555 |             files.append(open(filePath + "/hypers" + "0" + ".txt", "wb"))
556 |         previousLeap = self.leapfrog
557 |         with open(filePath + "/architecture.txt", "wb") as f:
558 |             for layer in (self.layers):
559 |                 f.write((layer.name+"\n").encode("utf-8"))
560 | 
561 |         iter_ = 0
562 |         tf.random.set_seed(50)
563 | 
564 |         self.mainAccept = tf.cast([0], tf.float32)
565 |         self.hyperAccept = tf.cast([0], tf.float32)
566 |         startTime = time.time()
567 |         while(iter_ < epochs):  # Main training loop
568 |             #
569 |             if(self.adjustHypers):
570 |                 returnVals = self.stepMCMC(self.states, self.hyperStates,
571 |                                            self.step_size,
572 |                                            self.hyper_step_size,
573 |                                            self.logEpsilonBar, self.h,
574 |                                            tf.cast(iter_, self.dtype),
575 |                                            self.leapfrog,
576 |                                            tf.cast(self.mainAccept,
577 |                                                    self.dtype),
578 |                                            tf.cast(self.hyperAccept,
579 |                                                    self.dtype))
580 |                 self.states, self.hyperStates, self.step_size, \
581 |                     self.hyper_step_size, self.logEpsilonBar, self.h, \
582 |                     self.mainAccept, self.hyperAccept = returnVals
583 | 
584 |             else:
585 |                 self.states, self.step_size, self.mainAccept = \
586 |                     self.stepMCMCNoHypers(self.states, self.hyperStates,
587 |                                           self.step_size, self.leapfrog,
588 |                                           tf.cast(self.mainAccept, self.dtype))
589 | 
590 |             previousLeap = self.leapfrog
591 |             iter_ += 1
592 | 
593 |             if(iter_ % displaySkip == 0):
594 |                 print()
595 |                 print("iter:{:>2}".format(iter_))
596 |                 print("step size", self.step_size.numpy())
597 |                 print("hyper step size", self.hyper_step_size.numpy())
598 |                 print("leapfrog", self.leapfrog.numpy())
599 |                 print("Main acceptance", self.mainAccept.numpy()[0])
600 |                 print("Hyper acceptance", self.hyperAccept.numpy()[0])
601 |                 self.metrics(self.predict(train=True), self.trainY,
602 |                              self.predict(train=False), self.validateY)
603 |             step, leap = self.adapt.update(self.states)
604 |             self.step_size = step + self.step_size * 0
605 |             self.leapfrog = leap + self.leapfrog * 0
606 | 
607 |             self.step_size = tf.cast(self.step_size, self.dtype)
608 | 
609 |             # Create new files to record network
610 |             indexShift = iter_ - startSampling - 1
611 |             indexInterval = networksPerFile * samplingStep
612 |             if(iter_ > startSampling and indexShift % indexInterval == 0):
613 |                 for file in files:
614 |                     file.close()
615 |                 temp = []
616 |                 for n in range(len(self.states)):
617 |                     temp.append(open(filePath + "/" + str(n) + "." +
618 |                                      str(int((iter_-startSampling) //
619 |                                              (networksPerFile *
620 |                                               samplingStep))) +
621 |                                      ".txt", "wb"))
622 |                 temp.append(open(filePath + "/hypers" +
623 |                                  str(int((iter_-startSampling) //
624 |                                      (networksPerFile * samplingStep))) +
625 |                                  ".txt", "wb"))
626 |                 files = temp
627 | 
628 |                 # Update the summary file
629 |                 file = open(filePath + "/summary.txt", "wb")
630 |                 for n in range(len(self.states)):
631 |                     val = ""
632 |                     for sizes in self.states[n].shape:
633 |                         val += str(sizes) + " "
634 |                     val = val.strip() + "\n"
635 |                     file.write(val.encode("utf-8"))
636 |                 numNetworks = (indexShift) // samplingStep
637 |                 numFiles = numNetworks // networksPerFile
638 |                 if(numNetworks % networksPerFile != 0):
639 |                     numFiles += 1
640 |                 file.write((str(numNetworks) + " " + str(numFiles) +
641 |                             " " + str(len(self.states))+"\n").encode("utf-8"))
642 |                 hyperStateCount = 0
643 |                 for state in self.hyperStates:
644 |                     hyperStateCount += tf.size(state)
645 |                 file.write(str(hyperStateCount.numpy()).encode("utf-8"))
646 |                 file.close()
647 |             # Record prediction
648 |             if(iter_ > startSampling and (iter_) % samplingStep == 0):
649 |                 if(filePath is not None):
650 |                     for n in range(len(files)-1):
651 |                         np.savetxt(files[n], self.states[n])
652 |                     tempStates = []
653 |                     for state in self.hyperStates:
654 |                         length = 1
655 |                         for x in state.shape:
656 |                             length = length*x
657 |                         if(length > 1):
658 |                             splitStates = tf.split(state, length)
659 |                             for splitState in splitStates:
660 |                                 tempStates.append(splitState)
661 |                         else:
662 |                             tempStates.append(state)
663 |                     np.savetxt(files[-1], tempStates)
664 |             if(iter_ % displaySkip == 0):
665 |                 likelihood.display(self.hyperStates)
666 |                 print("Time elapsed:", time.time() - startTime)
667 |                 startTime = time.time()
668 | 
669 |         for file in files:
670 |             file.close()
671 | 


--------------------------------------------------------------------------------
/tensorBNN/paramAdapter.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import random
  3 | import sys
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | 
  8 | from multiprocessing import Pool
  9 | 
 10 | 
 11 | class paramAdapter(object):
 12 |     """This object stores the variables required to implement an adaptive
 13 |     step size and number of leapfrog steps as detailed in "Adaptive Hamiltonian
 14 |     and Riemann Manifold Monte Carlo Samplers" by Wang, Mohamed, and
 15 |     de Freitas. This method performs Bayesian inference on these paramaters
 16 |     assuming a uniform prior between specified values. Over time, the
 17 |     probability of a new state being proposed decreases so that the values will
 18 |     converge to specific values.
 19 | 
 20 |     In a slight divergence from the paper three features are included to
 21 |     prevent the adapter from settling to early into an non-optimal position, to
 22 |     compensate for the optimal position chaning drastically through trainining,
 23 |     and to generally improve the suggested points. First, the adapter will
 24 |     randomly propose for a certain number of steps at the beginning as set by
 25 |     the randomSteps keyword argument. Secondly, if the adapter goes through a
 26 |     set number of iterations specified with the strikes keyword argument and
 27 |     the SJD is 0 every single time then the entire paramAdapter is reset to its
 28 |     initial condition. It is quite possible that this will happen after the BNN
 29 |     converges to a minimum and the maximum feasible step size is much smaller.
 30 |     Finally, the adapter will scale the leapfrog steps and step size to the
 31 |     range -1 to 1 in order for the 0 mean Gaussian priors used in the Bayeisan
 32 |     inference to better fit the data.
 33 | 
 34 |     In order to more rapidly search through the grid of possible step sizes
 35 |     and leapfrog steps this object uses parallel processing so that all
 36 |     available computing resources are used.
 37 |     """
 38 | 
 39 |     def __init__(self, e1, L1, el, eu, eNumber, Ll, Lu, lStep, m, k, a=4,
 40 |                  delta=0.1, cores=4, strikes=10, randomSteps=10):
 41 |         """ Creates a paramAdapter object.
 42 | 
 43 |         Arguments:
 44 |             * e1: starting step size
 45 |             * L1: starting number of leapfrog steps
 46 |             * el: lower step size bound
 47 |             * eu: upper step size bound
 48 |             * eNumber: number of step sizes in gride
 49 |             * Ll: lower leapfrog bound
 50 |             * Lu: upper leapfrog bound
 51 |             * lStep: leapfrog step size in grid
 52 |             * m: number of averaging steps
 53 |             * k: iterations before proposal probability starts decreasing
 54 |             * a: constant, 4 in paper
 55 |             * delta: constant, 0.1 in paper
 56 |             * cores: number of cores to use in processing
 57 |             * strikes: iterations with no movement before reseting adapter
 58 |             * randomSteps: averaging cycles at beginning with random values
 59 |         """
 60 |         self.dtype=tf.float32
 61 |         self.currentE = e1
 62 |         self.currentL = L1
 63 |         self.el = tf.cast(el, self.dtype)
 64 |         self.eu = tf.cast(eu, self.dtype)
 65 |         self.Ll = tf.cast(Ll, self.dtype)
 66 |         self.Lu = tf.cast(Lu, self.dtype)
 67 |         self.eNumber = tf.cast(eNumber, tf.int32)
 68 |         self.eGrid = tf.linspace(el, eu, num=eNumber)
 69 |         self.lGrid = tf.cast(np.array(range(Ll, Lu + 1, int(lStep))), self.dtype)
 70 |         self.lNumber = tf.cast(len(self.lGrid), tf.int32)
 71 |         self.delta = tf.cast(delta, self.dtype)
 72 |         kappa = tf.cast(0.2, self.dtype)
 73 |         self.sigma = tf.linalg.diag(
 74 |             [1 / ((kappa * (2))**2), 1 / ((kappa * (2))**2)])
 75 |         self.previousGamma = []
 76 | 
 77 |         self.allSD = []
 78 |         self.k = k
 79 |         self.K = tf.zeros([0,0], dtype=self.dtype)
 80 |         self.m = m
 81 |         self.currentData = []
 82 |         self.allData = []
 83 |         self.maxR = tf.cast(1e-8, self.dtype)
 84 |         self.a = tf.cast(a, self.dtype)
 85 |         self.i = tf.cast(-2, self.dtype)
 86 |         self.previous_state = None
 87 |         self.current_state = None
 88 |         #np.random.seed(10)
 89 | 
 90 |         self.cores = cores
 91 |         self.strikes = 0
 92 |         self.maxStrikes = 50#strikes
 93 |         self.randomSteps = randomSteps
 94 | 
 95 |     def calck(self, gammaI, gammaJ, el, eu, sigma):
 96 |         """ Calculates the covariance k between two states
 97 | 
 98 |         Arguments:
 99 |             * gammaI: state 1
100 |             * gammaJ: state 2
101 |         Returns:
102 |             * k: covaraiance between gammaI and gammaJ
103 |         """
104 |         gamma1 = tf.transpose([[-1+2*(gammaI[0]-el)/(eu-el),
105 |                   -1+2*(tf.cast(gammaI[1], self.dtype)-self.Ll)/(self.Lu-self.Ll)]])
106 |         gamma2 = tf.transpose([[-1+2*(gammaJ[0]-el)/(eu-el),
107 |                   -1+2*(tf.cast(gammaJ[1], self.dtype)-self.Ll)/(self.Lu-self.Ll)]])
108 | 
109 |         k = tf.exp(-0.5 * (tf.matmul(tf.transpose(gamma1),
110 |                                      tf.matmul(sigma, gamma2))))
111 |         return(k)
112 | 
113 |     def calcUCB(self, testGamma, previousGamma, inverseR, s, inverse, p, rootbeta, el, eu, sigma):
114 |         """ Calculates a varraint of the upper confidence bound for a test
115 |         state.
116 | 
117 |         Arguments:
118 |             * testGamma: the test state
119 |             * s: a scaling factor
120 |             * inverse: inverse of the covariance matrix
121 |             * inverseR: inverse of the covariance matrix time the data
122 |             * p: the decay value
123 |             * rootBeta: a constant based on the number of variables in the
124 |                         state
125 |         Returns:
126 |             * ucb: upper confidence bound
127 |         """
128 |         k = []#[None] * self.inverse.shape[0]
129 |         for gamma, index in zip(previousGamma,
130 |                                 range(len(previousGamma))):
131 |             #k[index] = self.calck(gamma, testGamma)
132 |             k.append([self.calck(gamma, testGamma, el, eu, sigma)[0,0]])
133 |         k = tf.cast(k, self.dtype)
134 |         mean = tf.matmul(tf.transpose(k), inverseR) * s
135 |         variance = tf.matmul(inverse, k)
136 |         variance = tf.matmul(tf.transpose(k), variance)
137 |         
138 |         variance = self.calck(testGamma, testGamma, el, eu, sigma) - variance
139 |         
140 |         ucb = mean + variance * p * rootbeta
141 |         return(ucb, mean, variance)
142 | 
143 |     def reset(self):
144 |         """Resets the adapter"""
145 |         tf.print("Reset")
146 |         self.previousGamma = []
147 | 
148 |         self.allSD = []
149 |         self.K = tf.zeros([0,0])
150 |         self.currentData = []
151 |         self.allData = []
152 |         self.maxR = 1e-8
153 |         self.i = -2
154 |         self.previous_state = None
155 |         self.current_state = None
156 |         self.strikes = 0
157 | 
158 |     @tf.function(jit_compile=True, experimental_relax_shapes=True)
159 |     def gridSearch(self, previousGamma, inverseR, s, inverse, p, rootbeta, el, eu, sigma):
160 |         eCount = tf.constant(0, dtype=tf.int32)
161 |         lCount = tf.constant(0, dtype=tf.int32)
162 |         cond = lambda eCount, lCount, e, L, ucb, mean, variance, previousGamma, inverseR, s, inverse, p, rootbeta, el, eu, sigma: tf.less(lCount, self.lNumber)
163 |         e = tf.cast([[el]], self.dtype)
164 |         L = tf.cast([[self.Ll]], self.dtype)
165 |         ucb = tf.cast([[-1000000000]], self.dtype)
166 |         variance = tf.cast([[-1000000000]], self.dtype)
167 |         mean=tf.cast([[-1000000000]], self.dtype)
168 |         
169 |         
170 |         def processChunk(eCount, lCount, e, L, ucb, mean, variance, previousGamma, inverseR, s, inverse, p, rootbeta, el, eu, sigma):
171 |             """Processes a chunk of the e, L combinations.
172 |     
173 |             Arguments:
174 |                 * eList: list of step sizes to check
175 |                 * lList: list of leapfrog steps to check
176 |     
177 |             Returns:
178 |                 * best: a tuple of the form ((best e, best L), ucb) where the e and
179 |                 L selected are those with the highest ucb, which is also included
180 |             """
181 |             newE = self.eGrid[eCount]
182 |             newL = self.lGrid[lCount]
183 |             newUcb, newMean, newVariance = self.calcUCB([newE, newL], previousGamma, inverseR, s, inverse, p, rootbeta, el, eu, sigma)
184 |             e = tf.where(newUcb>ucb,newE, e)
185 |             L = tf.where(newUcb>ucb,newL, L)
186 |             mean = tf.where(newUcb>ucb,newMean, mean)
187 |             variance = tf.where(newUcb>ucb,newVariance, variance)
188 |             ucb = tf.where(newUcb>ucb,newUcb, ucb)
189 |             lCount = tf.where(eCount==self.eNumber-1, lCount+1, lCount)
190 |             eCount = tf.where(eCount==self.eNumber-1, 0, eCount+1)
191 |             return(eCount, lCount, e, L, ucb, mean, variance, previousGamma, inverseR, s, inverse, p, rootbeta, el, eu, sigma)
192 |         
193 |         eCount, lCount, e, L, ucb, mean, variance, previousGamma, inverseR, s, inverse, p, rootbeta, el, eu, sigma = tf.while_loop(cond, processChunk, [eCount, lCount, e, L, ucb, mean, variance,
194 |                                                                                        previousGamma, inverseR, s, inverse, p, rootbeta, el, eu, sigma])
195 |         
196 |         return(tf.cast(e[0,0], self.dtype), tf.cast(L[0,0], self.dtype))
197 | 
198 | 
199 |     def update(self, state):
200 |         """ Steps the adapter forward by one step
201 | 
202 |         Arguments:
203 |             * state: the newest state proposed by the HMC algorithm
204 |         Returns:
205 |             * currentE: the new step size
206 |             * currentL: the new number of leapfrog steps
207 |         """
208 |         if(self.i<self.k-2 and self.strikes == self.maxStrikes):
209 |             self.el = self.el/2
210 |             self.eu = self.eu/2
211 |             self.eGrid =tf.linspace(self.el, self.eu, num=self.eNumber)
212 |             self.k=self.k-self.i-2
213 |             self.reset()
214 |             self.strikes = 0
215 | 
216 |         self.previous_state, self.current_state = self.current_state, state
217 | 
218 |         # Calculate the square jumping distance scaled by L^(-0.5)
219 |         if(self.previous_state is not None):
220 |             val = tf.cast(0, tf.float32)
221 |             for old, new in zip(self.previous_state, self.current_state):
222 |                 val += tf.math.reduce_sum(tf.math.square(tf.reshape(new,[-1]) - tf.reshape(old,[-1]))) / (tf.cast(self.currentL, tf.float32))**(0.5)
223 |             print("SJD:", val.numpy())
224 |             self.currentData.append(val)
225 |             if(val < 1e-8 and self.i // self.m > self.randomSteps):
226 |                 self.strikes += 1
227 |             else:
228 |                 self.strikes = 0
229 | 
230 |         # Update E and L if this is not just an averaging step
231 |         if(self.i % self.m == 0 and self.i > 0):
232 |             u = tf.random.uniform([1,1],minval=0, maxval=1)
233 |             self.p = max(self.i / self.m - self.k + 1, 1)**(-0.5)
234 |             if(u < self.p+u*0):  # Over time the probability of updating will decay
235 |                 mean = tf.math.reduce_mean(self.currentData)
236 |                 sd = tf.math.reduce_std(self.currentData)
237 |                 self.currentData = []
238 |                 self.allData.append(mean)
239 |                 self.allSD.append(sd)
240 |                 self.maxR = tf.math.reduce_max(self.allData)
241 |                 # Update the covariance matrix
242 |                 self.previousGamma.append((self.currentE, self.currentL))
243 |                 size = len(self.previousGamma)
244 |                 newK = tf.ones([size, size])
245 |                 if(size > 0):
246 |                     #newK[:size - 1, :size - 1] = self.K
247 |                     newK = self.K
248 |                 newKExtra=[]
249 |                 for gamma, index in zip(self.previousGamma, range(
250 |                         len(self.previousGamma))):
251 |                     k = self.calck(gamma, self.previousGamma[-1], self.el, self.eu, self.sigma)
252 |                     #newK[-1, index] = k
253 |                     #newK[index, -1] = k
254 |                     newKExtra.append(k[0,0])
255 |                 newK = tf.concat([newK, [newKExtra[:-1]]],axis=0)
256 |                 newK = tf.concat([newK, tf.transpose([newKExtra])], axis=1)
257 |                 self.K = newK
258 |                 self.s = self.a / self.maxR  # update scalling constant
259 | 
260 |                 sigmaNu = tf.math.reduce_mean(self.allSD)  # Variance of noise
261 | 
262 |                 # calculate inverse and other values only once
263 |                 try:  # In case the covaraince matrix is singular
264 |                     self.inverse = tf.linalg.inv(
265 |                         self.K + (sigmaNu**2) * tf.eye(self.K.shape[0]))
266 |                 except tf.errors.InvalidArgumentError:
267 |                     self.inverse = tf.linalg.inv(
268 |                         self.K + (sigmaNu**2) * tf.eye(self.K.shape[0]) +
269 |                         0.1 * tf.eye(self.K.shape[0]))
270 |                 self.inverseR = tf.matmul(self.inverse, tf.expand_dims(tf.cast(self.allData, tf.float32),1))
271 |                 
272 |                 
273 |                 
274 |                 self.rootbeta = (self.i / self.m + 1)**(3) * math.pi**2
275 |                 self.rootbeta /= (3 * self.delta)
276 |                 self.rootbeta = tf.math.log(self.rootbeta)*2
277 |                 self.rootbeta = self.rootbeta**(0.5)
278 | 
279 |                 # Start parallel searches, take best result found
280 |                 if(self.i//self.m >= self.randomSteps):
281 |                     self.currentE, self.currentL = self.gridSearch(self.previousGamma, self.inverseR, self.s, self.inverse, self.p, self.rootbeta, self.el, self.eu, self.sigma)
282 |                 else:
283 |                     self.currentE = random.choice(self.eGrid)
284 |                     self.currentL = random.choice(self.lGrid)
285 |                 if(size==50):
286 |                     self.K=self.K[1:,1:]
287 |                     self.previousGamma=self.previousGamma[1:]
288 |                     self.allData=self.allData[1:]
289 |                     self.allSD=self.allSD[1:]
290 | 
291 |         self.i += 1
292 |         return(tf.cast(self.currentE, self.dtype), tf.cast(self.currentL, tf.int32))
293 | 


--------------------------------------------------------------------------------
/tensorBNN/predictor.py:
--------------------------------------------------------------------------------
  1 | from tensorBNN.activationFunctions import (Exp, Relu, Sigmoid, Tanh, Elu,
  2 |                                            Softmax, Leaky_relu, Prelu,
  3 |                                            SquarePrelu)
  4 | from tensorBNN.layer import DenseLayer, GaussianDenseLayer
  5 | from tensorBNN.likelihood import GaussianLikelihood
  6 | 
  7 | from emcee.autocorr import integrated_time, function_1d
  8 | 
  9 | import numpy as np
 10 | import tensorflow as tf
 11 | 
 12 | import math
 13 | 
 14 | 
 15 | class predictor(object):
 16 |     def __init__(self, directoryPath, dtype, customLayerDict={},
 17 |                  likelihood=GaussianLikelihood(sd=0.1)):
 18 |         """ The constructor here obtains the necesary information to make basic
 19 |         predictions, and also the basic likelihood function for future
 20 |         reweighting.
 21 | 
 22 |         Arguments:
 23 |             * directoryPath: Path to folder containing saved networks
 24 |             * dtype: data type of network
 25 |             * customLayerDict: Dictionary containing any custom layers with
 26 |             their names as keys
 27 |             * likelihood: Likelihood object used in training. If reweighting
 28 |             is not performed this does not matter.
 29 |         """
 30 |         self.layerDict = {"Exp": Exp, "relu": Relu, "sigmoid": Sigmoid,
 31 |                           "tanh": Tanh, "elu": Elu, "softmax": Softmax,
 32 |                           "leakyrelu": Leaky_relu, "prelu": Prelu,
 33 |                           "squareprelu": SquarePrelu, "dense": DenseLayer,
 34 |                           "denseGaussian": GaussianDenseLayer}
 35 |         self.directoryPath = directoryPath
 36 |         self.layerDict.update(customLayerDict)
 37 |         self.dtype = dtype
 38 |         self.loadNetworks()
 39 |         self.loadArchitecture()
 40 |         self.likelihood = likelihood
 41 |         self.weightsTrain = []
 42 | 
 43 |     def loadNetworks(self):
 44 |         """Loads saved networks.
 45 |         """
 46 | 
 47 |         summary = []
 48 |         with open(self.directoryPath + "summary.txt", "r") as file:
 49 |             for line in iter(file):
 50 |                 summary.append(line.split())
 51 |         numNetworks = int(summary[-2][0])
 52 |         numFiles = int(summary[-2][1])
 53 |         numMatrices = int(summary[-2][2])
 54 |         numHypers = int(summary[-1][0])
 55 | 
 56 |         numNetworks //= numFiles
 57 | 
 58 |         matrices = []
 59 |         vectors = []
 60 |         for x in range(numFiles*numNetworks):
 61 |             vectors.append([])
 62 | 
 63 |         for n in range(numMatrices):
 64 |             if(len(summary[n]) == 2):
 65 |                 weightsSplitDims = (numNetworks *
 66 |                                     numFiles, int(summary[n][0]),
 67 |                                     int(summary[n][1]))
 68 |             else:
 69 |                 weightsSplitDims = (numNetworks *
 70 |                                     numFiles, int(summary[n][0]), int(1))
 71 |             weights0 = np.zeros(weightsSplitDims)
 72 |             for m in range(numFiles):
 73 |                 weights = np.loadtxt(
 74 |                     self.directoryPath +
 75 |                     str(n) +
 76 |                     "." +
 77 |                     str(m) +
 78 |                     ".txt",
 79 |                     dtype=np.float32,
 80 |                     ndmin=2)
 81 |                 for k in range(numNetworks):
 82 | 
 83 |                     netNumber = m * numNetworks + k
 84 |                     index1 = weightsSplitDims[1] * k
 85 |                     index2 = weightsSplitDims[1] * (k + 1)
 86 |                     index3 = weightsSplitDims[2]
 87 |                     weights0[netNumber, :, :] = weights[index1:index2, :index3]
 88 |                     newVector = tf.cast(weights[index1:index2, :index3],
 89 |                                         self.dtype).numpy().flatten()
 90 |                     vectors[netNumber].append(newVector)
 91 |             matrices.append(tf.cast(weights0, self.dtype))
 92 |         for x in range(len(vectors)):
 93 |             vectors[x] = np.concatenate(vectors[x])
 94 | 
 95 |         hypers = []
 96 |         if(numHypers > 0):
 97 |             for m in range(numFiles):
 98 |                 weights = np.loadtxt(
 99 |                     self.directoryPath + "hypers" + str(m) + ".txt",
100 |                     dtype=np.float32, ndmin=1)
101 |                 for k in range(numNetworks):
102 |                     netNumber = m * numNetworks + k
103 |                     index1 = numHypers * k
104 |                     index2 = numHypers * (k + 1)
105 |                     hypers.append(weights[index1:index2])
106 | 
107 |         numNetworks *= numFiles
108 | 
109 |         self.numNetworks = numNetworks
110 |         self.numMatrices = numMatrices
111 |         self.matrices = matrices
112 |         self.hypers = hypers
113 |         self.vectors = vectors
114 | 
115 |     def loadArchitecture(self, architecture=None):
116 |         self.layers = []
117 |         if(architecture is None):
118 |             with open(self.directoryPath + "architecture.txt", "r") as file:
119 |                 for line in iter(file):
120 |                     cleanedLine = line.replace("\n", "")
121 |                     cleanedLine = self.layerDict[cleanedLine](inputDims=1,
122 |                                                               outputDims=1)
123 |                     self.layers.append(cleanedLine)
124 |         else:
125 |             with open(architecture, "r") as file:
126 |                 for line in iter(file):
127 |                     cleanedLine = line.replace("\n", "")
128 |                     cleanedLine = self.layerDict[cleanedLine](inputDims=1,
129 |                                                               outputDims=1)
130 |                     self.layers.append(cleanedLine)
131 | 
132 |     def predict(self, inputMatrix, n=1):
133 |         """Make predictions from an ensemble of neural networks.
134 |         Arguments:
135 |             * inputMatrix: The input data
136 |             * n: Predict using every n networks
137 |         Returns:
138 |             * initialResults: List with all networks used
139 |         """
140 | 
141 |         inputVal = np.transpose(inputMatrix)
142 |         initialResults = [None] * math.ceil(self.numNetworks/n)
143 |         for m in range(0, self.numNetworks, n):
144 |             current = inputVal
145 |             matrixIndex = 0
146 |             for layer in self.layers:
147 |                 numTensors = layer.numTensors
148 |                 tensorList = []
149 |                 for x in range(numTensors):
150 |                     tensorList.append(self.matrices[matrixIndex+x][m, :, :])
151 |                 matrixIndex += numTensors
152 |                 current = layer.predict(current, tensorList)
153 |             initialResults[m//n] = current.numpy()
154 | 
155 |         return(initialResults)
156 | 
157 |     def trainProbs(self, trainX, trainY, n, likelihood):
158 |         """ Calculate the negative log likelihoods for the training data.
159 | 
160 |         Arguments:
161 |             * trainX: training input data
162 |             * trainY: training output data
163 |             * n: Predict using every n networks
164 |         """
165 |         weights = []
166 |         if(likelihood is not None):
167 |             hyperCountShape = np.array(self.likelihood.hypers).shape
168 |             hyperCount = 1
169 |             for x in hyperCountShape:
170 |                 hyperCount *= x
171 |             likelihoodHyper = []
172 |             for hyper in self.hypers:
173 |                 likelihoodHyper.append([hyper[-hyperCount:]])
174 |             weights = self.likelihood.calcultateLogProb(tf.transpose(trainX),
175 |                                                         realVals=trainY,
176 |                                                         n=n,
177 |                                                         hypers=likelihoodHyper,
178 |                                                         predict=self.predict,
179 |                                                         dtype=self.dtype)
180 |         else:
181 |             for m in range(0, self.numNetworks, n):
182 |                 weights.append(tf.cast(0, self.dtype))
183 |         for m in range(0, self.numNetworks, n):
184 |             matrixIndex = 0
185 |             hyperIndex = 0
186 |             current = -weights[m//n]
187 |             for layer in self.layers:
188 |                 numTensors = layer.numTensors
189 |                 numHyperTensors = layer.numHyperTensors
190 |                 tensorList = []
191 |                 hyperList = []
192 |                 for x in range(numTensors):
193 |                     tensorList.append(self.matrices[matrixIndex+x][m, :, :])
194 |                 for x in range(numHyperTensors):
195 |                     hyperList.append(self.hypers[m][hyperIndex+x])
196 |                 hyperIndex += numHyperTensors
197 |                 matrixIndex += numTensors
198 |                 current -= tf.cast(layer.calculateHyperProbs(hyperList,
199 |                                                              tensorList),
200 |                                    self.dtype).numpy()
201 |             weights[m//n] = current
202 |         self.weightsTrain = np.array(weights)
203 | 
204 |     def reweight(self, architecture, trainX=None, trainY=None, n=1,
205 |                  likelihood=None):
206 |         """ Calculate new weights for each network if they have the new
207 |         hyper paramters described in architecture. The weights are calculated
208 |         according to p(theta|priors2)/p(theta|priors1). The new priors can be
209 |         anything, but the layers must still accept the same size inputs and
210 |         number of hyper paramters as the base networks.
211 | 
212 |         Arguments:
213 |             * trainX: training input data
214 |             * trainY: training output data
215 |             * architecture: new architecture file
216 |             * n: Predict using every n networks
217 | 
218 |         Returns:
219 |             * weighting: Numpy array with new weights for the networks.
220 |         """
221 | 
222 |         if(len(self.weightsTrain) == 0):
223 |             self.trainProbs(trainX, trainY, n, likelihood)
224 | 
225 |         self.loadArchitecture(architecture=architecture)
226 | 
227 |         weights = []
228 |         if(likelihood is not None):
229 | 
230 |             hyperCountShape = np.array(likelihood.hypers).shape
231 |             hyperCount = 1
232 |             for x in hyperCountShape:
233 |                 hyperCount *= x
234 |             likelihoodHypers = []
235 |             for hyper in self.hypers:
236 |                 likelihoodHypers.append([hyper[-hyperCount:]])
237 |             weights = likelihood.calcultateLogProb(tf.transpose(trainX),
238 |                                                    realVals=trainY,
239 |                                                    n=n,
240 |                                                    hypers=likelihoodHypers,
241 |                                                    predict=self.predict,
242 |                                                    dtype=self.dtype)
243 | 
244 |         else:
245 |             for m in range(0, self.numNetworks, n):
246 |                 weights.append(tf.cast(0, self.dtype))
247 | 
248 |         for m in range(0, self.numNetworks, n):
249 |             matrixIndex = 0
250 |             hyperIndex = 0
251 |             current = -weights[m//n]
252 |             for layer in self.layers:
253 |                 numTensors = layer.numTensors
254 |                 numHyperTensors = layer.numHyperTensors
255 |                 tensorList = []
256 |                 hyperList = []
257 |                 for x in range(numTensors):
258 |                     tensorList.append(self.matrices[matrixIndex+x][m, :, :])
259 |                 for x in range(numHyperTensors):
260 |                     hyperList.append(self.hypers[m][hyperIndex+x])
261 |                 hyperIndex += numHyperTensors
262 |                 matrixIndex += numTensors
263 |                 current -= tf.cast(layer.calculateHyperProbs(hyperList,
264 |                                                              tensorList),
265 |                                    self.dtype).numpy()
266 |             weights[m//n] = current
267 |         self.weights = np.array(weights)
268 |         weighting = np.exp(self.weightsTrain-self.weights)
269 |         weighting = weighting/np.sum(weighting)
270 | 
271 |         self.loadArchitecture()
272 | 
273 |         return(weighting)
274 | 
275 |     def autocorrelation(self, inputData, nMax):
276 |         predictions = self.predict(inputData, n=1)
277 |         output = np.squeeze(np.array(predictions)).T
278 | 
279 |         valFunc = 0
280 |         accepted = 0
281 | 
282 |         for x in range(len(output)):
283 |             temp = (integrated_time(output[x], tol=5, quiet=True))
284 |             if(not math.isnan(temp)):
285 |                 valFunc += np.array((function_1d(output[x])))
286 |                 accepted += 1
287 | 
288 |         valFunc = valFunc/accepted
289 |         if(nMax < len(valFunc)):
290 |             valFunc = valFunc[:nMax]
291 | 
292 |         return(valFunc)
293 | 
294 |     def autoCorrelationLength(self, inputData, nMax):
295 |         predictions = self.predict(inputData, n=1)
296 |         output = np.squeeze(np.array(predictions)).T
297 | 
298 |         val = 0
299 |         accepted = 0
300 | 
301 |         for x in range(len(output)):
302 |             temp = (integrated_time(output[x], tol=5, quiet=True))
303 |             if(not math.isnan(temp)):
304 |                 val += temp
305 |                 accepted += 1
306 | 
307 |         val = val/accepted
308 | 
309 |         if(val[0] > nMax):
310 |             print("Correlation time is greater than maximum accepted value.")
311 | 
312 |         return(val[0])
313 | 
314 |     def extractParameters(self):
315 |         """
316 |         Returns a list with all the parameter matrices. The first axis in each
317 |         matrix corresponds to the network.
318 |         """
319 |         return(self.matrices)
320 | 
321 |     def extractHyperParameters(self):
322 |         """
323 |         Returns an array with all the hyper parameters. The first axis
324 |         corresponds to the network.
325 |         """
326 |         return(np.array(self.hypers))
327 | 
328 |     def parameterStatistics(self):
329 |         """
330 |         Returns two list, the first with the means of all the parameters,
331 |          and the second with standard deviations of the parameters.
332 |         """
333 |         parameterMeans = []
334 |         parameterSds = []
335 |         for matrix in self.matrices:
336 |             parameterMeans.append(np.mean(matrix, axis=0))
337 |             parameterSds.append(np.std(matrix, axis=0))
338 | 
339 |         return(parameterMeans, parameterSds)
340 | 
341 |     def hyperStatistics(self):
342 |         """
343 |         Returns two arrays, the first with the means of all the hyper
344 |         parameters, and the second with standard deviations of the hyper
345 |         parameters.
346 |         """
347 |         hypers = np.array(self.hypers)
348 |         hyperMeans = np.mean(hypers, axis=0)
349 |         hyperSds = np.std(hypers, axis=0)
350 | 
351 |         return(hyperMeans, hyperSds)
352 | 


--------------------------------------------------------------------------------