├── .gitignore ├── LICENSE ├── README.md ├── autogp ├── __init__.py ├── datasets │ ├── __init__.py │ ├── dataset.py │ └── mnist.py ├── gaussian_process.py ├── kernels │ ├── __init__.py │ ├── arc_cosine.py │ ├── kernel.py │ └── radial_basis.py ├── likelihoods │ ├── __init__.py │ ├── gaussian.py │ ├── likelihood.py │ ├── logistic.py │ ├── regression_network.py │ └── softmax.py ├── losses │ ├── __init__.py │ ├── loss.py │ ├── mean_sq_error.py │ ├── neg_log_like.py │ └── zero_one_loss.py └── util │ ├── __init__.py │ ├── normals.py │ └── util.py ├── example.py ├── experiments ├── cifar10.py ├── data │ ├── get_cifar10_data.sh │ ├── get_mnist8m_data.sh │ ├── get_rectangles_images_data.sh │ └── get_sarcos_data.sh ├── mnist.py ├── mnist8m.py ├── rectangles.py └── sarcos.py ├── run_tests.sh ├── setup.py └── test ├── __init__.py ├── gaussian_process_test.py ├── kernels_test ├── __init__.py └── arc_cosine_test.py ├── likelihoods_test ├── __init__.py └── softmax_test.py └── util_test ├── __init__.py ├── normals_test.py └── util_test.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | autogp/util/tf_ops/matpackops.so 3 | experiments/data/cifar-10-batches-py 4 | experiments/data/cifar-10-python.tar.gz 5 | experiments/data/MNIST_data 6 | experiments/data/infimnist.tar.gz 7 | experiments/data/infimnist/ 8 | experiments/data/rectangles_images.zip 9 | experiments/data/rectangles_im_test.amat 10 | experiments/data/rectangles_im_train.amat 11 | experiments/data/sarcos_inv.mat 12 | experiments/data/sarcos_inv_test.mat 13 | 14 | .DS_Store 15 | 16 | #tags 17 | .tags 18 | 19 | 20 | *.egg-info 21 | 22 | # Byte-compiled / optimized / DLL files 23 | __pycache__/ 24 | *.py[cod] 25 | *$py.class 26 | 27 | # C extensions 28 | *.so 29 | 30 | # Distribution / packaging 31 | .Python 32 | env/ 33 | build/ 34 | develop-eggs/ 35 | dist/ 36 | downloads/ 37 | eggs/ 38 | .eggs/ 39 | lib/ 40 | lib64/ 41 | parts/ 42 | sdist/ 43 | var/ 44 | *.egg-info/ 45 | .installed.cfg 46 | *.egg 47 | 48 | 49 | # Installer logs 50 | pip-log.txt 51 | pip-delete-this-directory.txt 52 | 53 | # Unit test / coverage reports 54 | htmlcov/ 55 | .tox/ 56 | .coverage 57 | .coverage.* 58 | .cache 59 | nosetests.xml 60 | coverage.xml 61 | *,cover 62 | .hypothesis/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | 72 | # IPython Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AutoGP: Automated Variational Inference for Gaussian Process Models 2 | An implementation of the model described in [AutoGP: Exploring the Capabilities and Limitations of Gaussian Process Models](https://arxiv.org/abs/1610.05392). 3 | 4 | The code was tested on Python 2.7 and 3.6 and [TensorFlow 1.5](https://www.tensorflow.org/get_started/os_setup). 5 | 6 | # Authors 7 | The original code was mainly developed by 8 | [Karl Krauth](https://github.com/Karl-Krauth) with some modifications by 9 | [Edwin Bonilla](https://github.com/ebonilla) 10 | and feedback from [Maurizio Filippone](https://github.com/mauriziofilippone), [Kurt Cutajar](http://www.eurecom.fr/en/people/cutajar-kurt) 11 | [Dan MacKinlay](http://danmackinlay.name). 12 | 13 | 14 | # Installation 15 | You can download and install AutoGP in development mode using: 16 | ``` 17 | git clone git@github.com:ebonilla/AutoGP.git 18 | pip install -e AutoGP 19 | ``` 20 | # Usage 21 | The script `example.py`shows a very simple example of how to use AutoGP with the default settings. The main components are: 22 | 23 | * Create a Likelihood object 24 | ``` 25 | likelihood = autogp.likelihoods.Gaussian() 26 | ``` 27 | * Create a Kernel object 28 | ``` 29 | kernel = [autogp.kernels.RadialBasis(1)] 30 | ``` 31 | * Initialize inducing inputs, e.g. using the training data 32 | ``` 33 | inducing_inputs = xtrain 34 | ``` 35 | * Create a new GaussianProcess object 36 | ``` 37 | model = autogp.GaussianProcess(likelihood, kernel, inducing_inputs) 38 | ``` 39 | * Select optimizer and train the model 40 | ``` 41 | optimizer = tf.train.RMSPropOptimizer(0.005) 42 | model.fit(data, optimizer, loo_steps=10, var_steps=20, epochs=30) 43 | ``` 44 | Where we have selected to train a model using 10 Leave-One-Out optimization steps; 20 variational steps; and a total of 30 global iterations. 45 | * Make predictions on unseen data 46 | ``` 47 | ypred, _ = model.predict(xtest) 48 | ``` 49 | 50 | # Experiments and Advanced Settings 51 | All the experiments in the current version of the [AutoGP paper](https://arxiv.org/abs/1610.05392) 52 | can be reproduced using the scripts in the `experiments` directory. 53 | The script `experiments/rectangles.py` is a good example of using more advanced settings regarding the available flags. 54 | The description of these flags can be found under `autogp/util/util.py`. Here we show the commands used in the [AutoGP paper's](https://arxiv.org/abs/1610.05392) experiments : 55 | ``` 56 | python experiments/rectangles.py --batch_size=1000 --learning_rate=0.003 --var_steps=50 --loocv_steps=0 --display_step=10 --mc_train=100 --n_inducing=10 --is_ard=1 --lengthscale=10 --num_components=1 57 | python experiments/rectangles.py --batch_size=1000 --learning_rate=0.003 --var_steps=50 --loocv_steps=0 --display_step=10 --mc_train=100 --n_inducing=200 --is_ard=1 --lengthscale=10 --num_components=1 58 | python experiments/rectangles.py --batch_size=1000 --learning_rate=0.003 --var_steps=50 --loocv_steps=0 --display_step=10 --mc_train=100 --n_inducing=1000 --is_ard=1 --lengthscale=10 --num_components=1 59 | python experiments/rectangles.py --batch_size=1000 --learning_rate=0.003 --var_steps=50 --loocv_steps=0 --display_step=10 --mc_train=100 --n_inducing=10 --is_ard=1 --lengthscale=10 --num_components=1 --kernel=arccosine --kernel_depth=3 --kernel_degree=1 60 | python experiments/rectangles.py --batch_size=1000 --learning_rate=0.003 --var_steps=50 --loocv_steps=0 --display_step=10 --mc_train=100 --n_inducing=200 --is_ard=1 --lengthscale=10 --num_components=1 --kernel=arccosine --kernel_depth=3 --kernel_degree=1 61 | python experiments/rectangles.py --batch_size=1000 --learning_rate=0.003 --var_steps=50 --loocv_steps=0 --display_step=10 --mc_train=100 --n_inducing=1000 --is_ard=1 --lengthscale=10 --num_components=1 --kernel=arccosine --kernel_depth=3 --kernel_degree=1 62 | python experiments/rectangles.py --batch_size=1000 --learning_rate=0.003 --var_steps=50 --loocv_steps=50 --display_step=10 --mc_train=100 --n_inducing=200 --is_ard=1 --lengthscale=10 --num_components=1 63 | python experiments/mnist.py --batch_size=1000 --learning_rate=0.003 --display_step=10 --mc_train=100 --n_inducing=10 --is_ard=1 64 | python experiments/mnist.py --batch_size=1000 --learning_rate=0.003 --display_step=10 --mc_train=100 --n_inducing=200 --is_ard=1 65 | python experiments/mnist.py --batch_size=1000 --learning_rate=0.003 --display_step=10 --mc_train=100 --n_inducing=1000 --is_ard=1 66 | python experiments/mnist8m.py --batch_size=1000 --learning_rate=0.003 --display_step=10 --mc_train=100 --n_inducing=200 --is_ard=1 67 | python experiments/sarcos.py --batch_size=1000 --learning_rate=0.003 --display_step=10 --mc_train=10 --n_inducing=200 --is_ard=1 68 | python experiments/sarcos.py --batch_size=1000 --learning_rate=0.003 --display_step=10 --mc_train=100 --n_inducing=200 --is_ard=1 69 | python experiments/sarcos.py --batch_size=1000 --learning_rate=0.003 --display_step=10 --mc_train=1000 --n_inducing=200 --is_ard=1 70 | python experiments/sarcos.py --batch_size=1000 --learning_rate=0.003 --display_step=10 --mc_train=10000 --n_inducing=200 --is_ard=1 71 | python experiments/cifar10.py --batch_size=1000 --learning_rate=0.003 --n_epochs=10000 --display_step=10 --mc_train=100 --n_inducing=200 --is_ard=1 72 | ``` 73 | where the options given are: 74 | * --batch_size: Batch size 75 | * --learning_rate: Learning rate 76 | * --var_steps: Number of variational steps (for optimization of the ELBO objective) 77 | * --loocv_steps: Number of loo steps (for optimization of the LOO objective) 78 | * --n_epochs: Number of epochs 79 | * --display_step: Number of global iterations to display progress 80 | * --mc_train: Number of MC samples for estimating gradients 81 | * --n_inducing: Number of inducing inputs 82 | * --is_ard: For Automated Relevance Retermination (different lengthscales for each dimension) 83 | * --lengthscale: Initial lengthscale for all latent processes 84 | * --num_component: Number of mixture components in the approximate posterior 85 | 86 | # Contact 87 | You can contact the authors of the [AutoGP paper](https://arxiv.org/abs/1610.05392) in the given order, i.e. [Karl Krauth](https://github.com/Karl-Krauth); [Edwin Bonilla](https://github.com/ebonilla); [Maurizio Filippone](https://github.com/mauriziofilippone); and [Kurt Cutajar](http://www.eurecom.fr/en/people/cutajar-kurt). 88 | 89 | # Acknowledgements 90 | The code to support triangular matrices operations under `autogp/util/tf_ops` was taken from the GPflow repository (Hensman, Matthews et al. GPflow, http://github.com/GPflow/GPflow, 2016). 91 | -------------------------------------------------------------------------------- /autogp/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .gaussian_process import GaussianProcess 3 | from . import datasets 4 | from . import likelihoods 5 | from . import kernels 6 | -------------------------------------------------------------------------------- /autogp/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .dataset import DataSet 3 | from .mnist import import_mnist 4 | -------------------------------------------------------------------------------- /autogp/datasets/dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class DataSet(): 5 | 6 | def __init__(self, X, Y, shuffle=True): 7 | self._num_examples = X.shape[0] 8 | perm = np.arange(self._num_examples) 9 | if (shuffle): 10 | np.random.shuffle(perm) 11 | self._X = X[perm, :] 12 | self._Y = Y[perm, :] 13 | self._epochs_completed = 0 14 | self._index_in_epoch = 0 15 | self._Din = X.shape[1] 16 | self._Dout = Y.shape[1] 17 | 18 | def next_batch(self, batch_size): 19 | start = self._index_in_epoch 20 | self._index_in_epoch += batch_size 21 | if ( 22 | self._index_in_epoch > self._num_examples 23 | ) and ( 24 | start != self._num_examples): 25 | self._index_in_epoch = self._num_examples 26 | if self._index_in_epoch > self._num_examples: # Finished epoch 27 | self._epochs_completed += 1 28 | perm = np.arange(self._num_examples) 29 | np.random.shuffle(perm) # Shuffle the data 30 | self._X = self._X[perm, :] 31 | self._Y = self._Y[perm, :] 32 | start = 0 # Start next epoch 33 | self._index_in_epoch = batch_size 34 | assert batch_size <= self._num_examples 35 | end = self._index_in_epoch 36 | return self._X[start:end, :], self._Y[start:end, :] 37 | 38 | @property 39 | def num_examples(self): 40 | return self._num_examples 41 | 42 | @property 43 | def epochs_completed(self): 44 | return self._epochs_completed 45 | 46 | @property 47 | def Din(self): 48 | return self._Din 49 | 50 | @property 51 | def Dout(self): 52 | return self._Dout 53 | 54 | @property 55 | def X(self): 56 | return self._X 57 | 58 | @property 59 | def Y(self): 60 | return self._Y 61 | -------------------------------------------------------------------------------- /autogp/datasets/mnist.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | 4 | from tensorflow.contrib.learn.python.learn.datasets import base 5 | from tensorflow.contrib.learn.python.learn.datasets.mnist import \ 6 | extract_images, extract_labels 7 | from tensorflow.python.framework import dtypes 8 | 9 | from .dataset import DataSet 10 | 11 | 12 | def process_mnist(images, dtype=dtypes.float32, reshape=True): 13 | if reshape: 14 | assert images.shape[3] == 1 15 | images = images.reshape(images.shape[0], 16 | images.shape[1] * images.shape[2]) 17 | if dtype == dtypes.float32: 18 | # Convert from [0, 255] -> [0.0, 1.0]. 19 | images = images.astype(np.float32) 20 | images = np.multiply(images, 1.0 / 255.0) 21 | 22 | return images 23 | 24 | 25 | def get_data_info(images): 26 | rows, cols = images.shape 27 | std = np.zeros(cols) 28 | mean = np.zeros(cols) 29 | for col in range(cols): 30 | std[col] = np.std(images[:, col]) 31 | mean[col] = np.mean(images[:, col]) 32 | return mean, std 33 | 34 | 35 | def standardize_data(images, means, stds): 36 | data = images.copy() 37 | rows, cols = data.shape 38 | for col in range(cols): 39 | if stds[col] == 0: 40 | data[:, col] = (data[:, col] - means[col]) 41 | else: 42 | data[:, col] = (data[:, col] - means[col]) / stds[col] 43 | return data 44 | 45 | 46 | def import_mnist(validation_size=0): 47 | """ 48 | This import mnist and saves the data as an object of our DataSet class 49 | :param concat_val: Concatenate training and validation 50 | :return: 51 | """ 52 | SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/' 53 | TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' 54 | TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' 55 | TEST_IMAGES = 't10k-images-idx3-ubyte.gz' 56 | TEST_LABELS = 't10k-labels-idx1-ubyte.gz' 57 | ONE_HOT = True 58 | TRAIN_DIR = 'experiments/data/MNIST_data' 59 | 60 | local_file = base.maybe_download(TRAIN_IMAGES, TRAIN_DIR, 61 | SOURCE_URL + TRAIN_IMAGES) 62 | with open(local_file) as f: 63 | train_images = extract_images(f) 64 | 65 | local_file = base.maybe_download(TRAIN_LABELS, TRAIN_DIR, 66 | SOURCE_URL + TRAIN_LABELS) 67 | with open(local_file) as f: 68 | train_labels = extract_labels(f, one_hot=ONE_HOT) 69 | 70 | local_file = base.maybe_download(TEST_IMAGES, TRAIN_DIR, 71 | SOURCE_URL + TEST_IMAGES) 72 | with open(local_file) as f: 73 | test_images = extract_images(f) 74 | 75 | local_file = base.maybe_download(TEST_LABELS, TRAIN_DIR, 76 | SOURCE_URL + TEST_LABELS) 77 | with open(local_file) as f: 78 | test_labels = extract_labels(f, one_hot=ONE_HOT) 79 | 80 | validation_images = train_images[:validation_size] 81 | validation_labels = train_labels[:validation_size] 82 | train_images = train_images[validation_size:] 83 | train_labels = train_labels[validation_size:] 84 | 85 | # process images 86 | train_images = process_mnist(train_images) 87 | validation_images = process_mnist(validation_images) 88 | test_images = process_mnist(test_images) 89 | 90 | # standardize data 91 | train_mean, train_std = get_data_info(train_images) 92 | train_images = standardize_data(train_images, train_mean, train_std) 93 | validation_images = standardize_data( 94 | validation_images, train_mean, train_std) 95 | test_images = standardize_data(test_images, train_mean, train_std) 96 | 97 | data = DataSet(train_images, train_labels) 98 | test = DataSet(test_images, test_labels) 99 | val = DataSet(validation_images, validation_labels) 100 | 101 | return data, test, val 102 | -------------------------------------------------------------------------------- /autogp/gaussian_process.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import absolute_import 3 | 4 | import numpy as np 5 | import tensorflow as tf 6 | import tensorflow.contrib 7 | from tensorflow.contrib.distributions import fill_triangular 8 | from . import util 9 | 10 | 11 | class GaussianProcess(object): 12 | """ 13 | The class representing the AutoGP model. 14 | 15 | Parameters 16 | ---------- 17 | likelihood_func : subclass of likelihoods.Likelihood 18 | An object representing the likelihood function p(y|f). 19 | kernel_funcs : list of subclasses of kernels.Kernel 20 | A list of one kernel per latent function. 21 | inducing_inputs : ndarray 22 | An array of initial inducing input locations 23 | Dimensions: num_inducing * input_dim. 24 | num_components : int 25 | The number of mixture of Gaussian components. 26 | diag_post : bool 27 | True if the mixture of Gaussians uses a diagonal covariance, 28 | False otherwise. 29 | num_samples : int 30 | The number of samples to approximate the expected log likelihood 31 | of the posterior. 32 | """ 33 | def __init__(self, 34 | likelihood_func, 35 | kernel_funcs, 36 | inducing_inputs, 37 | num_components=1, 38 | diag_post=False, 39 | num_samples=100): 40 | # Get the actual functions if they were initialized as strings. 41 | self.likelihood = likelihood_func 42 | self.kernels = kernel_funcs 43 | 44 | # Save whether our posterior is diagonal or not. 45 | self.diag_post = diag_post 46 | 47 | # Repeat the inducing inputs for all latent processes 48 | # if we haven't been given individually 49 | # specified inputs per process. 50 | if inducing_inputs.ndim == 2: 51 | inducing_inputs = np.tile( 52 | inducing_inputs[np.newaxis, :, :], 53 | [len(self.kernels), 1, 1] 54 | ) 55 | 56 | # Initialize all model dimension constants. 57 | self.num_components = num_components 58 | self.num_latent = len(self.kernels) 59 | self.num_samples = num_samples 60 | self.num_inducing = inducing_inputs.shape[1] 61 | self.input_dim = inducing_inputs.shape[2] 62 | 63 | # Define all parameters that get optimized directly in raw form. 64 | # Some parameters get 65 | # transformed internally to maintain certain pre-conditions. 66 | self.raw_weights = tf.Variable(tf.zeros([self.num_components])) 67 | self.raw_means = tf.Variable(tf.zeros([ 68 | self.num_components, 69 | self.num_latent, 70 | self.num_inducing])) 71 | if self.diag_post: 72 | self.raw_covars = tf.Variable(tf.ones([ 73 | self.num_components, 74 | self.num_latent, 75 | self.num_inducing])) 76 | else: 77 | init_vec = np.zeros( 78 | [ 79 | self.num_components, 80 | self.num_latent 81 | ] + util.tri_vec_shape(self.num_inducing), 82 | dtype=np.float32 83 | ) 84 | self.raw_covars = tf.Variable(init_vec) 85 | self.raw_inducing_inputs = tf.Variable( 86 | inducing_inputs, dtype=tf.float32) 87 | self.raw_likelihood_params = self.likelihood.get_params() 88 | self.raw_kernel_params = sum([ 89 | k.get_params() for k in self.kernels], [] 90 | ) 91 | 92 | # Define placeholder variables for training and predicting. 93 | self.num_train = tf.placeholder(tf.float32, shape=[], name="num_train") 94 | self.train_inputs = tf.placeholder( 95 | tf.float32, 96 | shape=[None, self.input_dim], 97 | name="train_inputs") 98 | self.train_outputs = tf.placeholder( 99 | tf.float32, shape=[None, None], 100 | name="train_outputs") 101 | self.test_inputs = tf.placeholder( 102 | tf.float32, 103 | shape=[None, self.input_dim], 104 | name="test_inputs") 105 | 106 | # Now build our computational graph. 107 | self.nelbo, self.loo_loss, self.predictions = self._build_graph( 108 | self.raw_weights, 109 | self.raw_means, 110 | self.raw_covars, 111 | self.raw_inducing_inputs, 112 | self.train_inputs, 113 | self.train_outputs, 114 | self.num_train, 115 | self.test_inputs) 116 | 117 | # config = tf.ConfigProto( 118 | # log_device_placement=True, allow_soft_placement=True) 119 | # Do all the tensorflow bookkeeping. 120 | self.session = tf.Session() 121 | self.optimizer = None 122 | self.train_step = None 123 | 124 | def fit(self, data, optimizer, loo_steps=10, var_steps=10, epochs=200, 125 | batch_size=None, display_step=1, test=None, loss=None): 126 | """ 127 | Fit the Gaussian process model to the given data. 128 | 129 | Parameters 130 | ---------- 131 | data : subclass of datasets.DataSet 132 | The train inputs and outputs. 133 | optimizer : TensorFlow optimizer 134 | The optimizer to use in the fitting process. 135 | loo_steps : int 136 | Number of steps to update hyper-parameters using loo objective 137 | var_steps : int 138 | Number of steps to update variational parameters using variational 139 | objective (elbo). 140 | epochs : int 141 | The number of epochs to optimize the model for. 142 | batch_size : int 143 | The number of datapoints to use per mini-batch when training. 144 | If batch_size is None, 145 | then we perform batch gradient descent. 146 | display_step : int 147 | The frequency at which the objective values are printed out. 148 | """ 149 | num_train = data.num_examples 150 | if batch_size is None: 151 | batch_size = num_train 152 | 153 | if self.optimizer != optimizer: 154 | self.optimizer = optimizer 155 | self.loo_train_step = optimizer.minimize( 156 | self.loo_loss, var_list=[ 157 | self.raw_inducing_inputs 158 | ] + 159 | self.raw_kernel_params + 160 | self.raw_likelihood_params 161 | ) 162 | self.train_step = optimizer.minimize(self.nelbo) 163 | self.session.run(tf.global_variables_initializer()) 164 | 165 | start = data.next_batch(batch_size) 166 | 167 | old_epoch = 0 168 | while data.epochs_completed < epochs: 169 | num_epochs = data.epochs_completed + var_steps 170 | while data.epochs_completed < num_epochs: 171 | batch = data.next_batch(batch_size) 172 | self.session.run( 173 | self.train_step, 174 | feed_dict={ 175 | self.train_inputs: batch[0], 176 | self.train_outputs: batch[1], 177 | self.num_train: num_train}) 178 | if (data.epochs_completed % display_step == 0 179 | and data.epochs_completed != old_epoch): 180 | self._print_state(data, test, loss, num_train) 181 | old_epoch = data.epochs_completed 182 | 183 | num_epochs = data.epochs_completed + loo_steps 184 | while data.epochs_completed < num_epochs: 185 | batch = data.next_batch(batch_size) 186 | self.session.run(self.loo_train_step, feed_dict={ 187 | self.train_inputs: batch[0], 188 | self.train_outputs: batch[1], 189 | self.num_train: num_train}) 190 | if ( 191 | data.epochs_completed % display_step == 0 and 192 | data.epochs_completed != old_epoch): 193 | self._print_state(data, test, loss, num_train) 194 | old_epoch = data.epochs_completed 195 | 196 | def predict(self, test_inputs, batch_size=None): 197 | """ 198 | Predict outputs given inputs. 199 | 200 | Parameters 201 | ---------- 202 | test_inputs : ndarray 203 | Points on which we wish to make predictions. 204 | Dimensions: num_test * input_dim. 205 | batch_size : int 206 | The size of the batches we make predictions on. 207 | If batch_size is None, predict on the 208 | entire test set at once. 209 | 210 | Returns 211 | ------- 212 | ndarray 213 | The predicted mean of the test inputs. 214 | Dimensions: num_test * output_dim. 215 | ndarray 216 | The predicted variance of the test inputs. 217 | Dimensions: num_test * output_dim. 218 | """ 219 | if batch_size is None: 220 | num_batches = 1 221 | else: 222 | num_batches = util.ceil_divide(test_inputs.shape[0], batch_size) 223 | 224 | test_inputs = np.array_split(test_inputs, num_batches) 225 | pred_means = util.init_list(0.0, [num_batches]) 226 | pred_vars = util.init_list(0.0, [num_batches]) 227 | for i in range(num_batches): 228 | pred_means[i], pred_vars[i] = self.session.run( 229 | self.predictions, feed_dict={self.test_inputs: test_inputs[i]}) 230 | 231 | return np.concatenate(pred_means, axis=0), np.concatenate(pred_vars, axis=0) 232 | 233 | def _print_state(self, data, test, loss, num_train): 234 | if num_train <= 100000: 235 | nelbo = self.session.run(self.nelbo, feed_dict={ 236 | self.train_inputs: data.X, 237 | self.train_outputs: data.Y, 238 | self.num_train: num_train}) 239 | loo = self.session.run(self.loo_loss, feed_dict={ 240 | self.train_inputs: data.X, 241 | self.train_outputs: data.Y, 242 | self.num_train: num_train}) 243 | print( 244 | "i=" + repr(data.epochs_completed) + 245 | " nelbo=" + repr(nelbo), end=" ") 246 | print("loo=" + repr(loo)) 247 | 248 | if loss is not None: 249 | ypred = self.predict(test.X)[0] 250 | print( 251 | "i=" + repr(data.epochs_completed) + 252 | " curent " + loss.get_name() + "=" + "%.4f" 253 | % loss.eval(test.Y, ypred)) 254 | 255 | def _build_graph( 256 | self, 257 | raw_weights, 258 | raw_means, 259 | raw_covars, 260 | raw_inducing_inputs, 261 | train_inputs, 262 | train_outputs, 263 | num_train, 264 | test_inputs): 265 | # First transform all raw variables into their internal form. 266 | # Use softmax(raw_weights) to keep all weights normalized. 267 | weights = tf.exp(raw_weights) / tf.reduce_sum(tf.exp(raw_weights)) 268 | 269 | if self.diag_post: 270 | # Use exp(raw_covars) so as to guarantee the diagonal matrix 271 | # remains positive definite. 272 | covars = tf.exp(raw_covars) 273 | else: 274 | # Use vec_to_tri(raw_covars) so as to only optimize 275 | # over the lower triangular portion. 276 | # We note that we will always operate 277 | # over the cholesky space internally. 278 | covars_list = [None] * self.num_components 279 | for i in range(self.num_components): 280 | mat = fill_triangular(raw_covars[i, :, :]) 281 | diag_mat = tf.matrix_diag(tf.matrix_diag_part(mat)) 282 | exp_diag_mat = tf.matrix_diag(tf.exp(tf.matrix_diag_part(mat))) 283 | covars_list[i] = mat - diag_mat + exp_diag_mat 284 | covars = tf.stack(covars_list, 0) 285 | # Both inducing inputs and the posterior means can vary freely so don't change them. 286 | means = raw_means 287 | inducing_inputs = raw_inducing_inputs 288 | 289 | # Build the matrices of covariances between inducing inputs. 290 | kernel_mat = [ 291 | self.kernels[i].kernel(inducing_inputs[i, :, :]) 292 | for i in range(self.num_latent)] 293 | kernel_chol = tf.stack([tf.cholesky(k) for k in kernel_mat], 0) 294 | 295 | # Now build the objective function. 296 | entropy = self._build_entropy(weights, means, covars) 297 | cross_ent = self._build_cross_ent(weights, means, covars, kernel_chol) 298 | ell = self._build_ell(weights, means, covars, inducing_inputs, 299 | kernel_chol, train_inputs, train_outputs) 300 | batch_size = tf.to_float(tf.shape(train_inputs)[0]) 301 | nelbo = -((batch_size / num_train) * (entropy + cross_ent) + ell) 302 | 303 | # Build the leave one out loss function. 304 | loo_loss = self._build_loo_loss( 305 | weights, means, covars, inducing_inputs, 306 | kernel_chol, train_inputs, train_outputs) 307 | 308 | # Finally, build the prediction function. 309 | predictions = self._build_predict( 310 | weights, means, covars, inducing_inputs, 311 | kernel_chol, test_inputs) 312 | 313 | return nelbo, loo_loss, predictions 314 | 315 | def _build_loo_loss(self, weights, means, covars, inducing_inputs, 316 | kernel_chol, train_inputs, train_outputs): 317 | kern_prods, kern_sums = self._build_interim_vals( 318 | kernel_chol, inducing_inputs, train_inputs) 319 | loss = 0 320 | for i in range(self.num_components): 321 | covar_input = ( 322 | covars[i, :, :] 323 | if self.diag_post 324 | else covars[i, :, :, :] 325 | ) 326 | latent_samples = self._build_samples( 327 | kern_prods, kern_sums, 328 | means[i, :, :], covar_input) 329 | loss += weights[i] * tf.reduce_mean( 330 | 1.0 / (tf.exp( 331 | self.likelihood.log_cond_prob( 332 | train_outputs, latent_samples 333 | ) 334 | ) + 1e-7), 335 | 0 336 | ) 337 | return tf.reduce_sum(tf.log(loss)) 338 | 339 | def _build_predict(self, weights, means, covars, inducing_inputs, 340 | kernel_chol, test_inputs): 341 | kern_prods, kern_sums = self._build_interim_vals(kernel_chol, inducing_inputs, test_inputs) 342 | pred_means = util.init_list(0.0, [self.num_components]) 343 | pred_vars = util.init_list(0.0, [self.num_components]) 344 | for i in range(self.num_components): 345 | covar_input = covars[i, :, :] if self.diag_post else covars[i, :, :, :] 346 | sample_means, sample_vars = self._build_sample_info(kern_prods, kern_sums, 347 | means[i, :, :], covar_input) 348 | pred_means[i], pred_vars[i] = self.likelihood.predict(sample_means, sample_vars) 349 | 350 | pred_means = tf.stack(pred_means, 0) 351 | pred_vars = tf.stack(pred_vars, 0) 352 | 353 | # Compute the mean and variance of the gaussian mixture from their components. 354 | weights = tf.expand_dims(tf.expand_dims(weights, 1), 1) 355 | weighted_means = tf.reduce_sum(weights * pred_means, 0) 356 | weighted_vars = (tf.reduce_sum(weights * (pred_means ** 2 + pred_vars), 0) - 357 | tf.reduce_sum(weights * pred_means, 0) ** 2) 358 | return weighted_means, weighted_vars 359 | 360 | def _build_entropy(self, weights, means, covars): 361 | # First build half a square matrix of normals. This avoids re-computing symmetric normals. 362 | log_normal_probs = util.init_list(0.0, [self.num_components, self.num_components]) 363 | for i in range(self.num_components): 364 | for j in range(i, self.num_components): 365 | for k in range(self.num_latent): 366 | if self.diag_post: 367 | normal = util.DiagNormal(means[i, k, :], covars[i, k, :] + 368 | covars[j, k, :]) 369 | else: 370 | if i == j: 371 | # Compute chol(2S) = sqrt(2)*chol(S). 372 | covars_sum = tf.sqrt(2.0) * covars[i, k, :, :] 373 | else: 374 | # TODO(karl): Can we just stay in cholesky space somehow? 375 | covars_sum = tf.cholesky(util.mat_square(covars[i, k, :, :]) + 376 | util.mat_square(covars[j, k, :, :])) 377 | normal = util.CholNormal(means[i, k, :], covars_sum) 378 | log_normal_probs[i][j] += normal.log_prob(means[j, k, :]) 379 | 380 | # Now compute the entropy. 381 | entropy = 0.0 382 | for i in range(self.num_components): 383 | weighted_log_probs = util.init_list(0.0, [self.num_components]) 384 | for j in range(self.num_components): 385 | if i <= j: 386 | weighted_log_probs[j] = tf.log(weights[j]) + log_normal_probs[i][j] 387 | else: 388 | weighted_log_probs[j] = tf.log(weights[j]) + log_normal_probs[j][i] 389 | 390 | entropy -= weights[i] * util.logsumexp(tf.stack(weighted_log_probs)) 391 | 392 | return entropy 393 | 394 | def _build_cross_ent(self, weights, means, covars, kernel_chol): 395 | cross_ent = 0.0 396 | for i in range(self.num_components): 397 | sum_val = 0.0 398 | for j in range(self.num_latent): 399 | if self.diag_post: 400 | # TODO(karl): this is a bit inefficient since we're not making use of the fact 401 | # that covars is diagonal. A solution most likely involves a custom tf op. 402 | trace = tf.trace(tf.cholesky_solve(kernel_chol[j, :, :], 403 | tf.diag(covars[i, j, :]))) 404 | else: 405 | trace = tf.reduce_sum(util.diag_mul( 406 | tf.cholesky_solve(kernel_chol[j, :, :], covars[i, j, :, :]), 407 | tf.transpose(covars[i, j, :, :]))) 408 | 409 | sum_val += (util.CholNormal(means[i, j, :], kernel_chol[j, :, :]).log_prob(0.0) - 410 | 0.5 * trace) 411 | 412 | cross_ent += weights[i] * sum_val 413 | 414 | return cross_ent 415 | 416 | def _build_ell(self, weights, means, covars, inducing_inputs, 417 | kernel_chol, train_inputs, train_outputs): 418 | kern_prods, kern_sums = self._build_interim_vals(kernel_chol, inducing_inputs, train_inputs) 419 | ell = 0 420 | for i in range(self.num_components): 421 | covar_input = covars[i, :, :] if self.diag_post else covars[i, :, :, :] 422 | latent_samples = self._build_samples(kern_prods, kern_sums, 423 | means[i, :, :], covar_input) 424 | ell += weights[i] * tf.reduce_sum(self.likelihood.log_cond_prob(train_outputs, 425 | latent_samples)) 426 | 427 | return ell / self.num_samples 428 | 429 | def _build_interim_vals(self, kernel_chol, inducing_inputs, train_inputs): 430 | kern_prods = util.init_list(0.0, [self.num_latent]) 431 | kern_sums = util.init_list(0.0, [self.num_latent]) 432 | for i in range(self.num_latent): 433 | ind_train_kern = self.kernels[i].kernel(inducing_inputs[i, :, :], train_inputs) 434 | # Compute A = Kxz.Kzz^(-1) = (Kzz^(-1).Kzx)^T. 435 | kern_prods[i] = tf.transpose(tf.cholesky_solve(kernel_chol[i, :, :], ind_train_kern)) 436 | # We only need the diagonal components. 437 | kern_sums[i] = (self.kernels[i].diag_kernel(train_inputs) - 438 | util.diag_mul(kern_prods[i], ind_train_kern)) 439 | 440 | kern_prods = tf.stack(kern_prods, 0) 441 | kern_sums = tf.stack(kern_sums, 0) 442 | return kern_prods, kern_sums 443 | 444 | def _build_samples(self, kern_prods, kern_sums, means, covars): 445 | sample_means, sample_vars = self._build_sample_info(kern_prods, kern_sums, means, covars) 446 | batch_size = tf.shape(sample_means)[0] 447 | return (sample_means + tf.sqrt(sample_vars) * 448 | tf.random_normal([self.num_samples, batch_size, self.num_latent])) 449 | 450 | def _build_sample_info(self, kern_prods, kern_sums, means, covars): 451 | sample_means = util.init_list(0.0, [self.num_latent]) 452 | sample_vars = util.init_list(0.0, [self.num_latent]) 453 | for i in range(self.num_latent): 454 | if self.diag_post: 455 | quad_form = util.diag_mul(kern_prods[i, :, :] * covars[i, :], 456 | tf.transpose(kern_prods[i, :, :])) 457 | else: 458 | full_covar = tf.matmul(covars[i, :, :], tf.transpose(covars[i, :, :])) 459 | quad_form = util.diag_mul(tf.matmul(kern_prods[i, :, :], full_covar), 460 | tf.transpose(kern_prods[i, :, :])) 461 | sample_means[i] = tf.matmul(kern_prods[i, :, :], tf.expand_dims(means[i, :], 1)) 462 | sample_vars[i] = tf.expand_dims(kern_sums[i, :] + quad_form, 1) 463 | 464 | sample_means = tf.concat( sample_means,1) 465 | sample_vars = tf.concat(sample_vars,1) 466 | return sample_means, sample_vars 467 | -------------------------------------------------------------------------------- /autogp/kernels/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .kernel import Kernel 3 | from .radial_basis import RadialBasis 4 | from .arc_cosine import ArcCosine 5 | -------------------------------------------------------------------------------- /autogp/kernels/arc_cosine.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | from . import kernel 6 | 7 | 8 | class ArcCosine(kernel.Kernel): 9 | def __init__(self, input_dim, degree=0, depth=1, lengthscale=1.0, 10 | std_dev=1.0, white=1e-4, input_scaling=False): 11 | self.degree = degree 12 | self.depth = depth 13 | self.white = white 14 | self.std_dev = tf.Variable([std_dev], dtype=tf.float32) 15 | if input_scaling: 16 | self.lengthscale = tf.Variable(lengthscale * tf.ones([input_dim])) 17 | else: 18 | self.lengthscale = tf.Variable([lengthscale], dtype=tf.float32) 19 | 20 | def kernel(self, points1, points2=None): 21 | if points2 is None: 22 | points2 = points1 23 | white_noise = self.white * tf.eye(tf.shape(points1)[0]) 24 | else: 25 | white_noise = 0.0 26 | 27 | kern = self.recursive_kernel( 28 | points1 / self.lengthscale, 29 | points2 / self.lengthscale, self.depth) 30 | return (self.std_dev ** 2) * kern + white_noise 31 | 32 | def recursive_kernel(self, points1, points2, depth): 33 | if depth == 1: 34 | mag_sqr1 = tf.expand_dims(tf.reduce_sum(points1 ** 2, 1), 1) 35 | mag_sqr2 = tf.expand_dims(tf.reduce_sum(points2 ** 2, 1), 1) 36 | point_prod = tf.matmul(points1, tf.transpose(points2)) 37 | else: 38 | mag_sqr1 = tf.expand_dims( 39 | self.diag_recursive_kernel(points1, depth - 1), 40 | 1) 41 | mag_sqr2 = tf.expand_dims( 42 | self.diag_recursive_kernel(points2, depth - 1), 43 | 1) 44 | point_prod = self.recursive_kernel(points1, points2, depth - 1) 45 | 46 | mag_prod = tf.sqrt(mag_sqr1) * tf.transpose(tf.sqrt(mag_sqr2)) 47 | cos_angles = (2 * point_prod) / ( 48 | tf.sqrt(1 + 2 * mag_sqr1) * tf.transpose( 49 | tf.sqrt(1 + 2 * mag_sqr2) 50 | ) 51 | ) 52 | 53 | return ( 54 | ((mag_prod ** self.degree) / np.pi) * 55 | self.angular_func(cos_angles) 56 | ) 57 | 58 | def diag_kernel(self, points): 59 | return (self.std_dev ** 2) * self.diag_recursive_kernel( 60 | points / self.lengthscale, self.depth 61 | ) + self.white 62 | 63 | # TODO(karl): Add a memoize decorator. 64 | # @util.memoize 65 | def diag_recursive_kernel(self, points, depth): 66 | # TODO(karl): Consider computing this in closed form. 67 | if depth == 1: 68 | mag_sqr = tf.reduce_sum(points ** 2, 1) 69 | else: 70 | mag_sqr = self.diag_recursive_kernel(points, depth - 1) 71 | 72 | return ( 73 | (mag_sqr ** self.degree) * self.angular_func( 74 | 2 * mag_sqr / (1 + 2 * mag_sqr) 75 | ) / np.pi) 76 | 77 | def angular_func(self, cos_angles): 78 | angles = tf.acos(cos_angles) 79 | sin_angles = tf.sin(angles) 80 | pi_diff = np.pi - angles 81 | if self.degree == 0: 82 | return pi_diff 83 | elif self.degree == 1: 84 | return sin_angles + pi_diff * cos_angles 85 | elif self.degree == 2: 86 | return 3 * sin_angles * cos_angles + pi_diff * ( 87 | 1 + 2 * cos_angles ** 2 88 | ) 89 | else: 90 | assert False 91 | 92 | def get_params(self): 93 | return [self.std_dev, self.lengthscale] 94 | -------------------------------------------------------------------------------- /autogp/kernels/kernel.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | 4 | class Kernel: 5 | __metaclass__ = abc.ABCMeta 6 | 7 | @abc.abstractmethod 8 | def kernel(self, inputs1, inputs2=None): 9 | pass 10 | 11 | @abc.abstractmethod 12 | def diag_kernel(self, inputs1, inputs2=None): 13 | pass 14 | 15 | @abc.abstractmethod 16 | def get_params(self): 17 | pass 18 | -------------------------------------------------------------------------------- /autogp/kernels/radial_basis.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import tensorflow as tf 3 | 4 | from . import kernel 5 | 6 | 7 | class RadialBasis(kernel.Kernel): 8 | MAX_DIST = 1e8 9 | 10 | def __init__(self, input_dim, lengthscale=1.0, std_dev=1.0, 11 | white=0.01, input_scaling=False): 12 | if input_scaling: 13 | self.lengthscale = tf.Variable(lengthscale * tf.ones([input_dim])) 14 | else: 15 | self.lengthscale = tf.Variable([lengthscale], dtype=tf.float32) 16 | 17 | self.std_dev = tf.Variable([std_dev], dtype=tf.float32) 18 | self.input_dim = input_dim 19 | self.white = white 20 | 21 | def kernel(self, points1, points2=None): 22 | if points2 is None: 23 | points2 = points1 24 | white_noise = self.white * tf.eye(tf.shape(points1)[0]) 25 | else: 26 | white_noise = 0.0 27 | 28 | points1 = points1 / self.lengthscale 29 | points2 = points2 / self.lengthscale 30 | magnitude_square1 = tf.expand_dims(tf.reduce_sum(points1 ** 2, 1), 1) 31 | magnitude_square2 = tf.expand_dims(tf.reduce_sum(points2 ** 2, 1), 1) 32 | distances = ( 33 | magnitude_square1 - 2 * tf.matmul( 34 | points1, 35 | tf.transpose(points2) 36 | ) + tf.transpose(magnitude_square2)) 37 | distances = tf.clip_by_value(distances, 0.0, self.MAX_DIST) 38 | 39 | kern = ((self.std_dev ** 2) * tf.exp(-distances / 2.0)) 40 | return kern + white_noise 41 | 42 | def diag_kernel(self, points): 43 | return ( 44 | (self.std_dev ** 2) + self.white 45 | ) * tf.ones([tf.shape(points)[0]]) 46 | 47 | def get_params(self): 48 | return [self.lengthscale, self.std_dev] 49 | -------------------------------------------------------------------------------- /autogp/likelihoods/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .likelihood import Likelihood 3 | from .logistic import Logistic 4 | from .gaussian import Gaussian 5 | from .softmax import Softmax 6 | from .regression_network import RegressionNetwork 7 | -------------------------------------------------------------------------------- /autogp/likelihoods/gaussian.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | from . import likelihood 6 | 7 | 8 | class Gaussian(likelihood.Likelihood): 9 | def __init__(self, std_dev=1.0): 10 | # Save the raw standard deviation. Note that this value can be negative. 11 | self.raw_std_dev = tf.Variable(std_dev) 12 | 13 | def log_cond_prob(self, outputs, latent): 14 | var = self.raw_std_dev ** 2 15 | return -0.5 * tf.log(2.0 * np.pi * var) - ((outputs - latent) ** 2) / (2.0 * var) 16 | 17 | def get_params(self): 18 | return [self.raw_std_dev] 19 | 20 | def predict(self, latent_means, latent_vars): 21 | return latent_means, latent_vars + self.raw_std_dev ** 2 22 | 23 | -------------------------------------------------------------------------------- /autogp/likelihoods/likelihood.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | 4 | class Likelihood: 5 | __metaclass__ = abc.ABCMeta 6 | 7 | @abc.abstractmethod 8 | def log_cond_prob(self, outputs, latent): 9 | raise NotImplementedError("Subclass should implement this.") 10 | 11 | @abc.abstractmethod 12 | def get_params(self): 13 | raise NotImplementedError("Subclass should implement this.") 14 | 15 | @abc.abstractmethod 16 | def predict(self, latent_means, latent_vars): 17 | raise NotImplementedError("Subclass should implement this.") 18 | 19 | -------------------------------------------------------------------------------- /autogp/likelihoods/logistic.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import tensorflow as tf 3 | from . import likelihood 4 | 5 | 6 | class Logistic(likelihood.Likelihood): 7 | def __init__(self, num_samples=2000): 8 | self.num_samples = num_samples 9 | 10 | def log_cond_prob(self, outputs, latent): 11 | return latent * (outputs - 1) - tf.log(1 + tf.exp(-latent)) 12 | 13 | def get_params(self): 14 | return [] 15 | 16 | def predict(self, latent_means, latent_vars): 17 | # Generate samples to estimate the expected value 18 | # and variance of outputs. 19 | num_points = tf.shape(latent_means)[0] 20 | latent = ( 21 | latent_means + tf.sqrt(latent_vars) * 22 | tf.random_normal([self.num_samples, num_points, 1]) 23 | ) 24 | # Compute the softmax of all generated latent values 25 | # in a stable fashion. 26 | logistic = 1.0 / (1.0 + tf.exp(-latent)) 27 | 28 | # Estimate the expected value of the softmax 29 | # and the variance through sampling. 30 | pred_means = tf.reduce_mean(logistic, 0) 31 | pred_vars = tf.reduce_sum( 32 | (logistic - pred_means) ** 2, 0 33 | ) / (self.num_samples - 1.0) 34 | 35 | return pred_means, pred_vars 36 | -------------------------------------------------------------------------------- /autogp/likelihoods/regression_network.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | from . import likelihood 6 | 7 | 8 | class RegressionNetwork(likelihood.Likelihood): 9 | def __init__(self, output_dim, std_dev, num_samples=5000): 10 | self.output_dim = output_dim 11 | self.num_samples = num_samples 12 | self.log_std_dev = tf.Variable( 13 | np.ones([self.output_dim]) * np.log(std_dev), 14 | dtype=tf.float32 15 | ) 16 | 17 | def log_cond_prob(self, outputs, latent): 18 | weights = latent[:, :, :self.output_dim] 19 | inputs = latent[:, :, self.output_dim:] 20 | prod = weights * inputs 21 | # diff = outputs - prod 22 | covar = tf.exp(self.log_std_dev) 23 | quad_form = tf.reduce_sum(1.0 / covar * (outputs - prod) ** 2, 2) 24 | return -0.5 * ( 25 | self.output_dim * tf.log(2.0 * np.pi) + 26 | tf.reduce_sum(covar) + quad_form) 27 | 28 | def get_params(self): 29 | return [self.log_std_dev] 30 | 31 | def predict(self, latent_means, latent_vars): 32 | # Generate samples to estimate the expected value 33 | # and variance of outputs. 34 | num_points = tf.shape(latent_means)[0] 35 | output_dims = tf.shape(latent_means)[1] 36 | latent = ( 37 | latent_means + tf.sqrt(latent_vars) * 38 | tf.random_normal([self.num_samples, num_points, output_dims])) 39 | weights = latent[:, :, :output_dims - 1] 40 | inputs = latent[:, :, output_dims - 1:] 41 | prod = weights * inputs 42 | return tf.reduce_mean(prod, 0), tf.reduce_mean(prod, 0) 43 | -------------------------------------------------------------------------------- /autogp/likelihoods/softmax.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | from autogp import util 6 | from . import likelihood 7 | 8 | 9 | class Softmax(likelihood.Likelihood): 10 | def __init__(self, num_samples=2000): 11 | self.num_samples = num_samples 12 | 13 | def log_cond_prob(self, outputs, latent): 14 | return tf.reduce_sum(outputs * latent, 2) - util.logsumexp(latent, 2) 15 | 16 | def get_params(self): 17 | return [] 18 | 19 | def predict(self, latent_means, latent_vars): 20 | # Generate samples to estimate the expected value and variance of outputs. 21 | num_points = tf.shape(latent_means)[0] 22 | output_dims = tf.shape(latent_means)[1] 23 | latent = (latent_means + tf.sqrt(latent_vars) * 24 | tf.random_normal([self.num_samples, num_points, output_dims])) 25 | # Compute the softmax of all generated latent values in a stable fashion. 26 | softmax = tf.exp(latent - tf.expand_dims(util.logsumexp(latent, 2), 2)) 27 | 28 | # Estimate the expected value of the softmax and the variance through sampling. 29 | pred_means = tf.reduce_mean(softmax, 0) 30 | pred_vars = tf.reduce_sum((softmax - pred_means) ** 2, 0) / (self.num_samples - 1.0) 31 | 32 | return pred_means, pred_vars 33 | 34 | -------------------------------------------------------------------------------- /autogp/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .loss import Loss 3 | from .zero_one_loss import ZeroOneLoss 4 | from .mean_sq_error import RootMeanSqError -------------------------------------------------------------------------------- /autogp/losses/loss.py: -------------------------------------------------------------------------------- 1 | class Loss(object): 2 | 3 | def __init__(self, dout): 4 | self.dout = dout 5 | 6 | def eval(self, _ytrue, _ypred): 7 | """ 8 | Subclass should implement log p(Y | F) 9 | :param output: (batch_size x Dout) matrix containing true outputs 10 | :param latent_val: (MC x batch_size x Q) matrix 11 | of latent function values, usually Q=F 12 | :return: 13 | """ 14 | raise NotImplementedError("Subclass should implement this.") 15 | 16 | def get_name(self): 17 | raise NotImplementedError("Subclass should implement this.") 18 | -------------------------------------------------------------------------------- /autogp/losses/mean_sq_error.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | from . import loss 4 | 5 | 6 | class RootMeanSqError(loss.Loss): 7 | def __init__(self, dout): 8 | loss.Loss.__init__(self, dout) 9 | 10 | def eval(self, ytrue, ypred): 11 | error_rate = np.sqrt(np.mean(np.square(ytrue - ypred))) 12 | return error_rate 13 | 14 | def get_name(self): 15 | return "RMSE" 16 | -------------------------------------------------------------------------------- /autogp/losses/neg_log_like.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from . import loss 3 | 4 | 5 | class NegLogLikelihood(loss.Loss): 6 | 7 | def __init__(self, dout): 8 | loss.Loss.__init__(self, dout) 9 | 10 | def eval(self, ytrue, ypred): 11 | return self.like.log_cond_prob(ytrue, ypred) 12 | 13 | def get_name(self): 14 | return "Negative Log Likelihood" 15 | -------------------------------------------------------------------------------- /autogp/losses/zero_one_loss.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | from . import loss 4 | 5 | 6 | class ZeroOneLoss(loss.Loss): 7 | def __init__(self, dout): 8 | loss.Loss.__init__(self, dout) 9 | 10 | def eval(self, ytrue, ypred): 11 | if ytrue.shape[1] == 1: 12 | error_rate = np.mean(np.round(ypred) != ytrue) 13 | else: 14 | error_rate = np.mean(np.argmax(ypred, 1) != np.argmax(ytrue, 1)) 15 | return error_rate 16 | 17 | def get_name(self): 18 | return "Error Rate" 19 | -------------------------------------------------------------------------------- /autogp/util/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .normals import CholNormal 3 | from .normals import DiagNormal 4 | from .util import tri_vec_shape 5 | from .util import ceil_divide 6 | from .util import get_flags 7 | from .util import log_cholesky_det 8 | from .util import diag_mul 9 | from .util import init_list 10 | from .util import logsumexp 11 | from .util import mat_square 12 | -------------------------------------------------------------------------------- /autogp/util/normals.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | from . import util 6 | 7 | 8 | class Normal(object): 9 | def __init__(self, mean, covar): 10 | self.mean = mean 11 | self.covar = covar 12 | 13 | 14 | class CholNormal(Normal): 15 | def prob(self, val): 16 | return tf.exp(self.log_prob(val)) 17 | 18 | def log_prob(self, val): 19 | dim = tf.to_float(tf.shape(self.mean)[0]) 20 | diff = tf.expand_dims(val - self.mean, 1) 21 | quad_form = tf.reduce_sum(diff * tf.cholesky_solve(self.covar, diff)) 22 | return -0.5 * (dim * tf.log(2.0 * np.pi) + util.log_cholesky_det(self.covar) + 23 | quad_form) 24 | 25 | 26 | class DiagNormal(Normal): 27 | def prob(self, val): 28 | return tf.exp(self.log_prob(val)) 29 | 30 | def log_prob(self, val): 31 | dim = tf.to_float(tf.shape(self.mean)[0]) 32 | quad_form = tf.reduce_sum((val - self.mean) ** 2 / self.covar) 33 | return -0.5 * (dim * tf.log(2.0 * np.pi) + tf.reduce_sum(tf.log(self.covar)) + quad_form) 34 | 35 | -------------------------------------------------------------------------------- /autogp/util/util.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import copy 3 | import tensorflow as tf 4 | 5 | 6 | def tri_vec_shape(N): 7 | return [N * (N + 1) // 2] 8 | 9 | 10 | def init_list(init, dims): 11 | def empty_list(dims): 12 | if not dims: 13 | return None 14 | else: 15 | return [copy.deepcopy(empty_list(dims[1:])) for i in range(dims[0])] 16 | 17 | def fill_list(dims, l): 18 | if len(dims) == 1: 19 | for i in range(dims[0]): 20 | if callable(init): 21 | l[i] = init() 22 | else: 23 | l[i] = init 24 | else: 25 | for i in range(dims[0]): 26 | fill_list(dims[1:], l[i]) 27 | 28 | l = empty_list(dims) 29 | fill_list(dims, l) 30 | 31 | return l 32 | 33 | 34 | def ceil_divide(dividend, divisor): 35 | return (dividend + divisor - 1) / divisor 36 | 37 | 38 | def log_cholesky_det(chol): 39 | return 2 * tf.reduce_sum(tf.log(tf.diag_part(chol))) 40 | 41 | 42 | def diag_mul(mat1, mat2): 43 | return tf.reduce_sum(mat1 * tf.transpose(mat2), 1) 44 | 45 | 46 | def logsumexp(vals, dim=None): 47 | m = tf.reduce_max(vals, dim) 48 | if dim is None: 49 | return m + tf.log(tf.reduce_sum(tf.exp(vals - m), dim)) 50 | else: 51 | return m + tf.log(tf.reduce_sum(tf.exp(vals - tf.expand_dims(m, dim)), dim)) 52 | 53 | def mat_square(mat): 54 | return tf.matmul(mat, tf.transpose(mat)) 55 | 56 | def get_flags(): 57 | flags = tf.app.flags 58 | FLAGS = flags.FLAGS 59 | flags.DEFINE_integer('batch_size', 100, 'Batch size. ' 60 | 'Must divide evenly into the dataset sizes.') 61 | flags.DEFINE_float('learning_rate', 0.001, 'Initial learning rate.') 62 | flags.DEFINE_integer('n_epochs', 10000, 'Number of passes through the data') 63 | flags.DEFINE_integer('n_inducing', 240, 'Number of inducing points') 64 | flags.DEFINE_integer('display_step', 500, 'Display progress every FLAGS.display_step iterations') 65 | flags.DEFINE_integer('mc_train', 100, 'Number of Monte Carlo samples used to compute stochastic gradients') 66 | flags.DEFINE_integer('mc_test', 100, 'Number of Monte Carlo samples for predictions') 67 | flags.DEFINE_string('optimizer', "adagrad", 'Optimizer') 68 | flags.DEFINE_boolean('is_ard', True, 'Using ARD kernel or isotropic') 69 | flags.DEFINE_float('lengthscale', 10, 'Initial lengthscale') 70 | flags.DEFINE_integer('var_steps', 50, 'Number of times spent optimizing the variational objective.') 71 | flags.DEFINE_integer('loocv_steps', 50, 'Number of times spent optimizing the LOOCV objective.') 72 | flags.DEFINE_float('opt_growth', 0.0, 'Percentage to grow the number of each optimizations.') 73 | flags.DEFINE_integer('num_components', 1, 'Number of mixture components on posterior') 74 | flags.DEFINE_string('kernel', 'rbf', 'kernel') 75 | flags.DEFINE_string('device_name', 'gpu0', 'Device name') 76 | flags.DEFINE_integer('kernel_degree', 0, 'Degree of arccosine kernel') 77 | flags.DEFINE_integer('kernel_depth', 1, 'Depth of arcosine kernel') 78 | return FLAGS 79 | 80 | -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | import autogp 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | # Generate synthetic data. 7 | N_all = 200 8 | N = 50 9 | inputs = 5 * np.linspace(0, 1, num=N_all)[:, np.newaxis] 10 | outputs = np.sin(inputs) 11 | 12 | # selects training and test 13 | idx = np.arange(N_all) 14 | np.random.shuffle(idx) 15 | xtrain = inputs[idx[:N]] 16 | ytrain = outputs[idx[:N]] 17 | data = autogp.datasets.DataSet(xtrain, ytrain) 18 | xtest = inputs[idx[N:]] 19 | ytest = outputs[idx[N:]] 20 | 21 | # Initialize the Gaussian process. 22 | likelihood = autogp.likelihoods.Gaussian() 23 | kernel = [autogp.kernels.RadialBasis(1)] 24 | inducing_inputs = xtrain 25 | model = autogp.GaussianProcess(likelihood, kernel, inducing_inputs) 26 | 27 | # Train the model. 28 | optimizer = tf.train.RMSPropOptimizer(0.005) 29 | model.fit(data, optimizer, loo_steps=50, var_steps=50, epochs=1000) 30 | 31 | # Predict new inputs. 32 | ypred, _ = model.predict(xtest) 33 | plt.plot(xtrain, ytrain, '.', mew=2) 34 | plt.plot(xtest, ytest, 'o', mew=2) 35 | plt.plot(xtest, ypred, 'x', mew=2) 36 | plt.show() 37 | 38 | 39 | -------------------------------------------------------------------------------- /experiments/cifar10.py: -------------------------------------------------------------------------------- 1 | import sklearn.cluster 2 | import numpy as np 3 | import autogp 4 | from autogp import likelihoods 5 | from autogp import kernels 6 | import tensorflow as tf 7 | from autogp import datasets 8 | from autogp import losses 9 | from autogp import util 10 | import os 11 | import subprocess 12 | 13 | DATA_DIR = 'experiments/data/cifar-10-batches-py/' 14 | 15 | def init_z(train_inputs, num_inducing): 16 | # Initialize inducing points using clustering. 17 | mini_batch = sklearn.cluster.MiniBatchKMeans(num_inducing) 18 | cluster_indices = mini_batch.fit_predict(train_inputs) 19 | inducing_locations = mini_batch.cluster_centers_ 20 | return inducing_locations 21 | 22 | 23 | def get_cifar_data(): 24 | print "Getting cifar10 data ..." 25 | os.chdir('experiments/data') 26 | subprocess.call(["./get_cifar10_data.sh"]) 27 | os.chdir("../../") 28 | print "done" 29 | 30 | def load_cifar(): 31 | if os.path.isdir(DATA_DIR) is False: # directory does not exist, download the data 32 | get_cifar_data() 33 | 34 | import cPickle 35 | train_X = np.empty([0, 3072], dtype=np.float32) 36 | train_Y = np.empty([0, 10], dtype=np.float32) 37 | for i in range(1, 6): 38 | f = open(DATA_DIR + "data_batch_" + str(i)) 39 | d = cPickle.load(f) 40 | f.close() 41 | train_X = np.concatenate([train_X, d["data"]]) 42 | train_Y = np.concatenate([train_Y, np.eye(10)[d["labels"]]]) 43 | f = open(DATA_DIR + "test_batch") 44 | d = cPickle.load(f) 45 | f.close() 46 | train_X = train_X / 255.0 47 | test_X = np.array(d["data"], dtype=np.float32) / 255.0 48 | test_Y = np.array(np.eye(10)[d["labels"]], dtype=np.float32) 49 | return train_X, train_Y, test_X, test_Y 50 | 51 | 52 | if __name__ == '__main__': 53 | FLAGS = util.util.get_flags() 54 | BATCH_SIZE = FLAGS.batch_size 55 | LEARNING_RATE = FLAGS.learning_rate 56 | DISPLAY_STEP = FLAGS.display_step 57 | EPOCHS = FLAGS.n_epochs 58 | NUM_SAMPLES = FLAGS.mc_train 59 | NUM_INDUCING = FLAGS.n_inducing 60 | IS_ARD = FLAGS.is_ard 61 | 62 | train_X, train_Y, test_X, test_Y = load_cifar() 63 | data = datasets.DataSet(train_X, train_Y) 64 | test = datasets.DataSet(test_X, test_Y) 65 | 66 | # Setup initial values for the model. 67 | likelihood = likelihoods.Softmax() 68 | kern = [kernels.RadialBasis(data.X.shape[1], lengthscale=10.0, input_scaling = IS_ARD) for i in range(10)] 69 | # kern = [kernels.ArcCosine(X.shape[1], 2, 3, 5.0, 1.0, input_scaling=True) for i in range(10)] #RadialBasis(X.shape[1], input_scaling=True) for i in range(10)] 70 | 71 | Z = init_z(data.X, NUM_INDUCING) 72 | m = autogp.GaussianProcess(likelihood, kern, Z, num_samples=NUM_SAMPLES) 73 | 74 | # setting up loss to be reported during training 75 | error_rate = losses.ZeroOneLoss(data.Dout) 76 | 77 | o = tf.train.RMSPropOptimizer(LEARNING_RATE) 78 | m.fit(data, o, loo_steps=50, var_steps=50, epochs=EPOCHS, batch_size=BATCH_SIZE, display_step=DISPLAY_STEP, test=test, 79 | loss=error_rate) 80 | ypred = m.predict(test.X)[0] 81 | print("Final " + error_rate.get_name() + "=" + "%.4f" % error_rate.eval(test.Y, ypred)) 82 | 83 | 84 | -------------------------------------------------------------------------------- /experiments/data/get_cifar10_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz 4 | tar -xvf cifar-10-python.tar.gz 5 | 6 | -------------------------------------------------------------------------------- /experiments/data/get_mnist8m_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -d infimnist ] 4 | then 5 | exit 6 | fi 7 | 8 | wget http://leon.bottou.org/_media/projects/infimnist.tar.gz 9 | tar -xvf infimnist.tar.gz 10 | cd infimnist 11 | make 12 | ./infimnist pat 10000 8109999 > "train-patterns" 13 | ./infimnist lab 10000 8109999 > "train-labels" 14 | ./infimnist pat 0 9999 > "test-patterns" 15 | ./infimnist lab 0 9999 > "test-labels" 16 | gzip "train-patterns" 17 | gzip "train-labels" 18 | gzip "test-patterns" 19 | gzip "test-labels" 20 | cd .. 21 | -------------------------------------------------------------------------------- /experiments/data/get_rectangles_images_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | wget http://www.iro.umontreal.ca/~lisa/icml2007data/rectangles_images.zip 4 | unzip rectangles_images.zip 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /experiments/data/get_sarcos_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | wget http://www.gaussianprocess.org/gpml/data/sarcos_inv.mat 4 | wget http://www.gaussianprocess.org/gpml/data/sarcos_inv_test.mat 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /experiments/mnist.py: -------------------------------------------------------------------------------- 1 | import sklearn.cluster 2 | import numpy as np 3 | import autogp 4 | from autogp import likelihoods 5 | from autogp import kernels 6 | import tensorflow as tf 7 | from autogp import datasets 8 | from autogp import losses 9 | from autogp import util 10 | 11 | def init_z(train_inputs, num_inducing): 12 | # Initialize inducing points using clustering. 13 | mini_batch = sklearn.cluster.MiniBatchKMeans(num_inducing) 14 | cluster_indices = mini_batch.fit_predict(train_inputs) 15 | inducing_locations = mini_batch.cluster_centers_ 16 | return inducing_locations 17 | 18 | 19 | if __name__ == '__main__': 20 | FLAGS = util.util.get_flags() 21 | BATCH_SIZE = FLAGS.batch_size 22 | LEARNING_RATE = FLAGS.learning_rate 23 | DISPLAY_STEP = FLAGS.display_step 24 | EPOCHS = FLAGS.n_epochs 25 | NUM_SAMPLES = FLAGS.mc_train 26 | NUM_INDUCING = FLAGS.n_inducing 27 | IS_ARD = FLAGS.is_ard 28 | 29 | data, test, _ = datasets.import_mnist() 30 | 31 | 32 | # Setup initial values for the model. 33 | likelihood = likelihoods.Softmax() 34 | kern = [kernels.RadialBasis(data.X.shape[1], lengthscale=10.0, input_scaling = IS_ARD) for i in range(10)] 35 | # kern = [kernels.ArcCosine(X.shape[1], 2, 3, 5.0, 1.0, input_scaling=True) for i in range(10)] #RadialBasis(X.shape[1], input_scaling=True) for i in range(10)] 36 | 37 | Z = init_z(data.X, NUM_INDUCING) 38 | m = autogp.GaussianProcess(likelihood, kern, Z, num_samples=NUM_SAMPLES) 39 | 40 | # setting up loss to be reported during training 41 | error_rate = losses.ZeroOneLoss(data.Dout) 42 | 43 | import time 44 | otime = time.time() 45 | o = tf.train.RMSPropOptimizer(LEARNING_RATE) 46 | start = time.time() 47 | m.fit(data, o, loo_steps=50, var_steps=50, epochs=EPOCHS, batch_size=BATCH_SIZE, display_step=DISPLAY_STEP, test=test, 48 | loss=error_rate) 49 | print time.time() - start 50 | print time.time() - otime 51 | 52 | ypred = m.predict(test.X)[0] 53 | print("Final " + error_rate.get_name() + "=" + "%.4f" % error_rate.eval(test.Y, ypred)) 54 | 55 | 56 | -------------------------------------------------------------------------------- /experiments/mnist8m.py: -------------------------------------------------------------------------------- 1 | import sklearn.cluster 2 | import numpy as np 3 | import autogp 4 | from autogp import likelihoods 5 | from autogp import kernels 6 | import tensorflow as tf 7 | from autogp import datasets 8 | from autogp import losses 9 | from autogp import util 10 | import subprocess 11 | from tensorflow.contrib.learn.python.learn.datasets import base 12 | from tensorflow.contrib.learn.python.learn.datasets.mnist import extract_labels 13 | from tensorflow.python.framework import dtypes 14 | import gzip 15 | import os 16 | 17 | 18 | DATA_DIR = "experiments/data/infimnist/" 19 | TRAIN_INPUTS = DATA_DIR + "train-patterns.gz" 20 | TRAIN_OUTPUTS = DATA_DIR + "train-labels.gz" 21 | TEST_INPUTS = DATA_DIR + "test-patterns.gz" 22 | TEST_OUTPUTS = DATA_DIR + "test-labels.gz" 23 | 24 | def _read32(bytestream): 25 | dt = np.dtype(np.uint32).newbyteorder('>') 26 | return np.frombuffer(bytestream.read(4), dtype=dt)[0] 27 | 28 | def extract_images(f): 29 | """Extract the images into a 4D uint8 numpy array [index, y, x, depth]. 30 | 31 | Args: 32 | f: A file object that can be passed into a gzip reader. 33 | 34 | Returns: 35 | data: A 4D unit8 numpy array [index, y, x, depth]. 36 | 37 | Raises: 38 | ValueError: If the bytestream does not start with 2051. 39 | 40 | """ 41 | print('Extracting', f.name) 42 | with gzip.GzipFile(fileobj=f) as bytestream: 43 | magic = _read32(bytestream) 44 | if magic != 2051: 45 | raise ValueError('Invalid magic number %d in MNIST image file: %s' % 46 | (magic, f.name)) 47 | num_images = int(_read32(bytestream)) 48 | rows = int(_read32(bytestream)) 49 | cols = int(_read32(bytestream)) 50 | buf = bytestream.read(rows * cols * num_images) 51 | data = np.frombuffer(buf, dtype=np.uint8) 52 | data = data.reshape(num_images, rows, cols, 1) 53 | return data 54 | 55 | def process_mnist(images, dtype = dtypes.float32, reshape=True): 56 | if reshape: 57 | assert images.shape[3] == 1 58 | images = images.reshape(images.shape[0], 59 | images.shape[1] * images.shape[2]) 60 | if dtype == dtypes.float32: 61 | # Convert from [0, 255] -> [0.0, 1.0]. 62 | images = images.astype(np.float32) 63 | images = np.multiply(images, 1.0 / 255.0) 64 | 65 | return images 66 | 67 | 68 | def get_mnist8m_data(): 69 | print "Getting mnist8m data ..." 70 | os.chdir('experiments/data') 71 | subprocess.call(["./get_mnist8m_data.sh"]) 72 | os.chdir("../../") 73 | print "done" 74 | 75 | def import_mnist(): 76 | if os.path.isdir(DATA_DIR) is False: # directory does not exist, download the data 77 | get_mnist8m_data() 78 | 79 | with open(TRAIN_INPUTS) as f: 80 | train_images = extract_images(f) 81 | train_images = process_mnist(train_images) 82 | 83 | with open(TRAIN_OUTPUTS) as f: 84 | train_labels = extract_labels(f, one_hot=True) 85 | 86 | with open(TEST_INPUTS) as f: 87 | test_images = extract_images(f) 88 | test_images = process_mnist(test_images) 89 | 90 | with open(TEST_OUTPUTS) as f: 91 | test_labels = extract_labels(f, one_hot=True) 92 | 93 | return datasets.DataSet(train_images, train_labels), datasets.DataSet(test_images, test_labels) 94 | 95 | 96 | def init_z(train_inputs, num_inducing): 97 | # Initialize inducing points using clustering. 98 | mini_batch = sklearn.cluster.MiniBatchKMeans(num_inducing) 99 | cluster_indices = mini_batch.fit_predict(train_inputs) 100 | inducing_locations = mini_batch.cluster_centers_ 101 | return inducing_locations 102 | 103 | 104 | if __name__ == '__main__': 105 | FLAGS = util.util.get_flags() 106 | BATCH_SIZE = FLAGS.batch_size 107 | LEARNING_RATE = FLAGS.learning_rate 108 | DISPLAY_STEP = FLAGS.display_step 109 | EPOCHS = FLAGS.n_epochs 110 | NUM_SAMPLES = FLAGS.mc_train 111 | NUM_INDUCING = FLAGS.n_inducing 112 | IS_ARD = FLAGS.is_ard 113 | 114 | data, test = import_mnist() 115 | 116 | # Setup initial values for the model. 117 | likelihood = likelihoods.Softmax() 118 | kern = [kernels.RadialBasis(data.X.shape[1], lengthscale=10.0, input_scaling = IS_ARD) for i in range(10)] 119 | # kern = [kernels.ArcCosine(X.shape[1], 2, 3, 5.0, 1.0, input_scaling=True) for i in range(10)] #RadialBasis(X.shape[1], input_scaling=True) for i in range(10)] 120 | 121 | Z = init_z(data.X, NUM_INDUCING) 122 | m = autogp.GaussianProcess(likelihood, kern, Z, num_samples=NUM_SAMPLES) 123 | 124 | # setting up loss to be reported during training 125 | error_rate = losses.ZeroOneLoss(data.Dout) 126 | 127 | o = tf.train.RMSPropOptimizer(LEARNING_RATE) 128 | m.fit(data, o, loo_steps=50, var_steps=50, epochs=EPOCHS, batch_size=BATCH_SIZE, display_step=DISPLAY_STEP, test=test, 129 | loss=error_rate) 130 | 131 | ypred = m.predict(test.X)[0] 132 | print("Final " + error_rate.get_name() + "=" + "%.4f" % error_rate.eval(test.Y, ypred)) 133 | 134 | 135 | -------------------------------------------------------------------------------- /experiments/rectangles.py: -------------------------------------------------------------------------------- 1 | import autogp 2 | from autogp import datasets 3 | from autogp import kernels 4 | from autogp import likelihoods 5 | from autogp import losses 6 | from autogp import util 7 | import numpy as np 8 | import os 9 | import subprocess 10 | import pandas as pd 11 | import sklearn.cluster 12 | import sklearn.preprocessing 13 | import tensorflow as tf 14 | import zipfile 15 | 16 | DATA_DIR = "experiments/data/" 17 | TRAIN_PATH = DATA_DIR + "rectangles_im_train.amat" 18 | TEST_PATH = DATA_DIR + "rectangles_im_test.amat" 19 | 20 | def init_z(train_inputs, num_inducing): 21 | # Initialize inducing points using clustering. 22 | mini_batch = sklearn.cluster.MiniBatchKMeans(num_inducing) 23 | cluster_indices = mini_batch.fit_predict(train_inputs) 24 | inducing_locations = mini_batch.cluster_centers_ 25 | return inducing_locations 26 | 27 | def get_rectangles_images_data(): 28 | print "Getting rectangles images data ..." 29 | os.chdir('experiments/data') 30 | subprocess.call(["./get_rectangles_images_data.sh"]) 31 | os.chdir("../../") 32 | print "done" 33 | 34 | 35 | # Gettign the data 36 | if os.path.exists(TRAIN_PATH) is False: # directory does not exist, download the data 37 | get_rectangles_images_data() 38 | 39 | FLAGS = util.get_flags() 40 | BATCH_SIZE = FLAGS.batch_size 41 | LEARNING_RATE = FLAGS.learning_rate 42 | DISPLAY_STEP = FLAGS.display_step 43 | EPOCHS = FLAGS.n_epochs 44 | NUM_SAMPLES = FLAGS.mc_train 45 | NUM_INDUCING = FLAGS.n_inducing 46 | IS_ARD = FLAGS.is_ard 47 | LENGTHSCALE = FLAGS.lengthscale 48 | VAR_STEPS = FLAGS.var_steps 49 | LOOCV_STEPS = FLAGS.loocv_steps 50 | NUM_COMPONENTS = FLAGS.num_components 51 | DEVICE_NAME = FLAGS.device_name 52 | KERNEL = FLAGS.kernel 53 | DEGREE = FLAGS.kernel_degree 54 | DEPTH = FLAGS.kernel_depth 55 | 56 | # Read in and scale the data. 57 | train_data = pd.read_csv(TRAIN_PATH, sep=r"\s+", header=None) 58 | test_data = pd.read_csv(TEST_PATH, sep=r"\s+", header=None) 59 | train_X = train_data.values[:, :-1] 60 | train_Y = train_data.values[:, -1:] 61 | test_X = test_data.values[:, :-1] 62 | test_Y = test_data.values[:, -1:] 63 | data = datasets.DataSet(train_X, train_Y) 64 | test = datasets.DataSet(test_X, test_Y) 65 | 66 | Z = init_z(data.X, NUM_INDUCING) 67 | likelihood = likelihoods.Logistic() # Setup initial values for the model. 68 | 69 | if KERNEL == 'arccosine': 70 | kern = [kernels.ArcCosine(data.X.shape[1], degree=DEGREE, depth=DEPTH, lengthscale=LENGTHSCALE, std_dev=1.0, input_scaling=IS_ARD) for i in range(1)] 71 | else: 72 | kern = [kernels.RadialBasis(data.X.shape[1], lengthscale=LENGTHSCALE, input_scaling=IS_ARD) for i in range(1)] 73 | 74 | print("Using Kernel " + KERNEL) 75 | 76 | m = autogp.GaussianProcess(likelihood, kern, Z, num_samples=NUM_SAMPLES, num_components=NUM_COMPONENTS) 77 | error_rate = losses.ZeroOneLoss(data.Dout) 78 | o = tf.train.AdamOptimizer(LEARNING_RATE) 79 | m.fit(data, o, loo_steps=LOOCV_STEPS, var_steps=VAR_STEPS, epochs=EPOCHS, batch_size=BATCH_SIZE, display_step=DISPLAY_STEP, test=test, 80 | loss=error_rate) 81 | 82 | ypred = m.predict(test.X)[0] 83 | print("Final " + error_rate.get_name() + "=" + "%.4f" % error_rate.eval(test.Y, ypred)) 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /experiments/sarcos.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sklearn.cluster 4 | import numpy as np 5 | import autogp 6 | from autogp import likelihoods 7 | from autogp import kernels 8 | import tensorflow as tf 9 | from autogp import datasets 10 | from autogp import losses 11 | from autogp import util 12 | import pandas 13 | import scipy.io as sio 14 | 15 | 16 | DATA_DIR = "experiments/data/" 17 | TRAIN_PATH = DATA_DIR + "sarcos_inv.mat" 18 | TEST_PATH = DATA_DIR + "sarcos_inv_test" 19 | 20 | def init_z(train_inputs, num_inducing): 21 | # Initialize inducing points using clustering. 22 | mini_batch = sklearn.cluster.MiniBatchKMeans(num_inducing) 23 | cluster_indices = mini_batch.fit_predict(train_inputs) 24 | inducing_locations = mini_batch.cluster_centers_ 25 | return inducing_locations 26 | 27 | 28 | def get_sarcos_data(): 29 | print "Getting sarcos data ..." 30 | os.chdir('experiments/data') 31 | subprocess.call(["./get_sarcos_data.sh"]) 32 | os.chdir("../../") 33 | print "done" 34 | 35 | 36 | def sarcos_all_joints_data(): 37 | """ 38 | Loads and returns data of SARCOS dataset for all joints. 39 | 40 | Returns 41 | ------- 42 | data : list 43 | A list of length = 1, where each element is a dictionary which contains ``train_outputs``, 44 | ``train_inputs``, ``test_outputs``, ``test_inputs``, and ``id`` 45 | """ 46 | 47 | train = sio.loadmat(TRAIN_PATH)['sarcos_inv'] 48 | test = sio.loadmat(TEST_PATH)['sarcos_inv_test'] 49 | return{ 50 | 'train_inputs': train[:, :21], 51 | 'train_outputs': train[:, 21:], 52 | 'test_inputs': test[:, :21], 53 | 'test_outputs': test[:, 21:], 54 | 'id': 0 55 | } 56 | 57 | 58 | if __name__ == '__main__': 59 | FLAGS = util.util.get_flags() 60 | BATCH_SIZE = FLAGS.batch_size 61 | LEARNING_RATE = FLAGS.learning_rate 62 | DISPLAY_STEP = FLAGS.display_step 63 | EPOCHS = FLAGS.n_epochs 64 | NUM_SAMPLES = FLAGS.mc_train 65 | NUM_INDUCING = FLAGS.n_inducing 66 | IS_ARD = FLAGS.is_ard 67 | 68 | if os.path.exists(TRAIN_PATH) is False: # directory does not exist, download the data 69 | get_sarcos_data() 70 | 71 | d = sarcos_all_joints_data() 72 | data = datasets.DataSet(d['train_inputs'].astype(np.float32), d['train_outputs'].astype(np.float32)) 73 | test = datasets.DataSet(d['test_inputs'].astype(np.float32), d['test_outputs'].astype(np.float32)) 74 | 75 | # Setup initial values for the model. 76 | likelihood = likelihoods.RegressionNetwork(7, 0.1) 77 | kern = [kernels.RadialBasis(data.X.shape[1], lengthscale=8.0, input_scaling = IS_ARD) for i in range(8)] 78 | # kern = [kernels.ArcCosine(data.X.shape[1], 1, 3, 5.0, 1.0, input_scaling=True) for i in range(10)] 79 | 80 | Z = init_z(data.X, NUM_INDUCING) 81 | m = autogp.GaussianProcess(likelihood, kern, Z, num_samples=NUM_SAMPLES) 82 | 83 | # setting up loss to be reported during training 84 | error_rate = None #losses.StandardizedMeanSqError(d['train_outputs'].astype(np.float32), data.Dout) 85 | 86 | import time 87 | o = tf.train.RMSPropOptimizer(LEARNING_RATE) 88 | start = time.time() 89 | m.fit(data, o, loo_steps=0, var_steps=50, epochs = EPOCHS, batch_size = BATCH_SIZE, display_step=DISPLAY_STEP, test = test, 90 | loss = error_rate ) 91 | print time.time() - start 92 | 93 | ypred = m.predict(test.X)[0] 94 | print("Final " + error_rate.get_name() + "=" + "%.4f" % error_rate.eval(test.Y, ypred)) 95 | 96 | -------------------------------------------------------------------------------- /run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | nosetests test/ 2>&1 >/dev/null | grep -v '^tensorflow: Level' | grep -v '^Level [0-9]:tensorflow' | egrep -v 'E?I tensorflow' | grep -v '^E$' | egrep -v '(begin|end) captured logging' 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | import os 3 | import re 4 | 5 | 6 | def find_packages(path): 7 | ret = [] 8 | for root, dirs, files in os.walk(path): 9 | if '__init__.py' in files: 10 | ret.append(re.sub('^[^A-z0-9_]+', '', root.replace('/', '.'))) 11 | return ret 12 | 13 | 14 | REQUIRED = [ 15 | 'scikit-learn>=0.17.0', 16 | # 'tensorflow>=1.5.0', # waiting for release 17 | ] 18 | 19 | DEV_REQUIRED = [ 20 | 'nose', 21 | ] 22 | 23 | 24 | setup( 25 | name='AutoGP', 26 | version='0.1', 27 | description='Unified tool for automatric Gaussian Process Inference', 28 | author='Karl Krauth and Edwin Bonilla', 29 | author_email='edwinbonilla+autogp@gmail.com', 30 | url='https://github.com/ebonilla/AutoGP', 31 | license='Apache', 32 | packages=find_packages('autogp'), 33 | install_requires=REQUIRED, 34 | dev_requires=DEV_REQUIRED, 35 | cmdclass={ 36 | # 'build_ext': custom_build_ext 37 | }, 38 | classifiers=[ 39 | "Development Status :: 4 - Beta", 40 | "Operating System :: MacOS :: MacOS X", 41 | "Operating System :: POSIX", 42 | "Programming Language :: Python", 43 | "Programming Language :: Python :: 2.7", 44 | "Programming Language :: Python :: 3.5", 45 | "Programming Language :: Python :: 3.6", 46 | "Topic :: Scientific/Engineering :: Information Analysis", 47 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 48 | ], 49 | ) 50 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebonilla/AutoGP/7190be6835b110fd43d610f812aad0caf716d7b4/test/__init__.py -------------------------------------------------------------------------------- /test/gaussian_process_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | import scipy.misc 5 | import scipy.stats 6 | import tensorflow as tf 7 | 8 | import autogp 9 | from autogp import kernels 10 | from autogp import likelihoods 11 | 12 | 13 | SIG_FIGS = 5 14 | RTOL = 10**(-SIG_FIGS) 15 | ATOL = 10**(-SIG_FIGS) 16 | 17 | 18 | class TestGaussianProcess(unittest.TestCase): 19 | @classmethod 20 | def setUpClass(cls): 21 | # We expect the child class to instantiate `cls.model` for us. 22 | cls.session = tf.Session() 23 | 24 | @classmethod 25 | def tearDownClass(cls): 26 | cls.session.close() 27 | 28 | @classmethod 29 | def entropy(cls, weights, means, covars): 30 | entropy = cls.model._build_entropy( 31 | weights=np.array(weights, dtype=np.float32), 32 | means=np.array(means, dtype=np.float32), 33 | covars=np.array(covars, dtype=np.float32)) 34 | return cls.session.run(entropy) 35 | 36 | @classmethod 37 | def cross_ent(cls, weights, means, covars, kernel_chol): 38 | cross_ent = cls.model._build_cross_ent( 39 | weights=np.array(weights, dtype=np.float32), 40 | means=np.array(means, dtype=np.float32), 41 | covars=np.array(covars, dtype=np.float32), 42 | kernel_chol=np.array(kernel_chol, dtype=np.float32)) 43 | return cls.session.run(cross_ent) 44 | 45 | @classmethod 46 | def interim_vals(cls, kernel_chol, inducing_inputs, train_inputs): 47 | kern_prods, kern_sums = cls.model._build_interim_vals( 48 | kernel_chol=np.array(kernel_chol, dtype=np.float32), 49 | inducing_inputs=np.array(inducing_inputs, dtype=np.float32), 50 | train_inputs=np.array(train_inputs, dtype=np.float32)) 51 | return cls.session.run([kern_prods, kern_sums]) 52 | 53 | @classmethod 54 | def sample_info(cls, kern_prods, kern_sums, means, covars): 55 | mean, var = cls.model._build_sample_info( 56 | kern_prods=np.array(kern_prods, dtype=np.float32), 57 | kern_sums=np.array(kern_sums, dtype=np.float32), 58 | means=np.array(means, dtype=np.float32), 59 | covars=np.array(covars, dtype=np.float32)) 60 | return cls.session.run([mean, var]) 61 | 62 | 63 | class TestSimpleFull(TestGaussianProcess): 64 | @classmethod 65 | def setUpClass(cls): 66 | super(TestSimpleFull, cls).setUpClass() 67 | likelihood = likelihoods.Gaussian(1.0) 68 | kernel = [ 69 | kernels.RadialBasis( 70 | input_dim=1, 71 | lengthscale=1.0, 72 | std_dev=1.0, 73 | white=0.0 74 | ) 75 | ] 76 | # In most of our unit tests, we will replace this value 77 | # with something else. 78 | inducing_inputs = np.array([[1.0]]) 79 | cls.model = autogp.GaussianProcess( 80 | likelihood_func=likelihood, 81 | kernel_funcs=kernel, 82 | inducing_inputs=inducing_inputs, 83 | num_components=1, 84 | diag_post=False, 85 | num_samples=10) 86 | cls.session.run(tf.global_variables_initializer()) 87 | 88 | def test_simple_entropy(self): 89 | entropy = TestSimpleFull.entropy( 90 | weights=[1.0], 91 | means=[[[1.0]]], 92 | covars=[[[[1.0]]]]) 93 | np.testing.assert_approx_equal( 94 | entropy, 95 | 0.5 * (np.log(2 * np.pi) + np.log(2.0)), 96 | SIG_FIGS) 97 | 98 | def test_small_covar_entropy(self): 99 | entropy = TestSimpleFull.entropy( 100 | weights=[1.0], 101 | means=[[[1.0]]], 102 | covars=[[[[1e-10]]]]) 103 | np.testing.assert_approx_equal( 104 | entropy, 105 | 0.5 * (np.log(2 * np.pi) + np.log(2 * 1e-20)), 106 | SIG_FIGS) 107 | 108 | def test_large_covar_entropy(self): 109 | entropy = TestSimpleFull.entropy( 110 | weights=[1.0], 111 | means=[[[1.0]]], 112 | covars=[[[[1e10]]]]) 113 | np.testing.assert_approx_equal( 114 | entropy, 115 | 0.5 * (np.log(2 * np.pi) + np.log(2 * 1e20)), 116 | SIG_FIGS) 117 | 118 | def test_simple_cross_ent(self): 119 | cross_ent = TestSimpleFull.cross_ent( 120 | weights=[1.0], 121 | means=[[[1.0]]], 122 | covars=[[[[1.0]]]], 123 | kernel_chol=[[[1.0]]]) 124 | np.testing.assert_approx_equal( 125 | cross_ent, 126 | -0.5 * (np.log(2 * np.pi) + np.log(1.0) + 2.0), 127 | SIG_FIGS) 128 | 129 | def test_small_cross_ent(self): 130 | cross_ent = TestSimpleFull.cross_ent( 131 | weights=[1.0], 132 | means=[[[1e-10]]], 133 | covars=[[[[1e-10]]]], 134 | kernel_chol=[[[1e-10]]]) 135 | np.testing.assert_approx_equal( 136 | cross_ent, 137 | -0.5 * (np.log(2 * np.pi) + np.log(1e-20) + 2.0), 138 | SIG_FIGS) 139 | 140 | def test_large_cross_ent(self): 141 | cross_ent = TestSimpleFull.cross_ent( 142 | weights=[1.0], 143 | means=[[[1e10]]], 144 | covars=[[[[1e10]]]], 145 | kernel_chol=[[[1e10]]]) 146 | np.testing.assert_approx_equal( 147 | cross_ent, 148 | -0.5 * (np.log(2 * np.pi) + np.log(1e20) + 2.0), 149 | SIG_FIGS) 150 | 151 | def test_simple_interim_vals(self): 152 | kern_prods, kern_sums = TestSimpleFull.interim_vals( 153 | kernel_chol=[[[1.0]]], 154 | inducing_inputs=[[[1.0]]], 155 | train_inputs=[[1.0]]) 156 | np.testing.assert_allclose(kern_prods, 1.0, rtol=RTOL) 157 | np.testing.assert_allclose(kern_sums, 0.0, rtol=RTOL) 158 | 159 | def test_small_interim_vals(self): 160 | kern_prods, kern_sums = TestSimpleFull.interim_vals( 161 | kernel_chol=[[[1e-8]]], 162 | inducing_inputs=[[[1e-8]]], 163 | train_inputs=[[1e-8]]) 164 | np.testing.assert_allclose(kern_prods, 1e16, rtol=RTOL) 165 | np.testing.assert_allclose(kern_sums, 1 - 1e16, rtol=RTOL) 166 | 167 | def test_large_interim_vals(self): 168 | kern_prods, kern_sums = TestSimpleFull.interim_vals( 169 | kernel_chol=[[[1e8]]], 170 | inducing_inputs=[[[1e8]]], 171 | train_inputs=[[1e8]]) 172 | np.testing.assert_allclose(kern_prods, 1e-8, rtol=RTOL, atol=ATOL) 173 | np.testing.assert_allclose(kern_sums, 1 - 1e-8, rtol=RTOL, atol=ATOL) 174 | 175 | def test_multiple_inputs_interim_vals(self): 176 | inducing_distances = np.array( 177 | [ 178 | [1.0, np.exp(-0.5), np.exp(-2.0)], 179 | [np.exp(-0.5), 1.0, np.exp(-0.5)], 180 | [np.exp(-2.0), np.exp(-0.5), 1.0] 181 | ], 182 | dtype=np.float32) 183 | kern_chol = np.linalg.cholesky(inducing_distances)[np.newaxis, :, :] 184 | kern_prods, kern_sums = TestSimpleFull.interim_vals( 185 | kern_chol, 186 | inducing_inputs=[[[1.0], [2.0], [3.0]]], 187 | train_inputs=[[3.0], [4.0]]) 188 | train_inducing_distances = np.array( 189 | [ 190 | [np.exp(-2.0), np.exp(-0.5), 1.0], 191 | [np.exp(-4.5), np.exp(-2.0), np.exp(-0.5)] 192 | ], 193 | dtype=np.float32) 194 | 195 | real_kern_prods = np.dot( 196 | train_inducing_distances, 197 | np.linalg.inv(inducing_distances)) 198 | real_kern_sums = np.ones([2]) - np.diag(np.dot( 199 | real_kern_prods, train_inducing_distances.T 200 | )) 201 | 202 | # We need atol here since we are testing for equality to zero 203 | np.testing.assert_allclose( 204 | kern_prods[0], real_kern_prods, 205 | rtol=RTOL, atol=ATOL) 206 | np.testing.assert_allclose( 207 | kern_sums[0], real_kern_sums, rtol=RTOL, atol=ATOL) 208 | 209 | def test_simple_sample_info(self): 210 | mean, var = TestSimpleFull.sample_info( 211 | kern_prods=[[[2.0]]], 212 | kern_sums=[[3.0]], 213 | means=[[4.0]], 214 | covars=[[[5.0]]]) 215 | np.testing.assert_allclose(mean, 8.0, rtol=RTOL) 216 | np.testing.assert_allclose(var, 103.0, rtol=RTOL) 217 | 218 | def test_multi_sample_info(self): 219 | mean, var = TestSimpleFull.sample_info( 220 | kern_prods=[[[1.0, 2.0], [3.0, 4.0]]], 221 | kern_sums=[[5.0, 6.0]], 222 | means=[[7.0, 8.0]], 223 | covars=[[[9.0, 10.0], [11.0, 12.0]]]) 224 | np.testing.assert_allclose(mean, [[23.0], [53.0]], rtol=RTOL) 225 | np.testing.assert_allclose(var, [[2122.0], [11131.0]], rtol=RTOL) 226 | 227 | 228 | class TestSimpleDiag(TestGaussianProcess): 229 | @classmethod 230 | def setUpClass(cls): 231 | super(TestSimpleDiag, cls).setUpClass() 232 | likelihood = likelihoods.Gaussian(1.0) 233 | kernel = [ 234 | kernels.RadialBasis( 235 | input_dim=1, 236 | lengthscale=1.0, 237 | std_dev=1.0, 238 | white=0.0 239 | ) 240 | ] 241 | # In most of our unit test, we will replace this value 242 | # with something else. 243 | inducing_inputs = np.array([[1.0]]) 244 | cls.model = autogp.GaussianProcess( 245 | likelihood_func=likelihood, 246 | kernel_funcs=kernel, 247 | inducing_inputs=inducing_inputs, 248 | num_components=1, 249 | diag_post=True, 250 | num_samples=10) 251 | cls.session.run(tf.global_variables_initializer()) 252 | 253 | def test_simple_entropy(self): 254 | entropy = TestSimpleDiag.entropy(weights=[1.0], 255 | means=[[[1.0]]], 256 | covars=[[[1.0]]]) 257 | np.testing.assert_allclose( 258 | entropy, 259 | 0.5 * (np.log(2 * np.pi) + np.log(2.0)), 260 | SIG_FIGS) 261 | 262 | def test_small_covar_entropy(self): 263 | entropy = TestSimpleDiag.entropy(weights=[1.0], 264 | means=[[[1.0]]], 265 | covars=[[[1e-10]]]) 266 | np.testing.assert_allclose( 267 | entropy, 268 | 0.5 * (np.log(2 * np.pi) + np.log(2 * 1e-10)), 269 | SIG_FIGS) 270 | 271 | def test_large_covar_entropy(self): 272 | entropy = TestSimpleDiag.entropy(weights=[1.0], 273 | means=[[[1.0]]], 274 | covars=[[[1e10]]]) 275 | np.testing.assert_allclose( 276 | entropy, 277 | 0.5 * (np.log(2 * np.pi) + np.log(2 * 1e10)), 278 | SIG_FIGS) 279 | 280 | def test_simple_cross_ent(self): 281 | cross_ent = TestSimpleDiag.cross_ent(weights=[1.0], 282 | means=[[[1.0]]], 283 | covars=[[[1.0]]], 284 | kernel_chol=[[[1.0]]]) 285 | np.testing.assert_approx_equal( 286 | cross_ent, 287 | -0.5 * (np.log(2 * np.pi) + 2.0), 288 | SIG_FIGS) 289 | 290 | def test_small_cross_ent(self): 291 | cross_ent = TestSimpleDiag.cross_ent(weights=[1.0], 292 | means=[[[1e-10]]], 293 | covars=[[[1e-10]]], 294 | kernel_chol=[[[1e-10]]]) 295 | np.testing.assert_approx_equal( 296 | cross_ent, 297 | -0.5 * (np.log(2 * np.pi) + np.log(1e-20) + 1.0 + 1e10), 298 | SIG_FIGS) 299 | 300 | def test_large_cross_ent(self): 301 | cross_ent = TestSimpleDiag.cross_ent(weights=[1.0], 302 | means=[[[1e10]]], 303 | covars=[[[1e10]]], 304 | kernel_chol=[[[1e10]]]) 305 | np.testing.assert_approx_equal( 306 | cross_ent, 307 | -0.5 * (np.log(2 * np.pi) + np.log(1e20) + 1.0 + 1e-10), 308 | SIG_FIGS) 309 | 310 | def test_simple_sample_info(self): 311 | mean, var = TestSimpleDiag.sample_info(kern_prods=[[[2.0]]], 312 | kern_sums=[[3.0]], 313 | means=[[4.0]], 314 | covars=[[5.0]]) 315 | np.testing.assert_approx_equal(mean, 8.0, SIG_FIGS) 316 | np.testing.assert_approx_equal(var, 23.0, SIG_FIGS) 317 | 318 | def test_multi_sample_info(self): 319 | mean, var = TestSimpleDiag.sample_info( 320 | kern_prods=[[[1.0, 2.0], [3.0, 4.0]]], 321 | kern_sums=[[5.0, 6.0]], 322 | means=[[7.0, 8.0]], 323 | covars=[[9.0, 10.0]] 324 | ) 325 | np.testing.assert_allclose(mean, [[23.0], [53.0]], rtol=RTOL) 326 | np.testing.assert_allclose(var, [[54.0], [247.0]], rtol=RTOL) 327 | 328 | 329 | class TestMultiFull(TestGaussianProcess): 330 | @classmethod 331 | def setUpClass(cls): 332 | super(TestMultiFull, cls).setUpClass() 333 | likelihood = likelihoods.Softmax() 334 | kernel = [ 335 | kernels.RadialBasis( 336 | input_dim=2, 337 | lengthscale=1.0, 338 | std_dev=1.0, 339 | white=0.0) 340 | for i in range(2) 341 | ] 342 | inducing_locations = np.array([[1.0, 2.0, 3.0, 4.0]]) 343 | cls.model = autogp.GaussianProcess( 344 | likelihood_func=likelihood, 345 | kernel_funcs=kernel, 346 | inducing_inputs=inducing_locations, 347 | num_components=2, 348 | diag_post=False, 349 | num_samples=1) 350 | cls.session.run(tf.global_variables_initializer()) 351 | 352 | def test_entropy(self): 353 | entropy = TestMultiFull.entropy( 354 | weights=[0.7, 0.3], 355 | means=[[[01.0, 02.0], 356 | [03.0, 04.0]], 357 | [[05.0, 06.0], 358 | [07.0, 08.0]]], 359 | covars=[[[[0.1, 0.0], 360 | [0.2, 0.3]], 361 | [[0.4, 0.0], 362 | [0.5, 0.6]]], 363 | [[[0.7, 0.0], 364 | [0.8, 0.9]], 365 | [[1.0, 0.0], 366 | [1.1, 1.2]]]]) 367 | n11_1 = scipy.stats.multivariate_normal.logpdf( 368 | [1.0, 2.0], [1.0, 2.0], 369 | [ 370 | [0.02, 0.04], 371 | [0.02, 0.26] 372 | ] 373 | ) 374 | n11_2 = scipy.stats.multivariate_normal.logpdf( 375 | [3.0, 4.0], [3.0, 4.0], 376 | [ 377 | [0.32, 0.40], 378 | [0.40, 1.22] 379 | ]) 380 | n12_1 = scipy.stats.multivariate_normal.logpdf( 381 | [1.0, 2.0], 382 | [5.0, 6.0], 383 | [ 384 | [0.50, 0.58], 385 | [0.58, 1.58] 386 | ] 387 | ) 388 | n12_2 = scipy.stats.multivariate_normal.logpdf( 389 | [3.0, 4.0], [7.0, 8.0], 390 | [[1.16, 1.30], [1.30, 3.26]]) 391 | n21_1 = scipy.stats.multivariate_normal.logpdf( 392 | [5.0, 6.0], [1.0, 2.0], 393 | [[0.50, 0.58], [0.58, 1.58]]) 394 | n21_2 = scipy.stats.multivariate_normal.logpdf( 395 | [7.0, 8.0], [3.0, 4.0], 396 | [[1.16, 1.30], [1.30, 3.26]]) 397 | n22_1 = scipy.stats.multivariate_normal.logpdf( 398 | [5.0, 6.0], [5.0, 6.0], 399 | [[0.98, 1.12], [1.12, 2.90]]) 400 | n22_2 = scipy.stats.multivariate_normal.logpdf( 401 | [7.0, 8.0], [7.0, 8.0], 402 | [[2.00, 2.20], [2.20, 5.30]]) 403 | true_ent = -( 404 | 0.7 * scipy.misc.logsumexp( 405 | [np.log(0.7) + n11_1 + n11_2, np.log(0.3) + n12_1 + n12_2] 406 | ) + 407 | 0.3 * scipy.misc.logsumexp( 408 | [np.log(0.7) + n21_1 + n21_2, np.log(0.3) + n22_1 + n22_2] 409 | ) 410 | ) 411 | np.testing.assert_approx_equal(entropy, true_ent, SIG_FIGS - 4) 412 | 413 | def test_cross_ent(self): 414 | cross_ent = TestMultiFull.cross_ent( 415 | weights=[0.3, 0.7], 416 | means=[[[01.0, 02.0], 417 | [03.0, 04.0]], 418 | [[05.0, 06.0], 419 | [07.0, 08.0]]], 420 | covars=[[[[01.0, 00.0], 421 | [02.0, 03.0]], 422 | [[04.0, 00.0], 423 | [05.0, 06.0]]], 424 | [[[07.0, 00.0], 425 | [08.0, 09.0]], 426 | [[10.0, 00.0], 427 | [11.0, 12.0]]]], 428 | kernel_chol=[[[13.0, 0.0], 429 | [14.0, 15.0]], 430 | [[16.0, 0.0], 431 | [17.0, 18.0]]]) 432 | n11 = scipy.stats.multivariate_normal.logpdf( 433 | [0.0, 0.0], 434 | [1.0, 2.0], 435 | [ 436 | [169.0, 182.0], 437 | [182.0, 421.0] 438 | ]) 439 | n12 = scipy.stats.multivariate_normal.logpdf( 440 | [0.0, 0.0], [3.0, 4.0], 441 | [ 442 | [256.0, 272.0], 443 | [272.0, 613.0] 444 | ]) 445 | n21 = scipy.stats.multivariate_normal.logpdf( 446 | [0.0, 0.0], [5.0, 6.0], 447 | [ 448 | [169.0, 182.0], 449 | [182.0, 421.0] 450 | ]) 451 | n22 = scipy.stats.multivariate_normal.logpdf( 452 | [0.0, 0.0], [7.0, 8.0], 453 | [ 454 | [256.0, 272.0], 455 | [272.0, 613.0]]) 456 | ki_1 = scipy.linalg.inv([[169.0, 182.0], 457 | [182.0, 421.0]]) 458 | ki_2 = scipy.linalg.inv([[256.0, 272.0], 459 | [272.0, 613.0]]) 460 | p11 = np.dot(ki_1, [[1.0, 2.0], 461 | [2.0, 13.0]]) 462 | p12 = np.dot(ki_2, [[16.0, 20.0], 463 | [20.0, 61.0]]) 464 | p21 = np.dot(ki_1, [[49.0, 56.0], 465 | [56.0, 145.0]]) 466 | p22 = np.dot(ki_2, [[100.0, 110.0], 467 | [110.0, 265.0]]) 468 | t11 = np.trace(p11) 469 | t12 = np.trace(p12) 470 | t21 = np.trace(p21) 471 | t22 = np.trace(p22) 472 | np.testing.assert_approx_equal( 473 | cross_ent, 474 | ( 475 | 0.3 * (n11 - 0.5 * t11 + n12 - 0.5 * t12) + 476 | 0.7 * (n21 - 0.5 * t21 + n22 - 0.5 * t22) 477 | ), 478 | SIG_FIGS 479 | ) 480 | 481 | def test_interim_vals(self): 482 | kern_prods, kern_sums = TestMultiFull.interim_vals( 483 | kernel_chol=[ 484 | [[1.0, 0.0], [2.0, 3.0]], 485 | [[4.0, 0.0], [5.0, 6.0]] 486 | ], 487 | inducing_inputs=[ 488 | [[7.0, 8.0], [9.0, 10.0]], 489 | [[11.0, 12.0], [13.0, 14.0]] 490 | ], 491 | train_inputs=[ 492 | [15.0, 16.0], [17.0, 18.0] 493 | ] 494 | ) 495 | kxz_1 = np.array([[np.exp(-64.0), np.exp(-36.0)], 496 | [np.exp(-100.0), np.exp(-64.0)]]) 497 | kxz_2 = np.array([[np.exp(-16.0), np.exp(-4.0)], 498 | [np.exp(-36.0), np.exp(-16.0)]]) 499 | kxx = np.array([[1.0, np.exp(-4.0)], 500 | [np.exp(-4.0), 1.0]]) 501 | kzz_inv1 = scipy.linalg.inv(np.array([[1.0, 2.0], 502 | [2.0, 13.0]])) 503 | kzz_inv2 = scipy.linalg.inv(np.array([[16.0, 20.0], 504 | [20.0, 61.0]])) 505 | a_1 = np.dot(kxz_1, kzz_inv1) 506 | a_2 = np.dot(kxz_2, kzz_inv2) 507 | np.testing.assert_allclose( 508 | kern_prods[0], 509 | a_1, 510 | rtol=RTOL) 511 | np.testing.assert_allclose( 512 | kern_prods[1], 513 | a_2, 514 | rtol=RTOL) 515 | np.testing.assert_allclose( 516 | kern_sums[0], 517 | np.diag(kxx - np.dot(a_1, kxz_1.T)), 518 | rtol=RTOL) 519 | np.testing.assert_allclose( 520 | kern_sums[1], 521 | np.diag(kxx - np.dot(a_2, kxz_2.T)), 522 | rtol=RTOL) 523 | 524 | def test_sample_info(self): 525 | mean, var = TestMultiFull.sample_info( 526 | kern_prods=[ 527 | [[1.0, 2.0], [3.0, 4.0]], 528 | [[5.0, 6.0], [7.0, 8.0]] 529 | ], 530 | kern_sums=[[9.0, 10.0], [11.0, 12.0]], 531 | means=[[13.0, 14.0], [15.0, 16.0]], 532 | covars=[ 533 | [[17.0, 0.0], [19.0, 20.0]], 534 | [[21.0, 0.0], [22.0, 23.0]] 535 | ] 536 | ) 537 | true_mean = np.array([[41.0, 171.0], 538 | [95.0, 233.0]]) 539 | true_var = np.array([[4634.0, 75224.0], 540 | [22539.0, 138197.0]]) 541 | np.testing.assert_allclose(mean, true_mean, rtol=RTOL) 542 | np.testing.assert_allclose(var, true_var, rtol=RTOL) 543 | -------------------------------------------------------------------------------- /test/kernels_test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebonilla/AutoGP/7190be6835b110fd43d610f812aad0caf716d7b4/test/kernels_test/__init__.py -------------------------------------------------------------------------------- /test/kernels_test/arc_cosine_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | from autogp import kernels 7 | from ..gaussian_process_test import TestGaussianProcess 8 | 9 | SIG_FIGS = 5 10 | RTOL = 10**(-SIG_FIGS) 11 | ATOL = 10**(-SIG_FIGS) 12 | 13 | 14 | class TestArcCosine(TestGaussianProcess): 15 | 16 | @classmethod 17 | def setUpClass(cls): 18 | super(TestArcCosine, cls).setUpClass() 19 | 20 | @classmethod 21 | def kernel(cls, points1, points2=None, degree=0, depth=1): 22 | arc_cosine = kernels.ArcCosine(degree, depth, white=0.0) 23 | cls.session.run(tf.global_variables_initializer()) 24 | if points2 is not None: 25 | return cls.session.run( 26 | arc_cosine.kernel( 27 | np.array(points1, dtype=np.float32), 28 | np.array(points2, dtype=np.float32))) 29 | else: 30 | return cls.session.run(arc_cosine.kernel( 31 | np.array(points1, dtype=np.float32))) 32 | 33 | @classmethod 34 | def diag_kernel(cls, points, degree=0, depth=1): 35 | arc_cosine = kernels.ArcCosine(degree, depth, white=0.0) 36 | cls.session.run(tf.global_variables_initializer()) 37 | return cls.session.run(arc_cosine.diag_kernel( 38 | np.array(points, dtype=np.float32))) 39 | 40 | @classmethod 41 | def test_simple_kern(cls): 42 | kern = cls.kernel( 43 | [ 44 | [1.0, 0.0, 0.0], 45 | [0.0, 1.0, 0.0], 46 | [0.0, 0.0, 1.0] 47 | ]) 48 | np.testing.assert_allclose( 49 | kern, 50 | [ 51 | [1.0, 0.5, 0.5], 52 | [0.5, 1.0, 0.5], 53 | [0.5, 0.5, 1.0] 54 | ], 55 | atol=ATOL, 56 | rtol=RTOL) 57 | 58 | @classmethod 59 | def test_parallel_kern(cls): 60 | kern = cls.kernel( 61 | [ 62 | [3.0, 5.0, 2.0], 63 | [-3.0, -5.0, -2.0], 64 | [6.0, 10.0, 4.0] 65 | ]) 66 | np.testing.assert_allclose( 67 | kern, 68 | [ 69 | [1.0, 0.0, 1.0], 70 | [0.0, 1.0, 0.0], 71 | [1.0, 0.0, 1.0] 72 | ], 73 | atol=ATOL, 74 | rtol=RTOL) 75 | -------------------------------------------------------------------------------- /test/likelihoods_test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebonilla/AutoGP/7190be6835b110fd43d610f812aad0caf716d7b4/test/likelihoods_test/__init__.py -------------------------------------------------------------------------------- /test/likelihoods_test/softmax_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | from autogp import util 7 | from autogp import likelihoods 8 | 9 | 10 | SIG_FIGS = 5 11 | 12 | 13 | class TestSoftmax(unittest.TestCase): 14 | def log_prob(self, outputs, latent): 15 | softmax = likelihoods.Softmax() 16 | return tf.Session().run(softmax.log_cond_prob(np.array(outputs, dtype=np.float32), 17 | np.array(latent, dtype=np.float32))) 18 | 19 | def predict(self, latent_means, latent_vars): 20 | softmax = likelihoods.Softmax() 21 | return tf.Session().run(softmax.predict(np.array(latent_means, dtype=np.float32), 22 | np.array(latent_vars, dtype=np.float32))) 23 | 24 | def test_single_prob(self): 25 | log_prob = self.log_prob([[1.0, 0.0]], [[[5.0, 2.0]]]) 26 | np.testing.assert_allclose(np.exp(log_prob), np.exp(5.0) / (np.exp(5.0) + np.exp(2.0)), 27 | SIG_FIGS) 28 | 29 | def test_extreme_probs(self): 30 | log_prob = self.log_prob([[1.0, 0.0], 31 | [0.0, 1.0]], 32 | [[[1e10, -1e10], 33 | [-1e10, 1e10]], 34 | [[-1e10, 1e10], 35 | [1e10, -1e10]]]) 36 | true_probs = np.array([[1.0, 1.0], 37 | [0.0, 0.0]]) 38 | np.testing.assert_allclose( 39 | np.exp(log_prob), 40 | true_probs, 41 | rtol=10**(-SIG_FIGS)) 42 | 43 | def test_multi_probs(self): 44 | log_prob = self.log_prob([[1.0, 0.0, 0.0], 45 | [0.0, 1.0, 0.0], 46 | [0.0, 0.0, 1.0]], 47 | [[[1.0, 2.0, 3.0], 48 | [4.0, 5.0, 6.0], 49 | [7.0, 8.0, 9.0]], 50 | [[10.0, 11.0, 12.0], 51 | [13.0, 14.0, 15.0], 52 | [16.0, 17.0, 18.0]]]) 53 | true_probs = np.array([[np.exp(1.0) / (np.exp(1.0) + np.exp(2.0) + np.exp(3.0)), 54 | np.exp(5.0) / (np.exp(4.0) + np.exp(5.0) + np.exp(6.0)), 55 | np.exp(9.0) / (np.exp(7.0) + np.exp(8.0) + np.exp(9.0))], 56 | [np.exp(10.0) / (np.exp(10.0) + np.exp(11.0) + np.exp(12.0)), 57 | np.exp(14.0) / (np.exp(13.0) + np.exp(14.0) + np.exp(15.0)), 58 | np.exp(18.0) / (np.exp(16.0) + np.exp(17.0) + np.exp(18.0))]]) 59 | np.testing.assert_allclose( 60 | np.exp(log_prob), 61 | true_probs, 62 | rtol=10**(-SIG_FIGS)) 63 | -------------------------------------------------------------------------------- /test/util_test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebonilla/AutoGP/7190be6835b110fd43d610f812aad0caf716d7b4/test/util_test/__init__.py -------------------------------------------------------------------------------- /test/util_test/normals_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | from autogp import util 7 | 8 | 9 | SIG_FIGS = 5 10 | 11 | 12 | class TestCholNormal(unittest.TestCase): 13 | def chol_normal_log_prob(self, val, mean, covar): 14 | chol_normal = util.CholNormal(np.array(mean, dtype=np.float32), 15 | np.array(covar, dtype=np.float32)) 16 | return tf.Session().run(chol_normal.log_prob(np.array(val, dtype=np.float32))) 17 | 18 | def test_same_mean(self): 19 | log_prob = self.chol_normal_log_prob([1.0], [1.0], [[1.0]]) 20 | self.assertAlmostEqual(log_prob, -0.5 * np.log(2 * np.pi), SIG_FIGS) 21 | 22 | def test_scalar_covar(self): 23 | log_prob = self.chol_normal_log_prob([1.0], [1.0], [[np.sqrt(2.0)]]) 24 | self.assertAlmostEqual(log_prob, -0.5 * (np.log(2 * np.pi) + np.log(2.0)), SIG_FIGS) 25 | 26 | def test_small_scalar_covar(self): 27 | log_prob = self.chol_normal_log_prob([1.0], [1.0], [[1e-10]]) 28 | self.assertAlmostEqual(log_prob, -0.5 * (np.log(2 * np.pi) + np.log(1e-20)), SIG_FIGS) 29 | 30 | def test_large_scalar_covar(self): 31 | log_prob = self.chol_normal_log_prob([1.0], [1.0], [[1e10]]) 32 | self.assertAlmostEqual(log_prob, -0.5 * (np.log(2 * np.pi) + np.log(1e20)), SIG_FIGS) 33 | 34 | def test_multi_covar_same_mean(self): 35 | log_prob = self.chol_normal_log_prob([1.0, 2.0], [1.0, 2.0], [[1.0, 0.0], [2.0, 3.0]]) 36 | self.assertAlmostEqual(log_prob, -0.5 * (2.0 * np.log(2 * np.pi) + np.log(9.0)), SIG_FIGS) 37 | 38 | -------------------------------------------------------------------------------- /test/util_test/util_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | from autogp import util 7 | 8 | 9 | class TestInitList(unittest.TestCase): 10 | def test_empty(self): 11 | self.assertEquals(util.init_list(0.0, [0]), []) 12 | 13 | def test_single_element(self): 14 | self.assertEquals(util.init_list(1.0, [1]), [1.0]) 15 | 16 | def test_nested_single(self): 17 | self.assertEquals(util.init_list(1.0, [1, 1, 1, 1]), [[[[1.0]]]]) 18 | 19 | def test_single_level(self): 20 | self.assertEquals(util.init_list(2.0, [4]), [2.0, 2.0, 2.0, 2.0]) 21 | 22 | def test_multiple_levels(self): 23 | self.assertEquals(util.init_list(3.25, [3, 2, 1]), 24 | [[[3.25], [3.25]], 25 | [[3.25], [3.25]], 26 | [[3.25], [3.25]]]) 27 | 28 | --------------------------------------------------------------------------------