├── .gitignore ├── LICENSE ├── README.md ├── robustgp ├── __init__.py ├── init_methods │ ├── __init__.py │ ├── init_methods_test.py │ ├── kdpp_mcmc.py │ ├── methods.py │ ├── misc.py │ └── rls.py ├── models.py ├── models_test.py ├── optimizers.py ├── optimizers_test.py └── utilities.py ├── robustgp_experiments ├── __init__.py ├── demo1d.py ├── init_z │ ├── __init__.py │ ├── figures │ │ ├── fixedhyp-Naval_noisy-elbo.pdf │ │ ├── fixedhyp-Naval_noisy-nlpp.pdf │ │ ├── fixedhyp-Naval_noisy-rmse.pdf │ │ ├── fixedhyp-Wilson_elevators-elbo.pdf │ │ ├── fixedhyp-Wilson_elevators-nlpp.pdf │ │ ├── fixedhyp-Wilson_elevators-rmse.pdf │ │ ├── fixedhyp-Wilson_energy-elbo.pdf │ │ ├── fixedhyp-Wilson_energy-nlpp.pdf │ │ ├── fixedhyp-Wilson_energy-rmse.pdf │ │ ├── optall-Naval_noisy-trace.pdf │ │ ├── optall-Wilson_elevators-trace.pdf │ │ ├── optall-Wilson_energy-trace.pdf │ │ ├── opthyp-Naval_noisy-elbo-only.pdf │ │ ├── opthyp-Naval_noisy-elbo.pdf │ │ ├── opthyp-Naval_noisy-nlpp.pdf │ │ ├── opthyp-Naval_noisy-rmse.pdf │ │ ├── opthyp-Wilson_elevators-elbo-only.pdf │ │ ├── opthyp-Wilson_elevators-elbo.pdf │ │ ├── opthyp-Wilson_elevators-nlpp.pdf │ │ ├── opthyp-Wilson_elevators-rmse.pdf │ │ ├── opthyp-Wilson_energy-elbo-only.pdf │ │ ├── opthyp-Wilson_energy-elbo.pdf │ │ ├── opthyp-Wilson_energy-nlpp.pdf │ │ └── opthyp-Wilson_energy-rmse.pdf │ ├── jug-plot-init-inducing-fixedhyp.py │ ├── jug-plot-init-inducing-opt.py │ ├── jug-plot-opt-inducing.py │ ├── jug-plot-search-uci.py │ ├── jug_init_inducing_fixedhyp.py │ ├── jug_init_inducing_opt.py │ ├── jug_opt_inducing.py │ ├── jug_search_uci.py │ ├── utils.py │ └── which-optimiser.py └── utils │ ├── __init__.py │ ├── baselines.py │ ├── data.py │ ├── experiment_processing.py │ ├── experiment_running.py │ ├── plotting.py │ └── storing.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.jugdata 2 | 3 | .vscode 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # pyenv 80 | .python-version 81 | 82 | # celery beat schedule file 83 | celerybeat-schedule 84 | 85 | # SageMath parsed files 86 | *.sage.py 87 | 88 | # Environments 89 | .env 90 | .venv 91 | env/ 92 | venv/ 93 | ENV/ 94 | env.bak/ 95 | venv.bak/ 96 | 97 | # Spyder project settings 98 | .spyderproject 99 | .spyproject 100 | 101 | # Rope project settings 102 | .ropeproject 103 | 104 | # mkdocs documentation 105 | /site 106 | 107 | # mypy 108 | .mypy_cache/ 109 | 110 | # PyCharm 111 | .idea 112 | 113 | *.pdf -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RobustGP 2 | Procedures for robust initialisation and optimisation of Variational Sparse Gaussian processes. This code accompanies 3 | Burt et al (2019, 2020) (see sitation below), and implements the recommendations. 4 | 5 | ## The bottom line 6 | In Burt et al (2020), we recommend Sparse GP Regression (SGPR) (Titsias, 2009) models to be trained in the following way: 7 | - Initialise the inducing inputs using the ```ConditionalVariance``` methods. 8 | - Alternately optimise the hyperparameters only and reinitialise the inducing inputs using ```ConditionalVariance```. 9 | See ```FullbatchUciExperiment``` for an example on how to implement this (```training_procedure == 'reinit_Z'```). 10 | 11 | We find that when using ```ConditionalVariance``` we obtain the same performance as gradient-optimised inducing inputs 12 | with a slightly larger number of inducing varianbles. The benefit is not having to do gradient-based optimisation, which 13 | is often more of a pain than it is worth. 14 | 15 | A few anecdotal suggestions for practitioners: 16 | - We suggest using ```ConditionalVariance``` even for non-Gaussian likelihoods for initialisation, although you may want 17 | test yourself whether to use the periodic reinitialisation method, or gradient-based inducing input optimisation. 18 | - When getting Cholesky errors, consider reinitialising the inducing inputs with ```ConditionalVariance``` rather than 19 | e.g. raising jitter. ```ConditionalVariance``` will repel the inducing inputs based on any new hyperparameters which 20 | caused high correlation between old inducing variables, leading to better conditioning of ```Kuu```. 21 | 22 | ### Example 23 | ```python 24 | M = 1000 # We choose 1000 inducing variables 25 | k = gpflow.kernels.SquaredExponential() 26 | # Initialise hyperparameters here 27 | init_method = robustgp.ConditionalVariance() 28 | Z = init_method.compute_initialisation(X, M, k)[0] 29 | model = gpflow.models.SGPR((X_train, Y_train), k, Z) 30 | for _ in range(10): 31 | # Optimise w.r.t. hyperparmeters here... 32 | Z = init_method.compute_initialisation(X, M, k)[0] # Reinit with the new kernel hyperparameters 33 | self.model.inducing_variable.Z = gpflow.Parameter(Z) 34 | ``` 35 | 36 | ## What the code provides 37 | ### Inducing input initialisation 38 | We provide various inducing point initialisation methods, together with some tools for robustly optimising GPflow 39 | models. We really only recommend using ```ConditionalVariance``` for initialising inducing inputs, with the others being 40 | included for the experiments in the paper. 41 | 42 | ### Automatic jitter selection 43 | In addition, we provide versions of the GPflow classes ```SGPR``` and ```GPR``` that have objective functions that are 44 | robust to Cholesky/inversion errors. This is implemented by automatic increasing of jitter, as is done in e.g. 45 | [GPy](https://sheffieldml.github.io/GPy/). This process is a bit cumbersome in TensorFlow, and to do it we provide the 46 | classes ```RobustSGPR``` and ```RobustGPR```, as well as a customised Scipy optimiser ```RobustScipy```. To see how this 47 | is used, see the class ```FullbatchUciExperiment``` in the ```robustgp_experiments``` directory. 48 | 49 | ### Experiments 50 | All the experiments from Burt et al (2020) are included in the ```robustgp_experiments``` directory. 51 | 52 | ## Code guidelines 53 | For using the initialisation code: 54 | - Make sure that [GPflow](https://github.com/GPflow/GPflow) is installed, followed by running ```pip setup.py develop```. 55 | - Tests can be run using ```pytest -x --cov-report html --cov=robustgp```. 56 | 57 | For running the experiments 58 | - We use code from [Bayesian benchmarks](https://github.com/hughsalimbeni/bayesian_benchmarks) to handle dataset 59 | loading. Some assembly needed to get all the datasets. 60 | - Some scripts are paralellised using `jug`. 61 | - Make sure it's installed using `pip install jug`. 62 | - You can run all the tasks in a script in parallel by running `jug execute jug_script.py` multiple times. 63 | - Jug communicates over the filesystem, so multiple computers can paralellise the same script if they share a networked filesystem. 64 | - Usually, a separate script takes care of the plotting / processing of the results. 65 | 66 | ## Citation 67 | To cite the recommendations in our paper or this accompanying software, please refer to our JMLR paper. 68 | ``` 69 | @article{burt2020gpviconv, 70 | author = {David R. Burt and Carl Edward Rasmussen and Mark van der Wilk}, 71 | title = {Convergence of Sparse Variational Inference in Gaussian Processes Regression}, 72 | journal = {Journal of Machine Learning Research}, 73 | year = {2020}, 74 | volume = {21}, 75 | number = {131}, 76 | pages = {1-63}, 77 | url = {http://jmlr.org/papers/v21/19-1015.html} 78 | } 79 | ``` 80 | 81 | This JMLR paper is an extended version of our ICML paper. 82 | ``` 83 | @InProceedings{burt2019gpviconv, 84 | title = {Rates of Convergence for Sparse Variational {G}aussian Process Regression}, 85 | author = {Burt, David and Rasmussen, Carl Edward and van der Wilk, Mark}, 86 | booktitle = {Proceedings of the 36th International Conference on Machine Learning}, 87 | pages = {862--871}, 88 | year = {2019}, 89 | editor = {Chaudhuri, Kamalika and Salakhutdinov, Ruslan}, 90 | volume = {97}, 91 | series = {Proceedings of Machine Learning Research}, 92 | address = {Long Beach, California, USA}, 93 | month = {09--15 Jun}, 94 | publisher = {PMLR}, 95 | pdf = {http://proceedings.mlr.press/v97/burt19a/burt19a.pdf}, 96 | url = {http://proceedings.mlr.press/v97/burt19a.html}, 97 | } 98 | ``` 99 | -------------------------------------------------------------------------------- /robustgp/__init__.py: -------------------------------------------------------------------------------- 1 | from .init_methods import InducingPointInitializer, FirstSubsample, UniformSubsample, Kmeans, ConditionalVariance, \ 2 | KdppMCMC, RLS 3 | from . import optimizers 4 | from . import models 5 | from . import utilities 6 | -------------------------------------------------------------------------------- /robustgp/init_methods/__init__.py: -------------------------------------------------------------------------------- 1 | from .methods import InducingPointInitializer, FirstSubsample, ConditionalVariance, UniformSubsample, Kmeans 2 | from .rls import RLS 3 | from .kdpp_mcmc import KdppMCMC -------------------------------------------------------------------------------- /robustgp/init_methods/init_methods_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from gpflow.kernels import SquaredExponential 5 | from .methods import FirstSubsample, UniformSubsample, Kmeans, ConditionalVariance 6 | 7 | 8 | @pytest.mark.parametrize("init_method", [FirstSubsample(seed=0), UniformSubsample(seed=0), Kmeans(seed=0), 9 | ConditionalVariance(seed=0, sample=True), 10 | ConditionalVariance(seed=0, sample=False)]) 11 | def test_seed_reproducibility(init_method): 12 | k = SquaredExponential() 13 | X = np.random.randn(100, 2) 14 | 15 | Z1, idx1 = init_method(X, 30, k) 16 | Z2, idx2 = init_method(X, 30, k) 17 | 18 | assert np.all(Z1 == Z2), str(init_method) 19 | assert np.all(idx1 == idx2), str(init_method) 20 | 21 | 22 | def test_incremental_ConditionalVariance(): 23 | init_method = ConditionalVariance(sample=True) 24 | 25 | k = SquaredExponential() 26 | X = np.random.randn(100, 2) 27 | 28 | Z1, idx1 = init_method(X, 20, k) 29 | Z2, idx2 = init_method(X, 30, k) 30 | 31 | assert np.all(Z1 == Z2[:20]) 32 | assert np.all(idx1 == idx2[:20]) 33 | -------------------------------------------------------------------------------- /robustgp/init_methods/kdpp_mcmc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy 3 | import warnings 4 | from typing import Callable, Optional 5 | 6 | from .methods import ConditionalVariance 7 | 8 | 9 | class KdppMCMC(ConditionalVariance): 10 | 11 | def __init__(self, num_steps: Optional[int] = 10000, seed: Optional[int] = 0, **kwargs): 12 | """ 13 | Implements the MCMC approximation to sampling from a k-DPP developed in 14 | @inproceedings{anari2016monte, 15 | title={Monte Carlo Markov chain algorithms for sampling strongly Rayleigh distributions and determinantal point processes}, 16 | author={Anari, Nima and Gharan, Shayan Oveis and Rezaei, Alireza}, 17 | booktitle={Conference on Learning Theory}, 18 | pages={103--115}, 19 | year={2016} 20 | } 21 | and used for initializing inducing point in 22 | @inproceedings{burt2019rates, 23 | title={Rates of Convergence for Sparse Variational Gaussian Process Regression}, 24 | author={Burt, David and Rasmussen, Carl Edward and Van Der Wilk, Mark}, 25 | booktitle={International Conference on Machine Learning}, 26 | pages={862--871}, 27 | year={2019} 28 | } 29 | More information on determinantal point processes and related algorithms can be found at: 30 | https://github.com/guilgautier/DPPy 31 | :param sample: int, number of steps of MCMC to run 32 | :param threshold: float or None, if not None, if tr(Kff-Qff) self.max_data: 95 | uniform = UniformSubsample(0) 96 | training_inputs = uniform(training_inputs, self.max_data) 97 | centroids, _ = scipy.cluster.vq.kmeans(training_inputs, M) 98 | # Some times K-Means returns fewer than K centroids, in this case we sample remaining point from data 99 | if len(centroids) < M: 100 | num_extra_points = M - len(centroids) 101 | indices = np.random.choice(N, size=num_extra_points, replace=False) 102 | additional_points = training_inputs[indices] 103 | centroids = np.concatenate([centroids, additional_points], axis=0) 104 | return centroids * training_inputs_stds, None 105 | 106 | 107 | class ConditionalVariance(InducingPointInitializer): 108 | def __init__(self, sample: Optional[bool] = False, threshold: Optional[int] = 0.0, seed: Optional[int] = 0, 109 | **kwargs): 110 | """ 111 | :param sample: bool, if True, sample points into subset to use with weights based on variance, if False choose 112 | point with highest variance at each iteration 113 | :param threshold: float or None, if not None, if tr(Kff-Qff) 0: 50 | candidate_indices = np.where(random_nums >= probs)[0] 51 | additional_indices = np.random.choice(candidate_indices, size=num_additional_indices, 52 | replace=False) 53 | indices = np.append(indices, additional_indices) 54 | indices_to_include = active_indices[indices] 55 | column_weights = np.sqrt(1. / probs[indices]) 56 | else: 57 | probs = probs * M / np.sum(probs) 58 | random_nums = np.random.rand(len(probs)) 59 | indices_to_include = active_indices[random_nums < probs] 60 | column_weights = np.sqrt(1. / probs[random_nums < probs]) 61 | # If we sample too few inducing points, resample 62 | while len(indices_to_include) < M: 63 | random_nums = np.random.rand(len(probs)) # resample if not enough 64 | indices_to_include = active_indices[random_nums < probs] 65 | column_weights = np.sqrt(1. / probs[random_nums < probs]) 66 | probs = np.clip(probs * M / np.sum(np.clip(probs, 0, 1)), 0, 1) 67 | probs *= 1.01 68 | inds = np.random.choice(len(indices_to_include), size=M, replace=False) 69 | indices_to_include, column_weights = indices_to_include[inds], column_weights[inds] 70 | return indices_to_include, column_weights, probs 71 | 72 | 73 | def recursive_rls(training_inputs: np.ndarray, 74 | M: int, 75 | kernel: Callable[[np.ndarray, Optional[np.ndarray], Optional[bool]], np.ndarray], 76 | active_indices: np.ndarray): 77 | num_data = training_inputs.shape[0] 78 | top_level = len(active_indices) == num_data # boolean indicating we are at top level of recursion 79 | c = .25 80 | k = np.minimum(num_data, int(np.ceil(c * M / np.log(M+1)))) 81 | 82 | if len(active_indices) <= M: # Base case of recursion, see l 1,2 in Musco and Musco, alg 3 83 | return active_indices, np.ones_like(active_indices), np.ones_like(active_indices) 84 | s_bar = np.random.randint(0, 2, len(active_indices)).nonzero()[0] # points sampled into Sbar, l4 85 | if len(s_bar) == 0: 86 | active_indices = np.random.choice(active_indices, (1+len(active_indices))//2, replace=False) 87 | return active_indices, np.ones_like(active_indices), np.ones_like(active_indices) 88 | 89 | indices_to_include, column_weights, probs = recursive_rls(training_inputs, M, kernel, 90 | active_indices=active_indices[s_bar]) 91 | Z = training_inputs[indices_to_include] 92 | SKS = kernel(Z) * column_weights[None, :] * column_weights[:, None] # sketched kernel matrix 93 | eigvals = scipy.sparse.linalg.eigsh(SKS.numpy(), k=k, which='LM', return_eigenvectors=False) 94 | 95 | lam = 1 / k * (np.sum(np.diag(SKS)) - np.sum(eigvals)) 96 | lam = np.maximum(1e-12, lam) 97 | 98 | weighted_leverage = approximate_rls(training_inputs, kernel, lam, subset_to_predict=active_indices, 99 | subset_used=indices_to_include, column_weights=column_weights) 100 | indices_to_include, column_weights, probs = get_indices_and_weights(weighted_leverage, active_indices, k, 101 | top_level, M) 102 | 103 | return indices_to_include, column_weights, probs -------------------------------------------------------------------------------- /robustgp/models.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | from gpflow.base import Parameter 7 | from gpflow.config import default_jitter, default_float 8 | from gpflow.covariances import Kuf, Kuu 9 | from gpflow.kernels import Kernel 10 | from gpflow.mean_functions import MeanFunction 11 | from gpflow.models import GPR, SGPR 12 | from gpflow.models.training_mixins import RegressionData, InputData 13 | from gpflow.utilities import positive, to_default_float 14 | from gpflow.models.model import MeanAndVariance 15 | 16 | 17 | class RobustObjectiveMixin: 18 | def __init__(self, *args, **kwargs): 19 | super().__init__(*args, **kwargs) 20 | self.jitter_variance = Parameter( 21 | max(default_jitter(), 1e-20), transform=positive(0.0), trainable=False, name="jitter" 22 | ) 23 | 24 | def _compute_robust_maximum_log_likelihood_objective(self) -> tf.Tensor: 25 | raise NotImplementedError 26 | 27 | def robust_maximum_log_likelihood_objective(self, restore_jitter=True) -> tf.Tensor: 28 | initial_jitter = self.jitter_variance.numpy() 29 | N_orders = 20 30 | for i in range(N_orders): 31 | self.jitter_variance.assign(10 ** i * initial_jitter) 32 | logjitter = np.log10(self.jitter_variance.numpy()) 33 | if i > 0: 34 | if i == 1: 35 | print( 36 | f"{type(self).__name__}: Failed first computation. " f"Now attempting computation with jitter ", 37 | end="", 38 | ) 39 | print(f"10**{logjitter:.2f} ", end="", flush=True) 40 | try: 41 | val = self._compute_robust_maximum_log_likelihood_objective() 42 | break 43 | except tf.errors.InvalidArgumentError as e_inner: 44 | e_msg = e_inner.message 45 | if (("Cholesky" not in e_msg) and ("not invertible" not in e_msg)) or i == (N_orders - 1): 46 | print(e_msg) 47 | raise e_inner 48 | except AssertionError as e_inner: 49 | e_msg = e_inner.args 50 | if i == (N_orders - 1): 51 | print(e_msg) 52 | raise e_inner 53 | if restore_jitter: 54 | self.jitter_variance.assign(initial_jitter) 55 | if i > 0: 56 | print("") 57 | return val 58 | 59 | 60 | class RobustSGPR(RobustObjectiveMixin, SGPR): 61 | def _compute_robust_maximum_log_likelihood_objective(self) -> tf.Tensor: 62 | """ 63 | Construct a tensorflow function to compute the bound on the marginal 64 | likelihood. For a derivation of the terms in here, see the associated 65 | SGPR notebook. 66 | """ 67 | X_data, Y_data = self.data 68 | 69 | num_inducing = len(self.inducing_variable) 70 | num_data = to_default_float(tf.shape(Y_data)[0]) 71 | output_dim = to_default_float(tf.shape(Y_data)[1]) 72 | 73 | err = Y_data - self.mean_function(X_data) 74 | Kdiag = self.kernel(X_data, full_cov=False) 75 | kuf = Kuf(self.inducing_variable, self.kernel, X_data) 76 | kuu = Kuu(self.inducing_variable, self.kernel, jitter=self.jitter_variance) 77 | L = tf.linalg.cholesky(kuu) 78 | sigma = tf.sqrt(self.likelihood.variance) 79 | 80 | # Compute intermediate matrices 81 | A = tf.linalg.triangular_solve(L, kuf, lower=True) / sigma 82 | AAT = tf.linalg.matmul(A, A, transpose_b=True) 83 | B = AAT + tf.eye(num_inducing, dtype=default_float()) 84 | LB = tf.linalg.cholesky(B) 85 | Aerr = tf.linalg.matmul(A, err) 86 | c = tf.linalg.triangular_solve(LB, Aerr, lower=True) / sigma 87 | trace_term = 0.5 * output_dim * tf.reduce_sum(Kdiag) / self.likelihood.variance 88 | trace_term -= 0.5 * output_dim * tf.reduce_sum(tf.linalg.diag_part(AAT)) 89 | 90 | # tr(Kff - Qff) should be positive, numerical issues can arise here 91 | assert trace_term > 0.0, f"Trace term negative, should be positive ({trace_term:.4e})." 92 | 93 | # compute log marginal bound 94 | bound = -0.5 * num_data * output_dim * np.log(2 * np.pi) 95 | bound += tf.negative(output_dim) * tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LB))) 96 | bound -= 0.5 * num_data * output_dim * tf.math.log(self.likelihood.variance) 97 | bound += -0.5 * tf.reduce_sum(tf.square(err)) / self.likelihood.variance 98 | bound += 0.5 * tf.reduce_sum(tf.square(c)) 99 | bound -= trace_term 100 | 101 | return bound 102 | 103 | def upper_bound(self) -> tf.Tensor: 104 | """ 105 | Upper bound for the sparse GP regression marginal likelihood. Note that 106 | the same inducing points are used for calculating the upper bound, as are 107 | used for computing the likelihood approximation. This may not lead to the 108 | best upper bound. The upper bound can be tightened by optimising Z, just 109 | like the lower bound. This is especially important in FITC, as FITC is 110 | known to produce poor inducing point locations. An optimisable upper bound 111 | can be found in https://github.com/markvdw/gp_upper. 112 | 113 | The key reference is 114 | 115 | :: 116 | 117 | @misc{titsias_2014, 118 | title={Variational Inference for Gaussian and Determinantal Point Processes}, 119 | url={http://www2.aueb.gr/users/mtitsias/papers/titsiasNipsVar14.pdf}, 120 | publisher={Workshop on Advances in Variational Inference (NIPS 2014)}, 121 | author={Titsias, Michalis K.}, 122 | year={2014}, 123 | month={Dec} 124 | } 125 | 126 | The key quantity, the trace term, can be computed via 127 | 128 | >>> _, v = conditionals.conditional(X, model.inducing_variable.Z, model.kernel, 129 | ... np.zeros((len(model.inducing_variable), 1))) 130 | 131 | which computes each individual element of the trace term. 132 | """ 133 | X_data, Y_data = self.data 134 | num_data = to_default_float(tf.shape(Y_data)[0]) 135 | 136 | Kdiag = self.kernel(X_data, full_cov=False) 137 | kuu = Kuu(self.inducing_variable, self.kernel, jitter=self.jitter_variance) 138 | kuf = Kuf(self.inducing_variable, self.kernel, X_data) 139 | 140 | I = tf.eye(tf.shape(kuu)[0], dtype=default_float()) 141 | 142 | L = tf.linalg.cholesky(kuu) 143 | A = tf.linalg.triangular_solve(L, kuf, lower=True) 144 | AAT = tf.linalg.matmul(A, A, transpose_b=True) 145 | B = I + AAT / self.likelihood.variance 146 | LB = tf.linalg.cholesky(B) 147 | 148 | # Using the Trace bound, from Titsias' presentation 149 | c = tf.maximum(tf.reduce_sum(Kdiag) - tf.reduce_sum(tf.square(A)), 0) 150 | 151 | # Alternative bound on max eigenval: 152 | corrected_noise = self.likelihood.variance + c 153 | 154 | const = -0.5 * num_data * tf.math.log(2 * np.pi * self.likelihood.variance) 155 | logdet = -tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LB))) 156 | 157 | LC = tf.linalg.cholesky(I + AAT / corrected_noise) 158 | v = tf.linalg.triangular_solve(LC, tf.linalg.matmul(A, Y_data) / corrected_noise, lower=True) 159 | quad = -0.5 * tf.reduce_sum(tf.square(Y_data)) / corrected_noise + 0.5 * tf.reduce_sum(tf.square(v)) 160 | 161 | return const + logdet + quad 162 | 163 | def upper_bound(self) -> tf.Tensor: 164 | """ 165 | Upper bound for the sparse GP regression marginal likelihood. Note that 166 | the same inducing points are used for calculating the upper bound, as are 167 | used for computing the likelihood approximation. This may not lead to the 168 | best upper bound. The upper bound can be tightened by optimising Z, just 169 | like the lower bound. This is especially important in FITC, as FITC is 170 | known to produce poor inducing point locations. An optimisable upper bound 171 | can be found in https://github.com/markvdw/gp_upper. 172 | The key reference is 173 | :: 174 | @misc{titsias_2014, 175 | title={Variational Inference for Gaussian and Determinantal Point Processes}, 176 | url={http://www2.aueb.gr/users/mtitsias/papers/titsiasNipsVar14.pdf}, 177 | publisher={Workshop on Advances in Variational Inference (NIPS 2014)}, 178 | author={Titsias, Michalis K.}, 179 | year={2014}, 180 | month={Dec} 181 | } 182 | The key quantity, the trace term, can be computed via 183 | >>> _, v = conditionals.conditional(X, model.inducing_variable.Z, model.kernel, 184 | ... np.zeros((len(model.inducing_variable), 1))) 185 | which computes each individual element of the trace term. 186 | """ 187 | X_data, Y_data = self.data 188 | num_data = to_default_float(tf.shape(Y_data)[0]) 189 | 190 | Kdiag = self.kernel(X_data, full_cov=False) 191 | kuu = Kuu(self.inducing_variable, self.kernel, jitter=self.jitter_variance) 192 | kuf = Kuf(self.inducing_variable, self.kernel, X_data) 193 | 194 | I = tf.eye(tf.shape(kuu)[0], dtype=default_float()) 195 | 196 | L = tf.linalg.cholesky(kuu) 197 | A = tf.linalg.triangular_solve(L, kuf, lower=True) 198 | AAT = tf.linalg.matmul(A, A, transpose_b=True) 199 | B = I + AAT / self.likelihood.variance 200 | LB = tf.linalg.cholesky(B) 201 | 202 | # Using the Trace bound, from Titsias' presentation 203 | c = tf.maximum(tf.reduce_sum(Kdiag) - tf.reduce_sum(tf.square(A)), 0) 204 | 205 | # Alternative bound on max eigenval: 206 | corrected_noise = self.likelihood.variance + c 207 | 208 | const = -0.5 * num_data * tf.math.log(2 * np.pi * self.likelihood.variance) 209 | logdet = -tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LB))) 210 | 211 | LC = tf.linalg.cholesky(I + AAT / corrected_noise) 212 | v = tf.linalg.triangular_solve(LC, tf.linalg.matmul(A, Y_data) / corrected_noise, lower=True) 213 | quad = -0.5 * tf.reduce_sum(tf.square(Y_data)) / corrected_noise + 0.5 * tf.reduce_sum(tf.square(v)) 214 | 215 | return const + logdet + quad 216 | 217 | def predict_f(self, Xnew: InputData, full_cov=False, full_output_cov=False) -> MeanAndVariance: 218 | """ 219 | Compute the mean and variance of the latent function at some new points 220 | Xnew. For a derivation of the terms in here, see the associated SGPR 221 | notebook. 222 | """ 223 | X_data, Y_data = self.data 224 | num_inducing = len(self.inducing_variable) 225 | err = Y_data - self.mean_function(X_data) 226 | kuf = Kuf(self.inducing_variable, self.kernel, X_data) 227 | kuu = Kuu(self.inducing_variable, self.kernel, jitter=self.jitter_variance) 228 | Kus = Kuf(self.inducing_variable, self.kernel, Xnew) 229 | sigma = tf.sqrt(self.likelihood.variance) 230 | L = tf.linalg.cholesky(kuu) 231 | A = tf.linalg.triangular_solve(L, kuf, lower=True) / sigma 232 | B = tf.linalg.matmul(A, A, transpose_b=True) + tf.eye(num_inducing, dtype=default_float()) 233 | LB = tf.linalg.cholesky(B) 234 | Aerr = tf.linalg.matmul(A, err) 235 | c = tf.linalg.triangular_solve(LB, Aerr, lower=True) / sigma 236 | tmp1 = tf.linalg.triangular_solve(L, Kus, lower=True) 237 | tmp2 = tf.linalg.triangular_solve(LB, tmp1, lower=True) 238 | mean = tf.linalg.matmul(tmp2, c, transpose_a=True) 239 | if full_cov: 240 | var = ( 241 | self.kernel(Xnew) 242 | + tf.linalg.matmul(tmp2, tmp2, transpose_a=True) 243 | - tf.linalg.matmul(tmp1, tmp1, transpose_a=True) 244 | ) 245 | var = tf.tile(var[None, ...], [self.num_latent_gps, 1, 1]) # [P, N, N] 246 | else: 247 | var = ( 248 | self.kernel(Xnew, full_cov=False) 249 | + tf.reduce_sum(tf.square(tmp2), 0) 250 | - tf.reduce_sum(tf.square(tmp1), 0) 251 | ) 252 | var = tf.tile(var[:, None], [1, self.num_latent_gps]) 253 | return mean + self.mean_function(Xnew), var 254 | 255 | 256 | class RobustGPR(RobustObjectiveMixin, GPR): 257 | def __init__( 258 | self, 259 | data: RegressionData, 260 | kernel: Kernel, 261 | mean_function: Optional[MeanFunction] = None, 262 | noise_variance: float = 1.0, 263 | ): 264 | super().__init__(data, kernel, mean_function, noise_variance) 265 | 266 | def _compute_robust_maximum_log_likelihood_objective(self) -> tf.Tensor: 267 | r""" 268 | Computes the log marginal likelihood, with some slack caused by the 269 | jitter. Adding the jitter ensures numerical stability. 270 | 271 | .. math:: 272 | \log p(Y | \theta). 273 | 274 | """ 275 | X, Y = self.data 276 | num_data = X.shape[0] 277 | output_dim = tf.shape(Y)[1] 278 | 279 | K = self.kernel(X) 280 | k_diag = tf.linalg.diag_part(K) 281 | noiseK_L, L = tf.cond( 282 | self.likelihood.variance > self.jitter_variance, 283 | lambda: ( 284 | tf.linalg.cholesky(tf.linalg.set_diag(K, k_diag + self.likelihood.variance)), 285 | tf.linalg.cholesky(tf.linalg.set_diag(K, k_diag + self.jitter_variance)), 286 | ), 287 | lambda: (tf.linalg.cholesky(tf.linalg.set_diag(K, k_diag + self.jitter_variance)),) * 2, 288 | ) 289 | 290 | err = Y - self.mean_function(X) 291 | sigma = tf.sqrt(self.likelihood.variance) 292 | 293 | # Compute intermediate matrices 294 | A = tf.linalg.triangular_solve(L, K, lower=True) / sigma 295 | 296 | AAT = tf.linalg.matmul(A, A, transpose_b=True) 297 | B = tf.linalg.set_diag(AAT, tf.linalg.diag_part(AAT) + 1) # B = AAT + tf.eye(num_data, dtype=default_float()) 298 | # B = AAT + tf.eye(num_data, dtype=default_float()) 299 | LB = tf.linalg.cholesky(B) 300 | Aerr = tf.linalg.matmul(A, err) 301 | c = tf.linalg.triangular_solve(LB, Aerr, lower=True) / sigma 302 | 303 | # compute log marginal bound 304 | bound = -0.5 * to_default_float(num_data) * to_default_float(output_dim) * np.log(2 * np.pi) 305 | bound -= to_default_float(output_dim) * tf.reduce_sum(tf.math.log(tf.linalg.diag_part(noiseK_L))) 306 | bound += -0.5 * tf.reduce_sum(tf.square(err)) / self.likelihood.variance 307 | bound += 0.5 * tf.reduce_sum(tf.square(c)) 308 | 309 | return bound 310 | -------------------------------------------------------------------------------- /robustgp/models_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | import tensorflow as tf 4 | 5 | import gpflow 6 | from .models import RobustSGPR, RobustGPR 7 | 8 | np.random.seed(0) 9 | X = np.random.rand(1000, 1) 10 | Y = np.hstack((np.sin(X), np.cos(X))) 11 | 12 | 13 | @pytest.mark.parametrize( 14 | "model", 15 | [ 16 | RobustSGPR((X, Y), gpflow.kernels.SquaredExponential(), X.copy()), 17 | RobustGPR((X, Y), gpflow.kernels.SquaredExponential()), 18 | ], 19 | ) 20 | def test_sgpr_stability(model): 21 | print(gpflow.config.default_jitter()) 22 | 23 | # Setup hyperparmaeters 24 | initial_jitter = 1e-6 25 | model.kernel.variance.assign(2.3) 26 | model.kernel.lengthscales.assign(0.93) 27 | model.likelihood.variance.assign(1e-4) 28 | 29 | # For small jitter the results should be very close 30 | model.jitter_variance.assign(initial_jitter) 31 | nojitter = model.maximum_log_likelihood_objective() 32 | jitter = model.robust_maximum_log_likelihood_objective() 33 | np.testing.assert_allclose(jitter, nojitter) 34 | 35 | # Test that increasing jitter leads to a lower bound 36 | for j in np.logspace(1, 8, 8) * initial_jitter: 37 | model.jitter_variance.assign(initial_jitter * j) 38 | model.jitter_variance.assign(j) 39 | jitter = model.robust_maximum_log_likelihood_objective() 40 | print(nojitter.numpy(), jitter.numpy()) 41 | assert jitter < nojitter 42 | 43 | # Test that adding jitter avoids a CholeskyError 44 | model.kernel.variance.assign(1e14) 45 | model.jitter_variance.assign(initial_jitter) 46 | 47 | with pytest.raises(tf.errors.InvalidArgumentError): 48 | model.maximum_log_likelihood_objective() 49 | 50 | model.robust_maximum_log_likelihood_objective() 51 | np.testing.assert_allclose(model.jitter_variance.numpy(), initial_jitter) 52 | -------------------------------------------------------------------------------- /robustgp/optimizers.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from dataclasses import field 3 | import numpy as np 4 | import scipy 5 | import tensorflow as tf 6 | 7 | import gpflow 8 | from gpflow.optimizers.scipy import ( 9 | LossClosure, 10 | Variables, 11 | Tuple, 12 | _compute_loss_and_gradients, 13 | Callable, 14 | StepCallback, 15 | OptimizeResult, 16 | ) 17 | 18 | 19 | class RobustScipy(gpflow.optimizers.Scipy): 20 | 21 | def __init__(self): 22 | super().__init__() 23 | self.f_vals = list() 24 | 25 | def minimize( 26 | self, 27 | closure: LossClosure, 28 | variables: Variables, 29 | method: Optional[str] = "L-BFGS-B", 30 | step_callback: Optional[StepCallback] = None, 31 | compile: bool = True, 32 | robust_closure: Optional[LossClosure] = None, 33 | **scipy_kwargs, 34 | ) -> OptimizeResult: 35 | """ 36 | Minimize is a wrapper around the `scipy.optimize.minimize` function 37 | handling the packing and unpacking of a list of shaped variables on the 38 | TensorFlow side vs. the flat numpy array required on the Scipy side. 39 | 40 | Args: 41 | closure: A closure that re-evaluates the model, returning the loss 42 | to be minimized. 43 | variables: The list (tuple) of variables to be optimized 44 | (typically `model.trainable_variables`) 45 | method: The type of solver to use in SciPy. Defaults to "L-BFGS-B". 46 | step_callback: If not None, a callable that gets called once after 47 | each optimisation step. The callabe is passed the arguments 48 | `step`, `variables`, and `values`. `step` is the optimisation 49 | step counter. `variables` is the list of trainable variables as 50 | above, and `values` is the corresponding list of tensors of 51 | matching shape that contains their value at this optimisation 52 | step. 53 | compile: If True, wraps the evaluation function (the passed `closure` as 54 | well as its gradient computation) inside a `tf.function()`, 55 | which will improve optimization speed in most cases. 56 | 57 | scipy_kwargs: Arguments passed through to `scipy.optimize.minimize` 58 | 59 | Returns: 60 | The optimization result represented as a scipy ``OptimizeResult`` 61 | object. See the Scipy documentation for description of attributes. 62 | """ 63 | if not callable(closure): 64 | raise TypeError("The 'closure' argument is expected to be a callable object.") # pragma: no cover 65 | variables = tuple(variables) 66 | if not all(isinstance(v, tf.Variable) for v in variables): 67 | raise TypeError( 68 | "The 'variables' argument is expected to only contain tf.Variable instances (use model.trainable_variables, not model.trainable_parameters)" 69 | ) # pragma: no cover 70 | initial_params = self.initial_parameters(variables) 71 | 72 | func = self.eval_func(closure, variables, compile=compile, robust_closure=robust_closure) 73 | if step_callback is not None: 74 | if "callback" in scipy_kwargs: 75 | raise ValueError("Callback passed both via `step_callback` and `callback`") 76 | 77 | callback = self.callback_func(variables, step_callback) 78 | scipy_kwargs.update(dict(callback=callback)) 79 | 80 | return scipy.optimize.minimize(func, initial_params, jac=True, method=method, **scipy_kwargs) 81 | 82 | def eval_func( 83 | self, 84 | closure: LossClosure, 85 | variables: Variables, 86 | compile: bool = True, 87 | robust_closure: Optional[LossClosure] = None, 88 | ) -> Callable[[np.ndarray], Tuple[np.ndarray, np.ndarray]]: 89 | def make_tf_eval(closure: LossClosure): 90 | def eager_tf_eval(x: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]: 91 | values = self.unpack_tensors(variables, x) 92 | self.assign_tensors(variables, values) 93 | 94 | loss, grads = _compute_loss_and_gradients(closure, variables) 95 | return loss, self.pack_tensors(grads) 96 | 97 | return eager_tf_eval 98 | 99 | fast_tf_eval = make_tf_eval(closure) 100 | robust_tf_eval = make_tf_eval(robust_closure) if robust_closure is not None else None 101 | if compile: 102 | fast_tf_eval = tf.function(fast_tf_eval) # Possibly compiled 103 | 104 | def _eval(x: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: 105 | try: 106 | loss, grad = fast_tf_eval(tf.convert_to_tensor(x)) 107 | except tf.errors.InvalidArgumentError as e: 108 | e_msg = e.message 109 | if robust_tf_eval is None or (("Cholesky" not in e_msg) and ("not invertible" not in e_msg)): 110 | raise e 111 | print(f"Warning: CholeskyError. Attempting to continue.") 112 | loss, grad = robust_tf_eval(tf.convert_to_tensor(x)) 113 | self.f_vals.append(loss.numpy()) 114 | return loss.numpy().astype(np.float64), grad.numpy().astype(np.float64) 115 | 116 | return _eval 117 | -------------------------------------------------------------------------------- /robustgp/optimizers_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | import tensorflow as tf 4 | 5 | import gpflow 6 | from .models import RobustSGPR, RobustGPR 7 | from .optimizers import RobustScipy 8 | from .utilities import set_trainable 9 | 10 | np.random.seed(0) 11 | X = np.random.rand(100, 1) 12 | Y = np.hstack((np.sin(X), np.cos(X))) 13 | 14 | original_default_jitter = gpflow.config.default_jitter() 15 | original_default_positive_minimum = gpflow.config.default_positive_minimum() 16 | gpflow.config.set_default_jitter(0.0) 17 | gpflow.config.set_default_positive_minimum(1e-6) 18 | 19 | 20 | @pytest.mark.parametrize( 21 | "model", 22 | [ 23 | RobustSGPR((X, Y), gpflow.kernels.SquaredExponential(), X.copy()), 24 | RobustGPR((X, Y), gpflow.kernels.SquaredExponential()), 25 | ], 26 | ) 27 | def test_optimize_stability(model): 28 | config = gpflow.config.Config(jitter=0.0, positive_minimum=1e-6) 29 | with gpflow.config.as_context(config): 30 | print(gpflow.config.default_jitter()) 31 | model.jitter_variance.assign(1e-14) 32 | print(model.jitter_variance.numpy()) 33 | model.likelihood.variance = gpflow.Parameter(1.0, transform=gpflow.utilities.positive(lower=1e-16)) 34 | set_trainable(model, False) 35 | set_trainable(model.kernel, True) 36 | set_trainable(model.likelihood, True) 37 | 38 | loss_function = model.training_loss_closure(compile=True) 39 | robust_loss_function = lambda: -model.robust_maximum_log_likelihood_objective() 40 | 41 | with pytest.raises(tf.errors.InvalidArgumentError): 42 | opt = gpflow.optimizers.Scipy() 43 | opt.minimize(loss_function, model.trainable_variables, method="l-bfgs-b", options=dict(maxiter=10000)) 44 | 45 | opt = RobustScipy() 46 | opt.minimize( 47 | loss_function, 48 | model.trainable_variables, 49 | robust_closure=robust_loss_function, 50 | method="l-bfgs-b", 51 | options=dict(maxiter=10000), 52 | ) 53 | opt.minimize( 54 | loss_function, 55 | model.trainable_variables, 56 | robust_closure=robust_loss_function, 57 | method="l-bfgs-b", 58 | options=dict(maxiter=10000), 59 | ) 60 | 61 | gpflow.utilities.print_summary(model) 62 | 63 | 64 | gpflow.config.set_default_jitter(original_default_jitter) 65 | gpflow.config.set_default_positive_minimum(original_default_positive_minimum) 66 | -------------------------------------------------------------------------------- /robustgp/utilities.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def set_trainable(model: tf.Module, flag: bool): 5 | """ 6 | Set trainable flag for all `tf.Variable`s and `gpflow.Parameter`s in a module. 7 | """ 8 | for variable in model.variables: 9 | if "jitter" not in variable.name: 10 | variable._trainable = flag 11 | -------------------------------------------------------------------------------- /robustgp_experiments/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/__init__.py -------------------------------------------------------------------------------- /robustgp_experiments/demo1d.py: -------------------------------------------------------------------------------- 1 | import gpflow 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | from robustgp import ConditionalVariance 5 | 6 | X = np.random.rand(150, 1) 7 | Y = 0.8 * np.cos(10 * X) + 1.2 * np.sin(8 * X + 0.3) + np.cos(17 * X) * 1.2 + np.random.randn(*X.shape) * 0.1 8 | 9 | gpr = gpflow.models.GPR((X, Y), gpflow.kernels.SquaredExponential()) 10 | opt = gpflow.optimizers.Scipy() 11 | opt_logs = opt.minimize(gpr.training_loss, gpr.trainable_variables, options=dict(maxiter=100)) 12 | 13 | k = gpflow.kernels.SquaredExponential() 14 | gpflow.utilities.multiple_assign(k, gpflow.utilities.read_values(gpr.kernel)) 15 | 16 | Z_initer = ConditionalVariance() 17 | sp = gpflow.models.SGPR((X, Y), k, Z_initer.compute_initialisation(X, 6, k)[0]) 18 | gpflow.utilities.multiple_assign(sp, gpflow.utilities.read_values(gpr)) 19 | 20 | pX = np.linspace(0, 1, 3000)[:, None] 21 | m, v = sp.predict_f(pX) 22 | ipm, _ = sp.predict_f(sp.inducing_variable.Z.value()) 23 | 24 | fig, (ax1, ax2) = plt.subplots(2, 1) 25 | ax1.plot(X, Y, 'x') 26 | ax1.plot(pX, m) 27 | ax1.plot(sp.inducing_variable.Z.value(), ipm, 'o', color='C3') 28 | deviation = (2 * (v + sp.likelihood.variance.value()) ** 0.5).numpy().flatten() 29 | ax1.fill_between(pX.flatten(), m.numpy().flatten() - deviation, m.numpy().flatten() + deviation, alpha=0.3) 30 | ax1.axvline(pX[np.argmax(v)].item(), color='C2') 31 | ax1.set_ylabel("y") 32 | ax2.plot(pX, v ** 0.5) 33 | ax2.plot(sp.inducing_variable.Z.value(), sp.inducing_variable.Z.value() * 0.0, 'o', color='C3') 34 | ax2.axvline(pX[np.argmax(v)].item(), color='C2') 35 | ax2.set_xlabel("input $x$") 36 | ax2.set_ylabel("$\mathbb{V}\,[p(f(x) | \mathbf{u}]^{0.5}$") 37 | plt.show() 38 | -------------------------------------------------------------------------------- /robustgp_experiments/init_z/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/__init__.py -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/fixedhyp-Naval_noisy-elbo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/fixedhyp-Naval_noisy-elbo.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/fixedhyp-Naval_noisy-nlpp.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/fixedhyp-Naval_noisy-nlpp.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/fixedhyp-Naval_noisy-rmse.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/fixedhyp-Naval_noisy-rmse.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/fixedhyp-Wilson_elevators-elbo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/fixedhyp-Wilson_elevators-elbo.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/fixedhyp-Wilson_elevators-nlpp.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/fixedhyp-Wilson_elevators-nlpp.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/fixedhyp-Wilson_elevators-rmse.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/fixedhyp-Wilson_elevators-rmse.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/fixedhyp-Wilson_energy-elbo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/fixedhyp-Wilson_energy-elbo.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/fixedhyp-Wilson_energy-nlpp.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/fixedhyp-Wilson_energy-nlpp.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/fixedhyp-Wilson_energy-rmse.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/fixedhyp-Wilson_energy-rmse.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/optall-Naval_noisy-trace.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/optall-Naval_noisy-trace.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/optall-Wilson_elevators-trace.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/optall-Wilson_elevators-trace.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/optall-Wilson_energy-trace.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/optall-Wilson_energy-trace.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/opthyp-Naval_noisy-elbo-only.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Naval_noisy-elbo-only.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/opthyp-Naval_noisy-elbo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Naval_noisy-elbo.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/opthyp-Naval_noisy-nlpp.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Naval_noisy-nlpp.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/opthyp-Naval_noisy-rmse.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Naval_noisy-rmse.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/opthyp-Wilson_elevators-elbo-only.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Wilson_elevators-elbo-only.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/opthyp-Wilson_elevators-elbo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Wilson_elevators-elbo.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/opthyp-Wilson_elevators-nlpp.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Wilson_elevators-nlpp.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/opthyp-Wilson_elevators-rmse.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Wilson_elevators-rmse.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/opthyp-Wilson_energy-elbo-only.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Wilson_energy-elbo-only.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/opthyp-Wilson_energy-elbo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Wilson_energy-elbo.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/opthyp-Wilson_energy-nlpp.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Wilson_energy-nlpp.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/figures/opthyp-Wilson_energy-rmse.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Wilson_energy-rmse.pdf -------------------------------------------------------------------------------- /robustgp_experiments/init_z/jug-plot-init-inducing-fixedhyp.py: -------------------------------------------------------------------------------- 1 | # # Inducing point initlaisation fixed hyperparameters 2 | # Assess how well inducing point initialisation works, with the hyperparameters fixed to the ones found by the full GP. 3 | # This simplifies things, since we only need to run optimisation with the full GP (or a GP with many inducing points). 4 | # 5 | # To parallelise things, we use jug. 6 | # 1. Run `jug execute jug_init_inducing_fixedhyp.py` multiple times to do runs in parallel. Can sync over a shared 7 | # filesystem. 8 | # 2. Once the above is done, run this script to create the plots. 9 | 10 | import jug.task 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | 14 | from robustgp_experiments.utils import baselines 15 | 16 | import matplotlib 17 | matplotlib.rcParams['axes.spines.right'] = False 18 | matplotlib.rcParams['axes.spines.top'] = False 19 | font = {'family': 'cmr10', 'size': 24} 20 | matplotlib.rc('font', **font) 21 | matplotlib.rc('text', usetex=True) 22 | color_dict = {"Uniform":'#999999',"Kmeans": '#ff7f00', "Greedy Conditional Variance":'#4daf4a',"gradient":"#377eb8", 23 | "RLS":'#a65628', "M-DPP MCMC": '#984ea3'} 24 | name_dict = {"Kmeans": "K-means", "Uniform":"Uniform", "Greedy Conditional Variance":"Greedy var.", "Sample Conditional Variance":"Sample var.","gradient":"Gradient", "RLS":"RLS", "M-DPP MCMC": "M-DPP MCMC"} 25 | plot_title_dict = {"Wilson_energy":"Energy", "Wilson_elevators":"Elevators","Naval_noisy":"Naval with Noise"} 26 | methods_to_ignore = ["Sample Conditional Variance"] 27 | jug.init("jug_init_inducing_fixedhyp.py", "jug_init_inducing_fixedhyp.jugdata") 28 | from jug_init_inducing_fixedhyp import ( 29 | init_Z_runs, init_Z_task_results, baseline_exps, full_rmses, full_nlpps, baseline_lmls, Ms, dataset_names 30 | ) 31 | # 32 | # 33 | # Evaluation 34 | init_Z_rmses = {} 35 | init_Z_nlpps = {} 36 | init_Z_elbos = {} 37 | init_Z_uppers = {} 38 | init_Z_Ms = {} 39 | mean_baselines = {} 40 | linear_baselines = {} 41 | for dataset in dataset_names: 42 | init_Z_rmses[dataset] = {} 43 | init_Z_nlpps[dataset] = {} 44 | init_Z_elbos[dataset] = {} 45 | init_Z_uppers[dataset] = {} 46 | init_Z_Ms[dataset] = {} 47 | 48 | for init_Z_method in init_Z_runs[dataset].keys(): 49 | init_Z_rmses[dataset][init_Z_method] = dict() 50 | init_Z_nlpps[dataset][init_Z_method] = dict() 51 | init_Z_elbos[dataset][init_Z_method] = dict() 52 | init_Z_uppers[dataset][init_Z_method] = dict() 53 | init_Z_Ms[dataset][init_Z_method] = [] 54 | for stat in ["Means", "Standard dev.", "Sample std.", "Median", "80 pct", "20 pct"]: 55 | for metric in [init_Z_rmses,init_Z_nlpps, init_Z_elbos, init_Z_uppers]: 56 | metric[dataset][init_Z_method][stat] = [] 57 | for M in init_Z_task_results[dataset][init_Z_method].keys(): 58 | init_Z_Ms[dataset][init_Z_method].append(int(M)) 59 | init_Z_rmses[dataset][init_Z_method][M] = [] 60 | init_Z_nlpps[dataset][init_Z_method][M] = [] 61 | init_Z_elbos[dataset][init_Z_method][M] = [] 62 | init_Z_uppers[dataset][init_Z_method][M] = [] 63 | for result in init_Z_task_results[dataset][init_Z_method][M]: 64 | try: 65 | elbo, upper, rmse, nlpp = jug.task.value(result) 66 | except: 67 | continue 68 | init_Z_elbos[dataset][str(init_Z_method)][M].append(elbo) 69 | init_Z_uppers[dataset][str(init_Z_method)][M].append(upper) 70 | init_Z_rmses[dataset][str(init_Z_method)][M].append(rmse) 71 | init_Z_nlpps[dataset][str(init_Z_method)][M].append(nlpp) 72 | for metric in [init_Z_rmses, init_Z_nlpps, init_Z_elbos, init_Z_uppers]: 73 | metric[dataset][init_Z_method]["Means"].append(np.mean(metric[dataset][init_Z_method][M])) 74 | metric[dataset][init_Z_method]["Standard dev."].append(np.std(metric[dataset][init_Z_method][M])) 75 | metric[dataset][init_Z_method]["Sample std."].append(np.std(metric[dataset][init_Z_method][M]) / 76 | np.sqrt((len(metric[dataset][init_Z_method][M])-1))) 77 | metric[dataset][init_Z_method]["Median"].append(np.median(metric[dataset][init_Z_method][M])) 78 | metric[dataset][init_Z_method]["20 pct"].append(np.percentile(metric[dataset][init_Z_method][M],20)) 79 | metric[dataset][init_Z_method]["80 pct"].append(np.percentile(metric[dataset][init_Z_method][M],80)) 80 | 81 | baseline_exp = baseline_exps[dataset] 82 | mean_baselines[dataset] = baselines.meanpred_baseline(None, baseline_exp.Y_train, None, baseline_exp.Y_test) 83 | linear_baselines[dataset] = baselines.linear_baseline(baseline_exp.X_train, baseline_exp.Y_train, 84 | baseline_exp.X_test, baseline_exp.Y_test) 85 | 86 | # 87 | # Plotting 88 | for dataset in dataset_names: 89 | dataset_plot_settings = dict( 90 | Naval_noisy=dict(xlim=(10,200),elbo_only_ylim=(37300, 37900), elbo_ylim=(36e3, 43500), nlpp_ylim=(-3.6, -2.1), 91 | rmse_ylim=(.0065, .018), include_mean=False, include_linear=False), 92 | Wilson_energy=dict(xlim=(10,200),elbo_only_ylim=(800, 1050), elbo_ylim=(800, 1350), nlpp_ylim=(-1.7, -.6), 93 | rmse_ylim=(.046, .09), include_mean=False, include_linear=False), 94 | Wilson_elevators=dict(xlim=(0,5000),elbo_only_ylim=(-8500, -6000), elbo_ylim=(-8000, 1000), nlpp_ylim=(.375, .45), 95 | rmse_ylim=(.3515, .37), include_mean=False, include_linear=False) 96 | ).get(dataset, dict(elbo_only_y_lim=None, elbo_ylim=None, include_linear=True, nlpp_ylim=None, 97 | rmse_ylim=None)) 98 | l_elbo, l_rmse, l_nlpp = linear_baselines[dataset] 99 | m_elbo, m_rmse, m_nlpp = mean_baselines[dataset] 100 | fig, ax = plt.subplots() 101 | for method in init_Z_runs[dataset].keys(): 102 | if method in methods_to_ignore: 103 | continue 104 | l, = ax.plot(init_Z_Ms[dataset][method],init_Z_elbos[dataset][method]["Median"], label=name_dict[method], 105 | color=color_dict[method]) 106 | ax.plot(init_Z_Ms[dataset][method], init_Z_uppers[dataset][method]["Median"], label="_nolegend_", 107 | color=l.get_color(), linestyle=(0, (3, 1, 1, 1, 1, 1))) 108 | ax.fill_between(init_Z_Ms[dataset][method], init_Z_elbos[dataset][method]["20 pct"], 109 | init_Z_elbos[dataset][method]["80 pct"], color=l.get_color(), alpha=.2) 110 | ax.fill_between(init_Z_Ms[dataset][method], init_Z_uppers[dataset][method]["20 pct"], 111 | init_Z_uppers[dataset][method]["80 pct"], color=l.get_color(), alpha=.2, 112 | hatch='/', label='_nolegend_') 113 | ax.axhline(baseline_lmls[dataset], label='Full GP', linestyle="--",color='k') 114 | if dataset_plot_settings["include_linear"]: 115 | ax.axhline(l_elbo, label='Linear', linestyle='-.',color='k') 116 | ax.axhline(m_elbo, label='Mean', linestyle=':',color='k') 117 | #ax.legend(loc="upper left") 118 | ax.set_xlabel("M") 119 | ax.set_ylabel("ELBO") 120 | ax.set_title(plot_title_dict[dataset]) 121 | ax.set_xlim(dataset_plot_settings["xlim"]) 122 | ax.set_ylim(dataset_plot_settings["elbo_ylim"]) 123 | plt.tight_layout() 124 | fig.savefig(f"./figures/fixedhyp-{dataset}-elbo.pdf") 125 | 126 | fig, ax = plt.subplots() 127 | for method in init_Z_runs[dataset].keys(): 128 | if method in methods_to_ignore: 129 | continue 130 | l, = ax.plot(init_Z_Ms[dataset][method], init_Z_rmses[dataset][method]["Median"], label=name_dict[method],color=color_dict[method]) 131 | ax.fill_between(init_Z_Ms[dataset][method], init_Z_rmses[dataset][method]["20 pct"], 132 | init_Z_rmses[dataset][method]["80 pct"], color=l.get_color(),alpha=.2) 133 | ax.axhline(full_rmses[dataset], label="Full GP", linestyle='--',color='k') 134 | if dataset_plot_settings["include_linear"]: 135 | ax.axhline(l_rmse, label="Linear", linestyle='-.',color='k') 136 | ax.axhline(m_rmse, label="Mean", linestyle=':',color='k') 137 | #ax.legend() 138 | ax.set_xlabel("M") 139 | ax.set_ylabel("RMSE") 140 | #ax.set_title(plot_title_dict[dataset]) 141 | ax.set_xlim(dataset_plot_settings["xlim"]) 142 | ax.set_ylim(dataset_plot_settings["rmse_ylim"]) 143 | plt.tight_layout() 144 | fig.savefig(f"./figures/fixedhyp-{dataset}-rmse.pdf") 145 | 146 | fig, ax = plt.subplots() 147 | for method in init_Z_runs[dataset].keys(): 148 | if method in methods_to_ignore: 149 | continue 150 | l,=ax.plot(init_Z_Ms[dataset][method], init_Z_nlpps[dataset][method]["Median"], label=name_dict[method],color=color_dict[method]) 151 | ax.fill_between(init_Z_Ms[dataset][method], init_Z_nlpps[dataset][method]["20 pct"], 152 | init_Z_nlpps[dataset][method]["80 pct"], color=l.get_color(),alpha=.2) 153 | ax.axhline(full_nlpps[dataset], label="Full GP", linestyle='--', color='k') 154 | if dataset_plot_settings["include_linear"]: 155 | ax.axhline(l_nlpp, label="Linear", linestyle='-.',color='k') 156 | ax.axhline(m_nlpp, label="Mean", linestyle=':',color='k') 157 | #ax.legend() 158 | ax.set_xlabel("M") 159 | ax.set_ylabel("NLPD") 160 | ax.set_xlim(dataset_plot_settings["xlim"]) 161 | ax.set_ylim(dataset_plot_settings["nlpp_ylim"]) 162 | #ax.set_title(plot_title_dict[dataset]) 163 | plt.tight_layout() 164 | fig.savefig(f"./figures/fixedhyp-{dataset}-nlpp.pdf") 165 | 166 | plt.show() 167 | 168 | 169 | -------------------------------------------------------------------------------- /robustgp_experiments/init_z/jug-plot-init-inducing-opt.py: -------------------------------------------------------------------------------- 1 | import jug.task 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | from robustgp_experiments.utils import baselines 6 | import matplotlib 7 | matplotlib.rcParams['axes.spines.right'] = False 8 | matplotlib.rcParams['axes.spines.top'] = False 9 | font = {'family': 'cmr10', 'size': 24} 10 | matplotlib.rc('font', **font) 11 | matplotlib.rc('text', usetex=True) 12 | color_dict = {"Uniform":'#999999', 13 | "Kmeans": '#ff7f00', 14 | "Greedy Conditional Variance": '#4daf4a', 15 | "Gradient": "#377eb8", 16 | "RLS": '#a65628', 17 | "M-DPP MCMC": '#984ea3', 18 | "reinit_Z_sF": 'C9'} 19 | name_dict = {"Kmeans": "K-means", "Uniform":"Uniform", 20 | "Greedy Conditional Variance":"Greedy var.", 21 | "Sample Conditional Variance":"Sample var.", 22 | "Gradient":"Gradient", 23 | "reinit_Z_sF":"Greedy var. (reinit.)", 24 | "reinit_Z_sT":"Sample var. (reinit.)", 25 | "RLS":"RLS", 26 | "M-DPP MCMC": "M-DPP MCMC"} 27 | plot_title_dict = {"Wilson_energy":"Energy", "Wilson_elevators":"Elevators","Naval_noisy":"Naval with Noise"} 28 | methods_to_ignore = ["Sample Conditional Variance", "reinit_Z_sT"] 29 | jug.init("jug_init_inducing_opt.py", "jug_init_inducing_opt.jugdata") 30 | from jug_init_inducing_opt import ( 31 | init_Z_runs, init_Z_task_results, baseline_exps, full_rmses, full_nlpps, baseline_lmls, Ms, dataset_names 32 | ) 33 | # Evaluation 34 | init_Z_rmses = {} 35 | init_Z_nlpps = {} 36 | init_Z_elbos = {} 37 | init_Z_uppers = {} 38 | init_Z_Ms = {} 39 | mean_baselines = {} 40 | linear_baselines = {} 41 | for dataset in dataset_names: 42 | init_Z_rmses[dataset] = {} 43 | init_Z_nlpps[dataset] = {} 44 | init_Z_elbos[dataset] = {} 45 | init_Z_uppers[dataset] = {} 46 | init_Z_Ms[dataset] = {} 47 | 48 | for init_Z_method in init_Z_runs[dataset].keys(): 49 | init_Z_rmses[dataset][init_Z_method] = dict() 50 | init_Z_nlpps[dataset][init_Z_method] = dict() 51 | init_Z_elbos[dataset][init_Z_method] = dict() 52 | init_Z_uppers[dataset][init_Z_method] = dict() 53 | init_Z_Ms[dataset][init_Z_method] = [] 54 | for stat in ["Means", "Standard dev.", "Sample std.", "Median", "80 pct", "20 pct"]: 55 | for metric in [init_Z_rmses,init_Z_nlpps, init_Z_elbos, init_Z_uppers]: 56 | metric[dataset][init_Z_method][stat] = [] 57 | for M in init_Z_task_results[dataset][init_Z_method].keys(): 58 | init_Z_Ms[dataset][init_Z_method].append(int(M)) 59 | init_Z_rmses[dataset][init_Z_method][M] = [] 60 | init_Z_nlpps[dataset][init_Z_method][M] = [] 61 | init_Z_elbos[dataset][init_Z_method][M] = [] 62 | init_Z_uppers[dataset][init_Z_method][M] = [] 63 | for result in init_Z_task_results[dataset][init_Z_method][M]: 64 | elbo, upper, rmse, nlpp = jug.task.value(result) 65 | init_Z_elbos[dataset][str(init_Z_method)][M].append(elbo) 66 | init_Z_uppers[dataset][str(init_Z_method)][M].append(upper) 67 | init_Z_rmses[dataset][str(init_Z_method)][M].append(rmse) 68 | init_Z_nlpps[dataset][str(init_Z_method)][M].append(nlpp) 69 | for metric in [init_Z_rmses, init_Z_nlpps, init_Z_elbos, init_Z_uppers]: 70 | metric[dataset][init_Z_method]["Means"].append(np.mean(metric[dataset][init_Z_method][M])) 71 | metric[dataset][init_Z_method]["Standard dev."].append(np.std(metric[dataset][init_Z_method][M])) 72 | metric[dataset][init_Z_method]["Sample std."].append(np.std(metric[dataset][init_Z_method][M]) / 73 | np.sqrt((len(metric[dataset][init_Z_method][M])-1))) 74 | metric[dataset][init_Z_method]["Median"].append(np.nanmedian(metric[dataset][init_Z_method][M])) 75 | metric[dataset][init_Z_method]["20 pct"].append(np.nanpercentile(metric[dataset][init_Z_method][M],20)) 76 | metric[dataset][init_Z_method]["80 pct"].append(np.nanpercentile(metric[dataset][init_Z_method][M],80)) 77 | 78 | 79 | baseline_exp = baseline_exps[dataset] 80 | mean_baselines[dataset] = baselines.meanpred_baseline(None, baseline_exp.Y_train, None, baseline_exp.Y_test) 81 | linear_baselines[dataset] = baselines.linear_baseline(baseline_exp.X_train, baseline_exp.Y_train, 82 | baseline_exp.X_test, baseline_exp.Y_test) 83 | # 84 | # Plotting 85 | for dataset in dataset_names: 86 | dataset_plot_settings = dict( 87 | Naval_noisy=dict(xlim=(20,300),elbo_only_ylim=(37300,37900),elbo_ylim=(36e3, 43500),nlpp_ylim=(-3.6,-3.4),rmse_ylim=(.0065,.01),include_mean=False,include_linear=False), 88 | Wilson_energy = dict(xlim=(10,200),elbo_only_ylim=(800,1020),elbo_ylim=(800, 1350),nlpp_ylim=(-1.7,-.4),rmse_ylim=(.045,.07),include_mean=False,include_linear=False), 89 | Wilson_elevators = dict(xlim=(-10,5000),elbo_only_ylim=(-7250,-6100),elbo_ylim=(-7200,1000),nlpp_ylim=(.375,.46),rmse_ylim=(.35,.38),include_mean=False,include_linear=False) 90 | ).get(dataset, dict(xlim=None,elbo_only_y_lim=None,elbo_ylim=None,include_mean=True,include_linear=True)) 91 | l_elbo, l_rmse, l_nlpp = linear_baselines[dataset] 92 | m_elbo, m_rmse, m_nlpp = mean_baselines[dataset] 93 | fig, ax = plt.subplots() 94 | methods = init_Z_runs[dataset].keys() 95 | for method in methods: 96 | if method in methods_to_ignore: 97 | continue 98 | l, = ax.plot(init_Z_Ms[dataset][method],init_Z_elbos[dataset][method]["Median"], label=name_dict[method], 99 | color=color_dict[method]) 100 | ax.plot(init_Z_Ms[dataset][method], init_Z_uppers[dataset][method]["Median"], label=f"_nolegend_", 101 | color=l.get_color(), linestyle=(0, (3, 1, 1, 1, 1, 1))) 102 | ax.fill_between(init_Z_Ms[dataset][method], init_Z_elbos[dataset][method]["20 pct"], 103 | init_Z_elbos[dataset][method]["80 pct"], color=l.get_color(),alpha=.2) 104 | ax.fill_between(init_Z_Ms[dataset][method], init_Z_uppers[dataset][method]["20 pct"], 105 | init_Z_uppers[dataset][method]["80 pct"], color=l.get_color(),alpha=.2, 106 | hatch='/', label='_nolegend_') 107 | ax.axhline(baseline_lmls[dataset], label='Full GP', linestyle="--",color='k') 108 | if dataset_plot_settings['include_linear']: 109 | ax.axhline(l_elbo, label='Linear', linestyle='-.',color='k') 110 | if dataset_plot_settings['include_mean']: 111 | ax.axhline(m_elbo, label='Mean', linestyle=':',color='k') 112 | # ax.legend() 113 | ax.set_title(plot_title_dict[dataset]) 114 | ax.set_xlabel("M") 115 | ax.set_ylabel("ELBO") 116 | ax.set_ylim(dataset_plot_settings["elbo_ylim"]) 117 | ax.set_xlim(dataset_plot_settings["xlim"]) 118 | 119 | plt.tight_layout() 120 | fig.savefig(f"./figures/opthyp-{dataset}-elbo.pdf") 121 | fig, ax = plt.subplots() 122 | for method in methods: 123 | if method in methods_to_ignore: 124 | continue 125 | l, = ax.plot(init_Z_Ms[dataset][method],init_Z_elbos[dataset][method]["Median"], label=name_dict[method],color=color_dict[method]) 126 | ax.fill_between(init_Z_Ms[dataset][method], init_Z_elbos[dataset][method]["20 pct"], 127 | init_Z_elbos[dataset][method]["80 pct"], color=l.get_color(),alpha=.2) 128 | ax.axhline(baseline_lmls[dataset], label='Full GP', linestyle="--",color='k') 129 | if dataset_plot_settings['include_linear']: 130 | ax.axhline(l_elbo, label='Linear', linestyle='-.',color='k') 131 | if dataset_plot_settings['include_mean']: 132 | ax.axhline(m_elbo, label='Mean', linestyle=':',color='k') 133 | # ax.legend() 134 | ax.set_xlabel("M") 135 | ax.set_title(plot_title_dict[dataset]) 136 | ax.set_ylabel("ELBO") 137 | ax.set_xlim(dataset_plot_settings["xlim"]) 138 | ax.set_ylim(dataset_plot_settings["elbo_only_ylim"]) 139 | plt.tight_layout() 140 | fig.savefig(f"./figures/opthyp-{dataset}-elbo-only.pdf") 141 | 142 | fig, ax = plt.subplots() 143 | for method in methods: 144 | if method in methods_to_ignore: 145 | continue 146 | l, = ax.plot(init_Z_Ms[dataset][method], init_Z_rmses[dataset][method]["Median"], label=name_dict[method],color=color_dict[method]) 147 | ax.fill_between(init_Z_Ms[dataset][method], init_Z_rmses[dataset][method]["20 pct"], 148 | init_Z_rmses[dataset][method]["80 pct"], color=l.get_color(),alpha=.2) 149 | ax.axhline(full_rmses[dataset], label="Full GP", linestyle='--',color='k') 150 | if dataset_plot_settings['include_linear']: 151 | ax.axhline(l_rmse, label='Linear', linestyle='-.',color='k') 152 | if dataset_plot_settings['include_mean']: 153 | ax.axhline(m_rmse, label='Mean', linestyle=':',color='k') 154 | # ax.legend() 155 | # ax.set_title(plot_title_dict[dataset]) 156 | ax.set_xlim(dataset_plot_settings["xlim"]) 157 | ax.set_ylim(dataset_plot_settings["rmse_ylim"]) 158 | ax.set_xlabel("M") 159 | ax.set_ylabel("RMSE") 160 | plt.tight_layout() 161 | fig.savefig(f"./figures/opthyp-{dataset}-rmse.pdf") 162 | 163 | fig, ax = plt.subplots() 164 | for method in methods: 165 | if method in methods_to_ignore: 166 | continue 167 | l,=ax.plot(init_Z_Ms[dataset][method], init_Z_nlpps[dataset][method]["Median"], label=name_dict[method],color=color_dict[method]) 168 | ax.fill_between(init_Z_Ms[dataset][method], init_Z_nlpps[dataset][method]["20 pct"], 169 | init_Z_nlpps[dataset][method]["80 pct"], color=l.get_color(),alpha=.2) 170 | ax.axhline(full_nlpps[dataset], label="Full GP", linestyle='--',color='k') 171 | if dataset_plot_settings['include_linear']: 172 | ax.axhline(l_nlpp, label='Linear', linestyle='-.',color='k') 173 | if dataset_plot_settings['include_mean']: 174 | ax.axhline(m_nlpp, label='Mean', linestyle=':',color='k') 175 | # ax.legend() 176 | # ax.set_title(plot_title_dict[dataset]) 177 | ax.set_xlabel("M") 178 | ax.set_ylabel("NLPD") 179 | ax.set_xlim(dataset_plot_settings["xlim"]) 180 | ax.set_ylim(dataset_plot_settings["nlpp_ylim"]) 181 | plt.tight_layout() 182 | fig.savefig(f"./figures/opthyp-{dataset}-nlpp.pdf") 183 | 184 | plt.show() 185 | 186 | -------------------------------------------------------------------------------- /robustgp_experiments/init_z/jug-plot-opt-inducing.py: -------------------------------------------------------------------------------- 1 | import jug.task 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import matplotlib 5 | 6 | matplotlib.rcParams["axes.spines.right"] = False 7 | matplotlib.rcParams["axes.spines.top"] = False 8 | font = {"family": "cmr10", "size": 24} 9 | matplotlib.rc("font", **font) 10 | matplotlib.rc("text", usetex=True) 11 | 12 | color_dict = {"Kmeans": "#ff7f00", "Greedy Conditional Variance": "C9", "Gradient": "#377eb8"} 13 | name_dict = { 14 | "Kmeans": "K-means (reinit.)", 15 | "Greedy Conditional Variance": "Greedy var. (reinit.)", 16 | "Gradient": "Gradient", 17 | } 18 | plot_title_dict = { 19 | "Wilson_energy": "Energy (M=65)", 20 | "Wilson_elevators": "Elevators (M=1200)", 21 | "Naval_noisy": "Naval with Noise (M=55)", 22 | } 23 | jug.init("jug_opt_inducing.py", "jug_opt_inducing.jugdata") 24 | from jug_opt_inducing import init_Z_runs, init_Z_task_results, baseline_lmls 25 | 26 | hists = {} 27 | for dataset in init_Z_runs.keys(): 28 | hists[dataset] = dict() 29 | for init_Z_method in init_Z_runs[dataset].keys(): 30 | hists[dataset][init_Z_method] = list() 31 | for M, result in init_Z_task_results[dataset][init_Z_method].items(): 32 | outputs = jug.task.value(result) 33 | for output in outputs: 34 | hists[dataset][init_Z_method].append(output[-1]) 35 | 36 | 37 | for dataset in init_Z_runs.keys(): 38 | fig, ax = plt.subplots() 39 | dataset_plot_settings = dict( 40 | Naval_noisy=dict(elbo_ylim=(32e3, 39e3)), 41 | Wilson_energy=dict(elbo_ylim=(700, 1050)), 42 | Wilson_elevators=dict(elbo_ylim=(-7000, -6000)), 43 | ).get(dataset, dict(elbo_ylim=None)) 44 | for init_Z_method in init_Z_runs[dataset].keys(): 45 | losses = hists[dataset][init_Z_method] 46 | max_f_evals = max(map(len, losses)) 47 | elbos = -np.array([elbo + [np.nan] * (max_f_evals - len(elbo)) for elbo in losses]) 48 | best_elbo = np.maximum.accumulate(elbos, axis=1) 49 | # median_elbo = np.nanmedian(best_elbo, axis=0) 50 | (l,) = ax.plot( 51 | np.arange(len(best_elbo[0])), best_elbo[0], label=name_dict[init_Z_method], color=color_dict[init_Z_method] 52 | ) 53 | for i in range(1, len(best_elbo)): 54 | ax.plot(np.arange(len(best_elbo[i])), best_elbo[i], label="_no_legend_", color=l.get_color()) 55 | ax.axhline(baseline_lmls[dataset], label="Full GP", linestyle="--", color="k") 56 | handles, labels = ax.get_legend_handles_labels() 57 | # sort both labels and handles by labels 58 | labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0])) 59 | # ax.legend() 60 | ax.set_xlabel("Number of Fn. Evals.") 61 | ax.set_ylabel("ELBO") 62 | ax.set_ylim(dataset_plot_settings["elbo_ylim"]) 63 | ax.set_xlim((-10, 250)) 64 | ax.set_title(plot_title_dict[dataset]) 65 | plt.tight_layout() 66 | fig.savefig(f"./figures/optall-{dataset}-trace.pdf") 67 | plt.show() 68 | -------------------------------------------------------------------------------- /robustgp_experiments/init_z/jug-plot-search-uci.py: -------------------------------------------------------------------------------- 1 | import jug.task 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import pandas as pd 5 | from robustgp_experiments.utils.baselines import linear_baseline, meanpred_baseline 6 | 7 | jug.init("jug_search_uci.py", "jug_search_uci.jugdata") 8 | from jug_search_uci import ( 9 | dataset_names, sparse_task_results, get_settings, baseline_results, baseline_exps, sparse_exps 10 | ) 11 | 12 | plot_all_datasets = True 13 | plot_normalised = True 14 | 15 | # Can comment this out to run all datasets 16 | if not plot_all_datasets: 17 | dataset_names = ["Wilson_energy"] 18 | # dataset_names = [n for n in dataset_names if n not in 19 | # ["Wilson_pendulum", "Pendulum_noisy", "Wilson_wine"]] 20 | # dataset_names = [n for n in dataset_names if n not in 21 | # ["Wilson_pendulum", "Pendulum_noisy", "Wilson_wine", "kin40k"]] 22 | # dataset_names = ["Naval", "Naval_noisy"] 23 | # dataset_names = ["Wilson_stock", "Wilson_energy", "Wilson_concrete", "Wilson_airfoil"] 24 | 25 | # Get values from tasks 26 | sparse_results_raw = {} 27 | sparse_results_normalised = {} 28 | baseline_lmls = {} 29 | for dataset_name in dataset_names: 30 | if (type(baseline_results[dataset_name]) is float) or not baseline_results[dataset_name].can_load(): 31 | continue 32 | baseline_lmls[dataset_name] = jug.task.value(baseline_results[dataset_name]) 33 | 34 | experiment_storage_path, Ms, common_run_settings, dataset_custom_settings = get_settings(dataset_name) 35 | sparse_task_values = [jug.task.value(result) for result in sparse_task_results[dataset_name]] 36 | sparse_results_raw[dataset_name] = pd.DataFrame.from_records( 37 | sparse_task_values, columns=['elbo', 'upper', 'rmse', 'nlpp'], index=Ms 38 | ) 39 | const_model_lml = meanpred_baseline(baseline_exps[dataset_name].X_train, baseline_exps[dataset_name].Y_train, 40 | baseline_exps[dataset_name].X_test, baseline_exps[dataset_name].Y_test)[0] 41 | linear_model_lml = linear_baseline(baseline_exps[dataset_name].X_train, baseline_exps[dataset_name].Y_train, 42 | baseline_exps[dataset_name].X_test, baseline_exps[dataset_name].Y_test)[0] 43 | rel_lml = const_model_lml 44 | sparse_results_normalised[dataset_name] = sparse_results_raw[dataset_name].copy() 45 | sparse_results_normalised[dataset_name].elbo -= baseline_lmls[dataset_name] 46 | sparse_results_normalised[dataset_name].elbo /= baseline_lmls[dataset_name] - rel_lml 47 | sparse_results_normalised[dataset_name].upper -= baseline_lmls[dataset_name] 48 | sparse_results_normalised[dataset_name].index /= baseline_exps[dataset_name].X_train.shape[0] 49 | 50 | baseline_exps[dataset_name].load() 51 | print(f"{dataset_name:30} lik variance: {baseline_exps[dataset_name].model.likelihood.variance.numpy():.8f}" 52 | f" lml: {baseline_lmls[dataset_name]} linlml: {linear_model_lml}") 53 | 54 | sparse_results = sparse_results_normalised if plot_normalised else sparse_results_raw 55 | 56 | _, ax = plt.subplots() 57 | for dataset_name in sparse_results.keys(): 58 | # ax.axhline(baseline_lmls[dataset_name]) 59 | # ax.axhline() 60 | l, = ax.plot(sparse_results[dataset_name].index, sparse_results[dataset_name].elbo, 61 | label=f"{dataset_name} ({len(sparse_exps[dataset_name][0].X_train)})") 62 | # ax.plot(sparse_results[dataset_name].index, sparse_results[dataset_name].upper, 63 | # color=l.get_color(), linestyle=':') 64 | ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), fontsize='x-small', ncol=5) 65 | plt.show() 66 | -------------------------------------------------------------------------------- /robustgp_experiments/init_z/jug_init_inducing_fixedhyp.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Optional 3 | 4 | import gpflow 5 | import jug 6 | import numpy as np 7 | import tensorflow as tf 8 | 9 | import robustgp 10 | from robustgp.utilities import set_trainable 11 | from robustgp_experiments.init_z.utils import print_post_run, uci_train_settings 12 | from robustgp_experiments.utils import FullbatchUciExperiment, LoggerCallback 13 | 14 | # Settings 15 | dataset_names = ["Wilson_energy", "Naval_noisy", "Wilson_elevators"] 16 | num_seeds = 3 # Use 10 to replicate experiments from paper 17 | seeds = np.arange(num_seeds) 18 | all_model_parameters = {} 19 | 20 | # 21 | # 22 | # Setup 23 | gpflow.config.set_default_positive_minimum(1.0e-5) 24 | gpflow.config.set_default_jitter(1e-10) 25 | init_Z_methods = dict() 26 | init_Z_methods["Uniform"] = [robustgp.UniformSubsample(seed=seed) for seed in seeds] 27 | init_Z_methods["Greedy Conditional Variance"] = [robustgp.ConditionalVariance(seed=seed) for seed in seeds] 28 | # init_Z_methods["Sample Conditional Variance"] = [robustgp.ConditionalVariance(sample=True, seed=seed) for seed in seeds] 29 | init_Z_methods["Kmeans"] = [robustgp.Kmeans(seed=seed) for seed in seeds] 30 | # init_Z_methods["M-DPP MCMC"] = [robustgp.KdppMCMC(seed=seed) for seed in seeds] 31 | # init_Z_methods["RLS"] = [robustgp.RLS(seed=seed) for seed in seeds] 32 | 33 | experiment_name = "init-inducing" 34 | 35 | 36 | uci_train_settings.update( 37 | dict( 38 | Naval_noisy=( 39 | [10, 20, 30, 40, 45, 47, 50, 55, 60, 65, 70, 75, 80, 85, 90, 100, 130, 150, 180, 200, 250, 300, 400, 500], 40 | {}, 41 | ), # Very sparse solution exists 42 | ) 43 | ) 44 | 45 | 46 | def get_settings(dataset_name): 47 | experiment_storage_path = f"./storage-{experiment_name}/{dataset_name}" 48 | Ms, dataset_custom_settings = uci_train_settings[dataset_name] 49 | common_run_settings = dict( 50 | storage_path=experiment_storage_path, dataset_name=dataset_name, max_lengthscale=1001.0, max_variance=1001.0 51 | ) 52 | return experiment_storage_path, Ms, common_run_settings, dataset_custom_settings 53 | 54 | 55 | # 56 | # Experiment classes 57 | @dataclass 58 | class FullbatchUciInducingOptExperiment(FullbatchUciExperiment): 59 | optimise_objective: Optional[str] = None # None | lower | upper 60 | 61 | def run_optimisation(self): 62 | print(f"Running {str(self)}") 63 | 64 | model = self.model 65 | set_trainable(model, False) 66 | set_trainable(model.inducing_variable, True) 67 | 68 | if self.optimise_objective is None: 69 | return 70 | elif self.optimise_objective == "upper": 71 | loss_function = tf.function(model.upper_bound) 72 | elif self.optimise_objective == "lower": 73 | loss_function = self.model.training_loss_closure(compile=True) 74 | else: 75 | raise NotImplementedError 76 | hist = LoggerCallback(model, loss_function) 77 | 78 | def run_optimisation(): 79 | if self.optimizer == "l-bfgs-b" or self.optimizer == "bfgs": 80 | try: 81 | opt = gpflow.optimizers.Scipy() 82 | opt.minimize( 83 | loss_function, 84 | self.model.trainable_variables, 85 | method=self.optimizer, 86 | options=dict(maxiter=1000, disp=False), 87 | step_callback=hist, 88 | ) 89 | print("") 90 | except KeyboardInterrupt: 91 | pass # TODO: Come up with something better than just pass... 92 | else: 93 | raise NotImplementedError(f"I don't know {self.optimizer}") 94 | 95 | run_optimisation() 96 | 97 | # Store results 98 | self.trained_parameters = gpflow.utilities.read_values(model) 99 | self.train_objective_hist = (hist.n_iters, hist.log_likelihoods) 100 | 101 | 102 | def compute_model_stats(exp): 103 | rmse = np.mean((exp.model.predict_f(exp.X_test)[0].numpy() - exp.Y_test) ** 2.0) ** 0.5 104 | nlpp = -np.mean(exp.model.predict_log_density((exp.X_test, exp.Y_test))) 105 | elbo = exp.model.elbo().numpy() 106 | upper = exp.model.upper_bound().numpy() 107 | return elbo, upper, rmse, nlpp 108 | 109 | 110 | @jug.TaskGenerator 111 | def run_baseline(baseline_exp): 112 | baseline_exp.cached_run() 113 | if baseline_exp.model_class == "SGPR": 114 | baseline_lml = baseline_exp.model.elbo().numpy() 115 | else: 116 | baseline_lml = baseline_exp.model.log_marginal_likelihood().numpy() 117 | model_parameters = gpflow.utilities.read_values(baseline_exp.model) 118 | if ".inducing_variable.Z" in model_parameters: 119 | model_parameters.pop(".inducing_variable.Z") 120 | full_rmse = ( 121 | np.mean((baseline_exp.model.predict_f(baseline_exp.X_test)[0].numpy() - baseline_exp.Y_test) ** 2.0) ** 0.5 122 | ) 123 | full_nlpp = -np.mean(baseline_exp.model.predict_log_density((baseline_exp.X_test, baseline_exp.Y_test))) 124 | return model_parameters, full_rmse, full_nlpp, baseline_lml 125 | 126 | 127 | # Baseline exp 128 | baseline_exps = {} 129 | full_rmses = {} 130 | full_nlpps = {} 131 | baseline_lmls = {} 132 | for dataset_name in dataset_names: 133 | # The baseline is GPR, except for Naval, where we use an SGPR with 1000 inducing variables. 134 | baseline_custom_settings = dict( 135 | Naval_noisy={ 136 | "model_class": "SGPR", 137 | "M": 1000, 138 | "training_procedure": "reinit_Z", 139 | "init_Z_method": robustgp.ConditionalVariance(sample=False), 140 | "max_lengthscale": 1000.0, 141 | "max_variance": 1000.0, 142 | } 143 | ).get(dataset_name, dict(model_class="GPR", max_lengthscale=1000.0, max_variance=1000.0)) 144 | experiment_storage_path, _, common_run_settings, dataset_custom_settings = get_settings(dataset_name) 145 | baseline_exps[dataset_name] = FullbatchUciExperiment( 146 | **{**common_run_settings, **dataset_custom_settings, **baseline_custom_settings} 147 | ) 148 | ( 149 | all_model_parameters[dataset_name], 150 | full_rmses[dataset_name], 151 | full_nlpps[dataset_name], 152 | baseline_lmls[dataset_name], 153 | ) = jug.bvalue(run_baseline(baseline_exps[dataset_name])) 154 | 155 | 156 | @jug.TaskGenerator 157 | def run_sparse_init(exp): 158 | print(exp) 159 | exp.setup_model() 160 | exp.init_params() 161 | print_post_run(exp) 162 | elbo, upper, rmse, nlpp = compute_model_stats(exp) 163 | return elbo, upper, rmse, nlpp 164 | 165 | 166 | # Sparse experiments 167 | init_Z_runs = {} 168 | init_Z_task_results = {} 169 | for dataset_name in dataset_names: 170 | init_Z_runs[dataset_name] = dict() 171 | init_Z_task_results[dataset_name] = dict() 172 | experiment_storage_path, Ms, common_run_settings, dataset_custom_settings = get_settings(dataset_name) 173 | for method_name, init_Z_method in init_Z_methods.items(): 174 | init_Z_runs[dataset_name][method_name] = dict() 175 | init_Z_task_results[dataset_name][method_name] = dict() 176 | for M in Ms: 177 | init_Z_runs[dataset_name][method_name][str(M)] = [] 178 | init_Z_task_results[dataset_name][method_name][str(M)] = [] 179 | settings_for_runs = [ 180 | {"model_class": "SGPR", "M": M, "init_Z_method": seeded_init_Z_method, **dataset_custom_settings} 181 | for M in Ms 182 | for seeded_init_Z_method in init_Z_method 183 | ] 184 | for run_settings in settings_for_runs: 185 | M = str(run_settings["M"]) 186 | # TODO: A better approach would be to put the bvalue at this point, so other jobs could run before the last 187 | # full GP finishes. 188 | exp = FullbatchUciExperiment( 189 | **{**common_run_settings, **run_settings}, initial_parameters=all_model_parameters[dataset_name] 190 | ) 191 | result = run_sparse_init(exp) 192 | init_Z_runs[dataset_name][method_name][M].append(exp) 193 | init_Z_task_results[dataset_name][method_name][M].append(result) 194 | 195 | 196 | # (Sparse) Bound optimisation 197 | @jug.TaskGenerator 198 | def run_sparse_opt(exp): 199 | print(exp) 200 | exp.cached_run() 201 | print_post_run(exp) 202 | elbo, upper, rmse, nlpp = compute_model_stats(exp) 203 | return elbo, upper, rmse, nlpp 204 | 205 | 206 | upper_runs = dict() 207 | for dataset_name in dataset_names: 208 | experiment_storage_path, Ms, common_run_settings, dataset_custom_settings = get_settings(dataset_name) 209 | settings_for_runs = [ 210 | { 211 | "model_class": "SGPR", 212 | "M": M, 213 | "init_Z_method": robustgp.ConditionalVariance(sample=True, seed=seed), 214 | **dataset_custom_settings, 215 | } 216 | for M in Ms 217 | for seed in seeds 218 | ] 219 | init_Z_runs[dataset_name]["gradient"] = dict() 220 | init_Z_task_results[dataset_name]["gradient"] = dict() 221 | upper_runs[dataset_name] = dict() 222 | for M in Ms: 223 | init_Z_runs[dataset_name]["gradient"][str(M)] = [] 224 | init_Z_task_results[dataset_name]["gradient"][str(M)] = [] 225 | upper_runs[dataset_name][str(M)] = [] 226 | # for optimise_objective in ["upper", "lower"]: # Optimising the upper bound makes hardly any difference 227 | for optimise_objective in ["lower"]: 228 | for run_settings in settings_for_runs: 229 | exp = FullbatchUciInducingOptExperiment( 230 | **{**common_run_settings, **run_settings}, 231 | initial_parameters=all_model_parameters[dataset_name], 232 | optimise_objective=optimise_objective, 233 | ) 234 | M = str(run_settings["M"]) 235 | result = run_sparse_opt(exp) 236 | if optimise_objective == "lower": 237 | init_Z_runs[dataset_name]["gradient"][str(M)].append(exp) 238 | init_Z_task_results[dataset_name]["gradient"][str(M)].append(result) 239 | elif optimise_objective == "upper": 240 | upper_runs[dataset_name][str(M)].append(exp) 241 | -------------------------------------------------------------------------------- /robustgp_experiments/init_z/jug_init_inducing_opt.py: -------------------------------------------------------------------------------- 1 | # # Inducing point initialisation while optimising hypers 2 | # Assess how well inducing point initialisation works, in conjunction with optimising the hyperparameters. 3 | # Here, we compare: 4 | # - Fixed Z, initialised with the baseline kernel hyperparameters 5 | # - EM "reinit" Z, initialised with the baseline kernel hyperparameters 6 | # In all cases, the Z are initialised using default kernel parameters, not the ones from the baseline. This is to ensure 7 | # that we do not use information when initialising Z that isn't accessable when running a new dataset. 8 | # 9 | # Local optima are a bit annoying when "cold" initialising. They make the plot appear non-smooth. So we initialise at 10 | 11 | 12 | import gpflow 13 | import jug 14 | import numpy as np 15 | 16 | import robustgp 17 | from robustgp_experiments.init_z.utils import uci_train_settings, print_post_run 18 | from robustgp_experiments.utils import FullbatchUciExperiment 19 | 20 | # 21 | # 22 | # Settings 23 | dataset_names = ["Wilson_energy", "Naval_noisy", "Wilson_elevators"] 24 | # init_from_baseline = True 25 | init_from_baseline = False 26 | 27 | uci_train_settings.update( 28 | dict( 29 | Naval_noisy=( 30 | [10, 20, 30, 40, 45, 47, 50, 55, 60, 65, 70, 75, 80, 85, 90, 100, 130, 150, 180, 200, 250, 300, 400, 500], 31 | {}, 32 | ), # Very sparse solution exists 33 | ) 34 | ) 35 | # 36 | # 37 | # Setup 38 | gpflow.config.set_default_positive_minimum(1.0e-5) 39 | gpflow.config.set_default_jitter(1e-10) 40 | 41 | num_seeds = 3 # Use 10 to replicate the paper 42 | seeds = np.arange(num_seeds) 43 | 44 | init_Z_methods = dict() 45 | init_Z_methods["Uniform"] = [robustgp.UniformSubsample(seed=seed) for seed in seeds] 46 | init_Z_methods["Greedy Conditional Variance"] = [robustgp.ConditionalVariance(seed=seed) for seed in seeds] 47 | # init_Z_methods["Sample Conditional Variance"] = [robustgp.ConditionalVariance(sample=True, seed=seed) for seed in seeds] 48 | init_Z_methods["Kmeans"] = [robustgp.Kmeans(seed=seed) for seed in seeds] 49 | # init_Z_methods["M-DPP MCMC"] = [robustgp.KdppMCMC(seed=seed) for seed in seeds] 50 | # init_Z_methods["RLS"] = [robustgp.RLS(seed=seed) for seed in seeds] 51 | 52 | experiment_name = "init-inducing-opt" 53 | 54 | 55 | def compute_model_stats(exp): 56 | elbo = exp.model.robust_maximum_log_likelihood_objective(restore_jitter=False).numpy() 57 | upper = exp.model.upper_bound().numpy() 58 | rmse = np.mean((exp.model.predict_f(exp.X_test)[0].numpy() - exp.Y_test) ** 2.0) ** 0.5 59 | nlpp = -np.mean(exp.model.predict_log_density((exp.X_test, exp.Y_test))) 60 | return elbo, upper, rmse, nlpp 61 | 62 | 63 | @jug.TaskGenerator 64 | def run_baseline(baseline_exp): 65 | baseline_exp.cached_run() 66 | baseline_lml = baseline_exp.model.robust_maximum_log_likelihood_objective().numpy() 67 | model_parameters = gpflow.utilities.read_values(baseline_exp.model) if init_from_baseline else {} 68 | if ".inducing_variable.Z" in model_parameters: 69 | model_parameters.pop(".inducing_variable.Z") 70 | full_rmse = ( 71 | np.mean((baseline_exp.model.predict_f(baseline_exp.X_test)[0].numpy() - baseline_exp.Y_test) ** 2.0) ** 0.5 72 | ) 73 | full_nlpp = -np.mean(baseline_exp.model.predict_log_density((baseline_exp.X_test, baseline_exp.Y_test))) 74 | 75 | return model_parameters, full_rmse, full_nlpp, baseline_lml 76 | 77 | 78 | baseline_exps = dict() 79 | all_model_parameters = dict() 80 | full_rmses = dict() 81 | full_nlpps = dict() 82 | baseline_lmls = dict() 83 | 84 | for dataset_name in dataset_names: 85 | # Baseline runs 86 | print("Baseline exp...", dataset_name) 87 | experiment_storage_path = f"./storage-{experiment_name}/{dataset_name}" 88 | common_run_settings = dict( 89 | storage_path=experiment_storage_path, 90 | dataset_name=dataset_name, 91 | max_lengthscale=1001.0, 92 | max_variance=1001.0, 93 | training_procedure="fixed_Z", 94 | ) 95 | Ms, dataset_custom_settings = uci_train_settings[dataset_name] 96 | 97 | baseline_custom_settings = dict( 98 | Naval_noisy={ 99 | "model_class": "SGPR", 100 | "M": 1000, 101 | "training_procedure": "reinit_Z", 102 | "init_Z_method": robustgp.ConditionalVariance(sample=False), 103 | "max_lengthscale": 1000.0, 104 | "max_variance": 1000.0, 105 | } 106 | ).get( 107 | dataset_name, dict(model_class="GPR", training_procedure="joint", max_lengthscale=1000.0, max_variance=1000.0) 108 | ) 109 | baseline_exps[dataset_name] = FullbatchUciExperiment( 110 | **{**common_run_settings, **dataset_custom_settings, **baseline_custom_settings} 111 | ) 112 | ( 113 | all_model_parameters[dataset_name], 114 | full_rmses[dataset_name], 115 | full_nlpps[dataset_name], 116 | baseline_lmls[dataset_name], 117 | ) = jug.bvalue(run_baseline(baseline_exps[dataset_name])) 118 | 119 | 120 | # Bound optimisation 121 | @jug.TaskGenerator 122 | def run_sparse_opt(exp): 123 | print(exp) 124 | exp.cached_run() 125 | print_post_run(exp) 126 | elbo, upper, rmse, nlpp = compute_model_stats(exp) 127 | return elbo, upper, rmse, nlpp 128 | 129 | 130 | # Sparse runs 131 | init_Z_runs = dict() 132 | init_Z_task_results = dict() 133 | for dataset_name in dataset_names: 134 | experiment_storage_path = f"./storage-{experiment_name}/{dataset_name}" 135 | common_run_settings = dict( 136 | storage_path=experiment_storage_path, 137 | dataset_name=dataset_name, 138 | max_lengthscale=1001.0, 139 | max_variance=1001.0, 140 | training_procedure="fixed_Z", 141 | ) 142 | Ms, dataset_custom_settings = uci_train_settings[dataset_name] 143 | 144 | init_Z_runs[dataset_name] = {} 145 | init_Z_task_results[dataset_name] = {} 146 | for method_name, init_Z_method in init_Z_methods.items(): 147 | settings_for_runs = [ 148 | { 149 | "model_class": "SGPR", 150 | "M": M, 151 | "init_Z_method": seeded_init_Z_method, 152 | "base_filename": "opthyp-fixed_Z", 153 | "initial_parameters": all_model_parameters[dataset_name], 154 | **dataset_custom_settings, 155 | } 156 | for M in Ms 157 | for seeded_init_Z_method in init_Z_method 158 | ] 159 | init_Z_runs[dataset_name][method_name] = dict() 160 | init_Z_task_results[dataset_name][method_name] = dict() 161 | for M in Ms: 162 | init_Z_runs[dataset_name][method_name][str(M)] = [] 163 | init_Z_task_results[dataset_name][method_name][str(M)] = [] 164 | for run_settings in settings_for_runs: 165 | M = str(run_settings["M"]) 166 | exp = FullbatchUciExperiment(**{**common_run_settings, **run_settings}) 167 | result = run_sparse_opt(exp) 168 | init_Z_runs[dataset_name][method_name][M].append(exp) 169 | init_Z_task_results[dataset_name][method_name][M].append(result) 170 | 171 | # Optimisation of Z 172 | method_names = ["reinit_Z_sF", "reinit_Z_sT"] 173 | 174 | for dataset_name in dataset_names: 175 | experiment_storage_path = f"./storage-{experiment_name}/{dataset_name}" 176 | common_run_settings = dict( 177 | storage_path=experiment_storage_path, dataset_name=dataset_name, max_lengthscale=1001.0, max_variance=1001.0 178 | ) 179 | Ms, dataset_custom_settings = uci_train_settings[dataset_name] 180 | settings_for_runs = [ 181 | { 182 | "model_class": "SGPR", 183 | "M": M, 184 | "training_procedure": "reinit_Z", 185 | "base_filename": "opthyp-reinit_Z", 186 | "initial_parameters": all_model_parameters[dataset_name], 187 | **dataset_custom_settings, 188 | } 189 | for M in Ms 190 | ] 191 | 192 | for method_name in method_names: 193 | init_Z_runs[dataset_name][method_name] = dict() 194 | init_Z_task_results[dataset_name][method_name] = dict() 195 | for M in Ms: 196 | init_Z_runs[dataset_name][method_name][str(M)] = [] 197 | init_Z_task_results[dataset_name][method_name][str(M)] = [] 198 | 199 | for seed in seeds: 200 | for method_name, init_Z_method in zip( 201 | method_names, 202 | [ 203 | robustgp.ConditionalVariance(seed=seed, sample=False), 204 | robustgp.ConditionalVariance(seed=seed, sample=True), 205 | ], 206 | ): 207 | for run_settings in settings_for_runs: 208 | exp = FullbatchUciExperiment(**{**common_run_settings, **run_settings, "init_Z_method": init_Z_method}) 209 | result = run_sparse_opt(exp) 210 | M = str(run_settings["M"]) 211 | init_Z_runs[dataset_name][method_name][M].append(exp) 212 | init_Z_task_results[dataset_name][method_name][M].append(result) 213 | -------------------------------------------------------------------------------- /robustgp_experiments/init_z/jug_opt_inducing.py: -------------------------------------------------------------------------------- 1 | # # Inducing point initialisation while optimising hypers 2 | # Assess how well inducing point initialisation works, in conjunction with optimising the hyperparameters. 3 | # Here, we compare: 4 | # - Fixed Z, initialised with the baseline kernel hyperparameters 5 | # - EM "reinit" Z, initialised with the baseline kernel hyperparameters 6 | # In all cases, the Z are initialised using default kernel parameters, not the ones from the baseline. This is to ensure 7 | # that we do not use information when initialising Z that isn't accessable when running a new dataset. 8 | # 9 | # Local optima are a bit annoying when "cold" initialising. They make the plot appear non-smooth. So we initialise at 10 | 11 | 12 | import gpflow 13 | import jug 14 | import numpy as np 15 | 16 | import robustgp 17 | from robustgp_experiments.init_z.utils import uci_train_settings, print_post_run 18 | from robustgp_experiments.utils import FullbatchUciExperiment 19 | 20 | # Settings 21 | dataset_names = ["Wilson_energy", "Naval_noisy", "Wilson_elevators"] 22 | init_from_baseline = False 23 | 24 | uci_train_settings.update(dict(Naval_noisy=([55], {}), Wilson_energy=([65], {}), Wilson_elevators=([1200], {}))) 25 | 26 | # Setup 27 | gpflow.config.set_default_positive_minimum(1.0e-5) 28 | gpflow.config.set_default_jitter(1e-10) 29 | 30 | num_seeds = 3 # For the experiments in the paper we used 10 31 | seeds = np.arange(num_seeds) 32 | 33 | init_Z_methods = dict() 34 | init_Z_methods["Kmeans"] = [robustgp.Kmeans(seed=seed) for seed in seeds] 35 | init_Z_methods["Greedy Conditional Variance"] = [robustgp.ConditionalVariance(seed=seed) for seed in seeds] 36 | init_Z_methods["Gradient"] = [robustgp.ConditionalVariance(seed=seed) for seed in seeds] 37 | experiment_name = "opt-inducing" 38 | 39 | 40 | def compute_model_stats(exp): 41 | elbo = exp.model.robust_maximum_log_likelihood_objective(restore_jitter=False).numpy() 42 | rmse = np.mean((exp.model.predict_f(exp.X_test)[0].numpy() - exp.Y_test) ** 2.0) ** 0.5 43 | nlpp = -np.mean(exp.model.predict_log_density((exp.X_test, exp.Y_test))) 44 | upper = exp.model.upper_bound().numpy() 45 | hist = exp.train_objective_hist 46 | return elbo, upper, rmse, nlpp, hist 47 | 48 | 49 | @jug.TaskGenerator 50 | def run_baseline(baseline_exp): 51 | baseline_exp.cached_run() 52 | baseline_lml = baseline_exp.model.robust_maximum_log_likelihood_objective().numpy() 53 | model_parameters = gpflow.utilities.read_values(baseline_exp.model) if init_from_baseline else {} 54 | if ".inducing_variable.Z" in model_parameters: 55 | model_parameters.pop(".inducing_variable.Z") 56 | full_rmse = ( 57 | np.mean((baseline_exp.model.predict_f(baseline_exp.X_test)[0].numpy() - baseline_exp.Y_test) ** 2.0) ** 0.5 58 | ) 59 | full_nlpp = -np.mean(baseline_exp.model.predict_log_density((baseline_exp.X_test, baseline_exp.Y_test))) 60 | return model_parameters, full_rmse, full_nlpp, baseline_lml 61 | 62 | 63 | baseline_exps = dict() 64 | baseline_tasks = [] 65 | for dataset_name in dataset_names: 66 | # Baseline runs 67 | print("Baseline exp...", dataset_name) 68 | experiment_storage_path = f"./storage-{experiment_name}/{dataset_name}" 69 | common_run_settings = dict(storage_path=experiment_storage_path, dataset_name=dataset_name) 70 | Ms, dataset_custom_settings = uci_train_settings[dataset_name] 71 | 72 | baseline_custom_settings = dict( 73 | Naval_noisy={ 74 | "model_class": "SGPR", 75 | "M": 1000, 76 | "training_procedure": "reinit_Z", 77 | "init_Z_method": robustgp.ConditionalVariance(sample=False), 78 | "max_lengthscale": 1000.0, 79 | "max_variance": 1000.0, 80 | } 81 | ).get( 82 | dataset_name, dict(model_class="GPR", training_procedure="joint", max_lengthscale=1000.0, max_variance=1000.0) 83 | ) 84 | baseline_exps[dataset_name] = FullbatchUciExperiment( 85 | **{**common_run_settings, **dataset_custom_settings, **baseline_custom_settings} 86 | ) 87 | 88 | baseline_tasks.append(run_baseline(baseline_exps[dataset_name])) 89 | 90 | 91 | # Bound optimisation 92 | @jug.TaskGenerator 93 | def run_sparse_opt(exp): 94 | print(exp) 95 | exp.cached_run() 96 | print_post_run(exp) 97 | elbo, upper, rmse, nlpp, hist = compute_model_stats(exp) 98 | return elbo, upper, rmse, nlpp, hist 99 | 100 | 101 | # Sparse runs 102 | init_Z_runs = dict() 103 | init_Z_task_results = dict() 104 | for dataset_name in dataset_names: 105 | experiment_storage_path = f"./storage-{experiment_name}/{dataset_name}" 106 | common_run_settings = dict( 107 | storage_path=experiment_storage_path, 108 | dataset_name=dataset_name, 109 | max_lengthscale=1001.0, 110 | max_variance=1001.0, 111 | training_procedure="reinit_Z", 112 | ) 113 | Ms, dataset_custom_settings = uci_train_settings[dataset_name] 114 | 115 | init_Z_runs[dataset_name] = {} 116 | init_Z_task_results[dataset_name] = {} 117 | for method_name, init_Z_method in init_Z_methods.items(): 118 | training_procedure = "reinit_Z" 119 | settings_for_runs = [ 120 | { 121 | "model_class": "SGPR", 122 | "M": M, 123 | "init_Z_method": seeded_init_Z_method, 124 | "base_filename": "opthyp-fixed_Z", 125 | "initial_parameters": {}, 126 | "training_procedure": training_procedure, 127 | **dataset_custom_settings, 128 | } 129 | for M in Ms 130 | for seeded_init_Z_method in init_Z_method 131 | ] 132 | init_Z_runs[dataset_name][method_name] = dict() 133 | init_Z_task_results[dataset_name][method_name] = dict() 134 | for M in Ms: 135 | init_Z_runs[dataset_name][method_name][str(M)] = [] 136 | init_Z_task_results[dataset_name][method_name][str(M)] = [] 137 | for run_settings in settings_for_runs: 138 | M = str(run_settings["M"]) 139 | exp = FullbatchUciExperiment(**{**common_run_settings, **run_settings}) 140 | result = run_sparse_opt(exp) 141 | init_Z_runs[dataset_name][method_name][M].append(exp) 142 | init_Z_task_results[dataset_name][method_name][M].append(result) 143 | 144 | # Optimisation of Z 145 | method_name = "Gradient" 146 | for dataset_name in dataset_names: 147 | experiment_storage_path = f"./storage-{experiment_name}/{dataset_name}" 148 | common_run_settings = dict( 149 | storage_path=experiment_storage_path, dataset_name=dataset_name, max_lengthscale=1001.0, max_variance=1001.0 150 | ) 151 | Ms, dataset_custom_settings = uci_train_settings[dataset_name] 152 | settings_for_runs = [ 153 | { 154 | "model_class": "SGPR", 155 | "M": M, 156 | "training_procedure": "joint", 157 | "base_filename": "opthyp-reinit_Z", 158 | "initial_parameters": {}, 159 | **dataset_custom_settings, 160 | } 161 | for M in Ms 162 | ] 163 | 164 | init_Z_runs[dataset_name][method_name] = dict() 165 | init_Z_task_results[dataset_name][method_name] = dict() 166 | for M in Ms: 167 | init_Z_runs[dataset_name][method_name][str(M)] = [] 168 | init_Z_task_results[dataset_name][method_name][str(M)] = [] 169 | 170 | for seed in seeds: 171 | init_Z_method = robustgp.ConditionalVariance(seed=seed, sample=False) 172 | for run_settings in settings_for_runs: 173 | exp = FullbatchUciExperiment(**{**common_run_settings, **run_settings, "init_Z_method": init_Z_method}) 174 | result = run_sparse_opt(exp) 175 | M = str(run_settings["M"]) 176 | init_Z_runs[dataset_name][method_name][M].append(exp) 177 | init_Z_task_results[dataset_name][method_name][M].append(result) 178 | 179 | 180 | all_model_parameters = dict() 181 | full_rmses = dict() 182 | full_nlpps = dict() 183 | baseline_lmls = dict() 184 | # Put trained values in variables 185 | for dataset_name in dataset_names: 186 | ( 187 | all_model_parameters[dataset_name], 188 | full_rmses[dataset_name], 189 | full_nlpps[dataset_name], 190 | baseline_lmls[dataset_name], 191 | ) = jug.bvalue(run_baseline(baseline_exps[dataset_name])) 192 | -------------------------------------------------------------------------------- /robustgp_experiments/init_z/jug_search_uci.py: -------------------------------------------------------------------------------- 1 | # # Search-UCI 2 | # Search UCI datasets for datasets for which the sparse approximation effectively converges. I.e. marglik bound doesn't 3 | # increase much as we add more inducing points. 4 | 5 | import jug 6 | import numpy as np 7 | 8 | import gpflow 9 | import robustgp as inducing_init 10 | from robustgp_experiments.init_z.utils import uci_train_settings, good_datasets, print_post_run 11 | from robustgp_experiments.utils import FullbatchUciExperiment 12 | 13 | gpflow.config.set_default_jitter(1e-8) 14 | gpflow.config.set_default_positive_minimum(1.0e-5) 15 | 16 | MAXITER = 1000 17 | 18 | experiment_name = "search-uci" 19 | dataset_names = ["Wilson_energy", "Wilson_autompg", "Wilson_concrete", "Wilson_airfoil", "Wilson_servo", 20 | "Wilson_concreteslump"] 21 | # dataset_names = ["Wilson_stock", "Wilson_energy", "Pendulum_noisy", "Wilson_pendulum", "Wilson_concrete", 22 | # "Wilson_airfoil", "Wilson_wine", "Naval_noisy", "Naval", "Wilson_gas", "Wilson_skillcraft", 23 | # "Wilson_sml", "Wilson_parkinsons", "Parkinsons_noisy", "Power", "Wilson_pol", "Wilson_elevators", 24 | # "Wilson_bike", "Wilson_kin40k", "Wilson_protein", "Wilson_tamielectric"] 25 | # dataset_names = good_datasets 26 | 27 | Z_init_method = inducing_init.ConditionalVariance(sample=True) 28 | 29 | baseline_exps = {} 30 | baseline_results = {} 31 | sparse_exps = {} 32 | sparse_task_results = {} 33 | 34 | 35 | def get_settings(dataset_name): 36 | experiment_storage_path = f"./storage-{experiment_name}/{dataset_name}" 37 | Ms, dataset_custom_settings = uci_train_settings[dataset_name] 38 | common_run_settings = dict(storage_path=experiment_storage_path, dataset_name=dataset_name, 39 | training_procedure="reinit_Z") 40 | return experiment_storage_path, Ms, common_run_settings, dataset_custom_settings 41 | 42 | 43 | @jug.TaskGenerator 44 | def full_cached_run(exp): 45 | exp.cached_run() 46 | print_post_run(exp) 47 | return exp.model.robust_maximum_log_likelihood_objective() 48 | 49 | 50 | @jug.TaskGenerator 51 | def sparse_cached_run(exp): 52 | exp.cached_run() 53 | print_post_run(exp) 54 | # lml = exp.model.log_marginal_likelihood().numpy() if exp.model_class == "GPR" else exp.model.elbo().numpy() 55 | lml = exp.model.robust_maximum_log_likelihood_objective(restore_jitter=False).numpy() 56 | upper = exp.model.upper_bound().numpy() 57 | # rmse = np.mean((exp.model.predict_f(exp.X_test)[0].numpy() - exp.Y_test) ** 2.0) ** 0.5 58 | # nlpp = -np.mean(exp.model.predict_log_density((exp.X_test, exp.Y_test))) 59 | return lml, upper, None, None 60 | 61 | 62 | for dataset_name in dataset_names: 63 | experiment_storage_path, Ms, common_run_settings, dataset_custom_settings = get_settings(dataset_name) 64 | 65 | # 66 | # 67 | # Baseline runs 68 | gpr_exp = FullbatchUciExperiment(**{**common_run_settings, **dataset_custom_settings, "model_class": "GPR", 69 | "training_procedure": "joint"}) 70 | gpr_exp.load_data() 71 | if len(gpr_exp.X_train) <= 30000: 72 | print("Baseline run...") 73 | result = full_cached_run(gpr_exp) 74 | else: 75 | print(f"{dataset_name}: Skipping baseline run... N={len(gpr_exp.X_train)}.") 76 | result = np.nan 77 | 78 | baseline_exps[dataset_name] = gpr_exp 79 | baseline_results[dataset_name] = result 80 | 81 | # 82 | # 83 | # Sparse runs -- We're trying an "optimal" training procedure 84 | greedy_init_settings_list = [ 85 | {"model_class": "SGPR", "M": M, "init_Z_method": Z_init_method, **dataset_custom_settings} 86 | for M in Ms] 87 | sparse_exps[dataset_name] = [] 88 | sparse_task_results[dataset_name] = [] 89 | for run_settings in greedy_init_settings_list: 90 | exp = FullbatchUciExperiment(**{**common_run_settings, **run_settings}) 91 | result = sparse_cached_run(exp) 92 | sparse_exps[dataset_name].append(exp) 93 | sparse_task_results[dataset_name].append(result) 94 | -------------------------------------------------------------------------------- /robustgp_experiments/init_z/utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def print_post_run(run): 5 | print("") 6 | try: 7 | print(f"ELBO: {run.model.robust_maximum_log_likelihood_objective().numpy()}") 8 | std_ratio = (run.model.kernel.variance.numpy() / run.model.likelihood.variance.numpy()) ** 0.5 9 | print(f"(kernel.variance / likelihood.variance)**0.5: {std_ratio}") 10 | print(run.model.kernel.lengthscales.numpy()) 11 | except AttributeError: 12 | pass 13 | except tf.errors.InvalidArgumentError as e: 14 | print("") 15 | print("Probably a CholeskyError:") 16 | print(e.message) 17 | print("Ignoring...") 18 | print("") 19 | print("") 20 | 21 | 22 | uci_train_settings = dict( 23 | Wilson_tamielectric=([100, 200, 500, 1000, 2000], {}), 24 | Wilson_protein=([100, 200, 500, 1000, 2000], {}), 25 | Wilson_kin40k=([(1000, 2000, 5000, 10000, 15000), {}]), 26 | Wilson_bike=([100, 200, 500, 1000, 2000, 5000], {}), 27 | Wilson_elevators=([100, 200, 500, 1000, 2000, 5000], {}), 28 | Wilson_pol=([100, 200, 500, 1000, 2000, 5000], {}), 29 | Power=([100, 200, 500, 1000, 2000, 5000], {}), # Step function in it? 30 | # Kin8mn=([100, 200, 500, 1000, 2000], {}), # Can't download 31 | Parkinsons_noisy=([100, 150, 170, 200, 500], {}), 32 | Wilson_parkinsons=([100, 150, 170, 200, 500, 1000], {}), 33 | Wilson_sml=([100, 200, 500, 1000, 2000, 3000, 3500], {}), # Mostly linear, but with benefit of nonlinear 34 | # Didn't get SE+Lin working, probably local optimum 35 | # Wilson_skillcraft=([10, 20, 50, 100, 200, 500], {"kernel_name": "SquaredExponentialLinear"}), 36 | Wilson_skillcraft=([10, 20, 50, 100, 200, 500, 1000], {}), # Mostly linear, but with benefit of nonlinear 37 | Wilson_gas=([100, 200, 500, 1000, 1300, 1500], {}), 38 | Naval=([10, 20, 50, 100, 200], {}), # Very sparse solution exists 39 | Naval_noisy=([10, 20, 50, 100, 200, 500], {}), # Very sparse solution exists 40 | Wilson_wine=([100, 200, 500, 1000, 1300, 1350], {}), # Suddenly catches good hypers with large M 41 | Wilson_airfoil=([100, 200, 500, 800, 1000, 1250, 1300, 1340], {}), # Good 42 | Wilson_solar=([100, 200, 300], 43 | {"kernel_name": "SquaredExponentialLinear", "max_lengthscale": 10.0}), # Mostly linear 44 | # Good, better performance with Linear kernel added 45 | # Wilson_concrete=([100, 200, 500, 600, 700, 800, 900], 46 | # {"kernel_name": "SquaredExponentialLinear", "optimizer": "bfgs", "max_lengthscale": 10.0}), 47 | Wilson_concrete=([100, 200, 500, 600, 700, 800, 900], {}), 48 | Wilson_pendulum=([10, 100, 200, 500, 567], {}), # Not sparse, due to very low noise 49 | Pendulum_noisy=([10, 100, 200, 500, 567], {}), # Not sparse, due to very low noise 50 | Wilson_forest=([10, 100, 200, 400], {"kernel_name": "SquaredExponentialLinear"}), # Bad 51 | Wilson_energy=([10, 30, 40, 50, 60, 70, 80, 90, 100, 200, 500], {}), # Good 52 | Wilson_stock=([10, 50, 100, 200, 400, 450], {"kernel_name": "SquaredExponentialLinear"}), # Mostly linear 53 | Wilson_housing=([100, 200, 300, 400], {}), # Bad 54 | Wilson_yacht=([10, 20, 50, 100, 200, 250], {}), 55 | Wilson_autompg=([10, 20, 50, 100, 200, 250], {}), 56 | Wilson_servo=([10, 20, 30, 40, 50, 70, 100, 110, 120, 130, 140], {}), 57 | Wilson_breastcancer=([10, 50, 100, 150], {}), 58 | Wilson_autos=([10, 20, 50, 100], {}), 59 | Wilson_concreteslump=([10, 20, 50, 60, 70], {}) 60 | ) 61 | 62 | bad_datasets = ["Wilson_housing", "Wilson_forest"] 63 | good_datasets = [k for k in uci_train_settings.keys() if k not in bad_datasets] 64 | -------------------------------------------------------------------------------- /robustgp_experiments/init_z/which-optimiser.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,py:percent 5 | # text_representation: 6 | # extension: .py 7 | # format_name: percent 8 | # format_version: '1.2' 9 | # jupytext_version: 1.2.4 10 | # kernelspec: 11 | # display_name: Python 3 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | # %% 17 | # ../create_report.sh jpt-test.py 18 | 19 | import gpflow 20 | 21 | from inducing_experiments.utils import ExperimentRecord 22 | 23 | gpflow.config.set_default_positive_minimum(1e-5) 24 | 25 | # %% [markdown] 26 | # # Kin40k 27 | 28 | # %% {"tags": ["parameters"]} 29 | MAXITER = 6000 30 | 31 | experiment_name = "which-optimiser" 32 | dataset_name = "Wilson_elevators" 33 | 34 | # %% 35 | experiment_storage_path = f"./storage-{experiment_name}/{dataset_name}" 36 | 37 | basic_run_settings_list = [ 38 | # {"model_class": "GPR"}, 39 | {"model_class": "SGPR", "M": 100, "fixed_Z": True}, 40 | {"model_class": "SGPR", "M": 100, "fixed_Z": True, "optimizer": "bfgs"}, 41 | {"model_class": "SGPR", "M": 200, "fixed_Z": True}, 42 | # {"model_class": "SGPR", "M": 200, "fixed_Z": True, "optimizer": "bfgs"}, # CholeskyError 43 | {"model_class": "SGPR", "M": 200, "fixed_Z": True, "optimizer": "bfgs", "lengthscale_transform": "constrained"}, 44 | ] 45 | 46 | common_params = {"storage_path": experiment_storage_path, "dataset_name": dataset_name} 47 | baseline_runs = [ExperimentRecord(storage_path=experiment_storage_path, dataset_name=dataset_name, **basic_run_settings) 48 | for basic_run_settings in basic_run_settings_list] 49 | 50 | [r.cached_run(MAXITER) for r in baseline_runs] 51 | # [plt.plot(*r.train_objective_hist) for r in baseline_runs] 52 | -------------------------------------------------------------------------------- /robustgp_experiments/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from . import data 2 | from .experiment_running import ( 3 | LoggerCallback, 4 | create_loss_function, 5 | FullbatchUciExperiment, 6 | GaussianProcessUciExperiment, 7 | UciExperiment, 8 | Experiment, 9 | ) 10 | from .plotting import plot_1d_model 11 | from .storing import get_next_filename, store_pickle, load_existing_runs, find_run 12 | -------------------------------------------------------------------------------- /robustgp_experiments/utils/baselines.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import stats 3 | from sklearn.linear_model import LinearRegression 4 | 5 | 6 | def meanpred_baseline(_, Y_train, __, Y_test): 7 | pf = np.mean(Y_train) 8 | pv = np.var(Y_train) 9 | elbo = np.sum(stats.norm.logpdf(Y_train, pf, pv ** 0.5)) 10 | rmse = np.mean((Y_test - pf) ** 2.0) ** 0.5 11 | nlpp = -np.mean(stats.norm.logpdf(Y_test, pf, pv ** 0.5)) 12 | return elbo, rmse, nlpp 13 | 14 | 15 | def linear_baseline(X_train, Y_train, X_test, Y_test): 16 | reg = LinearRegression().fit(X_train, Y_train) 17 | residuals = reg.predict(X_train) - Y_train 18 | pred_var = np.var(residuals) 19 | 20 | elbo = np.sum(stats.norm.logpdf(residuals, scale=pred_var ** 0.5)) 21 | 22 | residuals_test = reg.predict(X_test) - Y_test 23 | rmse = np.mean(residuals_test ** 2.0) ** 0.5 24 | nlpp = -np.mean(stats.norm.logpdf(residuals_test, scale=pred_var ** 0.5)) 25 | 26 | return elbo, rmse, nlpp 27 | -------------------------------------------------------------------------------- /robustgp_experiments/utils/data.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from shutil import copyfile, rmtree 6 | 7 | from observations.util import maybe_download_and_extract 8 | 9 | from bayesian_benchmarks.data import * 10 | 11 | 12 | def snelson1d(path="./.data/"): 13 | """Load Edward Snelson's 1d regression data set [@snelson2006fitc]. 14 | It contains 200 examples of a few oscillations of an example function. It has 15 | seen extensive use as a toy dataset for illustrating qualitative behaviour of 16 | Gaussian process approximations. 17 | 18 | Args: 19 | path: str. 20 | Path to directory which either stores file or otherwise file will be 21 | downloaded and extracted there. Filenames are `snelson_train_*`. 22 | 23 | Returns: 24 | Tuple of two np.darray `inputs` and `outputs` with 200 rows and 1 column. 25 | """ 26 | path = os.path.expanduser(path) 27 | inputs_path = os.path.join(path, "snelson_train_inputs") 28 | outputs_path = os.path.join(path, "snelson_train_outputs") 29 | 30 | # Contains all source as well. We just need the data. 31 | url = "http://www.gatsby.ucl.ac.uk/~snelson/SPGP_dist.zip" 32 | 33 | if not (os.path.exists(inputs_path) and os.path.exists(outputs_path)): 34 | maybe_download_and_extract(path, url) 35 | 36 | # Copy the required data 37 | copyfile(os.path.join(path, "SPGP_dist", "train_inputs"), inputs_path) 38 | copyfile(os.path.join(path, "SPGP_dist", "train_outputs"), outputs_path) 39 | 40 | # Clean up everything else 41 | rmtree(os.path.join(path, "SPGP_dist")) 42 | os.remove(os.path.join(path, "SPGP_dist.zip")) 43 | 44 | X = np.loadtxt(os.path.join(inputs_path))[:, None] 45 | Y = np.loadtxt(os.path.join(outputs_path))[:, None] 46 | 47 | return (X, Y), (X, Y) 48 | 49 | 50 | class Naval_noisy(Naval): 51 | def read_data(self): 52 | X, Y = super().read_data() 53 | Y = Y + np.random.randn(*Y.shape) * 0.0001 54 | return X, Y 55 | 56 | 57 | class Pendulum_noisy(Wilson_pendulum): 58 | def read_data(self): 59 | X, Y = super().read_data() 60 | Y = Y + np.random.randn(*Y.shape) * 0.1 61 | return X, Y 62 | 63 | 64 | class Parkinsons_noisy(Wilson_parkinsons): 65 | def read_data(self): 66 | X, Y = super().read_data() 67 | Y = Y + np.random.randn(*Y.shape) * 1e-1 68 | return X, Y 69 | -------------------------------------------------------------------------------- /robustgp_experiments/utils/experiment_processing.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from gpflow.config import default_jitter 4 | from gpflow.covariances.dispatch import Kuf, Kuu 5 | 6 | 7 | def residual_variances(model): 8 | X_data, Y_data = model.data 9 | 10 | Kdiag = model.kernel(X_data, full_cov=False) 11 | kuu = Kuu(model.inducing_variable, model.kernel, jitter=default_jitter()) 12 | kuf = Kuf(model.inducing_variable, model.kernel, X_data) 13 | 14 | L = tf.linalg.cholesky(kuu) 15 | A = tf.linalg.triangular_solve(L, kuf, lower=True) 16 | 17 | c = Kdiag - tf.reduce_sum(tf.square(A), 0) 18 | 19 | return c.numpy() 20 | -------------------------------------------------------------------------------- /robustgp_experiments/utils/experiment_running.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from dataclasses import dataclass, field, _MISSING_TYPE 3 | from functools import reduce 4 | from glob import glob 5 | from typing import Optional 6 | 7 | import gpflow 8 | import json_tricks 9 | import numpy as np 10 | import tensorflow as tf 11 | import tensorflow_probability as tfp 12 | 13 | from robustgp import InducingPointInitializer, FirstSubsample 14 | from robustgp.models import RobustGPR, RobustSGPR 15 | from robustgp.optimizers import RobustScipy 16 | from robustgp.utilities import set_trainable 17 | from . import data 18 | from .storing import get_next_filename 19 | 20 | 21 | def create_loss_function(model, data): 22 | @tf.function(autograph=False) 23 | def loss(): 24 | return -model.log_likelihood(*data) 25 | 26 | return loss 27 | 28 | 29 | class LoggerCallback: 30 | def __init__(self, model, loss_function, holdout_interval=10): 31 | self.model = model 32 | self.loss_function = loss_function 33 | self.holdout_interval = holdout_interval 34 | self.log_likelihoods = [] 35 | self.n_iters = [] 36 | self.counter = 0 37 | 38 | def __call__(self, step, variables=None, values=None): 39 | # step will reset to zero between calls to minimize(), whereas counter will keep increasing 40 | if (self.counter <= 10) or (self.counter % self.holdout_interval) == 0: 41 | if variables is not None: 42 | # Using Scipy and need to update the parameters 43 | for var, val in zip(variables, values): 44 | var.assign(val) 45 | 46 | self.n_iters.append(self.counter + 1) 47 | loss = self.loss_function().numpy() 48 | self.log_likelihoods.append(loss) 49 | print(f"{self.counter} - objective function: {self.log_likelihoods[-1]:.4f}", end="\r") 50 | 51 | self.counter += 1 52 | 53 | 54 | # def run_tf_optimizer(model, optimizer, data, iterations, callback=None): 55 | # logf = [] 56 | # 57 | # @tf.function(autograph=False) 58 | # def optimization_step(): 59 | # with tf.GradientTape(watch_accessed_variables=False) as tape: 60 | # tape.watch(model.trainable_variables) 61 | # objective = model.elbo(*data) 62 | # grads = tape.gradient(objective, model.trainable_variables) 63 | # optimizer.apply_gradients(zip(grads, model.trainable_variables)) 64 | # return -objective 65 | # 66 | # for step in range(iterations): 67 | # elbo = optimization_step() 68 | # print(f"{step}\t{elbo:.4f}", end="\r") 69 | # if callback is not None: 70 | # callback(step) 71 | # print("") 72 | # 73 | # return logf 74 | 75 | 76 | def normalize(X, X_mean, X_std): 77 | return (X - X_mean) / X_std 78 | 79 | 80 | @dataclass 81 | class Experiment: 82 | storage_path: str 83 | base_filename: Optional[str] = "data" 84 | 85 | # Populated during object life 86 | model = None 87 | trained_parameters = None 88 | 89 | _X_train = None 90 | _Y_train = None 91 | _X_test = None 92 | _Y_test = None 93 | 94 | def load_data(self): 95 | raise NotImplementedError 96 | 97 | def setup_model(self): 98 | """ 99 | Set up the model here to the point where existing parameters can be loaded into it. Do not 100 | initialise the parameters, as this can be time consuming. 101 | :return: 102 | """ 103 | raise NotImplementedError 104 | 105 | def init_params(self): 106 | """ 107 | Do the time consuming parameter initialisation here. 108 | :return: 109 | """ 110 | raise NotImplementedError 111 | 112 | def run_optimisation(self): 113 | raise NotImplementedError 114 | 115 | def run(self): 116 | self.setup_model() 117 | self.init_params() 118 | self.run_optimisation() 119 | 120 | def cached_run(self): 121 | try: 122 | self.load() 123 | print("Skipping...") 124 | except FileNotFoundError: 125 | self.run() 126 | self.save() 127 | 128 | @property 129 | def X_train(self): 130 | if self._X_train is None: 131 | self.load_data() 132 | return self._X_train 133 | 134 | @property 135 | def Y_train(self): 136 | if self._Y_train is None: 137 | self.load_data() 138 | return self._Y_train 139 | 140 | @property 141 | def X_test(self): 142 | if self._X_test is None: 143 | self.load_data() 144 | return self._X_test 145 | 146 | @property 147 | def Y_test(self): 148 | if self._Y_test is None: 149 | self.load_data() 150 | return self._Y_test 151 | 152 | @property 153 | def store_variables(self): 154 | return [ 155 | k for k in list(self.__dict__.keys()) if k[0] != "_" and k not in ["storage_path", "base_filename", "model"] 156 | ] 157 | 158 | @property 159 | def load_match_variables(self): 160 | return [k for k in self.store_variables if k not in ["trained_parameters"]] 161 | 162 | def save(self): 163 | store_dict = {k: v for k, v in self.__dict__.items() if k in self.store_variables} 164 | filename = get_next_filename(self.storage_path, self.base_filename, extension="json") 165 | json_tricks.dump(store_dict, filename) 166 | print(f"Stored results in {filename} at {datetime.datetime.now()}") 167 | 168 | def load(self, filename=None): 169 | def field_equal(a, b): 170 | if type(a) is dict: 171 | if a == {} and type(b) is _MISSING_TYPE: 172 | return True 173 | try: 174 | equality = True 175 | for k in reduce(set.union, map(set, map(dict.keys, [a, b]))): 176 | if type(a[k]) is np.ndarray: 177 | if not np.all(a[k] == b[k]): 178 | return False 179 | else: 180 | if a[k] != b[k]: 181 | return False 182 | except (TypeError, KeyError): 183 | equality = False 184 | 185 | return equality 186 | else: 187 | return a == b 188 | 189 | if filename is None: 190 | # Find run with similar run parameters 191 | existing_runs = [] 192 | for fn in glob(f"{self.storage_path}/{self.base_filename}*"): 193 | existing_runs.append((json_tricks.load(fn), fn)) 194 | 195 | matching_runs = [ 196 | (dict, fn) 197 | for dict, fn in existing_runs 198 | if all( 199 | [ 200 | field_equal(self.__dict__[k], (dict[k] if k in dict else self.__dataclass_fields__[k].default)) 201 | for k in self.load_match_variables 202 | ] 203 | ) 204 | ] 205 | else: 206 | matching_runs = [(json_tricks.load(filename), filename)] 207 | 208 | if len(matching_runs) == 1: 209 | print(f"Loading from `{matching_runs[0][1]}`...") 210 | for k, v in matching_runs[0][0].items(): 211 | setattr(self, k, v) 212 | gpflow.config.set_default_positive_minimum(1e-7) 213 | self.setup_model() 214 | gpflow.utilities.multiple_assign(self.model, self.trained_parameters) 215 | elif len(matching_runs) == 0: 216 | raise FileNotFoundError("No matching run found.") 217 | else: 218 | raise AssertionError("Only one run of an experiment should be present.") 219 | 220 | 221 | @dataclass 222 | class UciExperiment(Experiment): 223 | dataset_name: Optional[str] = "Wilson_elevators" 224 | 225 | def load_data(self): 226 | loaded_data = getattr(data, self.dataset_name)() 227 | if type(loaded_data) == tuple: 228 | self._X_train, self._Y_train = loaded_data[0] 229 | elif isinstance(loaded_data, data.Dataset): 230 | # Here, we always normalise on training. This is different to before. 231 | X_mean, X_std = np.average(loaded_data.X_train, 0)[None, :], 1e-6 + np.std(loaded_data.X_train, 0)[None, :] 232 | self._X_train = normalize(loaded_data.X_train, X_mean, X_std) 233 | self._X_test = normalize(loaded_data.X_test, X_mean, X_std) 234 | 235 | Y_mean, Y_std = np.average(loaded_data.Y_train, 0)[None, :], 1e-6 + np.std(loaded_data.Y_train, 0)[None, :] 236 | self._Y_train = normalize(loaded_data.Y_train, Y_mean, Y_std) 237 | self._Y_test = normalize(loaded_data.Y_test, Y_mean, Y_std) 238 | else: 239 | raise NotImplementedError 240 | 241 | 242 | @dataclass 243 | class GaussianProcessUciExperiment(UciExperiment): 244 | model_class: Optional[str] = "SGPR" 245 | M: Optional[int] = None 246 | kernel_name: Optional[str] = "SquaredExponential" 247 | init_Z_method: Optional[InducingPointInitializer] = FirstSubsample() 248 | max_lengthscale: Optional[float] = 1000.0 249 | max_variance: Optional[float] = 1000.0 250 | 251 | training_procedure: Optional[str] = "joint" # joint | reinit 252 | initial_parameters: Optional[dict] = field(default_factory=dict) 253 | 254 | # Populated during object life 255 | train_objective_hist = None 256 | 257 | def setup_model(self): 258 | kernel = self.setup_kernel() 259 | if self.model_class == "SGPR": 260 | inducing_variable = self.setup_inducing_variable() 261 | model = RobustSGPR((self.X_train, self.Y_train), kernel, inducing_variable=inducing_variable) 262 | elif self.model_class == "GPR": 263 | assert self.M is None 264 | model = RobustGPR((self.X_train, self.Y_train), kernel) 265 | else: 266 | raise NotImplementedError 267 | print(f"Jitter variance: {np.log10(model.jitter_variance.numpy()):.1f}") 268 | model.likelihood.variance = gpflow.Parameter(1.0, transform=gpflow.utilities.positive()) 269 | self.model = model 270 | 271 | def setup_kernel(self): 272 | if self.kernel_name == "SquaredExponential": 273 | kernel = gpflow.kernels.SquaredExponential(lengthscales=np.ones(self.X_train.shape[1])) 274 | elif self.kernel_name == "SquaredExponentialLinear": 275 | kernel = ( 276 | gpflow.kernels.SquaredExponential( 277 | lengthscales=np.ones(self.X_train.shape[1])) + gpflow.kernels.Linear() 278 | ) 279 | else: 280 | # try: 281 | kernel = getattr(gpflow.kernels, self.kernel_name)(lengthscales=np.ones(self.X_train.shape[1])) 282 | # except: 283 | # raise NotImplementedError(f"Kernel `{self.kernel_name}` is unknown.") 284 | 285 | return kernel 286 | 287 | def setup_inducing_variable(self): 288 | return gpflow.inducing_variables.InducingPoints(np.zeros((self.M, self.X_train.shape[1]))) 289 | 290 | def init_inducing_variable(self): 291 | if self.M > len(self.X_train): 292 | raise ValueError("Cannot have M > len(X).") 293 | 294 | Z, _ = self.init_Z_method(self.X_train, self.M, self.model.kernel) 295 | 296 | try: 297 | self.model.inducing_variable.Z.assign(Z) 298 | except Exception as e: 299 | print(type(e)) 300 | print(e) 301 | self.model.inducing_variable.Z = gpflow.Parameter(Z) 302 | 303 | def init_params(self): 304 | self.model.likelihood.variance.assign(0.01) 305 | gpflow.utilities.multiple_assign(self.model, self.initial_parameters) 306 | 307 | constrained_transform = tfp.bijectors.Sigmoid( 308 | gpflow.utilities.to_default_float(gpflow.config.default_positive_minimum()), 309 | gpflow.utilities.to_default_float(self.max_lengthscale), 310 | ) 311 | 312 | var_constrained_transform = tfp.bijectors.Sigmoid( 313 | gpflow.utilities.to_default_float(gpflow.config.default_positive_minimum()), 314 | gpflow.utilities.to_default_float(self.max_variance), 315 | ) 316 | 317 | if self.kernel_name == "SquaredExponential": 318 | new_len = gpflow.Parameter(self.model.kernel.lengthscales.numpy(), transform=constrained_transform) 319 | new_var = gpflow.Parameter(self.model.kernel.variance.numpy(), transform = var_constrained_transform) 320 | self.model.kernel.lengthscales = new_len 321 | self.model.kernel.variance = new_var 322 | elif self.kernel_name == "SquaredExponentialLinear": 323 | new_len = gpflow.Parameter(self.model.kernel.kernels[0].lengthscales.numpy(), 324 | transform=constrained_transform) 325 | self.model.kernel.kernels[0].lengthscales = new_len 326 | new_var_se = gpflow.Parameter(self.model.kernel[0].variance.numpy(), transform=var_constrained_transform) 327 | new_var_lin = gpflow.Parameter(self.model.kernel[1].variance.numpy(), transform=var_constrained_transform) 328 | self.model.kernel[0].variance = new_var_se 329 | self.model.kernel[1].variance = new_var_lin 330 | 331 | # TODO: Check if "inducing_variable" is in one of the keys in `self.initial_parameters`, to make things work 332 | # with non `InducingPoints` like inducing variables. 333 | if self.model_class != "GPR" and ".inducing_variable.Z" not in self.initial_parameters: 334 | # Kernel parameters should be initialised before inducing variables are. If inducing variables are set in 335 | # the initial parameters, we shouldn't run this. 336 | self.init_inducing_variable() 337 | 338 | 339 | @dataclass 340 | class FullbatchUciExperiment(GaussianProcessUciExperiment): 341 | optimizer: Optional[str] = "l-bfgs-b" 342 | training_procedure: Optional[str] = "joint" # joint | reinit 343 | 344 | def run_optimisation(self): 345 | print(f"Running {str(self)}") 346 | 347 | model = self.model 348 | loss_function = self.model.training_loss_closure(compile=True) 349 | robust_loss_function = lambda: -self.model.robust_maximum_log_likelihood_objective() 350 | # loss_function = tf.function(lambda jitter=None: -self.model.elbo(jitter)) 351 | hist = LoggerCallback(model, robust_loss_function) 352 | if self.optimizer == "l-bfgs-b" or self.optimizer == "bfgs": 353 | opt = RobustScipy() 354 | else: 355 | raise NotImplementedError(f"I don't know {self.optimizer}") 356 | def run_optimisation(): 357 | try: 358 | opt.minimize( 359 | loss_function, 360 | self.model.trainable_variables, 361 | robust_closure=robust_loss_function, 362 | method=self.optimizer, 363 | options=dict(maxiter=1000, disp=True), 364 | step_callback=hist, 365 | ) 366 | print("") 367 | except KeyboardInterrupt as e: 368 | if input("Optimisation aborted. Do you want to re-raise the KeyboardInterrupt? (y/n) ") == "y": 369 | raise e 370 | 371 | if self.training_procedure == "joint": 372 | run_optimisation() 373 | elif self.training_procedure == "fixed_Z": 374 | set_trainable(self.model.inducing_variable, False) 375 | run_optimisation() 376 | run_optimisation() 377 | elif self.training_procedure == "reinit_Z": 378 | set_trainable(self.model.inducing_variable, False) 379 | for i in range(20): 380 | reinit = True 381 | try: 382 | run_optimisation() 383 | except tf.errors.InvalidArgumentError as e: 384 | if e.message[1:9] != "Cholesky": 385 | raise e 386 | self.init_inducing_variable() 387 | print(self.model.elbo().numpy()) # Check whether Cholesky fails 388 | reinit = False 389 | 390 | if reinit: 391 | old_Z = self.model.inducing_variable.Z.numpy().copy() 392 | old_elbo = self.model.robust_maximum_log_likelihood_objective() 393 | self.init_inducing_variable() 394 | if self.model.robust_maximum_log_likelihood_objective() <= old_elbo: 395 | # Restore old Z, and finish optimisation 396 | self.model.inducing_variable.Z.assign(old_Z) 397 | print("Stopped reinit_Z procedure because new ELBO was smaller than old ELBO.") 398 | break 399 | else: 400 | raise NotImplementedError 401 | 402 | # Store results 403 | self.trained_parameters = gpflow.utilities.read_values(model) 404 | self.train_objective_hist = opt.f_vals #(hist.n_iters, hist.log_likelihoods) 405 | -------------------------------------------------------------------------------- /robustgp_experiments/utils/plotting.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | 5 | def plot_1d_model(m, *, data=None): 6 | D = m.inducing_variable.Z.numpy().shape[1] 7 | if data is not None: 8 | X, Y = data[0], data[1] 9 | plt.plot(X, Y, 'x') 10 | 11 | data_inducingpts = np.vstack((X if data else np.zeros((0, D)), m.inducing_variable.Z.numpy())) 12 | pX = np.linspace(np.min(data_inducingpts) - 1.0, np.max(data_inducingpts) + 1.0, 300)[:, None] 13 | pY, pYv = m.predict_y(pX) 14 | 15 | line, = plt.plot(pX, pY, lw=1.5) 16 | col = line.get_color() 17 | plt.plot(pX, pY + 2 * pYv ** 0.5, col, lw=1.5) 18 | plt.plot(pX, pY - 2 * pYv ** 0.5, col, lw=1.5) 19 | plt.plot(m.inducing_variable.Z.numpy(), np.zeros(m.inducing_variable.Z.numpy().shape), 'k|', mew=2) 20 | -------------------------------------------------------------------------------- /robustgp_experiments/utils/storing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import re 4 | from glob import glob 5 | 6 | 7 | def get_next_filename(path, base_filename="data", extension="pkl"): 8 | if not os.path.exists(path): 9 | os.makedirs(path) 10 | largest_existing_number = max([int(re.findall(r'\d+', fn)[-1]) for fn in glob(f"{path}/{base_filename}*")] + [0]) 11 | path = f"{path}/{base_filename}{largest_existing_number + 1}" 12 | if extension is not None: 13 | path = f"{path}.{extension}" 14 | return path 15 | 16 | 17 | def store_pickle(data, base_path, base_filename="data"): 18 | with open(get_next_filename(base_path, base_filename), 'wb') as outfile: 19 | pickle.dump(data, outfile) 20 | 21 | 22 | def load_existing_runs(path, base_filename="data"): 23 | existing_runs = [] 24 | for fn in glob(f"{path}/{base_filename}*"): 25 | with open(fn, 'rb') as fp: 26 | existing_runs.append((pickle.load(fp), fn)) 27 | return existing_runs 28 | 29 | 30 | def weak_dictionary_compare(source_dict, target_dict): 31 | """ 32 | Returns `True` if all values that are present in `source_dict`, are the 33 | same as the values of the same key in `target_dict. 34 | """ 35 | 36 | 37 | 38 | def find_run(existing_runs, run_details): 39 | return [run for run in existing_runs if run["run_details"] == run_details] 40 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | requirements = [ 4 | 'numpy>=1.18.1', 5 | 'scipy>=1.4.1', 6 | 'matplotlib>=3.1.3', 7 | 'json_tricks', 8 | 'jug' 9 | ] 10 | 11 | setup( 12 | name='robustgp', 13 | version='0.0.2', 14 | packages=find_packages(), 15 | install_requires=requirements, 16 | ) 17 | --------------------------------------------------------------------------------