├── .gitignore
├── LICENSE
├── README.md
├── robustgp
    ├── __init__.py
    ├── init_methods
    │   ├── __init__.py
    │   ├── init_methods_test.py
    │   ├── kdpp_mcmc.py
    │   ├── methods.py
    │   ├── misc.py
    │   └── rls.py
    ├── models.py
    ├── models_test.py
    ├── optimizers.py
    ├── optimizers_test.py
    └── utilities.py
├── robustgp_experiments
    ├── __init__.py
    ├── demo1d.py
    ├── init_z
    │   ├── __init__.py
    │   ├── figures
    │   │   ├── fixedhyp-Naval_noisy-elbo.pdf
    │   │   ├── fixedhyp-Naval_noisy-nlpp.pdf
    │   │   ├── fixedhyp-Naval_noisy-rmse.pdf
    │   │   ├── fixedhyp-Wilson_elevators-elbo.pdf
    │   │   ├── fixedhyp-Wilson_elevators-nlpp.pdf
    │   │   ├── fixedhyp-Wilson_elevators-rmse.pdf
    │   │   ├── fixedhyp-Wilson_energy-elbo.pdf
    │   │   ├── fixedhyp-Wilson_energy-nlpp.pdf
    │   │   ├── fixedhyp-Wilson_energy-rmse.pdf
    │   │   ├── optall-Naval_noisy-trace.pdf
    │   │   ├── optall-Wilson_elevators-trace.pdf
    │   │   ├── optall-Wilson_energy-trace.pdf
    │   │   ├── opthyp-Naval_noisy-elbo-only.pdf
    │   │   ├── opthyp-Naval_noisy-elbo.pdf
    │   │   ├── opthyp-Naval_noisy-nlpp.pdf
    │   │   ├── opthyp-Naval_noisy-rmse.pdf
    │   │   ├── opthyp-Wilson_elevators-elbo-only.pdf
    │   │   ├── opthyp-Wilson_elevators-elbo.pdf
    │   │   ├── opthyp-Wilson_elevators-nlpp.pdf
    │   │   ├── opthyp-Wilson_elevators-rmse.pdf
    │   │   ├── opthyp-Wilson_energy-elbo-only.pdf
    │   │   ├── opthyp-Wilson_energy-elbo.pdf
    │   │   ├── opthyp-Wilson_energy-nlpp.pdf
    │   │   └── opthyp-Wilson_energy-rmse.pdf
    │   ├── jug-plot-init-inducing-fixedhyp.py
    │   ├── jug-plot-init-inducing-opt.py
    │   ├── jug-plot-opt-inducing.py
    │   ├── jug-plot-search-uci.py
    │   ├── jug_init_inducing_fixedhyp.py
    │   ├── jug_init_inducing_opt.py
    │   ├── jug_opt_inducing.py
    │   ├── jug_search_uci.py
    │   ├── utils.py
    │   └── which-optimiser.py
    └── utils
    │   ├── __init__.py
    │   ├── baselines.py
    │   ├── data.py
    │   ├── experiment_processing.py
    │   ├── experiment_running.py
    │   ├── plotting.py
    │   └── storing.py
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | *.jugdata
  2 | 
  3 | .vscode
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | 
 73 | # PyBuilder
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # pyenv
 80 | .python-version
 81 | 
 82 | # celery beat schedule file
 83 | celerybeat-schedule
 84 | 
 85 | # SageMath parsed files
 86 | *.sage.py
 87 | 
 88 | # Environments
 89 | .env
 90 | .venv
 91 | env/
 92 | venv/
 93 | ENV/
 94 | env.bak/
 95 | venv.bak/
 96 | 
 97 | # Spyder project settings
 98 | .spyderproject
 99 | .spyproject
100 | 
101 | # Rope project settings
102 | .ropeproject
103 | 
104 | # mkdocs documentation
105 | /site
106 | 
107 | # mypy
108 | .mypy_cache/
109 | 
110 | # PyCharm
111 | .idea
112 | 
113 | *.pdf


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RobustGP
 2 | Procedures for robust initialisation and optimisation of Variational Sparse Gaussian processes. This code accompanies 
 3 | Burt et al (2019, 2020) (see sitation below), and implements the recommendations.
 4 | 
 5 | ## The bottom line
 6 | In Burt et al (2020), we recommend Sparse GP Regression (SGPR) (Titsias, 2009) models to be trained in the following way:
 7 | - Initialise the inducing inputs using the ```ConditionalVariance``` methods.
 8 | - Alternately optimise the hyperparameters only and reinitialise the inducing inputs using ```ConditionalVariance```.
 9 |   See ```FullbatchUciExperiment``` for an example on how to implement this (```training_procedure == 'reinit_Z'```).
10 | 
11 | We find that when using ```ConditionalVariance``` we obtain the same performance as gradient-optimised inducing inputs
12 | with a slightly larger number of inducing varianbles. The benefit is not having to do gradient-based optimisation, which
13 | is often more of a pain than it is worth. 
14 | 
15 | A few anecdotal suggestions for practitioners:
16 | - We suggest using ```ConditionalVariance``` even for non-Gaussian likelihoods for initialisation, although you may want
17 |   test yourself whether to use the periodic reinitialisation method, or gradient-based inducing input optimisation.
18 | - When getting Cholesky errors, consider reinitialising the inducing inputs with ```ConditionalVariance``` rather than
19 |   e.g. raising jitter. ```ConditionalVariance``` will repel the inducing inputs based on any new hyperparameters which
20 |   caused high correlation between old inducing variables, leading to better conditioning of ```Kuu```.
21 |   
22 | ### Example
23 | ```python
24 | M = 1000  # We choose 1000 inducing variables
25 | k = gpflow.kernels.SquaredExponential()
26 | # Initialise hyperparameters here
27 | init_method = robustgp.ConditionalVariance()
28 | Z = init_method.compute_initialisation(X, M, k)[0]
29 | model = gpflow.models.SGPR((X_train, Y_train), k, Z)
30 | for _ in range(10):
31 |     # Optimise w.r.t. hyperparmeters here...
32 |     Z = init_method.compute_initialisation(X, M, k)[0]  # Reinit with the new kernel hyperparameters
33 |     self.model.inducing_variable.Z = gpflow.Parameter(Z)
34 | ```
35 |   
36 | ## What the code provides
37 | ### Inducing input initialisation
38 | We provide various inducing point initialisation methods, together with some tools for robustly optimising GPflow 
39 | models. We really only recommend using ```ConditionalVariance``` for initialising inducing inputs, with the others being
40 | included for the experiments in the paper.
41 | 
42 | ### Automatic jitter selection
43 | In addition, we provide versions of the GPflow classes ```SGPR``` and ```GPR``` that have objective functions that are
44 | robust to Cholesky/inversion errors. This is implemented by automatic increasing of jitter, as is done in e.g. 
45 | [GPy](https://sheffieldml.github.io/GPy/). This process is a bit cumbersome in TensorFlow, and to do it we provide the
46 | classes ```RobustSGPR``` and ```RobustGPR```, as well as a customised Scipy optimiser ```RobustScipy```. To see how this
47 | is used, see the class ```FullbatchUciExperiment``` in the ```robustgp_experiments``` directory. 
48 | 
49 | ### Experiments
50 | All the experiments from Burt et al (2020) are included  in the ```robustgp_experiments``` directory.
51 | 
52 | ## Code guidelines
53 | For using the initialisation code:
54 | - Make sure that [GPflow](https://github.com/GPflow/GPflow) is installed, followed by running ```pip setup.py develop```.
55 | - Tests can be run using ```pytest -x --cov-report html --cov=robustgp```.
56 | 
57 | For running the experiments
58 | - We use code from [Bayesian benchmarks](https://github.com/hughsalimbeni/bayesian_benchmarks) to handle dataset
59 |   loading. Some assembly needed to get all the datasets.
60 | - Some scripts are paralellised using `jug`.
61 |   - Make sure it's installed using `pip install jug`.
62 |   - You can run all the tasks in a script in parallel by running `jug execute jug_script.py` multiple times.
63 |   - Jug communicates over the filesystem, so multiple computers can paralellise the same script if they share a networked filesystem.
64 |   - Usually, a separate script takes care of the plotting / processing of the results.
65 | 
66 | ## Citation
67 | To cite the recommendations in our paper or this accompanying software, please refer to our JMLR paper.
68 | ```
69 | @article{burt2020gpviconv,
70 |   author  = {David R. Burt and Carl Edward Rasmussen and Mark van der Wilk},
71 |   title   = {Convergence of Sparse Variational Inference in Gaussian Processes Regression},
72 |   journal = {Journal of Machine Learning Research},
73 |   year    = {2020},
74 |   volume  = {21},
75 |   number  = {131},
76 |   pages   = {1-63},
77 |   url     = {http://jmlr.org/papers/v21/19-1015.html}
78 | }
79 | ```
80 | 
81 | This JMLR paper is an extended version of our ICML paper.
82 | ```
83 | @InProceedings{burt2019gpviconv,
84 |   title = 	 {Rates of Convergence for Sparse Variational {G}aussian Process Regression},
85 |   author = 	 {Burt, David and Rasmussen, Carl Edward and van der Wilk, Mark},
86 |   booktitle = 	 {Proceedings of the 36th International Conference on Machine Learning},
87 |   pages = 	 {862--871},
88 |   year = 	 {2019},
89 |   editor = 	 {Chaudhuri, Kamalika and Salakhutdinov, Ruslan},
90 |   volume = 	 {97},
91 |   series = 	 {Proceedings of Machine Learning Research},
92 |   address = 	 {Long Beach, California, USA},
93 |   month = 	 {09--15 Jun},
94 |   publisher = 	 {PMLR},
95 |   pdf = 	 {http://proceedings.mlr.press/v97/burt19a/burt19a.pdf},
96 |   url = 	 {http://proceedings.mlr.press/v97/burt19a.html},
97 | }
98 | ```
99 | 


--------------------------------------------------------------------------------
/robustgp/__init__.py:
--------------------------------------------------------------------------------
1 | from .init_methods import InducingPointInitializer, FirstSubsample, UniformSubsample, Kmeans, ConditionalVariance, \
2 |     KdppMCMC, RLS
3 | from . import optimizers
4 | from . import models
5 | from . import utilities
6 | 


--------------------------------------------------------------------------------
/robustgp/init_methods/__init__.py:
--------------------------------------------------------------------------------
1 | from .methods import InducingPointInitializer, FirstSubsample, ConditionalVariance, UniformSubsample, Kmeans
2 | from .rls import RLS
3 | from .kdpp_mcmc import KdppMCMC


--------------------------------------------------------------------------------
/robustgp/init_methods/init_methods_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | from gpflow.kernels import SquaredExponential
 5 | from .methods import FirstSubsample, UniformSubsample, Kmeans, ConditionalVariance
 6 | 
 7 | 
 8 | @pytest.mark.parametrize("init_method", [FirstSubsample(seed=0), UniformSubsample(seed=0), Kmeans(seed=0),
 9 |                                          ConditionalVariance(seed=0, sample=True),
10 |                                          ConditionalVariance(seed=0, sample=False)])
11 | def test_seed_reproducibility(init_method):
12 |     k = SquaredExponential()
13 |     X = np.random.randn(100, 2)
14 | 
15 |     Z1, idx1 = init_method(X, 30, k)
16 |     Z2, idx2 = init_method(X, 30, k)
17 | 
18 |     assert np.all(Z1 == Z2), str(init_method)
19 |     assert np.all(idx1 == idx2), str(init_method)
20 | 
21 | 
22 | def test_incremental_ConditionalVariance():
23 |     init_method = ConditionalVariance(sample=True)
24 | 
25 |     k = SquaredExponential()
26 |     X = np.random.randn(100, 2)
27 | 
28 |     Z1, idx1 = init_method(X, 20, k)
29 |     Z2, idx2 = init_method(X, 30, k)
30 | 
31 |     assert np.all(Z1 == Z2[:20])
32 |     assert np.all(idx1 == idx2[:20])
33 | 


--------------------------------------------------------------------------------
/robustgp/init_methods/kdpp_mcmc.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy
  3 | import warnings
  4 | from typing import Callable, Optional
  5 | 
  6 | from .methods import ConditionalVariance
  7 | 
  8 | 
  9 | class KdppMCMC(ConditionalVariance):
 10 | 
 11 |     def __init__(self, num_steps: Optional[int] = 10000, seed: Optional[int] = 0, **kwargs):
 12 |         """
 13 |         Implements the MCMC approximation to sampling from a k-DPP developed in
 14 |         @inproceedings{anari2016monte,
 15 |                        title={Monte Carlo Markov chain algorithms for sampling strongly Rayleigh distributions and determinantal point processes},
 16 |                        author={Anari, Nima and Gharan, Shayan Oveis and Rezaei, Alireza},
 17 |                        booktitle={Conference on Learning Theory},
 18 |                        pages={103--115},
 19 |                        year={2016}
 20 |                     }
 21 |         and used for initializing inducing point in
 22 |         @inproceedings{burt2019rates,
 23 |                        title={Rates of Convergence for Sparse Variational Gaussian Process Regression},
 24 |                        author={Burt, David and Rasmussen, Carl Edward and Van Der Wilk, Mark},
 25 |                        booktitle={International Conference on Machine Learning},
 26 |                        pages={862--871},
 27 |                       year={2019}
 28 |             }
 29 |         More information on determinantal point processes and related algorithms can be found at:
 30 |         https://github.com/guilgautier/DPPy
 31 |         :param sample: int, number of steps of MCMC to run
 32 |         :param threshold: float or None, if not None, if tr(Kff-Qff)<threshold, stop choosing inducing points as the approx.
 33 |         has converged.
 34 |         """
 35 |         super().__init__(seed=seed, **kwargs)
 36 |         self.num_steps = num_steps
 37 | 
 38 |     def compute_initialisation(self, training_inputs: np.ndarray, M: int,
 39 |                                kernel: Callable[[np.ndarray, Optional[np.ndarray], Optional[bool]], np.ndarray]):
 40 |         """
 41 |         :param training_inputs: training_inputs: [N,D] numpy array
 42 |         :param M: int, number of inducing inputs to return
 43 |         :param kernel: kernelwrapper object
 44 |         :param num_steps: number of swap steps to perform.
 45 |         :param init_indices: array of M indices or None, set used to initialize mcmc alg. if None, we use the greedy MAP
 46 |         init. (variance, with sample=False)
 47 |         :return: inducing inputs, indices, [M], np.array of ints indices of these inputs in training data array
 48 |         """
 49 |         N = training_inputs.shape[0]
 50 |         _, indices = super().compute_initialisation(training_inputs, M, kernel)
 51 |         kzz = kernel(training_inputs[indices], None, full_cov=True)
 52 |         Q, R = scipy.linalg.qr(kzz, overwrite_a=True)
 53 |         if np.min(np.abs(np.diag(R))) == 0:
 54 |             warnings.warn("Determinant At initialization is numerically 0, MCMC was not run")
 55 |             return training_inputs[indices], indices
 56 |         for _ in range(self.num_steps):
 57 |             if np.random.rand() < .5:  # lazy MCMC, half the time, no swap is performed
 58 |                 continue
 59 |             indices_complement = np.delete(np.arange(N), indices)
 60 |             s = np.random.randint(M)
 61 |             t = np.random.choice(indices_complement)
 62 |             swap, Q, R = accept_or_reject(training_inputs, kernel, indices, s, t, Q, R)
 63 |             if swap:
 64 |                 indices = np.delete(indices, s, axis=0)
 65 |                 indices = np.append(indices, [t], axis=0)
 66 |         return training_inputs[indices], indices
 67 | 
 68 | 
 69 | def _delete_qr_square(Q, R, s):
 70 |     """
 71 |     Given a QR decomposition of a square matrix K, remove row and column s. Note we do not overwrite the original
 72 |     matrices
 73 |     :param Q: orthogonal matrix from QR decomposition of K
 74 |     :param R: upper triangular matrix. K = QR
 75 |     :param s: index of row/column to remove
 76 |     :return: QR decomposition of matrix formed by deleting row/column s from K
 77 |     """
 78 |     # remove the corresponding row and column from QR-decomposition
 79 |     Qtemp, Rtemp = scipy.linalg.qr_delete(Q, R, s, which='row')
 80 |     Qtemp, Rtemp = scipy.linalg.qr_delete(Qtemp, Rtemp, s, which='col', overwrite_qr=True)
 81 |     return Qtemp, Rtemp
 82 | 
 83 | 
 84 | def _build_new_row_column(X, kernel, indices, s, t):
 85 |     ZminusS = np.delete(X[indices], s, axis=0)
 86 |     xt = X[t:t + 1]
 87 |     ktt = kernel(xt, None, full_cov=True)[0]
 88 |     row_to_add = kernel(ZminusS, xt)[:, 0]
 89 |     col_to_add = np.concatenate([row_to_add, ktt], axis=0)
 90 |     return row_to_add, col_to_add
 91 | 
 92 | 
 93 | def _add_qr_square(Q, R, row, col):
 94 |     # get the shape, Note that QR is square. We insert the column and row in the last position
 95 |     M = Q.shape[0]
 96 |     # Add the row and column to the matrix
 97 |     Qtemp, Rtemp = scipy.linalg.qr_insert(Q, R, row, M, which='row', overwrite_qru=True)
 98 |     Qtemp, Rtemp = scipy.linalg.qr_insert(Qtemp, Rtemp, col, M, which='col', overwrite_qru=True)
 99 |     return Qtemp, Rtemp
100 | 
101 | 
102 | def _get_log_det_ratio(X, kernel, indices, s, t, Q, R):
103 |     """
104 |     Returns the log determinant ratio, as well as the updates to Q and R if the point is swapped
105 |     :param X: training inputs
106 |     :param kernel: kernelwrapper
107 |     :param indices: X[indices]=Z
108 |     :param s: current point we might (X[indices])[s] is the current point we might swap out
109 |     :param t: X[t] is the point we might add
110 |     :param Q: orthogonal matrix
111 |     :param R: upper triangular matrix, QR = Kuu (for Z=X[indices])
112 |     :return: log determinant ratio , Qnew, Rnew. QR decomposition of Z-{s}+{t}.
113 |     """
114 |     log_denominator = np.sum(np.log(np.abs(np.diag(R))))  # current value of det(Kuu)
115 |     # remove s from the QR decomposition
116 |     Qtemp, Rtemp = _delete_qr_square(Q, R, s)
117 |     # build new row and column to add to QR decomposition
118 |     row_to_add, col_to_add = _build_new_row_column(X, kernel, indices, s, t)
119 |     # add the corresponding row and column to QR-decomposition
120 |     Qnew, Rnew = _add_qr_square(Qtemp, Rtemp, row_to_add, col_to_add)
121 |     # sometimes Rnew will have zero entries along the diagonal for numerical reasons, in these case, we should always
122 |     # reject the swap as one Kuu should not be able to have (near) zero determinant, we force numpy to raise an error
123 |     # and catch it
124 |     try:
125 |         log_numerator = np.sum(np.log(np.abs(np.diag(Rnew))))  # det(Kuu) if we perform swap
126 |     except FloatingPointError:
127 |         return - np.inf, Qnew, Rnew
128 |     log_det_ratio = log_numerator - log_denominator  # ratio of determinants
129 |     return log_det_ratio, Qnew, Rnew
130 | 
131 | 
132 | def accept_or_reject(X, kernel, indices, s, t, Q, R):
133 |     """
134 |     Decides whether or not to swap points. Updates QR accordingly. Seems reasonably stable. Error of QR will get big if
135 |     10k or more iterations are run (e.g. increases by about a factor of 10 over 10k
136 |     iterations). Consider recomputing occasionally.
137 |     :param X: candidate points
138 |     :param k: kernel
139 |     :param indices: Current active set (Z)
140 |     :param s: index of point that could be removed in Z (Note this point is (X[indices])[s], not X[s]!)
141 |     :param t: index of point that could be added in X
142 |     :return: swap, Q, R: bool, [M,M], [M,M]. If swap, we removed s and added t. Return updated Q and R accodingly.
143 |     """
144 |     log_det_ratio, Qnew, Rnew = _get_log_det_ratio(X, kernel, indices, s, t, Q, R)
145 |     acceptance_prob = np.exp(log_det_ratio)  # P(new)/P(old), probability of swap
146 |     if np.random.rand() < acceptance_prob:
147 |         return True, Qnew, Rnew  # swapped
148 |     return False, Q, R  # stayed in same state


--------------------------------------------------------------------------------
/robustgp/init_methods/methods.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | from typing import Callable, Optional
  3 | 
  4 | import numpy as np
  5 | import scipy.cluster
  6 | 
  7 | from robustgp.init_methods.misc import sample_discrete
  8 | 
  9 | 
 10 | class InducingPointInitializer:
 11 |     def __init__(self, seed: Optional[int] = 0, randomized: Optional[bool] = True, **kwargs):
 12 |         self._randomized = randomized
 13 |         self.seed = seed if self.randomized else None
 14 | 
 15 |     def __call__(self, training_inputs: np.ndarray, M: int,
 16 |                  kernel: Callable[[np.ndarray, Optional[np.ndarray], Optional[bool]], np.ndarray]):
 17 |         if self.seed is not None:
 18 |             restore_random_state = np.random.get_state()
 19 |             np.random.seed(self.seed)
 20 |         else:
 21 |             restore_random_state = None
 22 | 
 23 |         Z = self.compute_initialisation(training_inputs, M, kernel)
 24 | 
 25 |         if self.seed is not None:
 26 |             np.random.set_state(restore_random_state)
 27 | 
 28 |         return Z
 29 | 
 30 |     def compute_initialisation(self, training_inputs: np.ndarray, M: int,
 31 |                                kernel: Callable[[np.ndarray, Optional[np.ndarray], Optional[bool]], np.ndarray]):
 32 |         raise NotImplementedError
 33 | 
 34 |     @property
 35 |     def randomized(self):
 36 |         return self._randomized
 37 | 
 38 |     def __eq__(self, other):
 39 |         if isinstance(other, self.__class__):
 40 |             return self.__dict__ == other.__dict__
 41 |         else:
 42 |             return False
 43 | 
 44 |     def __repr__(self):
 45 |         params = ', '.join([f'{k}={v}' for k, v in self.__dict__.items() if k not in ['_randomized']])
 46 |         return f"{type(self).__name__}({params})"
 47 | 
 48 | 
 49 | class FirstSubsample(InducingPointInitializer):
 50 |     def __init__(self, **kwargs):
 51 |         super().__init__(randomized=False, **kwargs)
 52 | 
 53 |     def compute_initialisation(self, training_inputs: np.ndarray, M: int, *args, **kwargs):
 54 |         return training_inputs[:M], np.arange(M)
 55 | 
 56 | 
 57 | class UniformSubsample(InducingPointInitializer):
 58 |     def __init__(self, seed: Optional[int] = 0, **kwargs):
 59 |         super().__init__(seed=seed, randomized=True, **kwargs)
 60 | 
 61 |     def compute_initialisation(self, training_inputs: np.ndarray, M: int, *args, **kwargs):
 62 |         """
 63 |         Returns a subset of inducing inputs Z ⊂ 𝑋, with |Z| = M such that the probability of any such set is equal.
 64 |         :param training_inputs: An array of training inputs X ⊂ 𝑋, with |X| = N < ∞. We frequently assume X= ℝ^D
 65 |         and this is [N,D]
 66 |         :param M: integer, number of inducing points to return
 67 |         :param kernel: unused argument
 68 |         :return: Z, indices, [M,D], M inducing inputs, indices of inducing points in X
 69 |         """
 70 |         N = training_inputs.shape[0]
 71 |         indices = np.random.choice(N, size=M, replace=False)  # we always sample without replacement
 72 |         return training_inputs[indices], indices
 73 | 
 74 | 
 75 | class Kmeans(InducingPointInitializer):
 76 |     def __init__(self, max_data: Optional[int] = 20000, **kwargs):
 77 |         super().__init__(randomized=True, **kwargs)
 78 |         self.max_data = max_data
 79 | 
 80 |     def compute_initialisation(self, training_inputs: np.ndarray, M: int, *args, **kwargs):
 81 |         """
 82 |         Initialize inducing inputs using kmeans(++)
 83 |         :param training_inputs:  An array of training inputs X ⊂ 𝑋, with |X| = N < ∞. We frequently assume X= ℝ^D
 84 |         and this is [N,D]
 85 |         :param M: integer, number of inducing points to return. Equiv. "k" to use in kmeans
 86 |         :return: Z, None, M inducing inputs
 87 |         """
 88 |         N = training_inputs.shape[0]
 89 |         # normalize data
 90 |         training_inputs_stds = np.std(training_inputs, axis=0)
 91 |         if np.min(training_inputs_stds) < 1e-13:
 92 |             warnings.warn("One feature of training inputs is constant")
 93 |         training_inputs = training_inputs / training_inputs_stds
 94 |         if self.max_data is not None and N > self.max_data:
 95 |             uniform = UniformSubsample(0)
 96 |             training_inputs = uniform(training_inputs, self.max_data)
 97 |         centroids, _ = scipy.cluster.vq.kmeans(training_inputs, M)
 98 |         # Some times K-Means returns fewer than K centroids, in this case we sample remaining point from data
 99 |         if len(centroids) < M:
100 |             num_extra_points = M - len(centroids)
101 |             indices = np.random.choice(N, size=num_extra_points, replace=False)
102 |             additional_points = training_inputs[indices]
103 |             centroids = np.concatenate([centroids, additional_points], axis=0)
104 |         return centroids * training_inputs_stds, None
105 | 
106 | 
107 | class ConditionalVariance(InducingPointInitializer):
108 |     def __init__(self, sample: Optional[bool] = False, threshold: Optional[int] = 0.0, seed: Optional[int] = 0,
109 |                  **kwargs):
110 |         """
111 |         :param sample: bool, if True, sample points into subset to use with weights based on variance, if False choose
112 |         point with highest variance at each iteration
113 |         :param threshold: float or None, if not None, if tr(Kff-Qff)<threshold, stop choosing inducing points as the approx.
114 |         has converged.
115 |         """
116 |         super().__init__(seed=seed, randomized=True, **kwargs)
117 |         self.sample = sample
118 |         self.threshold = threshold
119 | 
120 |     def compute_initialisation(self, training_inputs: np.ndarray, M: int,
121 |                                kernel: Callable[[np.ndarray, Optional[np.ndarray], Optional[bool]], np.ndarray]):
122 |         """
123 |         The version of this code without sampling follows the Greedy approximation to MAP for DPPs in
124 |         @incollection{NIPS2018_7805,
125 |                 title = {Fast Greedy MAP Inference for Determinantal Point Process to Improve Recommendation Diversity},
126 |                 author = {Chen, Laming and Zhang, Guoxin and Zhou, Eric},
127 |                 booktitle = {Advances in Neural Information Processing Systems 31},
128 |                 year = {2018},
129 |             }
130 |         and the initial code is based on the implementation of this algorithm (https://github.com/laming-chen/fast-map-dpp)
131 |         It is equivalent to running a partial pivoted Cholesky decomposition on Kff (see Figure 2 in the below ref.),
132 |         @article{fine2001efficient,
133 |                 title={Efficient SVM training using low-rank kernel representations},
134 |                 author={Fine, Shai and Scheinberg, Katya},
135 |                 journal={Journal of Machine Learning Research},
136 |                 year={2001}
137 |             }
138 | 
139 |         TODO: IF M ==1 this throws errors, currently throws an assertion error, but should fix
140 |         Initializes based on variance of noiseless GP fit on inducing points currently in active set
141 |         Complexity: O(NM) memory, O(NM^2) time
142 |         :param training_inputs: [N,D] numpy array,
143 |         :param M: int, number of points desired. If threshold is None actual number returned may be less than M
144 |         :param kernel: kernelwrapper object
145 |         :return: inducing inputs, indices,
146 |         [M,D] np.array to use as inducing inputs,  [M], np.array of ints indices of these inputs in training data array
147 |         """
148 |         N = training_inputs.shape[0]
149 |         perm = np.random.permutation(N)  # permute entries so tiebreaking is random
150 |         training_inputs = training_inputs[perm]
151 |         # note this will throw an out of bounds exception if we do not update each entry
152 |         indices = np.zeros(M, dtype=int) + N
153 |         di = kernel(training_inputs, None, full_cov=False) + 1e-12  # jitter
154 |         if self.sample:
155 |             indices[0] = sample_discrete(di)
156 |         else:
157 |             indices[0] = np.argmax(di)  # select first point, add to index 0
158 |         if M == 1:
159 |             indices = indices.astype(int)
160 |             Z = training_inputs[indices]
161 |             indices = perm[indices]
162 |             return Z, indices
163 |         ci = np.zeros((M - 1, N))  # [M,N]
164 |         for m in range(M - 1):
165 |             j = int(indices[m])  # int
166 |             new_Z = training_inputs[j:j + 1]  # [1,D]
167 |             dj = np.sqrt(di[j])  # float
168 |             cj = ci[:m, j]  # [m, 1]
169 |             Lraw = np.array(kernel(training_inputs, new_Z, full_cov=True))
170 |             L = np.round(np.squeeze(Lraw), 20)  # [N]
171 |             L[j] += 1e-12  # jitter
172 |             ei = (L - np.dot(cj, ci[:m])) / dj
173 |             ci[m, :] = ei
174 |             try:
175 |                 di -= ei ** 2
176 |             except FloatingPointError:
177 |                 pass
178 |             di = np.clip(di, 0, None)
179 |             if self.sample:
180 |                 indices[m + 1] = sample_discrete(di)
181 |             else:
182 |                 indices[m + 1] = np.argmax(di)  # select first point, add to index 0
183 |             # sum of di is tr(Kff-Qff), if this is small things are ok
184 |             if np.sum(np.clip(di, 0, None)) < self.threshold:
185 |                 indices = indices[:m]
186 |                 warnings.warn("ConditionalVariance: Terminating selection of inducing points early.")
187 |                 break
188 |         indices = indices.astype(int)
189 |         Z = training_inputs[indices]
190 |         indices = perm[indices]
191 |         return Z, indices
192 | 
193 |     def __repr__(self):
194 |         params = ', '.join([f'{k}={v}' for k, v in self.__dict__.items()
195 |                             if
196 |                             k not in ['_randomized'] and
197 |                             not (k == "threshold" and self.threshold == 0.0)])
198 |         return f"{type(self).__name__}({params})"
199 | 


--------------------------------------------------------------------------------
/robustgp/init_methods/misc.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import warnings
 3 | 
 4 | np.seterr(all='raise')
 5 | 
 6 | 
 7 | def sample_discrete(unnormalized_probs):
 8 |     unnormalized_probs = np.clip(unnormalized_probs, 0, None)
 9 |     N = unnormalized_probs.shape[0]
10 |     normalization = np.sum(unnormalized_probs)
11 |     if normalization == 0:  # if all of the probabilities are numerically 0, sample uniformly
12 |         warnings.warn("Trying to sample discrete distribution with all 0 weights")
13 |         return np.random.choice(a=N, size=1)[0]
14 |     probs = unnormalized_probs / normalization
15 |     return np.random.choice(a=N, size=1, p=probs)[0]
16 | 


--------------------------------------------------------------------------------
/robustgp/init_methods/rls.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy
  3 | from typing import Callable, Optional
  4 | from .methods import InducingPointInitializer
  5 | 
  6 | 
  7 | class RLS(InducingPointInitializer):
  8 |     """
  9 |     Implements a modified version of the "fixed size" variant of the (approximate)
 10 |     RLS algorithm given in Musco and Musco 2017
 11 |     @inproceedings{musco2017recursive,
 12 |                    title={Recursive sampling for the nystrom method},
 13 |                    author={Musco, Cameron and Musco, Christopher},
 14 |                    booktitle={Advances in Neural Information Processing Systems},
 15 |                    pages={3833--3845},
 16 |                    year={2017}
 17 |                    }
 18 |     """
 19 | 
 20 |     def __init__(self, seed: Optional[int] = 0, **kwargs):
 21 |         super().__init__(seed=seed, randomized=True, **kwargs)
 22 | 
 23 |     def compute_initialisation(self, training_inputs: np.ndarray, M: int,
 24 |                                kernel: Callable[[np.ndarray, Optional[np.ndarray], Optional[bool]], np.ndarray]):
 25 |         indices, _, _ = recursive_rls(training_inputs, M, kernel, np.arange(training_inputs.shape[0]))
 26 |         return training_inputs[indices], indices
 27 | 
 28 | 
 29 | def approximate_rls(training_inputs, kernel, regularization, subset_to_predict, subset_used, column_weights):
 30 |     X = training_inputs[subset_to_predict]
 31 |     Z = training_inputs[subset_used]
 32 |     regularization_matrix = np.diag(np.square(1. / column_weights) * regularization)
 33 |     regularized_Kuu = kernel(Z) + regularization_matrix
 34 |     L = np.linalg.cholesky(regularized_Kuu)
 35 |     kuf = kernel(Z, X)
 36 |     Linvkuf = scipy.linalg.solve_triangular(L, kuf, lower=True)
 37 |     posterior_variance = kernel(X, full_cov=False) - np.sum(np.square(Linvkuf), axis=0)
 38 | 
 39 |     return 1 / regularization * posterior_variance
 40 | 
 41 | 
 42 | def get_indices_and_weights(weighted_leverage, active_indices, k, top_level, M):
 43 |     probs = np.minimum(1., weighted_leverage * np.log(2*k))
 44 |     if not top_level:
 45 |         random_nums = np.random.rand(len(probs))
 46 |         indices = np.where(random_nums < probs)[0]
 47 |         # in cases where to few (potentially no) weights are sampled
 48 |         num_additional_indices = M - len(indices)
 49 |         if num_additional_indices > 0:
 50 |             candidate_indices = np.where(random_nums >= probs)[0]
 51 |             additional_indices = np.random.choice(candidate_indices, size=num_additional_indices,
 52 |                                                   replace=False)
 53 |             indices = np.append(indices, additional_indices)
 54 |         indices_to_include = active_indices[indices]
 55 |         column_weights = np.sqrt(1. / probs[indices])
 56 |     else:
 57 |         probs = probs * M / np.sum(probs)
 58 |         random_nums = np.random.rand(len(probs))
 59 |         indices_to_include = active_indices[random_nums < probs]
 60 |         column_weights = np.sqrt(1. / probs[random_nums < probs])
 61 |         # If we sample too few inducing points, resample
 62 |         while len(indices_to_include) < M:
 63 |             random_nums = np.random.rand(len(probs))  # resample if not enough
 64 |             indices_to_include = active_indices[random_nums < probs]
 65 |             column_weights = np.sqrt(1. / probs[random_nums < probs])
 66 |             probs = np.clip(probs * M / np.sum(np.clip(probs, 0, 1)), 0, 1)
 67 |             probs *= 1.01
 68 |         inds = np.random.choice(len(indices_to_include), size=M, replace=False)
 69 |         indices_to_include, column_weights = indices_to_include[inds], column_weights[inds]
 70 |     return indices_to_include, column_weights, probs
 71 | 
 72 | 
 73 | def recursive_rls(training_inputs: np.ndarray,
 74 |                   M: int,
 75 |                   kernel: Callable[[np.ndarray, Optional[np.ndarray], Optional[bool]], np.ndarray],
 76 |                   active_indices: np.ndarray):
 77 |     num_data = training_inputs.shape[0]
 78 |     top_level = len(active_indices) == num_data   # boolean indicating we are at top level of recursion
 79 |     c = .25
 80 |     k = np.minimum(num_data, int(np.ceil(c * M / np.log(M+1))))
 81 | 
 82 |     if len(active_indices) <= M:  # Base case of recursion, see l 1,2 in Musco and Musco, alg 3
 83 |         return active_indices, np.ones_like(active_indices), np.ones_like(active_indices)
 84 |     s_bar = np.random.randint(0, 2, len(active_indices)).nonzero()[0]  # points sampled into Sbar, l4
 85 |     if len(s_bar) == 0:
 86 |         active_indices = np.random.choice(active_indices, (1+len(active_indices))//2, replace=False)
 87 |         return active_indices, np.ones_like(active_indices), np.ones_like(active_indices)
 88 | 
 89 |     indices_to_include, column_weights, probs = recursive_rls(training_inputs, M, kernel,
 90 |                                                               active_indices=active_indices[s_bar])
 91 |     Z = training_inputs[indices_to_include]
 92 |     SKS = kernel(Z) * column_weights[None, :] * column_weights[:, None]  # sketched kernel matrix
 93 |     eigvals = scipy.sparse.linalg.eigsh(SKS.numpy(), k=k, which='LM', return_eigenvectors=False)
 94 | 
 95 |     lam = 1 / k * (np.sum(np.diag(SKS)) - np.sum(eigvals))
 96 |     lam = np.maximum(1e-12, lam)
 97 | 
 98 |     weighted_leverage = approximate_rls(training_inputs, kernel, lam, subset_to_predict=active_indices,
 99 |                                         subset_used=indices_to_include, column_weights=column_weights)
100 |     indices_to_include, column_weights, probs = get_indices_and_weights(weighted_leverage, active_indices, k,
101 |                                                                         top_level, M)
102 | 
103 |     return indices_to_include, column_weights, probs


--------------------------------------------------------------------------------
/robustgp/models.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | from gpflow.base import Parameter
  7 | from gpflow.config import default_jitter, default_float
  8 | from gpflow.covariances import Kuf, Kuu
  9 | from gpflow.kernels import Kernel
 10 | from gpflow.mean_functions import MeanFunction
 11 | from gpflow.models import GPR, SGPR
 12 | from gpflow.models.training_mixins import RegressionData, InputData
 13 | from gpflow.utilities import positive, to_default_float
 14 | from gpflow.models.model import MeanAndVariance
 15 | 
 16 | 
 17 | class RobustObjectiveMixin:
 18 |     def __init__(self, *args, **kwargs):
 19 |         super().__init__(*args, **kwargs)
 20 |         self.jitter_variance = Parameter(
 21 |             max(default_jitter(), 1e-20), transform=positive(0.0), trainable=False, name="jitter"
 22 |         )
 23 | 
 24 |     def _compute_robust_maximum_log_likelihood_objective(self) -> tf.Tensor:
 25 |         raise NotImplementedError
 26 | 
 27 |     def robust_maximum_log_likelihood_objective(self, restore_jitter=True) -> tf.Tensor:
 28 |         initial_jitter = self.jitter_variance.numpy()
 29 |         N_orders = 20
 30 |         for i in range(N_orders):
 31 |             self.jitter_variance.assign(10 ** i * initial_jitter)
 32 |             logjitter = np.log10(self.jitter_variance.numpy())
 33 |             if i > 0:
 34 |                 if i == 1:
 35 |                     print(
 36 |                         f"{type(self).__name__}: Failed first computation. " f"Now attempting computation with jitter ",
 37 |                         end="",
 38 |                     )
 39 |                 print(f"10**{logjitter:.2f} ", end="", flush=True)
 40 |             try:
 41 |                 val = self._compute_robust_maximum_log_likelihood_objective()
 42 |                 break
 43 |             except tf.errors.InvalidArgumentError as e_inner:
 44 |                 e_msg = e_inner.message
 45 |                 if (("Cholesky" not in e_msg) and ("not invertible" not in e_msg)) or i == (N_orders - 1):
 46 |                     print(e_msg)
 47 |                     raise e_inner
 48 |             except AssertionError as e_inner:
 49 |                 e_msg = e_inner.args
 50 |                 if i == (N_orders - 1):
 51 |                     print(e_msg)
 52 |                     raise e_inner
 53 |         if restore_jitter:
 54 |             self.jitter_variance.assign(initial_jitter)
 55 |         if i > 0:
 56 |             print("")
 57 |         return val
 58 | 
 59 | 
 60 | class RobustSGPR(RobustObjectiveMixin, SGPR):
 61 |     def _compute_robust_maximum_log_likelihood_objective(self) -> tf.Tensor:
 62 |         """
 63 |         Construct a tensorflow function to compute the bound on the marginal
 64 |         likelihood. For a derivation of the terms in here, see the associated
 65 |         SGPR notebook.
 66 |         """
 67 |         X_data, Y_data = self.data
 68 | 
 69 |         num_inducing = len(self.inducing_variable)
 70 |         num_data = to_default_float(tf.shape(Y_data)[0])
 71 |         output_dim = to_default_float(tf.shape(Y_data)[1])
 72 | 
 73 |         err = Y_data - self.mean_function(X_data)
 74 |         Kdiag = self.kernel(X_data, full_cov=False)
 75 |         kuf = Kuf(self.inducing_variable, self.kernel, X_data)
 76 |         kuu = Kuu(self.inducing_variable, self.kernel, jitter=self.jitter_variance)
 77 |         L = tf.linalg.cholesky(kuu)
 78 |         sigma = tf.sqrt(self.likelihood.variance)
 79 | 
 80 |         # Compute intermediate matrices
 81 |         A = tf.linalg.triangular_solve(L, kuf, lower=True) / sigma
 82 |         AAT = tf.linalg.matmul(A, A, transpose_b=True)
 83 |         B = AAT + tf.eye(num_inducing, dtype=default_float())
 84 |         LB = tf.linalg.cholesky(B)
 85 |         Aerr = tf.linalg.matmul(A, err)
 86 |         c = tf.linalg.triangular_solve(LB, Aerr, lower=True) / sigma
 87 |         trace_term = 0.5 * output_dim * tf.reduce_sum(Kdiag) / self.likelihood.variance
 88 |         trace_term -= 0.5 * output_dim * tf.reduce_sum(tf.linalg.diag_part(AAT))
 89 | 
 90 |         # tr(Kff - Qff) should be positive, numerical issues can arise here
 91 |         assert trace_term > 0.0, f"Trace term negative, should be positive ({trace_term:.4e})."
 92 | 
 93 |         # compute log marginal bound
 94 |         bound = -0.5 * num_data * output_dim * np.log(2 * np.pi)
 95 |         bound += tf.negative(output_dim) * tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LB)))
 96 |         bound -= 0.5 * num_data * output_dim * tf.math.log(self.likelihood.variance)
 97 |         bound += -0.5 * tf.reduce_sum(tf.square(err)) / self.likelihood.variance
 98 |         bound += 0.5 * tf.reduce_sum(tf.square(c))
 99 |         bound -= trace_term
100 | 
101 |         return bound
102 | 
103 |     def upper_bound(self) -> tf.Tensor:
104 |         """
105 |         Upper bound for the sparse GP regression marginal likelihood.  Note that
106 |         the same inducing points are used for calculating the upper bound, as are
107 |         used for computing the likelihood approximation. This may not lead to the
108 |         best upper bound. The upper bound can be tightened by optimising Z, just
109 |         like the lower bound. This is especially important in FITC, as FITC is
110 |         known to produce poor inducing point locations. An optimisable upper bound
111 |         can be found in https://github.com/markvdw/gp_upper.
112 | 
113 |         The key reference is
114 | 
115 |         ::
116 | 
117 |           @misc{titsias_2014,
118 |             title={Variational Inference for Gaussian and Determinantal Point Processes},
119 |             url={http://www2.aueb.gr/users/mtitsias/papers/titsiasNipsVar14.pdf},
120 |             publisher={Workshop on Advances in Variational Inference (NIPS 2014)},
121 |             author={Titsias, Michalis K.},
122 |             year={2014},
123 |             month={Dec}
124 |           }
125 | 
126 |         The key quantity, the trace term, can be computed via
127 | 
128 |         >>> _, v = conditionals.conditional(X, model.inducing_variable.Z, model.kernel,
129 |         ...                                 np.zeros((len(model.inducing_variable), 1)))
130 | 
131 |         which computes each individual element of the trace term.
132 |         """
133 |         X_data, Y_data = self.data
134 |         num_data = to_default_float(tf.shape(Y_data)[0])
135 | 
136 |         Kdiag = self.kernel(X_data, full_cov=False)
137 |         kuu = Kuu(self.inducing_variable, self.kernel, jitter=self.jitter_variance)
138 |         kuf = Kuf(self.inducing_variable, self.kernel, X_data)
139 | 
140 |         I = tf.eye(tf.shape(kuu)[0], dtype=default_float())
141 | 
142 |         L = tf.linalg.cholesky(kuu)
143 |         A = tf.linalg.triangular_solve(L, kuf, lower=True)
144 |         AAT = tf.linalg.matmul(A, A, transpose_b=True)
145 |         B = I + AAT / self.likelihood.variance
146 |         LB = tf.linalg.cholesky(B)
147 | 
148 |         # Using the Trace bound, from Titsias' presentation
149 |         c = tf.maximum(tf.reduce_sum(Kdiag) - tf.reduce_sum(tf.square(A)), 0)
150 | 
151 |         # Alternative bound on max eigenval:
152 |         corrected_noise = self.likelihood.variance + c
153 | 
154 |         const = -0.5 * num_data * tf.math.log(2 * np.pi * self.likelihood.variance)
155 |         logdet = -tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LB)))
156 | 
157 |         LC = tf.linalg.cholesky(I + AAT / corrected_noise)
158 |         v = tf.linalg.triangular_solve(LC, tf.linalg.matmul(A, Y_data) / corrected_noise, lower=True)
159 |         quad = -0.5 * tf.reduce_sum(tf.square(Y_data)) / corrected_noise + 0.5 * tf.reduce_sum(tf.square(v))
160 | 
161 |         return const + logdet + quad
162 | 
163 |     def upper_bound(self) -> tf.Tensor:
164 |         """
165 |         Upper bound for the sparse GP regression marginal likelihood.  Note that
166 |         the same inducing points are used for calculating the upper bound, as are
167 |         used for computing the likelihood approximation. This may not lead to the
168 |         best upper bound. The upper bound can be tightened by optimising Z, just
169 |         like the lower bound. This is especially important in FITC, as FITC is
170 |         known to produce poor inducing point locations. An optimisable upper bound
171 |         can be found in https://github.com/markvdw/gp_upper.
172 |         The key reference is
173 |         ::
174 |           @misc{titsias_2014,
175 |             title={Variational Inference for Gaussian and Determinantal Point Processes},
176 |             url={http://www2.aueb.gr/users/mtitsias/papers/titsiasNipsVar14.pdf},
177 |             publisher={Workshop on Advances in Variational Inference (NIPS 2014)},
178 |             author={Titsias, Michalis K.},
179 |             year={2014},
180 |             month={Dec}
181 |           }
182 |         The key quantity, the trace term, can be computed via
183 |         >>> _, v = conditionals.conditional(X, model.inducing_variable.Z, model.kernel,
184 |         ...                                 np.zeros((len(model.inducing_variable), 1)))
185 |         which computes each individual element of the trace term.
186 |         """
187 |         X_data, Y_data = self.data
188 |         num_data = to_default_float(tf.shape(Y_data)[0])
189 | 
190 |         Kdiag = self.kernel(X_data, full_cov=False)
191 |         kuu = Kuu(self.inducing_variable, self.kernel, jitter=self.jitter_variance)
192 |         kuf = Kuf(self.inducing_variable, self.kernel, X_data)
193 | 
194 |         I = tf.eye(tf.shape(kuu)[0], dtype=default_float())
195 | 
196 |         L = tf.linalg.cholesky(kuu)
197 |         A = tf.linalg.triangular_solve(L, kuf, lower=True)
198 |         AAT = tf.linalg.matmul(A, A, transpose_b=True)
199 |         B = I + AAT / self.likelihood.variance
200 |         LB = tf.linalg.cholesky(B)
201 | 
202 |         # Using the Trace bound, from Titsias' presentation
203 |         c = tf.maximum(tf.reduce_sum(Kdiag) - tf.reduce_sum(tf.square(A)), 0)
204 | 
205 |         # Alternative bound on max eigenval:
206 |         corrected_noise = self.likelihood.variance + c
207 | 
208 |         const = -0.5 * num_data * tf.math.log(2 * np.pi * self.likelihood.variance)
209 |         logdet = -tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LB)))
210 | 
211 |         LC = tf.linalg.cholesky(I + AAT / corrected_noise)
212 |         v = tf.linalg.triangular_solve(LC, tf.linalg.matmul(A, Y_data) / corrected_noise, lower=True)
213 |         quad = -0.5 * tf.reduce_sum(tf.square(Y_data)) / corrected_noise + 0.5 * tf.reduce_sum(tf.square(v))
214 | 
215 |         return const + logdet + quad
216 | 
217 |     def predict_f(self, Xnew: InputData, full_cov=False, full_output_cov=False) -> MeanAndVariance:
218 |         """
219 |         Compute the mean and variance of the latent function at some new points
220 |         Xnew. For a derivation of the terms in here, see the associated SGPR
221 |         notebook.
222 |         """
223 |         X_data, Y_data = self.data
224 |         num_inducing = len(self.inducing_variable)
225 |         err = Y_data - self.mean_function(X_data)
226 |         kuf = Kuf(self.inducing_variable, self.kernel, X_data)
227 |         kuu = Kuu(self.inducing_variable, self.kernel, jitter=self.jitter_variance)
228 |         Kus = Kuf(self.inducing_variable, self.kernel, Xnew)
229 |         sigma = tf.sqrt(self.likelihood.variance)
230 |         L = tf.linalg.cholesky(kuu)
231 |         A = tf.linalg.triangular_solve(L, kuf, lower=True) / sigma
232 |         B = tf.linalg.matmul(A, A, transpose_b=True) + tf.eye(num_inducing, dtype=default_float())
233 |         LB = tf.linalg.cholesky(B)
234 |         Aerr = tf.linalg.matmul(A, err)
235 |         c = tf.linalg.triangular_solve(LB, Aerr, lower=True) / sigma
236 |         tmp1 = tf.linalg.triangular_solve(L, Kus, lower=True)
237 |         tmp2 = tf.linalg.triangular_solve(LB, tmp1, lower=True)
238 |         mean = tf.linalg.matmul(tmp2, c, transpose_a=True)
239 |         if full_cov:
240 |             var = (
241 |                 self.kernel(Xnew)
242 |                 + tf.linalg.matmul(tmp2, tmp2, transpose_a=True)
243 |                 - tf.linalg.matmul(tmp1, tmp1, transpose_a=True)
244 |             )
245 |             var = tf.tile(var[None, ...], [self.num_latent_gps, 1, 1])  # [P, N, N]
246 |         else:
247 |             var = (
248 |                 self.kernel(Xnew, full_cov=False)
249 |                 + tf.reduce_sum(tf.square(tmp2), 0)
250 |                 - tf.reduce_sum(tf.square(tmp1), 0)
251 |             )
252 |             var = tf.tile(var[:, None], [1, self.num_latent_gps])
253 |         return mean + self.mean_function(Xnew), var
254 | 
255 | 
256 | class RobustGPR(RobustObjectiveMixin, GPR):
257 |     def __init__(
258 |         self,
259 |         data: RegressionData,
260 |         kernel: Kernel,
261 |         mean_function: Optional[MeanFunction] = None,
262 |         noise_variance: float = 1.0,
263 |     ):
264 |         super().__init__(data, kernel, mean_function, noise_variance)
265 | 
266 |     def _compute_robust_maximum_log_likelihood_objective(self) -> tf.Tensor:
267 |         r"""
268 |         Computes the log marginal likelihood, with some slack caused by the
269 |         jitter. Adding the jitter ensures numerical stability.
270 | 
271 |         .. math::
272 |             \log p(Y | \theta).
273 | 
274 |         """
275 |         X, Y = self.data
276 |         num_data = X.shape[0]
277 |         output_dim = tf.shape(Y)[1]
278 | 
279 |         K = self.kernel(X)
280 |         k_diag = tf.linalg.diag_part(K)
281 |         noiseK_L, L = tf.cond(
282 |             self.likelihood.variance > self.jitter_variance,
283 |             lambda: (
284 |                 tf.linalg.cholesky(tf.linalg.set_diag(K, k_diag + self.likelihood.variance)),
285 |                 tf.linalg.cholesky(tf.linalg.set_diag(K, k_diag + self.jitter_variance)),
286 |             ),
287 |             lambda: (tf.linalg.cholesky(tf.linalg.set_diag(K, k_diag + self.jitter_variance)),) * 2,
288 |         )
289 | 
290 |         err = Y - self.mean_function(X)
291 |         sigma = tf.sqrt(self.likelihood.variance)
292 | 
293 |         # Compute intermediate matrices
294 |         A = tf.linalg.triangular_solve(L, K, lower=True) / sigma
295 | 
296 |         AAT = tf.linalg.matmul(A, A, transpose_b=True)
297 |         B = tf.linalg.set_diag(AAT, tf.linalg.diag_part(AAT) + 1)  # B = AAT + tf.eye(num_data, dtype=default_float())
298 |         # B = AAT + tf.eye(num_data, dtype=default_float())
299 |         LB = tf.linalg.cholesky(B)
300 |         Aerr = tf.linalg.matmul(A, err)
301 |         c = tf.linalg.triangular_solve(LB, Aerr, lower=True) / sigma
302 | 
303 |         # compute log marginal bound
304 |         bound = -0.5 * to_default_float(num_data) * to_default_float(output_dim) * np.log(2 * np.pi)
305 |         bound -= to_default_float(output_dim) * tf.reduce_sum(tf.math.log(tf.linalg.diag_part(noiseK_L)))
306 |         bound += -0.5 * tf.reduce_sum(tf.square(err)) / self.likelihood.variance
307 |         bound += 0.5 * tf.reduce_sum(tf.square(c))
308 | 
309 |         return bound
310 | 


--------------------------------------------------------------------------------
/robustgp/models_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | import tensorflow as tf
 4 | 
 5 | import gpflow
 6 | from .models import RobustSGPR, RobustGPR
 7 | 
 8 | np.random.seed(0)
 9 | X = np.random.rand(1000, 1)
10 | Y = np.hstack((np.sin(X), np.cos(X)))
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "model",
15 |     [
16 |         RobustSGPR((X, Y), gpflow.kernels.SquaredExponential(), X.copy()),
17 |         RobustGPR((X, Y), gpflow.kernels.SquaredExponential()),
18 |     ],
19 | )
20 | def test_sgpr_stability(model):
21 |     print(gpflow.config.default_jitter())
22 | 
23 |     # Setup hyperparmaeters
24 |     initial_jitter = 1e-6
25 |     model.kernel.variance.assign(2.3)
26 |     model.kernel.lengthscales.assign(0.93)
27 |     model.likelihood.variance.assign(1e-4)
28 | 
29 |     # For small jitter the results should be very close
30 |     model.jitter_variance.assign(initial_jitter)
31 |     nojitter = model.maximum_log_likelihood_objective()
32 |     jitter = model.robust_maximum_log_likelihood_objective()
33 |     np.testing.assert_allclose(jitter, nojitter)
34 | 
35 |     # Test that increasing jitter leads to a lower bound
36 |     for j in np.logspace(1, 8, 8) * initial_jitter:
37 |         model.jitter_variance.assign(initial_jitter * j)
38 |         model.jitter_variance.assign(j)
39 |         jitter = model.robust_maximum_log_likelihood_objective()
40 |         print(nojitter.numpy(), jitter.numpy())
41 |         assert jitter < nojitter
42 | 
43 |     # Test that adding jitter avoids a CholeskyError
44 |     model.kernel.variance.assign(1e14)
45 |     model.jitter_variance.assign(initial_jitter)
46 | 
47 |     with pytest.raises(tf.errors.InvalidArgumentError):
48 |         model.maximum_log_likelihood_objective()
49 | 
50 |     model.robust_maximum_log_likelihood_objective()
51 |     np.testing.assert_allclose(model.jitter_variance.numpy(), initial_jitter)
52 | 


--------------------------------------------------------------------------------
/robustgp/optimizers.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | from dataclasses import field
  3 | import numpy as np
  4 | import scipy
  5 | import tensorflow as tf
  6 | 
  7 | import gpflow
  8 | from gpflow.optimizers.scipy import (
  9 |     LossClosure,
 10 |     Variables,
 11 |     Tuple,
 12 |     _compute_loss_and_gradients,
 13 |     Callable,
 14 |     StepCallback,
 15 |     OptimizeResult,
 16 | )
 17 | 
 18 | 
 19 | class RobustScipy(gpflow.optimizers.Scipy):
 20 | 
 21 |     def __init__(self):
 22 |         super().__init__()
 23 |         self.f_vals = list()
 24 | 
 25 |     def minimize(
 26 |         self,
 27 |         closure: LossClosure,
 28 |         variables: Variables,
 29 |         method: Optional[str] = "L-BFGS-B",
 30 |         step_callback: Optional[StepCallback] = None,
 31 |         compile: bool = True,
 32 |         robust_closure: Optional[LossClosure] = None,
 33 |         **scipy_kwargs,
 34 |     ) -> OptimizeResult:
 35 |         """
 36 |         Minimize is a wrapper around the `scipy.optimize.minimize` function
 37 |         handling the packing and unpacking of a list of shaped variables on the
 38 |         TensorFlow side vs. the flat numpy array required on the Scipy side.
 39 | 
 40 |         Args:
 41 |             closure: A closure that re-evaluates the model, returning the loss
 42 |                 to be minimized.
 43 |             variables: The list (tuple) of variables to be optimized
 44 |                 (typically `model.trainable_variables`)
 45 |             method: The type of solver to use in SciPy. Defaults to "L-BFGS-B".
 46 |             step_callback: If not None, a callable that gets called once after
 47 |                 each optimisation step. The callabe is passed the arguments
 48 |                 `step`, `variables`, and `values`. `step` is the optimisation
 49 |                 step counter. `variables` is the list of trainable variables as
 50 |                 above, and `values` is the corresponding list of tensors of
 51 |                 matching shape that contains their value at this optimisation
 52 |                 step.
 53 |             compile: If True, wraps the evaluation function (the passed `closure` as
 54 |                 well as its gradient computation) inside a `tf.function()`,
 55 |                 which will improve optimization speed in most cases.
 56 | 
 57 |             scipy_kwargs: Arguments passed through to `scipy.optimize.minimize`
 58 | 
 59 |         Returns:
 60 |             The optimization result represented as a scipy ``OptimizeResult``
 61 |             object. See the Scipy documentation for description of attributes.
 62 |         """
 63 |         if not callable(closure):
 64 |             raise TypeError("The 'closure' argument is expected to be a callable object.")  # pragma: no cover
 65 |         variables = tuple(variables)
 66 |         if not all(isinstance(v, tf.Variable) for v in variables):
 67 |             raise TypeError(
 68 |                 "The 'variables' argument is expected to only contain tf.Variable instances (use model.trainable_variables, not model.trainable_parameters)"
 69 |             )  # pragma: no cover
 70 |         initial_params = self.initial_parameters(variables)
 71 | 
 72 |         func = self.eval_func(closure, variables, compile=compile, robust_closure=robust_closure)
 73 |         if step_callback is not None:
 74 |             if "callback" in scipy_kwargs:
 75 |                 raise ValueError("Callback passed both via `step_callback` and `callback`")
 76 | 
 77 |             callback = self.callback_func(variables, step_callback)
 78 |             scipy_kwargs.update(dict(callback=callback))
 79 | 
 80 |         return scipy.optimize.minimize(func, initial_params, jac=True, method=method, **scipy_kwargs)
 81 | 
 82 |     def eval_func(
 83 |         self,
 84 |         closure: LossClosure,
 85 |         variables: Variables,
 86 |         compile: bool = True,
 87 |         robust_closure: Optional[LossClosure] = None,
 88 |     ) -> Callable[[np.ndarray], Tuple[np.ndarray, np.ndarray]]:
 89 |         def make_tf_eval(closure: LossClosure):
 90 |             def eager_tf_eval(x: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
 91 |                 values = self.unpack_tensors(variables, x)
 92 |                 self.assign_tensors(variables, values)
 93 | 
 94 |                 loss, grads = _compute_loss_and_gradients(closure, variables)
 95 |                 return loss, self.pack_tensors(grads)
 96 | 
 97 |             return eager_tf_eval
 98 | 
 99 |         fast_tf_eval = make_tf_eval(closure)
100 |         robust_tf_eval = make_tf_eval(robust_closure) if robust_closure is not None else None
101 |         if compile:
102 |             fast_tf_eval = tf.function(fast_tf_eval)  # Possibly compiled
103 | 
104 |         def _eval(x: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
105 |             try:
106 |                 loss, grad = fast_tf_eval(tf.convert_to_tensor(x))
107 |             except tf.errors.InvalidArgumentError as e:
108 |                 e_msg = e.message
109 |                 if robust_tf_eval is None or (("Cholesky" not in e_msg) and ("not invertible" not in e_msg)):
110 |                     raise e
111 |                 print(f"Warning: CholeskyError. Attempting to continue.")
112 |                 loss, grad = robust_tf_eval(tf.convert_to_tensor(x))
113 |             self.f_vals.append(loss.numpy())
114 |             return loss.numpy().astype(np.float64), grad.numpy().astype(np.float64)
115 | 
116 |         return _eval
117 | 


--------------------------------------------------------------------------------
/robustgp/optimizers_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | import tensorflow as tf
 4 | 
 5 | import gpflow
 6 | from .models import RobustSGPR, RobustGPR
 7 | from .optimizers import RobustScipy
 8 | from .utilities import set_trainable
 9 | 
10 | np.random.seed(0)
11 | X = np.random.rand(100, 1)
12 | Y = np.hstack((np.sin(X), np.cos(X)))
13 | 
14 | original_default_jitter = gpflow.config.default_jitter()
15 | original_default_positive_minimum = gpflow.config.default_positive_minimum()
16 | gpflow.config.set_default_jitter(0.0)
17 | gpflow.config.set_default_positive_minimum(1e-6)
18 | 
19 | 
20 | @pytest.mark.parametrize(
21 |     "model",
22 |     [
23 |         RobustSGPR((X, Y), gpflow.kernels.SquaredExponential(), X.copy()),
24 |         RobustGPR((X, Y), gpflow.kernels.SquaredExponential()),
25 |     ],
26 | )
27 | def test_optimize_stability(model):
28 |     config = gpflow.config.Config(jitter=0.0, positive_minimum=1e-6)
29 |     with gpflow.config.as_context(config):
30 |         print(gpflow.config.default_jitter())
31 |         model.jitter_variance.assign(1e-14)
32 |         print(model.jitter_variance.numpy())
33 |         model.likelihood.variance = gpflow.Parameter(1.0, transform=gpflow.utilities.positive(lower=1e-16))
34 |         set_trainable(model, False)
35 |         set_trainable(model.kernel, True)
36 |         set_trainable(model.likelihood, True)
37 | 
38 |         loss_function = model.training_loss_closure(compile=True)
39 |         robust_loss_function = lambda: -model.robust_maximum_log_likelihood_objective()
40 | 
41 |         with pytest.raises(tf.errors.InvalidArgumentError):
42 |             opt = gpflow.optimizers.Scipy()
43 |             opt.minimize(loss_function, model.trainable_variables, method="l-bfgs-b", options=dict(maxiter=10000))
44 | 
45 |         opt = RobustScipy()
46 |         opt.minimize(
47 |             loss_function,
48 |             model.trainable_variables,
49 |             robust_closure=robust_loss_function,
50 |             method="l-bfgs-b",
51 |             options=dict(maxiter=10000),
52 |         )
53 |         opt.minimize(
54 |             loss_function,
55 |             model.trainable_variables,
56 |             robust_closure=robust_loss_function,
57 |             method="l-bfgs-b",
58 |             options=dict(maxiter=10000),
59 |         )
60 | 
61 |         gpflow.utilities.print_summary(model)
62 | 
63 | 
64 | gpflow.config.set_default_jitter(original_default_jitter)
65 | gpflow.config.set_default_positive_minimum(original_default_positive_minimum)
66 | 


--------------------------------------------------------------------------------
/robustgp/utilities.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def set_trainable(model: tf.Module, flag: bool):
 5 |     """
 6 |     Set trainable flag for all `tf.Variable`s and `gpflow.Parameter`s in a module.
 7 |     """
 8 |     for variable in model.variables:
 9 |         if "jitter" not in variable.name:
10 |             variable._trainable = flag
11 | 


--------------------------------------------------------------------------------
/robustgp_experiments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/__init__.py


--------------------------------------------------------------------------------
/robustgp_experiments/demo1d.py:
--------------------------------------------------------------------------------
 1 | import gpflow
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | from robustgp import ConditionalVariance
 5 | 
 6 | X = np.random.rand(150, 1)
 7 | Y = 0.8 * np.cos(10 * X) + 1.2 * np.sin(8 * X + 0.3) + np.cos(17 * X) * 1.2 + np.random.randn(*X.shape) * 0.1
 8 | 
 9 | gpr = gpflow.models.GPR((X, Y), gpflow.kernels.SquaredExponential())
10 | opt = gpflow.optimizers.Scipy()
11 | opt_logs = opt.minimize(gpr.training_loss, gpr.trainable_variables, options=dict(maxiter=100))
12 | 
13 | k = gpflow.kernels.SquaredExponential()
14 | gpflow.utilities.multiple_assign(k, gpflow.utilities.read_values(gpr.kernel))
15 | 
16 | Z_initer = ConditionalVariance()
17 | sp = gpflow.models.SGPR((X, Y), k, Z_initer.compute_initialisation(X, 6, k)[0])
18 | gpflow.utilities.multiple_assign(sp, gpflow.utilities.read_values(gpr))
19 | 
20 | pX = np.linspace(0, 1, 3000)[:, None]
21 | m, v = sp.predict_f(pX)
22 | ipm, _ = sp.predict_f(sp.inducing_variable.Z.value())
23 | 
24 | fig, (ax1, ax2) = plt.subplots(2, 1)
25 | ax1.plot(X, Y, 'x')
26 | ax1.plot(pX, m)
27 | ax1.plot(sp.inducing_variable.Z.value(), ipm, 'o', color='C3')
28 | deviation = (2 * (v + sp.likelihood.variance.value()) ** 0.5).numpy().flatten()
29 | ax1.fill_between(pX.flatten(), m.numpy().flatten() - deviation, m.numpy().flatten() + deviation, alpha=0.3)
30 | ax1.axvline(pX[np.argmax(v)].item(), color='C2')
31 | ax1.set_ylabel("y")
32 | ax2.plot(pX, v ** 0.5)
33 | ax2.plot(sp.inducing_variable.Z.value(), sp.inducing_variable.Z.value() * 0.0, 'o', color='C3')
34 | ax2.axvline(pX[np.argmax(v)].item(), color='C2')
35 | ax2.set_xlabel("input $x$")
36 | ax2.set_ylabel("$\mathbb{V}\,[p(f(x) | \mathbf{u}]^{0.5}$")
37 | plt.show()
38 | 


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/__init__.py


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/fixedhyp-Naval_noisy-elbo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/fixedhyp-Naval_noisy-elbo.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/fixedhyp-Naval_noisy-nlpp.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/fixedhyp-Naval_noisy-nlpp.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/fixedhyp-Naval_noisy-rmse.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/fixedhyp-Naval_noisy-rmse.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/fixedhyp-Wilson_elevators-elbo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/fixedhyp-Wilson_elevators-elbo.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/fixedhyp-Wilson_elevators-nlpp.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/fixedhyp-Wilson_elevators-nlpp.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/fixedhyp-Wilson_elevators-rmse.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/fixedhyp-Wilson_elevators-rmse.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/fixedhyp-Wilson_energy-elbo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/fixedhyp-Wilson_energy-elbo.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/fixedhyp-Wilson_energy-nlpp.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/fixedhyp-Wilson_energy-nlpp.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/fixedhyp-Wilson_energy-rmse.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/fixedhyp-Wilson_energy-rmse.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/optall-Naval_noisy-trace.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/optall-Naval_noisy-trace.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/optall-Wilson_elevators-trace.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/optall-Wilson_elevators-trace.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/optall-Wilson_energy-trace.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/optall-Wilson_energy-trace.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/opthyp-Naval_noisy-elbo-only.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Naval_noisy-elbo-only.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/opthyp-Naval_noisy-elbo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Naval_noisy-elbo.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/opthyp-Naval_noisy-nlpp.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Naval_noisy-nlpp.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/opthyp-Naval_noisy-rmse.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Naval_noisy-rmse.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/opthyp-Wilson_elevators-elbo-only.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Wilson_elevators-elbo-only.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/opthyp-Wilson_elevators-elbo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Wilson_elevators-elbo.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/opthyp-Wilson_elevators-nlpp.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Wilson_elevators-nlpp.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/opthyp-Wilson_elevators-rmse.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Wilson_elevators-rmse.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/opthyp-Wilson_energy-elbo-only.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Wilson_energy-elbo-only.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/opthyp-Wilson_energy-elbo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Wilson_energy-elbo.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/opthyp-Wilson_energy-nlpp.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Wilson_energy-nlpp.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/figures/opthyp-Wilson_energy-rmse.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markvdw/RobustGP/0819bc9370f8e974f7f751143224d59d990e9531/robustgp_experiments/init_z/figures/opthyp-Wilson_energy-rmse.pdf


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/jug-plot-init-inducing-fixedhyp.py:
--------------------------------------------------------------------------------
  1 | # # Inducing point initlaisation fixed hyperparameters
  2 | # Assess how well inducing point initialisation works, with the hyperparameters fixed to the ones found by the full GP.
  3 | # This simplifies things, since we only need to run optimisation with the full GP (or a GP with many inducing points).
  4 | #
  5 | # To parallelise things, we use jug.
  6 | #  1. Run `jug execute jug_init_inducing_fixedhyp.py` multiple times to do runs in parallel. Can sync over a shared
  7 | #     filesystem.
  8 | #  2. Once the above is done, run this script to create the plots.
  9 | 
 10 | import jug.task
 11 | import matplotlib.pyplot as plt
 12 | import numpy as np
 13 | 
 14 | from robustgp_experiments.utils import baselines
 15 | 
 16 | import matplotlib
 17 | matplotlib.rcParams['axes.spines.right'] = False
 18 | matplotlib.rcParams['axes.spines.top'] = False
 19 | font = {'family': 'cmr10', 'size': 24}
 20 | matplotlib.rc('font', **font)
 21 | matplotlib.rc('text', usetex=True)
 22 | color_dict = {"Uniform":'#999999',"Kmeans": '#ff7f00', "Greedy Conditional Variance":'#4daf4a',"gradient":"#377eb8",
 23 |             "RLS":'#a65628', "M-DPP MCMC": '#984ea3'}
 24 | name_dict = {"Kmeans": "K-means", "Uniform":"Uniform", "Greedy Conditional Variance":"Greedy var.", "Sample Conditional Variance":"Sample var.","gradient":"Gradient", "RLS":"RLS", "M-DPP MCMC": "M-DPP MCMC"}
 25 | plot_title_dict = {"Wilson_energy":"Energy", "Wilson_elevators":"Elevators","Naval_noisy":"Naval with Noise"}
 26 | methods_to_ignore = ["Sample Conditional Variance"]
 27 | jug.init("jug_init_inducing_fixedhyp.py", "jug_init_inducing_fixedhyp.jugdata")
 28 | from jug_init_inducing_fixedhyp import (
 29 |     init_Z_runs, init_Z_task_results, baseline_exps, full_rmses, full_nlpps, baseline_lmls, Ms, dataset_names
 30 | )
 31 | #
 32 | #
 33 | # Evaluation
 34 | init_Z_rmses = {}
 35 | init_Z_nlpps = {}
 36 | init_Z_elbos = {}
 37 | init_Z_uppers = {}
 38 | init_Z_Ms = {}
 39 | mean_baselines = {}
 40 | linear_baselines = {}
 41 | for dataset in dataset_names:
 42 |     init_Z_rmses[dataset] = {}
 43 |     init_Z_nlpps[dataset] = {}
 44 |     init_Z_elbos[dataset] = {}
 45 |     init_Z_uppers[dataset] = {}
 46 |     init_Z_Ms[dataset] = {}
 47 | 
 48 |     for init_Z_method in init_Z_runs[dataset].keys():
 49 |         init_Z_rmses[dataset][init_Z_method] = dict()
 50 |         init_Z_nlpps[dataset][init_Z_method] = dict()
 51 |         init_Z_elbos[dataset][init_Z_method] = dict()
 52 |         init_Z_uppers[dataset][init_Z_method] = dict()
 53 |         init_Z_Ms[dataset][init_Z_method] = []
 54 |         for stat in ["Means", "Standard dev.", "Sample std.", "Median", "80 pct", "20 pct"]:
 55 |             for metric in [init_Z_rmses,init_Z_nlpps, init_Z_elbos, init_Z_uppers]:
 56 |                 metric[dataset][init_Z_method][stat] = []
 57 |         for M in init_Z_task_results[dataset][init_Z_method].keys():
 58 |             init_Z_Ms[dataset][init_Z_method].append(int(M))
 59 |             init_Z_rmses[dataset][init_Z_method][M] = []
 60 |             init_Z_nlpps[dataset][init_Z_method][M] = []
 61 |             init_Z_elbos[dataset][init_Z_method][M] = []
 62 |             init_Z_uppers[dataset][init_Z_method][M] = []
 63 |             for result in init_Z_task_results[dataset][init_Z_method][M]:
 64 |                 try:
 65 |                     elbo, upper, rmse, nlpp = jug.task.value(result)
 66 |                 except:
 67 |                     continue
 68 |                 init_Z_elbos[dataset][str(init_Z_method)][M].append(elbo)
 69 |                 init_Z_uppers[dataset][str(init_Z_method)][M].append(upper)
 70 |                 init_Z_rmses[dataset][str(init_Z_method)][M].append(rmse)
 71 |                 init_Z_nlpps[dataset][str(init_Z_method)][M].append(nlpp)
 72 |             for metric in [init_Z_rmses, init_Z_nlpps, init_Z_elbos, init_Z_uppers]:
 73 |                 metric[dataset][init_Z_method]["Means"].append(np.mean(metric[dataset][init_Z_method][M]))
 74 |                 metric[dataset][init_Z_method]["Standard dev."].append(np.std(metric[dataset][init_Z_method][M]))
 75 |                 metric[dataset][init_Z_method]["Sample std."].append(np.std(metric[dataset][init_Z_method][M]) /
 76 |                                                             np.sqrt((len(metric[dataset][init_Z_method][M])-1)))
 77 |                 metric[dataset][init_Z_method]["Median"].append(np.median(metric[dataset][init_Z_method][M]))
 78 |                 metric[dataset][init_Z_method]["20 pct"].append(np.percentile(metric[dataset][init_Z_method][M],20))
 79 |                 metric[dataset][init_Z_method]["80 pct"].append(np.percentile(metric[dataset][init_Z_method][M],80))
 80 | 
 81 |     baseline_exp = baseline_exps[dataset]
 82 |     mean_baselines[dataset] = baselines.meanpred_baseline(None, baseline_exp.Y_train, None, baseline_exp.Y_test)
 83 |     linear_baselines[dataset] = baselines.linear_baseline(baseline_exp.X_train, baseline_exp.Y_train,
 84 |                                                           baseline_exp.X_test, baseline_exp.Y_test)
 85 | 
 86 | #
 87 | # Plotting
 88 | for dataset in dataset_names:
 89 |     dataset_plot_settings = dict(
 90 |         Naval_noisy=dict(xlim=(10,200),elbo_only_ylim=(37300, 37900), elbo_ylim=(36e3, 43500), nlpp_ylim=(-3.6, -2.1),
 91 |                          rmse_ylim=(.0065, .018), include_mean=False, include_linear=False),
 92 |         Wilson_energy=dict(xlim=(10,200),elbo_only_ylim=(800, 1050), elbo_ylim=(800, 1350), nlpp_ylim=(-1.7, -.6),
 93 |                            rmse_ylim=(.046, .09), include_mean=False, include_linear=False),
 94 |         Wilson_elevators=dict(xlim=(0,5000),elbo_only_ylim=(-8500, -6000), elbo_ylim=(-8000, 1000), nlpp_ylim=(.375, .45),
 95 |                               rmse_ylim=(.3515, .37), include_mean=False, include_linear=False)
 96 |     ).get(dataset, dict(elbo_only_y_lim=None, elbo_ylim=None, include_linear=True, nlpp_ylim=None,
 97 |                         rmse_ylim=None))
 98 |     l_elbo, l_rmse, l_nlpp = linear_baselines[dataset]
 99 |     m_elbo, m_rmse, m_nlpp = mean_baselines[dataset]
100 |     fig, ax = plt.subplots()
101 |     for method in init_Z_runs[dataset].keys():
102 |         if method in methods_to_ignore:
103 |             continue
104 |         l, = ax.plot(init_Z_Ms[dataset][method],init_Z_elbos[dataset][method]["Median"], label=name_dict[method],
105 |                      color=color_dict[method])
106 |         ax.plot(init_Z_Ms[dataset][method], init_Z_uppers[dataset][method]["Median"], label="_nolegend_",
107 |                 color=l.get_color(), linestyle=(0, (3, 1, 1, 1, 1, 1)))
108 |         ax.fill_between(init_Z_Ms[dataset][method], init_Z_elbos[dataset][method]["20 pct"],
109 |                         init_Z_elbos[dataset][method]["80 pct"], color=l.get_color(), alpha=.2)
110 |         ax.fill_between(init_Z_Ms[dataset][method], init_Z_uppers[dataset][method]["20 pct"],
111 |                         init_Z_uppers[dataset][method]["80 pct"], color=l.get_color(), alpha=.2,
112 |                         hatch='/', label='_nolegend_')
113 |     ax.axhline(baseline_lmls[dataset], label='Full GP', linestyle="--",color='k')
114 |     if dataset_plot_settings["include_linear"]:
115 |         ax.axhline(l_elbo, label='Linear', linestyle='-.',color='k')
116 |         ax.axhline(m_elbo, label='Mean', linestyle=':',color='k')
117 |     #ax.legend(loc="upper left")
118 |     ax.set_xlabel("M")
119 |     ax.set_ylabel("ELBO")
120 |     ax.set_title(plot_title_dict[dataset])
121 |     ax.set_xlim(dataset_plot_settings["xlim"])
122 |     ax.set_ylim(dataset_plot_settings["elbo_ylim"])
123 |     plt.tight_layout()
124 |     fig.savefig(f"./figures/fixedhyp-{dataset}-elbo.pdf")
125 | 
126 |     fig, ax = plt.subplots()
127 |     for method in init_Z_runs[dataset].keys():
128 |         if method in methods_to_ignore:
129 |             continue
130 |         l, = ax.plot(init_Z_Ms[dataset][method], init_Z_rmses[dataset][method]["Median"], label=name_dict[method],color=color_dict[method])
131 |         ax.fill_between(init_Z_Ms[dataset][method], init_Z_rmses[dataset][method]["20 pct"],
132 |                         init_Z_rmses[dataset][method]["80 pct"], color=l.get_color(),alpha=.2)
133 |     ax.axhline(full_rmses[dataset], label="Full GP", linestyle='--',color='k')
134 |     if dataset_plot_settings["include_linear"]:    
135 |         ax.axhline(l_rmse, label="Linear", linestyle='-.',color='k')
136 |         ax.axhline(m_rmse, label="Mean", linestyle=':',color='k')
137 |     #ax.legend()
138 |     ax.set_xlabel("M")
139 |     ax.set_ylabel("RMSE")
140 |     #ax.set_title(plot_title_dict[dataset])
141 |     ax.set_xlim(dataset_plot_settings["xlim"])
142 |     ax.set_ylim(dataset_plot_settings["rmse_ylim"])
143 |     plt.tight_layout()
144 |     fig.savefig(f"./figures/fixedhyp-{dataset}-rmse.pdf")
145 | 
146 |     fig, ax = plt.subplots()
147 |     for method in init_Z_runs[dataset].keys():
148 |         if method in methods_to_ignore:
149 |             continue
150 |         l,=ax.plot(init_Z_Ms[dataset][method], init_Z_nlpps[dataset][method]["Median"], label=name_dict[method],color=color_dict[method])
151 |         ax.fill_between(init_Z_Ms[dataset][method], init_Z_nlpps[dataset][method]["20 pct"],
152 |                         init_Z_nlpps[dataset][method]["80 pct"], color=l.get_color(),alpha=.2)
153 |     ax.axhline(full_nlpps[dataset], label="Full GP", linestyle='--', color='k')
154 |     if dataset_plot_settings["include_linear"]:
155 |         ax.axhline(l_nlpp, label="Linear", linestyle='-.',color='k')
156 |         ax.axhline(m_nlpp, label="Mean", linestyle=':',color='k')
157 |     #ax.legend()
158 |     ax.set_xlabel("M")
159 |     ax.set_ylabel("NLPD")
160 |     ax.set_xlim(dataset_plot_settings["xlim"])
161 |     ax.set_ylim(dataset_plot_settings["nlpp_ylim"])
162 |     #ax.set_title(plot_title_dict[dataset])
163 |     plt.tight_layout()
164 |     fig.savefig(f"./figures/fixedhyp-{dataset}-nlpp.pdf")
165 | 
166 | plt.show()
167 | 
168 | 
169 | 


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/jug-plot-init-inducing-opt.py:
--------------------------------------------------------------------------------
  1 | import jug.task
  2 | import matplotlib.pyplot as plt
  3 | import numpy as np
  4 | 
  5 | from robustgp_experiments.utils import baselines
  6 | import matplotlib
  7 | matplotlib.rcParams['axes.spines.right'] = False
  8 | matplotlib.rcParams['axes.spines.top'] = False
  9 | font = {'family': 'cmr10', 'size': 24}
 10 | matplotlib.rc('font', **font)
 11 | matplotlib.rc('text', usetex=True)
 12 | color_dict = {"Uniform":'#999999',
 13 |               "Kmeans": '#ff7f00',
 14 |               "Greedy Conditional Variance": '#4daf4a',
 15 |               "Gradient": "#377eb8",
 16 |               "RLS": '#a65628',
 17 |               "M-DPP MCMC": '#984ea3',
 18 |               "reinit_Z_sF": 'C9'}
 19 | name_dict = {"Kmeans": "K-means", "Uniform":"Uniform",
 20 |              "Greedy Conditional Variance":"Greedy var.",
 21 |              "Sample Conditional Variance":"Sample var.",
 22 |              "Gradient":"Gradient",
 23 |              "reinit_Z_sF":"Greedy var. (reinit.)",
 24 |              "reinit_Z_sT":"Sample var. (reinit.)", 
 25 |              "RLS":"RLS",
 26 |              "M-DPP MCMC": "M-DPP MCMC"}
 27 | plot_title_dict = {"Wilson_energy":"Energy", "Wilson_elevators":"Elevators","Naval_noisy":"Naval with Noise"}
 28 | methods_to_ignore = ["Sample Conditional Variance", "reinit_Z_sT"]
 29 | jug.init("jug_init_inducing_opt.py", "jug_init_inducing_opt.jugdata")
 30 | from jug_init_inducing_opt import (
 31 |     init_Z_runs, init_Z_task_results, baseline_exps, full_rmses, full_nlpps, baseline_lmls, Ms, dataset_names
 32 | )
 33 | # Evaluation
 34 | init_Z_rmses = {}
 35 | init_Z_nlpps = {}
 36 | init_Z_elbos = {}
 37 | init_Z_uppers = {}
 38 | init_Z_Ms = {}
 39 | mean_baselines = {}
 40 | linear_baselines = {}
 41 | for dataset in dataset_names:
 42 |     init_Z_rmses[dataset] = {}
 43 |     init_Z_nlpps[dataset] = {}
 44 |     init_Z_elbos[dataset] = {}
 45 |     init_Z_uppers[dataset] = {}
 46 |     init_Z_Ms[dataset] = {}
 47 | 
 48 |     for init_Z_method in init_Z_runs[dataset].keys():
 49 |         init_Z_rmses[dataset][init_Z_method] = dict()
 50 |         init_Z_nlpps[dataset][init_Z_method] = dict()
 51 |         init_Z_elbos[dataset][init_Z_method] = dict()
 52 |         init_Z_uppers[dataset][init_Z_method] = dict()
 53 |         init_Z_Ms[dataset][init_Z_method] = []
 54 |         for stat in ["Means", "Standard dev.", "Sample std.", "Median", "80 pct", "20 pct"]:
 55 |             for metric in [init_Z_rmses,init_Z_nlpps, init_Z_elbos, init_Z_uppers]:
 56 |                 metric[dataset][init_Z_method][stat] = []
 57 |         for M in init_Z_task_results[dataset][init_Z_method].keys():
 58 |             init_Z_Ms[dataset][init_Z_method].append(int(M))
 59 |             init_Z_rmses[dataset][init_Z_method][M] = []
 60 |             init_Z_nlpps[dataset][init_Z_method][M] = []
 61 |             init_Z_elbos[dataset][init_Z_method][M] = []
 62 |             init_Z_uppers[dataset][init_Z_method][M] = []
 63 |             for result in init_Z_task_results[dataset][init_Z_method][M]:
 64 |                 elbo, upper, rmse, nlpp = jug.task.value(result)
 65 |                 init_Z_elbos[dataset][str(init_Z_method)][M].append(elbo)
 66 |                 init_Z_uppers[dataset][str(init_Z_method)][M].append(upper)
 67 |                 init_Z_rmses[dataset][str(init_Z_method)][M].append(rmse)
 68 |                 init_Z_nlpps[dataset][str(init_Z_method)][M].append(nlpp)
 69 |             for metric in [init_Z_rmses, init_Z_nlpps, init_Z_elbos, init_Z_uppers]:
 70 |                 metric[dataset][init_Z_method]["Means"].append(np.mean(metric[dataset][init_Z_method][M]))
 71 |                 metric[dataset][init_Z_method]["Standard dev."].append(np.std(metric[dataset][init_Z_method][M]))
 72 |                 metric[dataset][init_Z_method]["Sample std."].append(np.std(metric[dataset][init_Z_method][M]) /
 73 |                                                             np.sqrt((len(metric[dataset][init_Z_method][M])-1)))
 74 |                 metric[dataset][init_Z_method]["Median"].append(np.nanmedian(metric[dataset][init_Z_method][M]))
 75 |                 metric[dataset][init_Z_method]["20 pct"].append(np.nanpercentile(metric[dataset][init_Z_method][M],20))
 76 |                 metric[dataset][init_Z_method]["80 pct"].append(np.nanpercentile(metric[dataset][init_Z_method][M],80))
 77 | 
 78 | 
 79 |     baseline_exp = baseline_exps[dataset]
 80 |     mean_baselines[dataset] = baselines.meanpred_baseline(None, baseline_exp.Y_train, None, baseline_exp.Y_test)
 81 |     linear_baselines[dataset] = baselines.linear_baseline(baseline_exp.X_train, baseline_exp.Y_train,
 82 |                                                           baseline_exp.X_test, baseline_exp.Y_test)
 83 | #
 84 | # Plotting
 85 | for dataset in dataset_names:
 86 |     dataset_plot_settings = dict(
 87 | 	Naval_noisy=dict(xlim=(20,300),elbo_only_ylim=(37300,37900),elbo_ylim=(36e3, 43500),nlpp_ylim=(-3.6,-3.4),rmse_ylim=(.0065,.01),include_mean=False,include_linear=False),
 88 |         Wilson_energy = dict(xlim=(10,200),elbo_only_ylim=(800,1020),elbo_ylim=(800, 1350),nlpp_ylim=(-1.7,-.4),rmse_ylim=(.045,.07),include_mean=False,include_linear=False),
 89 |         Wilson_elevators = dict(xlim=(-10,5000),elbo_only_ylim=(-7250,-6100),elbo_ylim=(-7200,1000),nlpp_ylim=(.375,.46),rmse_ylim=(.35,.38),include_mean=False,include_linear=False)
 90 |     ).get(dataset, dict(xlim=None,elbo_only_y_lim=None,elbo_ylim=None,include_mean=True,include_linear=True))
 91 |     l_elbo, l_rmse, l_nlpp = linear_baselines[dataset]
 92 |     m_elbo, m_rmse, m_nlpp = mean_baselines[dataset]
 93 |     fig, ax = plt.subplots()
 94 |     methods = init_Z_runs[dataset].keys()
 95 |     for method in methods:
 96 |         if method in methods_to_ignore:
 97 |             continue
 98 |         l, = ax.plot(init_Z_Ms[dataset][method],init_Z_elbos[dataset][method]["Median"], label=name_dict[method],
 99 |                      color=color_dict[method])
100 |         ax.plot(init_Z_Ms[dataset][method], init_Z_uppers[dataset][method]["Median"], label=f"_nolegend_",
101 |                 color=l.get_color(), linestyle=(0, (3, 1, 1, 1, 1, 1)))
102 |         ax.fill_between(init_Z_Ms[dataset][method], init_Z_elbos[dataset][method]["20 pct"],
103 |                         init_Z_elbos[dataset][method]["80 pct"], color=l.get_color(),alpha=.2)
104 |         ax.fill_between(init_Z_Ms[dataset][method], init_Z_uppers[dataset][method]["20 pct"],
105 |                         init_Z_uppers[dataset][method]["80 pct"], color=l.get_color(),alpha=.2,
106 |                         hatch='/', label='_nolegend_')
107 |     ax.axhline(baseline_lmls[dataset], label='Full GP', linestyle="--",color='k')
108 |     if dataset_plot_settings['include_linear']:
109 |         ax.axhline(l_elbo, label='Linear', linestyle='-.',color='k')
110 |     if dataset_plot_settings['include_mean']:
111 |         ax.axhline(m_elbo, label='Mean', linestyle=':',color='k')
112 | #    ax.legend()
113 |     ax.set_title(plot_title_dict[dataset])
114 |     ax.set_xlabel("M")
115 |     ax.set_ylabel("ELBO")
116 |     ax.set_ylim(dataset_plot_settings["elbo_ylim"])
117 |     ax.set_xlim(dataset_plot_settings["xlim"])
118 | 
119 |     plt.tight_layout()
120 |     fig.savefig(f"./figures/opthyp-{dataset}-elbo.pdf")
121 |     fig, ax = plt.subplots()
122 |     for method in methods:
123 |         if method in methods_to_ignore:
124 |             continue
125 |         l, = ax.plot(init_Z_Ms[dataset][method],init_Z_elbos[dataset][method]["Median"], label=name_dict[method],color=color_dict[method])
126 |         ax.fill_between(init_Z_Ms[dataset][method], init_Z_elbos[dataset][method]["20 pct"],
127 |                         init_Z_elbos[dataset][method]["80 pct"], color=l.get_color(),alpha=.2)
128 |     ax.axhline(baseline_lmls[dataset], label='Full GP', linestyle="--",color='k')
129 |     if dataset_plot_settings['include_linear']:
130 |         ax.axhline(l_elbo, label='Linear', linestyle='-.',color='k')
131 |     if dataset_plot_settings['include_mean']:
132 |         ax.axhline(m_elbo, label='Mean', linestyle=':',color='k')
133 |  #   ax.legend()
134 |     ax.set_xlabel("M")
135 |     ax.set_title(plot_title_dict[dataset])
136 |     ax.set_ylabel("ELBO")
137 |     ax.set_xlim(dataset_plot_settings["xlim"])
138 |     ax.set_ylim(dataset_plot_settings["elbo_only_ylim"])
139 |     plt.tight_layout()
140 |     fig.savefig(f"./figures/opthyp-{dataset}-elbo-only.pdf")
141 | 
142 |     fig, ax = plt.subplots()
143 |     for method in methods:
144 |         if method in methods_to_ignore:
145 |             continue
146 |         l, = ax.plot(init_Z_Ms[dataset][method], init_Z_rmses[dataset][method]["Median"], label=name_dict[method],color=color_dict[method])
147 |         ax.fill_between(init_Z_Ms[dataset][method], init_Z_rmses[dataset][method]["20 pct"],
148 |                         init_Z_rmses[dataset][method]["80 pct"], color=l.get_color(),alpha=.2)
149 |     ax.axhline(full_rmses[dataset], label="Full GP", linestyle='--',color='k')
150 |     if dataset_plot_settings['include_linear']:
151 |         ax.axhline(l_rmse, label='Linear', linestyle='-.',color='k')
152 |     if dataset_plot_settings['include_mean']:
153 |         ax.axhline(m_rmse, label='Mean', linestyle=':',color='k')
154 |   #  ax.legend()
155 |    # ax.set_title(plot_title_dict[dataset])
156 |     ax.set_xlim(dataset_plot_settings["xlim"])
157 |     ax.set_ylim(dataset_plot_settings["rmse_ylim"])
158 |     ax.set_xlabel("M")
159 |     ax.set_ylabel("RMSE")
160 |     plt.tight_layout()
161 |     fig.savefig(f"./figures/opthyp-{dataset}-rmse.pdf")
162 | 
163 |     fig, ax = plt.subplots()
164 |     for method in methods:
165 |         if method in methods_to_ignore:
166 |             continue
167 |         l,=ax.plot(init_Z_Ms[dataset][method], init_Z_nlpps[dataset][method]["Median"], label=name_dict[method],color=color_dict[method])
168 |         ax.fill_between(init_Z_Ms[dataset][method], init_Z_nlpps[dataset][method]["20 pct"],
169 |                         init_Z_nlpps[dataset][method]["80 pct"], color=l.get_color(),alpha=.2)
170 |     ax.axhline(full_nlpps[dataset], label="Full GP", linestyle='--',color='k')
171 |     if dataset_plot_settings['include_linear']:
172 |         ax.axhline(l_nlpp, label='Linear', linestyle='-.',color='k')
173 |     if dataset_plot_settings['include_mean']:
174 |         ax.axhline(m_nlpp, label='Mean', linestyle=':',color='k')
175 |    # ax.legend()
176 |   #  ax.set_title(plot_title_dict[dataset])
177 |     ax.set_xlabel("M")
178 |     ax.set_ylabel("NLPD")
179 |     ax.set_xlim(dataset_plot_settings["xlim"])
180 |     ax.set_ylim(dataset_plot_settings["nlpp_ylim"])
181 |     plt.tight_layout()
182 |     fig.savefig(f"./figures/opthyp-{dataset}-nlpp.pdf")
183 | 
184 | plt.show()
185 | 
186 | 


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/jug-plot-opt-inducing.py:
--------------------------------------------------------------------------------
 1 | import jug.task
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | import matplotlib
 5 | 
 6 | matplotlib.rcParams["axes.spines.right"] = False
 7 | matplotlib.rcParams["axes.spines.top"] = False
 8 | font = {"family": "cmr10", "size": 24}
 9 | matplotlib.rc("font", **font)
10 | matplotlib.rc("text", usetex=True)
11 | 
12 | color_dict = {"Kmeans": "#ff7f00", "Greedy Conditional Variance": "C9", "Gradient": "#377eb8"}
13 | name_dict = {
14 |     "Kmeans": "K-means (reinit.)",
15 |     "Greedy Conditional Variance": "Greedy var. (reinit.)",
16 |     "Gradient": "Gradient",
17 | }
18 | plot_title_dict = {
19 |     "Wilson_energy": "Energy (M=65)",
20 |     "Wilson_elevators": "Elevators (M=1200)",
21 |     "Naval_noisy": "Naval with Noise (M=55)",
22 | }
23 | jug.init("jug_opt_inducing.py", "jug_opt_inducing.jugdata")
24 | from jug_opt_inducing import init_Z_runs, init_Z_task_results, baseline_lmls
25 | 
26 | hists = {}
27 | for dataset in init_Z_runs.keys():
28 |     hists[dataset] = dict()
29 |     for init_Z_method in init_Z_runs[dataset].keys():
30 |         hists[dataset][init_Z_method] = list()
31 |         for M, result in init_Z_task_results[dataset][init_Z_method].items():
32 |             outputs = jug.task.value(result)
33 |             for output in outputs:
34 |                 hists[dataset][init_Z_method].append(output[-1])
35 | 
36 | 
37 | for dataset in init_Z_runs.keys():
38 |     fig, ax = plt.subplots()
39 |     dataset_plot_settings = dict(
40 |         Naval_noisy=dict(elbo_ylim=(32e3, 39e3)),
41 |         Wilson_energy=dict(elbo_ylim=(700, 1050)),
42 |         Wilson_elevators=dict(elbo_ylim=(-7000, -6000)),
43 |     ).get(dataset, dict(elbo_ylim=None))
44 |     for init_Z_method in init_Z_runs[dataset].keys():
45 |         losses = hists[dataset][init_Z_method]
46 |         max_f_evals = max(map(len, losses))
47 |         elbos = -np.array([elbo + [np.nan] * (max_f_evals - len(elbo)) for elbo in losses])
48 |         best_elbo = np.maximum.accumulate(elbos, axis=1)
49 |         # median_elbo = np.nanmedian(best_elbo, axis=0)
50 |         (l,) = ax.plot(
51 |             np.arange(len(best_elbo[0])), best_elbo[0], label=name_dict[init_Z_method], color=color_dict[init_Z_method]
52 |         )
53 |         for i in range(1, len(best_elbo)):
54 |             ax.plot(np.arange(len(best_elbo[i])), best_elbo[i], label="_no_legend_", color=l.get_color())
55 |     ax.axhline(baseline_lmls[dataset], label="Full GP", linestyle="--", color="k")
56 |     handles, labels = ax.get_legend_handles_labels()
57 |     # sort both labels and handles by labels
58 |     labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0]))
59 |     # ax.legend()
60 |     ax.set_xlabel("Number of Fn. Evals.")
61 |     ax.set_ylabel("ELBO")
62 |     ax.set_ylim(dataset_plot_settings["elbo_ylim"])
63 |     ax.set_xlim((-10, 250))
64 |     ax.set_title(plot_title_dict[dataset])
65 |     plt.tight_layout()
66 |     fig.savefig(f"./figures/optall-{dataset}-trace.pdf")
67 |     plt.show()
68 | 


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/jug-plot-search-uci.py:
--------------------------------------------------------------------------------
 1 | import jug.task
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | import pandas as pd
 5 | from robustgp_experiments.utils.baselines import linear_baseline, meanpred_baseline
 6 | 
 7 | jug.init("jug_search_uci.py", "jug_search_uci.jugdata")
 8 | from jug_search_uci import (
 9 |     dataset_names, sparse_task_results, get_settings, baseline_results, baseline_exps, sparse_exps
10 | )
11 | 
12 | plot_all_datasets = True
13 | plot_normalised = True
14 | 
15 | # Can comment this out to run all datasets
16 | if not plot_all_datasets:
17 |     dataset_names = ["Wilson_energy"]
18 |     # dataset_names = [n for n in dataset_names if n not in
19 |     #                  ["Wilson_pendulum", "Pendulum_noisy", "Wilson_wine"]]
20 |     # dataset_names = [n for n in dataset_names if n not in
21 |     #                  ["Wilson_pendulum", "Pendulum_noisy", "Wilson_wine", "kin40k"]]
22 |     # dataset_names = ["Naval", "Naval_noisy"]
23 |     # dataset_names = ["Wilson_stock", "Wilson_energy", "Wilson_concrete", "Wilson_airfoil"]
24 | 
25 | # Get values from tasks
26 | sparse_results_raw = {}
27 | sparse_results_normalised = {}
28 | baseline_lmls = {}
29 | for dataset_name in dataset_names:
30 |     if (type(baseline_results[dataset_name]) is float) or not baseline_results[dataset_name].can_load():
31 |         continue
32 |     baseline_lmls[dataset_name] = jug.task.value(baseline_results[dataset_name])
33 | 
34 |     experiment_storage_path, Ms, common_run_settings, dataset_custom_settings = get_settings(dataset_name)
35 |     sparse_task_values = [jug.task.value(result) for result in sparse_task_results[dataset_name]]
36 |     sparse_results_raw[dataset_name] = pd.DataFrame.from_records(
37 |         sparse_task_values, columns=['elbo', 'upper', 'rmse', 'nlpp'], index=Ms
38 |     )
39 |     const_model_lml = meanpred_baseline(baseline_exps[dataset_name].X_train, baseline_exps[dataset_name].Y_train,
40 |                                         baseline_exps[dataset_name].X_test, baseline_exps[dataset_name].Y_test)[0]
41 |     linear_model_lml = linear_baseline(baseline_exps[dataset_name].X_train, baseline_exps[dataset_name].Y_train,
42 |                                        baseline_exps[dataset_name].X_test, baseline_exps[dataset_name].Y_test)[0]
43 |     rel_lml = const_model_lml
44 |     sparse_results_normalised[dataset_name] = sparse_results_raw[dataset_name].copy()
45 |     sparse_results_normalised[dataset_name].elbo -= baseline_lmls[dataset_name]
46 |     sparse_results_normalised[dataset_name].elbo /= baseline_lmls[dataset_name] - rel_lml
47 |     sparse_results_normalised[dataset_name].upper -= baseline_lmls[dataset_name]
48 |     sparse_results_normalised[dataset_name].index /= baseline_exps[dataset_name].X_train.shape[0]
49 | 
50 |     baseline_exps[dataset_name].load()
51 |     print(f"{dataset_name:30} lik variance: {baseline_exps[dataset_name].model.likelihood.variance.numpy():.8f}"
52 |           f" lml: {baseline_lmls[dataset_name]} linlml: {linear_model_lml}")
53 | 
54 | sparse_results = sparse_results_normalised if plot_normalised else sparse_results_raw
55 | 
56 | _, ax = plt.subplots()
57 | for dataset_name in sparse_results.keys():
58 |     # ax.axhline(baseline_lmls[dataset_name])
59 |     # ax.axhline()
60 |     l, = ax.plot(sparse_results[dataset_name].index, sparse_results[dataset_name].elbo,
61 |                  label=f"{dataset_name} ({len(sparse_exps[dataset_name][0].X_train)})")
62 |     # ax.plot(sparse_results[dataset_name].index, sparse_results[dataset_name].upper,
63 |     #         color=l.get_color(), linestyle=':')
64 | ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), fontsize='x-small', ncol=5)
65 | plt.show()
66 | 


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/jug_init_inducing_fixedhyp.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | from typing import Optional
  3 | 
  4 | import gpflow
  5 | import jug
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | 
  9 | import robustgp
 10 | from robustgp.utilities import set_trainable
 11 | from robustgp_experiments.init_z.utils import print_post_run, uci_train_settings
 12 | from robustgp_experiments.utils import FullbatchUciExperiment, LoggerCallback
 13 | 
 14 | # Settings
 15 | dataset_names = ["Wilson_energy", "Naval_noisy", "Wilson_elevators"]
 16 | num_seeds = 3  # Use 10 to replicate experiments from paper
 17 | seeds = np.arange(num_seeds)
 18 | all_model_parameters = {}
 19 | 
 20 | #
 21 | #
 22 | # Setup
 23 | gpflow.config.set_default_positive_minimum(1.0e-5)
 24 | gpflow.config.set_default_jitter(1e-10)
 25 | init_Z_methods = dict()
 26 | init_Z_methods["Uniform"] = [robustgp.UniformSubsample(seed=seed) for seed in seeds]
 27 | init_Z_methods["Greedy Conditional Variance"] = [robustgp.ConditionalVariance(seed=seed) for seed in seeds]
 28 | # init_Z_methods["Sample Conditional Variance"] = [robustgp.ConditionalVariance(sample=True, seed=seed) for seed in seeds]
 29 | init_Z_methods["Kmeans"] = [robustgp.Kmeans(seed=seed) for seed in seeds]
 30 | # init_Z_methods["M-DPP MCMC"] = [robustgp.KdppMCMC(seed=seed) for seed in seeds]
 31 | # init_Z_methods["RLS"] = [robustgp.RLS(seed=seed) for seed in seeds]
 32 | 
 33 | experiment_name = "init-inducing"
 34 | 
 35 | 
 36 | uci_train_settings.update(
 37 |     dict(
 38 |         Naval_noisy=(
 39 |             [10, 20, 30, 40, 45, 47, 50, 55, 60, 65, 70, 75, 80, 85, 90, 100, 130, 150, 180, 200, 250, 300, 400, 500],
 40 |             {},
 41 |         ),  # Very sparse solution exists
 42 |     )
 43 | )
 44 | 
 45 | 
 46 | def get_settings(dataset_name):
 47 |     experiment_storage_path = f"./storage-{experiment_name}/{dataset_name}"
 48 |     Ms, dataset_custom_settings = uci_train_settings[dataset_name]
 49 |     common_run_settings = dict(
 50 |         storage_path=experiment_storage_path, dataset_name=dataset_name, max_lengthscale=1001.0, max_variance=1001.0
 51 |     )
 52 |     return experiment_storage_path, Ms, common_run_settings, dataset_custom_settings
 53 | 
 54 | 
 55 | #
 56 | # Experiment classes
 57 | @dataclass
 58 | class FullbatchUciInducingOptExperiment(FullbatchUciExperiment):
 59 |     optimise_objective: Optional[str] = None  # None | lower | upper
 60 | 
 61 |     def run_optimisation(self):
 62 |         print(f"Running {str(self)}")
 63 | 
 64 |         model = self.model
 65 |         set_trainable(model, False)
 66 |         set_trainable(model.inducing_variable, True)
 67 | 
 68 |         if self.optimise_objective is None:
 69 |             return
 70 |         elif self.optimise_objective == "upper":
 71 |             loss_function = tf.function(model.upper_bound)
 72 |         elif self.optimise_objective == "lower":
 73 |             loss_function = self.model.training_loss_closure(compile=True)
 74 |         else:
 75 |             raise NotImplementedError
 76 |         hist = LoggerCallback(model, loss_function)
 77 | 
 78 |         def run_optimisation():
 79 |             if self.optimizer == "l-bfgs-b" or self.optimizer == "bfgs":
 80 |                 try:
 81 |                     opt = gpflow.optimizers.Scipy()
 82 |                     opt.minimize(
 83 |                         loss_function,
 84 |                         self.model.trainable_variables,
 85 |                         method=self.optimizer,
 86 |                         options=dict(maxiter=1000, disp=False),
 87 |                         step_callback=hist,
 88 |                     )
 89 |                     print("")
 90 |                 except KeyboardInterrupt:
 91 |                     pass  # TODO: Come up with something better than just pass...
 92 |             else:
 93 |                 raise NotImplementedError(f"I don't know {self.optimizer}")
 94 | 
 95 |         run_optimisation()
 96 | 
 97 |         # Store results
 98 |         self.trained_parameters = gpflow.utilities.read_values(model)
 99 |         self.train_objective_hist = (hist.n_iters, hist.log_likelihoods)
100 | 
101 | 
102 | def compute_model_stats(exp):
103 |     rmse = np.mean((exp.model.predict_f(exp.X_test)[0].numpy() - exp.Y_test) ** 2.0) ** 0.5
104 |     nlpp = -np.mean(exp.model.predict_log_density((exp.X_test, exp.Y_test)))
105 |     elbo = exp.model.elbo().numpy()
106 |     upper = exp.model.upper_bound().numpy()
107 |     return elbo, upper, rmse, nlpp
108 | 
109 | 
110 | @jug.TaskGenerator
111 | def run_baseline(baseline_exp):
112 |     baseline_exp.cached_run()
113 |     if baseline_exp.model_class == "SGPR":
114 |         baseline_lml = baseline_exp.model.elbo().numpy()
115 |     else:
116 |         baseline_lml = baseline_exp.model.log_marginal_likelihood().numpy()
117 |     model_parameters = gpflow.utilities.read_values(baseline_exp.model)
118 |     if ".inducing_variable.Z" in model_parameters:
119 |         model_parameters.pop(".inducing_variable.Z")
120 |     full_rmse = (
121 |         np.mean((baseline_exp.model.predict_f(baseline_exp.X_test)[0].numpy() - baseline_exp.Y_test) ** 2.0) ** 0.5
122 |     )
123 |     full_nlpp = -np.mean(baseline_exp.model.predict_log_density((baseline_exp.X_test, baseline_exp.Y_test)))
124 |     return model_parameters, full_rmse, full_nlpp, baseline_lml
125 | 
126 | 
127 | # Baseline exp
128 | baseline_exps = {}
129 | full_rmses = {}
130 | full_nlpps = {}
131 | baseline_lmls = {}
132 | for dataset_name in dataset_names:
133 |     # The baseline is GPR, except for Naval, where we use an SGPR with 1000 inducing variables.
134 |     baseline_custom_settings = dict(
135 |         Naval_noisy={
136 |             "model_class": "SGPR",
137 |             "M": 1000,
138 |             "training_procedure": "reinit_Z",
139 |             "init_Z_method": robustgp.ConditionalVariance(sample=False),
140 |             "max_lengthscale": 1000.0,
141 |             "max_variance": 1000.0,
142 |         }
143 |     ).get(dataset_name, dict(model_class="GPR", max_lengthscale=1000.0, max_variance=1000.0))
144 |     experiment_storage_path, _, common_run_settings, dataset_custom_settings = get_settings(dataset_name)
145 |     baseline_exps[dataset_name] = FullbatchUciExperiment(
146 |         **{**common_run_settings, **dataset_custom_settings, **baseline_custom_settings}
147 |     )
148 |     (
149 |         all_model_parameters[dataset_name],
150 |         full_rmses[dataset_name],
151 |         full_nlpps[dataset_name],
152 |         baseline_lmls[dataset_name],
153 |     ) = jug.bvalue(run_baseline(baseline_exps[dataset_name]))
154 | 
155 | 
156 | @jug.TaskGenerator
157 | def run_sparse_init(exp):
158 |     print(exp)
159 |     exp.setup_model()
160 |     exp.init_params()
161 |     print_post_run(exp)
162 |     elbo, upper, rmse, nlpp = compute_model_stats(exp)
163 |     return elbo, upper, rmse, nlpp
164 | 
165 | 
166 | # Sparse experiments
167 | init_Z_runs = {}
168 | init_Z_task_results = {}
169 | for dataset_name in dataset_names:
170 |     init_Z_runs[dataset_name] = dict()
171 |     init_Z_task_results[dataset_name] = dict()
172 |     experiment_storage_path, Ms, common_run_settings, dataset_custom_settings = get_settings(dataset_name)
173 |     for method_name, init_Z_method in init_Z_methods.items():
174 |         init_Z_runs[dataset_name][method_name] = dict()
175 |         init_Z_task_results[dataset_name][method_name] = dict()
176 |         for M in Ms:
177 |             init_Z_runs[dataset_name][method_name][str(M)] = []
178 |             init_Z_task_results[dataset_name][method_name][str(M)] = []
179 |         settings_for_runs = [
180 |             {"model_class": "SGPR", "M": M, "init_Z_method": seeded_init_Z_method, **dataset_custom_settings}
181 |             for M in Ms
182 |             for seeded_init_Z_method in init_Z_method
183 |         ]
184 |         for run_settings in settings_for_runs:
185 |             M = str(run_settings["M"])
186 |             # TODO: A better approach would be to put the bvalue at this point, so other jobs could run before the last
187 |             #       full GP finishes.
188 |             exp = FullbatchUciExperiment(
189 |                 **{**common_run_settings, **run_settings}, initial_parameters=all_model_parameters[dataset_name]
190 |             )
191 |             result = run_sparse_init(exp)
192 |             init_Z_runs[dataset_name][method_name][M].append(exp)
193 |             init_Z_task_results[dataset_name][method_name][M].append(result)
194 | 
195 | 
196 | # (Sparse) Bound optimisation
197 | @jug.TaskGenerator
198 | def run_sparse_opt(exp):
199 |     print(exp)
200 |     exp.cached_run()
201 |     print_post_run(exp)
202 |     elbo, upper, rmse, nlpp = compute_model_stats(exp)
203 |     return elbo, upper, rmse, nlpp
204 | 
205 | 
206 | upper_runs = dict()
207 | for dataset_name in dataset_names:
208 |     experiment_storage_path, Ms, common_run_settings, dataset_custom_settings = get_settings(dataset_name)
209 |     settings_for_runs = [
210 |         {
211 |             "model_class": "SGPR",
212 |             "M": M,
213 |             "init_Z_method": robustgp.ConditionalVariance(sample=True, seed=seed),
214 |             **dataset_custom_settings,
215 |         }
216 |         for M in Ms
217 |         for seed in seeds
218 |     ]
219 |     init_Z_runs[dataset_name]["gradient"] = dict()
220 |     init_Z_task_results[dataset_name]["gradient"] = dict()
221 |     upper_runs[dataset_name] = dict()
222 |     for M in Ms:
223 |         init_Z_runs[dataset_name]["gradient"][str(M)] = []
224 |         init_Z_task_results[dataset_name]["gradient"][str(M)] = []
225 |         upper_runs[dataset_name][str(M)] = []
226 |         # for optimise_objective in ["upper", "lower"]:  # Optimising the upper bound makes hardly any difference
227 |     for optimise_objective in ["lower"]:
228 |         for run_settings in settings_for_runs:
229 |             exp = FullbatchUciInducingOptExperiment(
230 |                 **{**common_run_settings, **run_settings},
231 |                 initial_parameters=all_model_parameters[dataset_name],
232 |                 optimise_objective=optimise_objective,
233 |             )
234 |             M = str(run_settings["M"])
235 |             result = run_sparse_opt(exp)
236 |             if optimise_objective == "lower":
237 |                 init_Z_runs[dataset_name]["gradient"][str(M)].append(exp)
238 |                 init_Z_task_results[dataset_name]["gradient"][str(M)].append(result)
239 |             elif optimise_objective == "upper":
240 |                 upper_runs[dataset_name][str(M)].append(exp)
241 | 


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/jug_init_inducing_opt.py:
--------------------------------------------------------------------------------
  1 | # # Inducing point initialisation while optimising hypers
  2 | # Assess how well inducing point initialisation works, in conjunction with optimising the hyperparameters.
  3 | # Here, we compare:
  4 | #  - Fixed Z,       initialised with the baseline kernel hyperparameters
  5 | #  - EM "reinit" Z, initialised with the baseline kernel hyperparameters
  6 | # In all cases, the Z are initialised using default kernel parameters, not the ones from the baseline. This is to ensure
  7 | # that we do not use information when initialising Z that isn't accessable when running a new dataset.
  8 | #
  9 | # Local optima are a bit annoying when "cold" initialising. They make the plot appear non-smooth. So we initialise at
 10 | 
 11 | 
 12 | import gpflow
 13 | import jug
 14 | import numpy as np
 15 | 
 16 | import robustgp
 17 | from robustgp_experiments.init_z.utils import uci_train_settings, print_post_run
 18 | from robustgp_experiments.utils import FullbatchUciExperiment
 19 | 
 20 | #
 21 | #
 22 | # Settings
 23 | dataset_names = ["Wilson_energy", "Naval_noisy", "Wilson_elevators"]
 24 | # init_from_baseline = True
 25 | init_from_baseline = False
 26 | 
 27 | uci_train_settings.update(
 28 |     dict(
 29 |         Naval_noisy=(
 30 |             [10, 20, 30, 40, 45, 47, 50, 55, 60, 65, 70, 75, 80, 85, 90, 100, 130, 150, 180, 200, 250, 300, 400, 500],
 31 |             {},
 32 |         ),  # Very sparse solution exists
 33 |     )
 34 | )
 35 | #
 36 | #
 37 | # Setup
 38 | gpflow.config.set_default_positive_minimum(1.0e-5)
 39 | gpflow.config.set_default_jitter(1e-10)
 40 | 
 41 | num_seeds = 3  # Use 10 to replicate the paper
 42 | seeds = np.arange(num_seeds)
 43 | 
 44 | init_Z_methods = dict()
 45 | init_Z_methods["Uniform"] = [robustgp.UniformSubsample(seed=seed) for seed in seeds]
 46 | init_Z_methods["Greedy Conditional Variance"] = [robustgp.ConditionalVariance(seed=seed) for seed in seeds]
 47 | # init_Z_methods["Sample Conditional Variance"] = [robustgp.ConditionalVariance(sample=True, seed=seed) for seed in seeds]
 48 | init_Z_methods["Kmeans"] = [robustgp.Kmeans(seed=seed) for seed in seeds]
 49 | # init_Z_methods["M-DPP MCMC"] = [robustgp.KdppMCMC(seed=seed) for seed in seeds]
 50 | # init_Z_methods["RLS"] = [robustgp.RLS(seed=seed) for seed in seeds]
 51 | 
 52 | experiment_name = "init-inducing-opt"
 53 | 
 54 | 
 55 | def compute_model_stats(exp):
 56 |     elbo = exp.model.robust_maximum_log_likelihood_objective(restore_jitter=False).numpy()
 57 |     upper = exp.model.upper_bound().numpy()
 58 |     rmse = np.mean((exp.model.predict_f(exp.X_test)[0].numpy() - exp.Y_test) ** 2.0) ** 0.5
 59 |     nlpp = -np.mean(exp.model.predict_log_density((exp.X_test, exp.Y_test)))
 60 |     return elbo, upper, rmse, nlpp
 61 | 
 62 | 
 63 | @jug.TaskGenerator
 64 | def run_baseline(baseline_exp):
 65 |     baseline_exp.cached_run()
 66 |     baseline_lml = baseline_exp.model.robust_maximum_log_likelihood_objective().numpy()
 67 |     model_parameters = gpflow.utilities.read_values(baseline_exp.model) if init_from_baseline else {}
 68 |     if ".inducing_variable.Z" in model_parameters:
 69 |         model_parameters.pop(".inducing_variable.Z")
 70 |     full_rmse = (
 71 |         np.mean((baseline_exp.model.predict_f(baseline_exp.X_test)[0].numpy() - baseline_exp.Y_test) ** 2.0) ** 0.5
 72 |     )
 73 |     full_nlpp = -np.mean(baseline_exp.model.predict_log_density((baseline_exp.X_test, baseline_exp.Y_test)))
 74 | 
 75 |     return model_parameters, full_rmse, full_nlpp, baseline_lml
 76 | 
 77 | 
 78 | baseline_exps = dict()
 79 | all_model_parameters = dict()
 80 | full_rmses = dict()
 81 | full_nlpps = dict()
 82 | baseline_lmls = dict()
 83 | 
 84 | for dataset_name in dataset_names:
 85 |     # Baseline runs
 86 |     print("Baseline exp...", dataset_name)
 87 |     experiment_storage_path = f"./storage-{experiment_name}/{dataset_name}"
 88 |     common_run_settings = dict(
 89 |         storage_path=experiment_storage_path,
 90 |         dataset_name=dataset_name,
 91 |         max_lengthscale=1001.0,
 92 |         max_variance=1001.0,
 93 |         training_procedure="fixed_Z",
 94 |     )
 95 |     Ms, dataset_custom_settings = uci_train_settings[dataset_name]
 96 | 
 97 |     baseline_custom_settings = dict(
 98 |         Naval_noisy={
 99 |             "model_class": "SGPR",
100 |             "M": 1000,
101 |             "training_procedure": "reinit_Z",
102 |             "init_Z_method": robustgp.ConditionalVariance(sample=False),
103 |             "max_lengthscale": 1000.0,
104 |             "max_variance": 1000.0,
105 |         }
106 |     ).get(
107 |         dataset_name, dict(model_class="GPR", training_procedure="joint", max_lengthscale=1000.0, max_variance=1000.0)
108 |     )
109 |     baseline_exps[dataset_name] = FullbatchUciExperiment(
110 |         **{**common_run_settings, **dataset_custom_settings, **baseline_custom_settings}
111 |     )
112 |     (
113 |         all_model_parameters[dataset_name],
114 |         full_rmses[dataset_name],
115 |         full_nlpps[dataset_name],
116 |         baseline_lmls[dataset_name],
117 |     ) = jug.bvalue(run_baseline(baseline_exps[dataset_name]))
118 | 
119 | 
120 | # Bound optimisation
121 | @jug.TaskGenerator
122 | def run_sparse_opt(exp):
123 |     print(exp)
124 |     exp.cached_run()
125 |     print_post_run(exp)
126 |     elbo, upper, rmse, nlpp = compute_model_stats(exp)
127 |     return elbo, upper, rmse, nlpp
128 | 
129 | 
130 | # Sparse runs
131 | init_Z_runs = dict()
132 | init_Z_task_results = dict()
133 | for dataset_name in dataset_names:
134 |     experiment_storage_path = f"./storage-{experiment_name}/{dataset_name}"
135 |     common_run_settings = dict(
136 |         storage_path=experiment_storage_path,
137 |         dataset_name=dataset_name,
138 |         max_lengthscale=1001.0,
139 |         max_variance=1001.0,
140 |         training_procedure="fixed_Z",
141 |     )
142 |     Ms, dataset_custom_settings = uci_train_settings[dataset_name]
143 | 
144 |     init_Z_runs[dataset_name] = {}
145 |     init_Z_task_results[dataset_name] = {}
146 |     for method_name, init_Z_method in init_Z_methods.items():
147 |         settings_for_runs = [
148 |             {
149 |                 "model_class": "SGPR",
150 |                 "M": M,
151 |                 "init_Z_method": seeded_init_Z_method,
152 |                 "base_filename": "opthyp-fixed_Z",
153 |                 "initial_parameters": all_model_parameters[dataset_name],
154 |                 **dataset_custom_settings,
155 |             }
156 |             for M in Ms
157 |             for seeded_init_Z_method in init_Z_method
158 |         ]
159 |         init_Z_runs[dataset_name][method_name] = dict()
160 |         init_Z_task_results[dataset_name][method_name] = dict()
161 |         for M in Ms:
162 |             init_Z_runs[dataset_name][method_name][str(M)] = []
163 |             init_Z_task_results[dataset_name][method_name][str(M)] = []
164 |         for run_settings in settings_for_runs:
165 |             M = str(run_settings["M"])
166 |             exp = FullbatchUciExperiment(**{**common_run_settings, **run_settings})
167 |             result = run_sparse_opt(exp)
168 |             init_Z_runs[dataset_name][method_name][M].append(exp)
169 |             init_Z_task_results[dataset_name][method_name][M].append(result)
170 | 
171 | # Optimisation of Z
172 | method_names = ["reinit_Z_sF", "reinit_Z_sT"]
173 | 
174 | for dataset_name in dataset_names:
175 |     experiment_storage_path = f"./storage-{experiment_name}/{dataset_name}"
176 |     common_run_settings = dict(
177 |         storage_path=experiment_storage_path, dataset_name=dataset_name, max_lengthscale=1001.0, max_variance=1001.0
178 |     )
179 |     Ms, dataset_custom_settings = uci_train_settings[dataset_name]
180 |     settings_for_runs = [
181 |         {
182 |             "model_class": "SGPR",
183 |             "M": M,
184 |             "training_procedure": "reinit_Z",
185 |             "base_filename": "opthyp-reinit_Z",
186 |             "initial_parameters": all_model_parameters[dataset_name],
187 |             **dataset_custom_settings,
188 |         }
189 |         for M in Ms
190 |     ]
191 | 
192 |     for method_name in method_names:
193 |         init_Z_runs[dataset_name][method_name] = dict()
194 |         init_Z_task_results[dataset_name][method_name] = dict()
195 |         for M in Ms:
196 |             init_Z_runs[dataset_name][method_name][str(M)] = []
197 |             init_Z_task_results[dataset_name][method_name][str(M)] = []
198 | 
199 |     for seed in seeds:
200 |         for method_name, init_Z_method in zip(
201 |             method_names,
202 |             [
203 |                 robustgp.ConditionalVariance(seed=seed, sample=False),
204 |                 robustgp.ConditionalVariance(seed=seed, sample=True),
205 |             ],
206 |         ):
207 |             for run_settings in settings_for_runs:
208 |                 exp = FullbatchUciExperiment(**{**common_run_settings, **run_settings, "init_Z_method": init_Z_method})
209 |                 result = run_sparse_opt(exp)
210 |                 M = str(run_settings["M"])
211 |                 init_Z_runs[dataset_name][method_name][M].append(exp)
212 |                 init_Z_task_results[dataset_name][method_name][M].append(result)
213 | 


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/jug_opt_inducing.py:
--------------------------------------------------------------------------------
  1 | # # Inducing point initialisation while optimising hypers
  2 | # Assess how well inducing point initialisation works, in conjunction with optimising the hyperparameters.
  3 | # Here, we compare:
  4 | #  - Fixed Z,       initialised with the baseline kernel hyperparameters
  5 | #  - EM "reinit" Z, initialised with the baseline kernel hyperparameters
  6 | # In all cases, the Z are initialised using default kernel parameters, not the ones from the baseline. This is to ensure
  7 | # that we do not use information when initialising Z that isn't accessable when running a new dataset.
  8 | #
  9 | # Local optima are a bit annoying when "cold" initialising. They make the plot appear non-smooth. So we initialise at
 10 | 
 11 | 
 12 | import gpflow
 13 | import jug
 14 | import numpy as np
 15 | 
 16 | import robustgp
 17 | from robustgp_experiments.init_z.utils import uci_train_settings, print_post_run
 18 | from robustgp_experiments.utils import FullbatchUciExperiment
 19 | 
 20 | # Settings
 21 | dataset_names = ["Wilson_energy", "Naval_noisy", "Wilson_elevators"]
 22 | init_from_baseline = False
 23 | 
 24 | uci_train_settings.update(dict(Naval_noisy=([55], {}), Wilson_energy=([65], {}), Wilson_elevators=([1200], {})))
 25 | 
 26 | # Setup
 27 | gpflow.config.set_default_positive_minimum(1.0e-5)
 28 | gpflow.config.set_default_jitter(1e-10)
 29 | 
 30 | num_seeds = 3  # For the experiments in the paper we used 10
 31 | seeds = np.arange(num_seeds)
 32 | 
 33 | init_Z_methods = dict()
 34 | init_Z_methods["Kmeans"] = [robustgp.Kmeans(seed=seed) for seed in seeds]
 35 | init_Z_methods["Greedy Conditional Variance"] = [robustgp.ConditionalVariance(seed=seed) for seed in seeds]
 36 | init_Z_methods["Gradient"] = [robustgp.ConditionalVariance(seed=seed) for seed in seeds]
 37 | experiment_name = "opt-inducing"
 38 | 
 39 | 
 40 | def compute_model_stats(exp):
 41 |     elbo = exp.model.robust_maximum_log_likelihood_objective(restore_jitter=False).numpy()
 42 |     rmse = np.mean((exp.model.predict_f(exp.X_test)[0].numpy() - exp.Y_test) ** 2.0) ** 0.5
 43 |     nlpp = -np.mean(exp.model.predict_log_density((exp.X_test, exp.Y_test)))
 44 |     upper = exp.model.upper_bound().numpy()
 45 |     hist = exp.train_objective_hist
 46 |     return elbo, upper, rmse, nlpp, hist
 47 | 
 48 | 
 49 | @jug.TaskGenerator
 50 | def run_baseline(baseline_exp):
 51 |     baseline_exp.cached_run()
 52 |     baseline_lml = baseline_exp.model.robust_maximum_log_likelihood_objective().numpy()
 53 |     model_parameters = gpflow.utilities.read_values(baseline_exp.model) if init_from_baseline else {}
 54 |     if ".inducing_variable.Z" in model_parameters:
 55 |         model_parameters.pop(".inducing_variable.Z")
 56 |     full_rmse = (
 57 |         np.mean((baseline_exp.model.predict_f(baseline_exp.X_test)[0].numpy() - baseline_exp.Y_test) ** 2.0) ** 0.5
 58 |     )
 59 |     full_nlpp = -np.mean(baseline_exp.model.predict_log_density((baseline_exp.X_test, baseline_exp.Y_test)))
 60 |     return model_parameters, full_rmse, full_nlpp, baseline_lml
 61 | 
 62 | 
 63 | baseline_exps = dict()
 64 | baseline_tasks = []
 65 | for dataset_name in dataset_names:
 66 |     # Baseline runs
 67 |     print("Baseline exp...", dataset_name)
 68 |     experiment_storage_path = f"./storage-{experiment_name}/{dataset_name}"
 69 |     common_run_settings = dict(storage_path=experiment_storage_path, dataset_name=dataset_name)
 70 |     Ms, dataset_custom_settings = uci_train_settings[dataset_name]
 71 | 
 72 |     baseline_custom_settings = dict(
 73 |         Naval_noisy={
 74 |             "model_class": "SGPR",
 75 |             "M": 1000,
 76 |             "training_procedure": "reinit_Z",
 77 |             "init_Z_method": robustgp.ConditionalVariance(sample=False),
 78 |             "max_lengthscale": 1000.0,
 79 |             "max_variance": 1000.0,
 80 |         }
 81 |     ).get(
 82 |         dataset_name, dict(model_class="GPR", training_procedure="joint", max_lengthscale=1000.0, max_variance=1000.0)
 83 |     )
 84 |     baseline_exps[dataset_name] = FullbatchUciExperiment(
 85 |         **{**common_run_settings, **dataset_custom_settings, **baseline_custom_settings}
 86 |     )
 87 | 
 88 |     baseline_tasks.append(run_baseline(baseline_exps[dataset_name]))
 89 | 
 90 | 
 91 | # Bound optimisation
 92 | @jug.TaskGenerator
 93 | def run_sparse_opt(exp):
 94 |     print(exp)
 95 |     exp.cached_run()
 96 |     print_post_run(exp)
 97 |     elbo, upper, rmse, nlpp, hist = compute_model_stats(exp)
 98 |     return elbo, upper, rmse, nlpp, hist
 99 | 
100 | 
101 | # Sparse runs
102 | init_Z_runs = dict()
103 | init_Z_task_results = dict()
104 | for dataset_name in dataset_names:
105 |     experiment_storage_path = f"./storage-{experiment_name}/{dataset_name}"
106 |     common_run_settings = dict(
107 |         storage_path=experiment_storage_path,
108 |         dataset_name=dataset_name,
109 |         max_lengthscale=1001.0,
110 |         max_variance=1001.0,
111 |         training_procedure="reinit_Z",
112 |     )
113 |     Ms, dataset_custom_settings = uci_train_settings[dataset_name]
114 | 
115 |     init_Z_runs[dataset_name] = {}
116 |     init_Z_task_results[dataset_name] = {}
117 |     for method_name, init_Z_method in init_Z_methods.items():
118 |         training_procedure = "reinit_Z"
119 |         settings_for_runs = [
120 |             {
121 |                 "model_class": "SGPR",
122 |                 "M": M,
123 |                 "init_Z_method": seeded_init_Z_method,
124 |                 "base_filename": "opthyp-fixed_Z",
125 |                 "initial_parameters": {},
126 |                 "training_procedure": training_procedure,
127 |                 **dataset_custom_settings,
128 |             }
129 |             for M in Ms
130 |             for seeded_init_Z_method in init_Z_method
131 |         ]
132 |         init_Z_runs[dataset_name][method_name] = dict()
133 |         init_Z_task_results[dataset_name][method_name] = dict()
134 |         for M in Ms:
135 |             init_Z_runs[dataset_name][method_name][str(M)] = []
136 |             init_Z_task_results[dataset_name][method_name][str(M)] = []
137 |         for run_settings in settings_for_runs:
138 |             M = str(run_settings["M"])
139 |             exp = FullbatchUciExperiment(**{**common_run_settings, **run_settings})
140 |             result = run_sparse_opt(exp)
141 |             init_Z_runs[dataset_name][method_name][M].append(exp)
142 |             init_Z_task_results[dataset_name][method_name][M].append(result)
143 | 
144 | # Optimisation of Z
145 | method_name = "Gradient"
146 | for dataset_name in dataset_names:
147 |     experiment_storage_path = f"./storage-{experiment_name}/{dataset_name}"
148 |     common_run_settings = dict(
149 |         storage_path=experiment_storage_path, dataset_name=dataset_name, max_lengthscale=1001.0, max_variance=1001.0
150 |     )
151 |     Ms, dataset_custom_settings = uci_train_settings[dataset_name]
152 |     settings_for_runs = [
153 |         {
154 |             "model_class": "SGPR",
155 |             "M": M,
156 |             "training_procedure": "joint",
157 |             "base_filename": "opthyp-reinit_Z",
158 |             "initial_parameters": {},
159 |             **dataset_custom_settings,
160 |         }
161 |         for M in Ms
162 |     ]
163 | 
164 |     init_Z_runs[dataset_name][method_name] = dict()
165 |     init_Z_task_results[dataset_name][method_name] = dict()
166 |     for M in Ms:
167 |         init_Z_runs[dataset_name][method_name][str(M)] = []
168 |         init_Z_task_results[dataset_name][method_name][str(M)] = []
169 | 
170 |     for seed in seeds:
171 |         init_Z_method = robustgp.ConditionalVariance(seed=seed, sample=False)
172 |         for run_settings in settings_for_runs:
173 |             exp = FullbatchUciExperiment(**{**common_run_settings, **run_settings, "init_Z_method": init_Z_method})
174 |             result = run_sparse_opt(exp)
175 |             M = str(run_settings["M"])
176 |             init_Z_runs[dataset_name][method_name][M].append(exp)
177 |             init_Z_task_results[dataset_name][method_name][M].append(result)
178 | 
179 | 
180 | all_model_parameters = dict()
181 | full_rmses = dict()
182 | full_nlpps = dict()
183 | baseline_lmls = dict()
184 | # Put trained values in variables
185 | for dataset_name in dataset_names:
186 |     (
187 |         all_model_parameters[dataset_name],
188 |         full_rmses[dataset_name],
189 |         full_nlpps[dataset_name],
190 |         baseline_lmls[dataset_name],
191 |     ) = jug.bvalue(run_baseline(baseline_exps[dataset_name]))
192 | 


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/jug_search_uci.py:
--------------------------------------------------------------------------------
 1 | # # Search-UCI
 2 | # Search UCI datasets for datasets for which the sparse approximation effectively converges. I.e. marglik bound doesn't
 3 | # increase much as we add more inducing points.
 4 | 
 5 | import jug
 6 | import numpy as np
 7 | 
 8 | import gpflow
 9 | import robustgp as inducing_init
10 | from robustgp_experiments.init_z.utils import uci_train_settings, good_datasets, print_post_run
11 | from robustgp_experiments.utils import FullbatchUciExperiment
12 | 
13 | gpflow.config.set_default_jitter(1e-8)
14 | gpflow.config.set_default_positive_minimum(1.0e-5)
15 | 
16 | MAXITER = 1000
17 | 
18 | experiment_name = "search-uci"
19 | dataset_names = ["Wilson_energy", "Wilson_autompg", "Wilson_concrete", "Wilson_airfoil", "Wilson_servo",
20 |                  "Wilson_concreteslump"]
21 | # dataset_names = ["Wilson_stock", "Wilson_energy", "Pendulum_noisy", "Wilson_pendulum", "Wilson_concrete",
22 | #                  "Wilson_airfoil", "Wilson_wine", "Naval_noisy", "Naval", "Wilson_gas", "Wilson_skillcraft",
23 | #                  "Wilson_sml", "Wilson_parkinsons", "Parkinsons_noisy", "Power", "Wilson_pol", "Wilson_elevators",
24 | #                  "Wilson_bike", "Wilson_kin40k", "Wilson_protein", "Wilson_tamielectric"]
25 | # dataset_names = good_datasets
26 | 
27 | Z_init_method = inducing_init.ConditionalVariance(sample=True)
28 | 
29 | baseline_exps = {}
30 | baseline_results = {}
31 | sparse_exps = {}
32 | sparse_task_results = {}
33 | 
34 | 
35 | def get_settings(dataset_name):
36 |     experiment_storage_path = f"./storage-{experiment_name}/{dataset_name}"
37 |     Ms, dataset_custom_settings = uci_train_settings[dataset_name]
38 |     common_run_settings = dict(storage_path=experiment_storage_path, dataset_name=dataset_name,
39 |                                training_procedure="reinit_Z")
40 |     return experiment_storage_path, Ms, common_run_settings, dataset_custom_settings
41 | 
42 | 
43 | @jug.TaskGenerator
44 | def full_cached_run(exp):
45 |     exp.cached_run()
46 |     print_post_run(exp)
47 |     return exp.model.robust_maximum_log_likelihood_objective()
48 | 
49 | 
50 | @jug.TaskGenerator
51 | def sparse_cached_run(exp):
52 |     exp.cached_run()
53 |     print_post_run(exp)
54 |     # lml = exp.model.log_marginal_likelihood().numpy() if exp.model_class == "GPR" else exp.model.elbo().numpy()
55 |     lml = exp.model.robust_maximum_log_likelihood_objective(restore_jitter=False).numpy()
56 |     upper = exp.model.upper_bound().numpy()
57 |     # rmse = np.mean((exp.model.predict_f(exp.X_test)[0].numpy() - exp.Y_test) ** 2.0) ** 0.5
58 |     # nlpp = -np.mean(exp.model.predict_log_density((exp.X_test, exp.Y_test)))
59 |     return lml, upper, None, None
60 | 
61 | 
62 | for dataset_name in dataset_names:
63 |     experiment_storage_path, Ms, common_run_settings, dataset_custom_settings = get_settings(dataset_name)
64 | 
65 |     #
66 |     #
67 |     # Baseline runs
68 |     gpr_exp = FullbatchUciExperiment(**{**common_run_settings, **dataset_custom_settings, "model_class": "GPR",
69 |                                         "training_procedure": "joint"})
70 |     gpr_exp.load_data()
71 |     if len(gpr_exp.X_train) <= 30000:
72 |         print("Baseline run...")
73 |         result = full_cached_run(gpr_exp)
74 |     else:
75 |         print(f"{dataset_name}: Skipping baseline run... N={len(gpr_exp.X_train)}.")
76 |         result = np.nan
77 | 
78 |     baseline_exps[dataset_name] = gpr_exp
79 |     baseline_results[dataset_name] = result
80 | 
81 |     #
82 |     #
83 |     # Sparse runs -- We're trying an "optimal" training procedure
84 |     greedy_init_settings_list = [
85 |         {"model_class": "SGPR", "M": M, "init_Z_method": Z_init_method, **dataset_custom_settings}
86 |         for M in Ms]
87 |     sparse_exps[dataset_name] = []
88 |     sparse_task_results[dataset_name] = []
89 |     for run_settings in greedy_init_settings_list:
90 |         exp = FullbatchUciExperiment(**{**common_run_settings, **run_settings})
91 |         result = sparse_cached_run(exp)
92 |         sparse_exps[dataset_name].append(exp)
93 |         sparse_task_results[dataset_name].append(result)
94 | 


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/utils.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def print_post_run(run):
 5 |     print("")
 6 |     try:
 7 |         print(f"ELBO: {run.model.robust_maximum_log_likelihood_objective().numpy()}")
 8 |         std_ratio = (run.model.kernel.variance.numpy() / run.model.likelihood.variance.numpy()) ** 0.5
 9 |         print(f"(kernel.variance / likelihood.variance)**0.5: {std_ratio}")
10 |         print(run.model.kernel.lengthscales.numpy())
11 |     except AttributeError:
12 |         pass
13 |     except tf.errors.InvalidArgumentError as e:
14 |         print("")
15 |         print("Probably a CholeskyError:")
16 |         print(e.message)
17 |         print("Ignoring...")
18 |     print("")
19 |     print("")
20 | 
21 | 
22 | uci_train_settings = dict(
23 |     Wilson_tamielectric=([100, 200, 500, 1000, 2000], {}),
24 |     Wilson_protein=([100, 200, 500, 1000, 2000], {}),
25 |     Wilson_kin40k=([(1000, 2000, 5000, 10000, 15000), {}]),
26 |     Wilson_bike=([100, 200, 500, 1000, 2000, 5000], {}),
27 |     Wilson_elevators=([100, 200, 500, 1000, 2000, 5000], {}),
28 |     Wilson_pol=([100, 200, 500, 1000, 2000, 5000], {}),
29 |     Power=([100, 200, 500, 1000, 2000, 5000], {}),  # Step function in it?
30 |     # Kin8mn=([100, 200, 500, 1000, 2000], {}),  # Can't download
31 |     Parkinsons_noisy=([100, 150, 170, 200, 500], {}),
32 |     Wilson_parkinsons=([100, 150, 170, 200, 500, 1000], {}),
33 |     Wilson_sml=([100, 200, 500, 1000, 2000, 3000, 3500], {}),  # Mostly linear, but with benefit of nonlinear
34 |     # Didn't get SE+Lin working, probably local optimum
35 |     # Wilson_skillcraft=([10, 20, 50, 100, 200, 500], {"kernel_name": "SquaredExponentialLinear"}),
36 |     Wilson_skillcraft=([10, 20, 50, 100, 200, 500, 1000], {}),  # Mostly linear, but with benefit of nonlinear
37 |     Wilson_gas=([100, 200, 500, 1000, 1300, 1500], {}),
38 |     Naval=([10, 20, 50, 100, 200], {}),  # Very sparse solution exists
39 |     Naval_noisy=([10, 20, 50, 100, 200, 500], {}),  # Very sparse solution exists
40 |     Wilson_wine=([100, 200, 500, 1000, 1300, 1350], {}),  # Suddenly catches good hypers with large M
41 |     Wilson_airfoil=([100, 200, 500, 800, 1000, 1250, 1300, 1340], {}),  # Good
42 |     Wilson_solar=([100, 200, 300],
43 |                   {"kernel_name": "SquaredExponentialLinear", "max_lengthscale": 10.0}),  # Mostly linear
44 |     # Good, better performance with Linear kernel added
45 |     # Wilson_concrete=([100, 200, 500, 600, 700, 800, 900],
46 |     #                  {"kernel_name": "SquaredExponentialLinear", "optimizer": "bfgs", "max_lengthscale": 10.0}),
47 |     Wilson_concrete=([100, 200, 500, 600, 700, 800, 900], {}),
48 |     Wilson_pendulum=([10, 100, 200, 500, 567], {}),  # Not sparse, due to very low noise
49 |     Pendulum_noisy=([10, 100, 200, 500, 567], {}),  # Not sparse, due to very low noise
50 |     Wilson_forest=([10, 100, 200, 400], {"kernel_name": "SquaredExponentialLinear"}),  # Bad
51 |     Wilson_energy=([10, 30, 40, 50, 60, 70, 80, 90, 100, 200, 500], {}),  # Good
52 |     Wilson_stock=([10, 50, 100, 200, 400, 450], {"kernel_name": "SquaredExponentialLinear"}),  # Mostly linear
53 |     Wilson_housing=([100, 200, 300, 400], {}),  # Bad
54 |     Wilson_yacht=([10, 20, 50, 100, 200, 250], {}),
55 |     Wilson_autompg=([10, 20, 50, 100, 200, 250], {}),
56 |     Wilson_servo=([10, 20, 30, 40, 50, 70, 100, 110, 120, 130, 140], {}),
57 |     Wilson_breastcancer=([10, 50, 100, 150], {}),
58 |     Wilson_autos=([10, 20, 50, 100], {}),
59 |     Wilson_concreteslump=([10, 20, 50, 60, 70], {})
60 | )
61 | 
62 | bad_datasets = ["Wilson_housing", "Wilson_forest"]
63 | good_datasets = [k for k in uci_train_settings.keys() if k not in bad_datasets]
64 | 


--------------------------------------------------------------------------------
/robustgp_experiments/init_z/which-optimiser.py:
--------------------------------------------------------------------------------
 1 | # ---
 2 | # jupyter:
 3 | #   jupytext:
 4 | #     formats: ipynb,py:percent
 5 | #     text_representation:
 6 | #       extension: .py
 7 | #       format_name: percent
 8 | #       format_version: '1.2'
 9 | #       jupytext_version: 1.2.4
10 | #   kernelspec:
11 | #     display_name: Python 3
12 | #     language: python
13 | #     name: python3
14 | # ---
15 | 
16 | # %%
17 | # ../create_report.sh jpt-test.py
18 | 
19 | import gpflow
20 | 
21 | from inducing_experiments.utils import ExperimentRecord
22 | 
23 | gpflow.config.set_default_positive_minimum(1e-5)
24 | 
25 | # %% [markdown]
26 | # # Kin40k
27 | 
28 | # %% {"tags": ["parameters"]}
29 | MAXITER = 6000
30 | 
31 | experiment_name = "which-optimiser"
32 | dataset_name = "Wilson_elevators"
33 | 
34 | # %%
35 | experiment_storage_path = f"./storage-{experiment_name}/{dataset_name}"
36 | 
37 | basic_run_settings_list = [
38 |     # {"model_class": "GPR"},
39 |     {"model_class": "SGPR", "M": 100, "fixed_Z": True},
40 |     {"model_class": "SGPR", "M": 100, "fixed_Z": True, "optimizer": "bfgs"},
41 |     {"model_class": "SGPR", "M": 200, "fixed_Z": True},
42 |     # {"model_class": "SGPR", "M": 200, "fixed_Z": True, "optimizer": "bfgs"},  # CholeskyError
43 |     {"model_class": "SGPR", "M": 200, "fixed_Z": True, "optimizer": "bfgs", "lengthscale_transform": "constrained"},
44 | ]
45 | 
46 | common_params = {"storage_path": experiment_storage_path, "dataset_name": dataset_name}
47 | baseline_runs = [ExperimentRecord(storage_path=experiment_storage_path, dataset_name=dataset_name, **basic_run_settings)
48 |                  for basic_run_settings in basic_run_settings_list]
49 | 
50 | [r.cached_run(MAXITER) for r in baseline_runs]
51 | # [plt.plot(*r.train_objective_hist) for r in baseline_runs]
52 | 


--------------------------------------------------------------------------------
/robustgp_experiments/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import data
 2 | from .experiment_running import (
 3 |     LoggerCallback,
 4 |     create_loss_function,
 5 |     FullbatchUciExperiment,
 6 |     GaussianProcessUciExperiment,
 7 |     UciExperiment,
 8 |     Experiment,
 9 | )
10 | from .plotting import plot_1d_model
11 | from .storing import get_next_filename, store_pickle, load_existing_runs, find_run
12 | 


--------------------------------------------------------------------------------
/robustgp_experiments/utils/baselines.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy import stats
 3 | from sklearn.linear_model import LinearRegression
 4 | 
 5 | 
 6 | def meanpred_baseline(_, Y_train, __, Y_test):
 7 |     pf = np.mean(Y_train)
 8 |     pv = np.var(Y_train)
 9 |     elbo = np.sum(stats.norm.logpdf(Y_train, pf, pv ** 0.5))
10 |     rmse = np.mean((Y_test - pf) ** 2.0) ** 0.5
11 |     nlpp = -np.mean(stats.norm.logpdf(Y_test, pf, pv ** 0.5))
12 |     return elbo, rmse, nlpp
13 | 
14 | 
15 | def linear_baseline(X_train, Y_train, X_test, Y_test):
16 |     reg = LinearRegression().fit(X_train, Y_train)
17 |     residuals = reg.predict(X_train) - Y_train
18 |     pred_var = np.var(residuals)
19 | 
20 |     elbo = np.sum(stats.norm.logpdf(residuals, scale=pred_var ** 0.5))
21 | 
22 |     residuals_test = reg.predict(X_test) - Y_test
23 |     rmse = np.mean(residuals_test ** 2.0) ** 0.5
24 |     nlpp = -np.mean(stats.norm.logpdf(residuals_test, scale=pred_var ** 0.5))
25 | 
26 |     return elbo, rmse, nlpp
27 | 


--------------------------------------------------------------------------------
/robustgp_experiments/utils/data.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | from shutil import copyfile, rmtree
 6 | 
 7 | from observations.util import maybe_download_and_extract
 8 | 
 9 | from bayesian_benchmarks.data import *
10 | 
11 | 
12 | def snelson1d(path="./.data/"):
13 |     """Load Edward Snelson's 1d regression data set [@snelson2006fitc].
14 |     It contains 200 examples of a few oscillations of an example function. It has
15 |     seen extensive use as a toy dataset for illustrating qualitative behaviour of
16 |     Gaussian process approximations.
17 | 
18 |     Args:
19 |       path: str.
20 |         Path to directory which either stores file or otherwise file will be
21 |         downloaded and extracted there. Filenames are `snelson_train_*`.
22 | 
23 |     Returns:
24 |       Tuple of two np.darray `inputs` and `outputs` with 200 rows and 1 column.
25 |     """
26 |     path = os.path.expanduser(path)
27 |     inputs_path = os.path.join(path, "snelson_train_inputs")
28 |     outputs_path = os.path.join(path, "snelson_train_outputs")
29 | 
30 |     # Contains all source as well. We just need the data.
31 |     url = "http://www.gatsby.ucl.ac.uk/~snelson/SPGP_dist.zip"
32 | 
33 |     if not (os.path.exists(inputs_path) and os.path.exists(outputs_path)):
34 |         maybe_download_and_extract(path, url)
35 | 
36 |         # Copy the required data
37 |         copyfile(os.path.join(path, "SPGP_dist", "train_inputs"), inputs_path)
38 |         copyfile(os.path.join(path, "SPGP_dist", "train_outputs"), outputs_path)
39 | 
40 |         # Clean up everything else
41 |         rmtree(os.path.join(path, "SPGP_dist"))
42 |         os.remove(os.path.join(path, "SPGP_dist.zip"))
43 | 
44 |     X = np.loadtxt(os.path.join(inputs_path))[:, None]
45 |     Y = np.loadtxt(os.path.join(outputs_path))[:, None]
46 | 
47 |     return (X, Y), (X, Y)
48 | 
49 | 
50 | class Naval_noisy(Naval):
51 |     def read_data(self):
52 |         X, Y = super().read_data()
53 |         Y = Y + np.random.randn(*Y.shape) * 0.0001
54 |         return X, Y
55 | 
56 | 
57 | class Pendulum_noisy(Wilson_pendulum):
58 |     def read_data(self):
59 |         X, Y = super().read_data()
60 |         Y = Y + np.random.randn(*Y.shape) * 0.1
61 |         return X, Y
62 | 
63 | 
64 | class Parkinsons_noisy(Wilson_parkinsons):
65 |     def read_data(self):
66 |         X, Y = super().read_data()
67 |         Y = Y + np.random.randn(*Y.shape) * 1e-1
68 |         return X, Y
69 | 


--------------------------------------------------------------------------------
/robustgp_experiments/utils/experiment_processing.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from gpflow.config import default_jitter
 4 | from gpflow.covariances.dispatch import Kuf, Kuu
 5 | 
 6 | 
 7 | def residual_variances(model):
 8 |     X_data, Y_data = model.data
 9 | 
10 |     Kdiag = model.kernel(X_data, full_cov=False)
11 |     kuu = Kuu(model.inducing_variable, model.kernel, jitter=default_jitter())
12 |     kuf = Kuf(model.inducing_variable, model.kernel, X_data)
13 | 
14 |     L = tf.linalg.cholesky(kuu)
15 |     A = tf.linalg.triangular_solve(L, kuf, lower=True)
16 | 
17 |     c = Kdiag - tf.reduce_sum(tf.square(A), 0)
18 | 
19 |     return c.numpy()
20 | 


--------------------------------------------------------------------------------
/robustgp_experiments/utils/experiment_running.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | from dataclasses import dataclass, field, _MISSING_TYPE
  3 | from functools import reduce
  4 | from glob import glob
  5 | from typing import Optional
  6 | 
  7 | import gpflow
  8 | import json_tricks
  9 | import numpy as np
 10 | import tensorflow as tf
 11 | import tensorflow_probability as tfp
 12 | 
 13 | from robustgp import InducingPointInitializer, FirstSubsample
 14 | from robustgp.models import RobustGPR, RobustSGPR
 15 | from robustgp.optimizers import RobustScipy
 16 | from robustgp.utilities import set_trainable
 17 | from . import data
 18 | from .storing import get_next_filename
 19 | 
 20 | 
 21 | def create_loss_function(model, data):
 22 |     @tf.function(autograph=False)
 23 |     def loss():
 24 |         return -model.log_likelihood(*data)
 25 | 
 26 |     return loss
 27 | 
 28 | 
 29 | class LoggerCallback:
 30 |     def __init__(self, model, loss_function, holdout_interval=10):
 31 |         self.model = model
 32 |         self.loss_function = loss_function
 33 |         self.holdout_interval = holdout_interval
 34 |         self.log_likelihoods = []
 35 |         self.n_iters = []
 36 |         self.counter = 0
 37 | 
 38 |     def __call__(self, step, variables=None, values=None):
 39 |         # step will reset to zero between calls to minimize(), whereas counter will keep increasing
 40 |         if (self.counter <= 10) or (self.counter % self.holdout_interval) == 0:
 41 |             if variables is not None:
 42 |                 # Using Scipy and need to update the parameters
 43 |                 for var, val in zip(variables, values):
 44 |                     var.assign(val)
 45 | 
 46 |             self.n_iters.append(self.counter + 1)
 47 |             loss = self.loss_function().numpy()
 48 |             self.log_likelihoods.append(loss)
 49 |             print(f"{self.counter} - objective function: {self.log_likelihoods[-1]:.4f}", end="\r")
 50 | 
 51 |         self.counter += 1
 52 | 
 53 | 
 54 | # def run_tf_optimizer(model, optimizer, data, iterations, callback=None):
 55 | #     logf = []
 56 | #
 57 | #     @tf.function(autograph=False)
 58 | #     def optimization_step():
 59 | #         with tf.GradientTape(watch_accessed_variables=False) as tape:
 60 | #             tape.watch(model.trainable_variables)
 61 | #             objective = model.elbo(*data)
 62 | #             grads = tape.gradient(objective, model.trainable_variables)
 63 | #         optimizer.apply_gradients(zip(grads, model.trainable_variables))
 64 | #         return -objective
 65 | #
 66 | #     for step in range(iterations):
 67 | #         elbo = optimization_step()
 68 | #         print(f"{step}\t{elbo:.4f}", end="\r")
 69 | #         if callback is not None:
 70 | #             callback(step)
 71 | #     print("")
 72 | #
 73 | #     return logf
 74 | 
 75 | 
 76 | def normalize(X, X_mean, X_std):
 77 |     return (X - X_mean) / X_std
 78 | 
 79 | 
 80 | @dataclass
 81 | class Experiment:
 82 |     storage_path: str
 83 |     base_filename: Optional[str] = "data"
 84 | 
 85 |     # Populated during object life
 86 |     model = None
 87 |     trained_parameters = None
 88 | 
 89 |     _X_train = None
 90 |     _Y_train = None
 91 |     _X_test = None
 92 |     _Y_test = None
 93 | 
 94 |     def load_data(self):
 95 |         raise NotImplementedError
 96 | 
 97 |     def setup_model(self):
 98 |         """
 99 |         Set up the model here to the point where existing parameters can be loaded into it. Do not
100 |         initialise the parameters, as this can be time consuming.
101 |         :return:
102 |         """
103 |         raise NotImplementedError
104 | 
105 |     def init_params(self):
106 |         """
107 |         Do the time consuming parameter initialisation here.
108 |         :return:
109 |         """
110 |         raise NotImplementedError
111 | 
112 |     def run_optimisation(self):
113 |         raise NotImplementedError
114 | 
115 |     def run(self):
116 |         self.setup_model()
117 |         self.init_params()
118 |         self.run_optimisation()
119 | 
120 |     def cached_run(self):
121 |         try:
122 |             self.load()
123 |             print("Skipping...")
124 |         except FileNotFoundError:
125 |             self.run()
126 |             self.save()
127 | 
128 |     @property
129 |     def X_train(self):
130 |         if self._X_train is None:
131 |             self.load_data()
132 |         return self._X_train
133 | 
134 |     @property
135 |     def Y_train(self):
136 |         if self._Y_train is None:
137 |             self.load_data()
138 |         return self._Y_train
139 | 
140 |     @property
141 |     def X_test(self):
142 |         if self._X_test is None:
143 |             self.load_data()
144 |         return self._X_test
145 | 
146 |     @property
147 |     def Y_test(self):
148 |         if self._Y_test is None:
149 |             self.load_data()
150 |         return self._Y_test
151 | 
152 |     @property
153 |     def store_variables(self):
154 |         return [
155 |             k for k in list(self.__dict__.keys()) if k[0] != "_" and k not in ["storage_path", "base_filename", "model"]
156 |         ]
157 | 
158 |     @property
159 |     def load_match_variables(self):
160 |         return [k for k in self.store_variables if k not in ["trained_parameters"]]
161 | 
162 |     def save(self):
163 |         store_dict = {k: v for k, v in self.__dict__.items() if k in self.store_variables}
164 |         filename = get_next_filename(self.storage_path, self.base_filename, extension="json")
165 |         json_tricks.dump(store_dict, filename)
166 |         print(f"Stored results in {filename} at {datetime.datetime.now()}")
167 | 
168 |     def load(self, filename=None):
169 |         def field_equal(a, b):
170 |             if type(a) is dict:
171 |                 if a == {} and type(b) is _MISSING_TYPE:
172 |                     return True
173 |                 try:
174 |                     equality = True
175 |                     for k in reduce(set.union, map(set, map(dict.keys, [a, b]))):
176 |                         if type(a[k]) is np.ndarray:
177 |                             if not np.all(a[k] == b[k]):
178 |                                 return False
179 |                         else:
180 |                             if a[k] != b[k]:
181 |                                 return False
182 |                 except (TypeError, KeyError):
183 |                     equality = False
184 | 
185 |                 return equality
186 |             else:
187 |                 return a == b
188 | 
189 |         if filename is None:
190 |             # Find run with similar run parameters
191 |             existing_runs = []
192 |             for fn in glob(f"{self.storage_path}/{self.base_filename}*"):
193 |                 existing_runs.append((json_tricks.load(fn), fn))
194 | 
195 |             matching_runs = [
196 |                 (dict, fn)
197 |                 for dict, fn in existing_runs
198 |                 if all(
199 |                     [
200 |                         field_equal(self.__dict__[k], (dict[k] if k in dict else self.__dataclass_fields__[k].default))
201 |                         for k in self.load_match_variables
202 |                     ]
203 |                 )
204 |             ]
205 |         else:
206 |             matching_runs = [(json_tricks.load(filename), filename)]
207 | 
208 |         if len(matching_runs) == 1:
209 |             print(f"Loading from `{matching_runs[0][1]}`...")
210 |             for k, v in matching_runs[0][0].items():
211 |                 setattr(self, k, v)
212 |             gpflow.config.set_default_positive_minimum(1e-7)
213 |             self.setup_model()
214 |             gpflow.utilities.multiple_assign(self.model, self.trained_parameters)
215 |         elif len(matching_runs) == 0:
216 |             raise FileNotFoundError("No matching run found.")
217 |         else:
218 |             raise AssertionError("Only one run of an experiment should be present.")
219 | 
220 | 
221 | @dataclass
222 | class UciExperiment(Experiment):
223 |     dataset_name: Optional[str] = "Wilson_elevators"
224 | 
225 |     def load_data(self):
226 |         loaded_data = getattr(data, self.dataset_name)()
227 |         if type(loaded_data) == tuple:
228 |             self._X_train, self._Y_train = loaded_data[0]
229 |         elif isinstance(loaded_data, data.Dataset):
230 |             # Here, we always normalise on training. This is different to before.
231 |             X_mean, X_std = np.average(loaded_data.X_train, 0)[None, :], 1e-6 + np.std(loaded_data.X_train, 0)[None, :]
232 |             self._X_train = normalize(loaded_data.X_train, X_mean, X_std)
233 |             self._X_test = normalize(loaded_data.X_test, X_mean, X_std)
234 | 
235 |             Y_mean, Y_std = np.average(loaded_data.Y_train, 0)[None, :], 1e-6 + np.std(loaded_data.Y_train, 0)[None, :]
236 |             self._Y_train = normalize(loaded_data.Y_train, Y_mean, Y_std)
237 |             self._Y_test = normalize(loaded_data.Y_test, Y_mean, Y_std)
238 |         else:
239 |             raise NotImplementedError
240 | 
241 | 
242 | @dataclass
243 | class GaussianProcessUciExperiment(UciExperiment):
244 |     model_class: Optional[str] = "SGPR"
245 |     M: Optional[int] = None
246 |     kernel_name: Optional[str] = "SquaredExponential"
247 |     init_Z_method: Optional[InducingPointInitializer] = FirstSubsample()
248 |     max_lengthscale: Optional[float] = 1000.0
249 |     max_variance: Optional[float] = 1000.0
250 | 
251 |     training_procedure: Optional[str] = "joint"  # joint | reinit
252 |     initial_parameters: Optional[dict] = field(default_factory=dict)
253 | 
254 |     # Populated during object life
255 |     train_objective_hist = None
256 | 
257 |     def setup_model(self):
258 |         kernel = self.setup_kernel()
259 |         if self.model_class == "SGPR":
260 |             inducing_variable = self.setup_inducing_variable()
261 |             model = RobustSGPR((self.X_train, self.Y_train), kernel, inducing_variable=inducing_variable)
262 |         elif self.model_class == "GPR":
263 |             assert self.M is None
264 |             model = RobustGPR((self.X_train, self.Y_train), kernel)
265 |         else:
266 |             raise NotImplementedError
267 |         print(f"Jitter variance: {np.log10(model.jitter_variance.numpy()):.1f}")
268 |         model.likelihood.variance = gpflow.Parameter(1.0, transform=gpflow.utilities.positive())
269 |         self.model = model
270 | 
271 |     def setup_kernel(self):
272 |         if self.kernel_name == "SquaredExponential":
273 |             kernel = gpflow.kernels.SquaredExponential(lengthscales=np.ones(self.X_train.shape[1]))
274 |         elif self.kernel_name == "SquaredExponentialLinear":
275 |             kernel = (
276 |                     gpflow.kernels.SquaredExponential(
277 |                         lengthscales=np.ones(self.X_train.shape[1])) + gpflow.kernels.Linear()
278 |             )
279 |         else:
280 |             # try:
281 |             kernel = getattr(gpflow.kernels, self.kernel_name)(lengthscales=np.ones(self.X_train.shape[1]))
282 |             # except:
283 |             #     raise NotImplementedError(f"Kernel `{self.kernel_name}` is unknown.")
284 | 
285 |         return kernel
286 | 
287 |     def setup_inducing_variable(self):
288 |         return gpflow.inducing_variables.InducingPoints(np.zeros((self.M, self.X_train.shape[1])))
289 | 
290 |     def init_inducing_variable(self):
291 |         if self.M > len(self.X_train):
292 |             raise ValueError("Cannot have M > len(X).")
293 | 
294 |         Z, _ = self.init_Z_method(self.X_train, self.M, self.model.kernel)
295 | 
296 |         try:
297 |             self.model.inducing_variable.Z.assign(Z)
298 |         except Exception as e:
299 |             print(type(e))
300 |             print(e)
301 |             self.model.inducing_variable.Z = gpflow.Parameter(Z)
302 | 
303 |     def init_params(self):
304 |         self.model.likelihood.variance.assign(0.01)
305 |         gpflow.utilities.multiple_assign(self.model, self.initial_parameters)
306 | 
307 |         constrained_transform = tfp.bijectors.Sigmoid(
308 |             gpflow.utilities.to_default_float(gpflow.config.default_positive_minimum()),
309 |             gpflow.utilities.to_default_float(self.max_lengthscale),
310 |         )
311 | 
312 |         var_constrained_transform = tfp.bijectors.Sigmoid(
313 |             gpflow.utilities.to_default_float(gpflow.config.default_positive_minimum()),
314 |             gpflow.utilities.to_default_float(self.max_variance),
315 |         )
316 | 
317 |         if self.kernel_name == "SquaredExponential":
318 |             new_len = gpflow.Parameter(self.model.kernel.lengthscales.numpy(), transform=constrained_transform)
319 |             new_var = gpflow.Parameter(self.model.kernel.variance.numpy(), transform = var_constrained_transform)
320 |             self.model.kernel.lengthscales = new_len
321 |             self.model.kernel.variance = new_var
322 |         elif self.kernel_name == "SquaredExponentialLinear":
323 |             new_len = gpflow.Parameter(self.model.kernel.kernels[0].lengthscales.numpy(),
324 |                                        transform=constrained_transform)
325 |             self.model.kernel.kernels[0].lengthscales = new_len
326 |             new_var_se = gpflow.Parameter(self.model.kernel[0].variance.numpy(), transform=var_constrained_transform)
327 |             new_var_lin = gpflow.Parameter(self.model.kernel[1].variance.numpy(), transform=var_constrained_transform)
328 |             self.model.kernel[0].variance = new_var_se
329 |             self.model.kernel[1].variance = new_var_lin
330 | 
331 |         # TODO: Check if "inducing_variable" is in one of the keys in `self.initial_parameters`, to make things work
332 |         #       with non `InducingPoints` like inducing variables.
333 |         if self.model_class != "GPR" and ".inducing_variable.Z" not in self.initial_parameters:
334 |             # Kernel parameters should be initialised before inducing variables are. If inducing variables are set in
335 |             # the initial parameters, we shouldn't run this.
336 |             self.init_inducing_variable()
337 | 
338 | 
339 | @dataclass
340 | class FullbatchUciExperiment(GaussianProcessUciExperiment):
341 |     optimizer: Optional[str] = "l-bfgs-b"
342 |     training_procedure: Optional[str] = "joint"  # joint | reinit
343 | 
344 |     def run_optimisation(self):
345 |         print(f"Running {str(self)}")
346 | 
347 |         model = self.model
348 |         loss_function = self.model.training_loss_closure(compile=True)
349 |         robust_loss_function = lambda: -self.model.robust_maximum_log_likelihood_objective()
350 |         # loss_function = tf.function(lambda jitter=None: -self.model.elbo(jitter))
351 |         hist = LoggerCallback(model, robust_loss_function)
352 |         if self.optimizer == "l-bfgs-b" or self.optimizer == "bfgs":
353 |             opt = RobustScipy()
354 |         else:
355 |             raise NotImplementedError(f"I don't know {self.optimizer}")
356 |         def run_optimisation():
357 |             try:
358 |                 opt.minimize(
359 |                     loss_function,
360 |                     self.model.trainable_variables,
361 |                     robust_closure=robust_loss_function,
362 |                     method=self.optimizer,
363 |                     options=dict(maxiter=1000, disp=True),
364 |                     step_callback=hist,
365 |                 )
366 |                 print("")
367 |             except KeyboardInterrupt as e:
368 |                 if input("Optimisation aborted. Do you want to re-raise the KeyboardInterrupt? (y/n) ") == "y":
369 |                     raise e
370 | 
371 |         if self.training_procedure == "joint":
372 |             run_optimisation()
373 |         elif self.training_procedure == "fixed_Z":
374 |             set_trainable(self.model.inducing_variable, False)
375 |             run_optimisation()
376 |             run_optimisation()
377 |         elif self.training_procedure == "reinit_Z":
378 |             set_trainable(self.model.inducing_variable, False)
379 |             for i in range(20):
380 |                 reinit = True
381 |                 try:
382 |                     run_optimisation()
383 |                 except tf.errors.InvalidArgumentError as e:
384 |                     if e.message[1:9] != "Cholesky":
385 |                         raise e
386 |                     self.init_inducing_variable()
387 |                     print(self.model.elbo().numpy())  # Check whether Cholesky fails
388 |                     reinit = False
389 | 
390 |                 if reinit:
391 |                     old_Z = self.model.inducing_variable.Z.numpy().copy()
392 |                     old_elbo = self.model.robust_maximum_log_likelihood_objective()
393 |                     self.init_inducing_variable()
394 |                     if self.model.robust_maximum_log_likelihood_objective() <= old_elbo:
395 |                         # Restore old Z, and finish optimisation
396 |                         self.model.inducing_variable.Z.assign(old_Z)
397 |                         print("Stopped reinit_Z procedure because new ELBO was smaller than old ELBO.")
398 |                         break
399 |         else:
400 |             raise NotImplementedError
401 | 
402 |         # Store results
403 |         self.trained_parameters = gpflow.utilities.read_values(model)
404 |         self.train_objective_hist = opt.f_vals  #(hist.n_iters, hist.log_likelihoods)
405 | 


--------------------------------------------------------------------------------
/robustgp_experiments/utils/plotting.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | 
 5 | def plot_1d_model(m, *, data=None):
 6 |     D = m.inducing_variable.Z.numpy().shape[1]
 7 |     if data is not None:
 8 |         X, Y = data[0], data[1]
 9 |         plt.plot(X, Y, 'x')
10 | 
11 |     data_inducingpts = np.vstack((X if data else np.zeros((0, D)), m.inducing_variable.Z.numpy()))
12 |     pX = np.linspace(np.min(data_inducingpts) - 1.0, np.max(data_inducingpts) + 1.0, 300)[:, None]
13 |     pY, pYv = m.predict_y(pX)
14 | 
15 |     line, = plt.plot(pX, pY, lw=1.5)
16 |     col = line.get_color()
17 |     plt.plot(pX, pY + 2 * pYv ** 0.5, col, lw=1.5)
18 |     plt.plot(pX, pY - 2 * pYv ** 0.5, col, lw=1.5)
19 |     plt.plot(m.inducing_variable.Z.numpy(), np.zeros(m.inducing_variable.Z.numpy().shape), 'k|', mew=2)
20 | 


--------------------------------------------------------------------------------
/robustgp_experiments/utils/storing.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle
 3 | import re
 4 | from glob import glob
 5 | 
 6 | 
 7 | def get_next_filename(path, base_filename="data", extension="pkl"):
 8 |     if not os.path.exists(path):
 9 |         os.makedirs(path)
10 |     largest_existing_number = max([int(re.findall(r'\d+', fn)[-1]) for fn in glob(f"{path}/{base_filename}*")] + [0])
11 |     path = f"{path}/{base_filename}{largest_existing_number + 1}"
12 |     if extension is not None:
13 |         path = f"{path}.{extension}"
14 |     return path
15 | 
16 | 
17 | def store_pickle(data, base_path, base_filename="data"):
18 |     with open(get_next_filename(base_path, base_filename), 'wb') as outfile:
19 |         pickle.dump(data, outfile)
20 | 
21 | 
22 | def load_existing_runs(path, base_filename="data"):
23 |     existing_runs = []
24 |     for fn in glob(f"{path}/{base_filename}*"):
25 |         with open(fn, 'rb') as fp:
26 |             existing_runs.append((pickle.load(fp), fn))
27 |     return existing_runs
28 | 
29 | 
30 | def weak_dictionary_compare(source_dict, target_dict):
31 |     """
32 |     Returns `True` if all values that are present in `source_dict`, are the
33 |     same as the values of the same key in `target_dict.
34 |     """
35 | 
36 | 
37 | 
38 | def find_run(existing_runs, run_details):
39 |     return [run for run in existing_runs if run["run_details"] == run_details]
40 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | requirements = [
 4 |     'numpy>=1.18.1',
 5 |     'scipy>=1.4.1',
 6 |     'matplotlib>=3.1.3',
 7 |     'json_tricks',
 8 |     'jug'
 9 | ]
10 | 
11 | setup(
12 |     name='robustgp',
13 |     version='0.0.2',
14 |     packages=find_packages(),
15 |     install_requires=requirements,
16 | )
17 | 


--------------------------------------------------------------------------------