├── LICENSE
├── README.md
├── __init__.py
├── baselines
    ├── __init__.py
    ├── bcm.py
    ├── distgp.py
    ├── dvigp.py
    ├── gpoegp.py
    ├── poegp.py
    └── rbcm.py
├── data
    ├── banana.mat
    └── nasa.mat
├── experiments
    ├── __init__.py
    ├── banana.py
    ├── baseline.py
    ├── dvigp_nlpd.py
    ├── image.py
    ├── million.py
    ├── million_rbcm.py
    ├── paralell.py
    └── solar.py
├── extra
    └── modular_gp_logo.png
├── kernels
    ├── __init__.py
    ├── coregionalization.py
    ├── kernel.py
    ├── rbf.py
    └── stationary.py
├── likelihoods
    ├── __init__.py
    ├── bernoulli.py
    ├── gaussian.py
    ├── hetgaussian.py
    └── likelihood.py
├── models
    ├── __init__.py
    ├── chainedgp.py
    ├── ensemblegp.py
    ├── hetmoensemble.py
    ├── moensemble.py
    ├── svgp.py
    └── svmogp.py
├── optimization
    ├── __init__.py
    └── algorithms.py
├── requirements.txt
└── util.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Modular Gaussian Processes<br> for Transfer Learning
 2 | 
 3 | <img src="/extra/modular_gp_logo.png" width=1000>
 4 | 
 5 | ## 🧩 Introduction
 6 | 
 7 | This repository contains the implementation of our paper [Modular Gaussian Processes for Transfer Learning](https://arxiv.org/abs/2110.13515) accepted in the 35th Conference on Neural Information Processing Systems (NeurIPS) 2021. The entire code is written in Python and is based on the [Pytorch](https://pytorch.org/) framework.
 8 | 
 9 | ### 🧩 Idea
10 | 
11 | Here, you may find a new framework for transfer learning based on *modular Gaussian processes* (GP). The underlying idea is to avoid the revisiting of samples once a model is trained and well-fitted, so the model can be repurposed in combination with other or new data. We build *dictionaries* of modules (models), where each one contains only parameters and hyperparameters, but not observations. Finally, we are able to build *meta-models* (GP models) from different combinations of modules without reusing the old data.
12 | 
13 | ## 🧩 Citation
14 | 
15 | Please, if you use this code, include the following citation:
16 | ```
17 | @inproceedings{MorenoArtesAlvarez21,
18 |   title =  {Modular {G}aussian Processes for Transfer Learning},
19 |   author =   {Moreno-Mu\~noz, Pablo and Art\'es-Rodr\'iguez, Antonio and \'Alvarez, Mauricio A},
20 |   booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
21 |   year =   {2021}
22 | }
23 | ```
24 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/__init__.py


--------------------------------------------------------------------------------
/baselines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/baselines/__init__.py


--------------------------------------------------------------------------------
/baselines/bcm.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------
  2 | # This script belongs to the ModularGP repo
  3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
  4 | # Copyright (c) 2021 Pablo Moreno-Munoz
  5 | # -----------------------------------------------------------------
  6 | #
  7 | #
  8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
  9 | # Section for Cognitive Systems
 10 | # Technical University of Denmark (DTU)
 11 | # October 2021
 12 | 
 13 | import torch
 14 | 
 15 | class BayesianCM(torch.nn.Module):
 16 |     """
 17 |     -- Bayesian Committee Machine --
 18 |     --
 19 |     -- Adaptation to Pytorch + GP framework
 20 |     -- V. Tresp "A Bayesian Committee Machine"
 21 |     -- Reference: https://www.dbs.ifi.lmu.de/~tresp/papers/bcm6.pdf
 22 |     """
 23 | 
 24 |     def __init__(self, models, input_dim=1.0):
 25 |         super(BayesianCM, self).__init__()
 26 | 
 27 |         self.input_dim = int(input_dim)  # dimension of x
 28 | 
 29 |         # Adjacent Local GP Models
 30 |         self.models = models  # is a list
 31 | 
 32 |     def forward(self):
 33 |         return 1.0
 34 | 
 35 |     def predictive(self, x, y, x_new):
 36 |         # x is a list of x_k (distributed)
 37 |         # y is a list of y_k (distributed)
 38 | 
 39 |         K = len(self.models)
 40 |         prior_v = torch.diagonal(self.models[0].kernel.K(x_new, x_new), 0)[:, None]
 41 |         correction = (1-K)/prior_v
 42 | 
 43 |         gp_m = torch.zeros(x_new.size())
 44 |         gp_v = torch.zeros(x_new.size())
 45 | 
 46 |         for k, model_k in enumerate(self.models):
 47 |             m_k, v_k = model_k.predictive(x[k], y[k], x_new)
 48 | 
 49 |             gp_m += m_k/v_k
 50 |             gp_v += 1.0/v_k
 51 | 
 52 |         gp_v = 1.0/(gp_v + correction)
 53 |         gp_m = gp_v*gp_m
 54 | 
 55 |         return gp_m, gp_v
 56 | 
 57 |     def rmse(self, x, y, x_new, f_new):
 58 |         f_gp,_ = self.predictive(x, y, x_new)
 59 |         rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
 60 |         return rmse
 61 | 
 62 |     def mae(self, x, y, x_new, f_new):
 63 |         f_gp,_ = self.predictive(x, y, x_new)
 64 |         mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
 65 |         return mae
 66 | 
 67 |     def nlpd(self, x, y, x_new, y_new):
 68 |         f_gp, v_gp = self.predictive(x, y, x_new)
 69 |         nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
 70 |         return nlpd
 71 | 
 72 |     # FOR HIERARCHICAL SETTINGS
 73 | 
 74 |     def predictive_layer(self, gps_m, gps_v, x_new):
 75 |         # gps_m is a list of gp_m (distributed)
 76 |         # gps_v is a list of gp_v (distributed)
 77 | 
 78 |         K = len(gps_m)
 79 |         prior_v = torch.diagonal(self.models[0].kernel.K(x_new, x_new), 0)[:, None]
 80 |         prior_v += 1e-1
 81 |         correction = (1 - K)/prior_v
 82 | 
 83 |         gp_m = torch.zeros(x_new.size())
 84 |         gp_v = torch.zeros(x_new.size())
 85 | 
 86 |         for k, m_k in enumerate(gps_m):
 87 |             v_k = gps_v[k]
 88 | 
 89 |             gp_m += m_k / v_k
 90 |             gp_v += 1.0 / v_k
 91 | 
 92 |         gp_v = 1.0 / (gp_v + correction)
 93 |         gp_m = gp_v * gp_m
 94 | 
 95 |         return gp_m, gp_v
 96 | 
 97 |     def rmse_layer(self, gps_m, gps_v, x_new, f_new):
 98 |         f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new)
 99 |         rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
100 |         return rmse
101 | 
102 |     def mae_layer(self, gps_m, gps_v, x_new, f_new):
103 |         f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new)
104 |         mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
105 |         return mae
106 | 
107 |     def nlpd_layer(self, gps_m, gps_v, x_new, y_new):
108 |         f_gp, v_gp = self.predictive_layer(gps_m, gps_v, x_new)
109 |         nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
110 |         return nlpd


--------------------------------------------------------------------------------
/baselines/distgp.py:
--------------------------------------------------------------------------------
 1 | # Implementation of the "Distributed GP"
 2 | # by Deisenroth & Ng, ICML 2015
 3 | # -----------------------------------------------------------------
 4 | # This script belongs to the ModularGP repo
 5 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
 6 | # Copyright (c) 2021 Pablo Moreno-Munoz
 7 | # -----------------------------------------------------------------
 8 | #
 9 | #
10 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
11 | # Section for Cognitive Systems
12 | # Technical University of Denmark (DTU)
13 | # October 2021
14 | 
15 | import torch
16 | 
17 | class DistGP(torch.nn.Module):
18 |     """
19 |     -- Distributed Gaussian Process Regression--
20 |     --
21 |     -- Adaptation to Pytorch + GP framework
22 |     -- M. P. Deisenroth and J. W. Ng, "Distributed Gaussian Processes"
23 |     -- Reference: http://proceedings.mlr.press/v37/deisenroth15.pdf
24 |     """
25 | 
26 |     def __init__(self, kernel, likelihood, input_dim=None):
27 |         super(DistGP, self).__init__()
28 | 
29 |         if input_dim is None:
30 |             input_dim = 1
31 | 
32 |         self.input_dim = int(input_dim)     #dimension of x
33 | 
34 |         # GP Elements --
35 |         self.likelihood = likelihood        #type of likelihood
36 |         self.kernel = kernel                #type of kernel
37 | 
38 | 
39 |     def forward(self, x, y):
40 |         identity = torch.eye(y.size(0))
41 |         s_n = torch.pow(self.likelihood.sigma, 2.0)
42 | 
43 |         K = self.kernel.K(x,x)
44 |         KI = K + torch.mul(s_n,identity)
45 |         iKI, _ = torch.solve(torch.eye(KI.size(0)), KI)
46 |         yiKIy = y.t().mm(iKI).mm(y)
47 | 
48 |         log_marginal = -0.5*yiKIy - 0.5*torch.logdet(KI)
49 |         return -log_marginal
50 | 
51 |     def predictive(self, x, y, x_new):
52 | 
53 |         Kx = self.kernel.K(x, x_new)
54 |         Kxx = self.kernel.K(x_new, x_new)
55 | 
56 |         identity = torch.eye(y.size(0))
57 |         s_n = torch.pow(self.likelihood.sigma, 2.0)
58 | 
59 |         K = self.kernel.K(x, x)
60 |         KI = K + torch.mul(s_n, identity)
61 |         iKI, _ = torch.solve(torch.eye(KI.size(0)), KI)
62 | 
63 |         gp_m = Kx.t().mm(iKI).mm(y)
64 |         gp_v = torch.diagonal(Kxx - Kx.t().mm(iKI).mm(Kx), 0)[:,None]
65 | 
66 |         return gp_m, gp_v
67 | 
68 |     def rmse(self, x, y, x_new, f_new):
69 |         f_gp,_ = self.predictive(x, y, x_new)
70 |         rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
71 |         return rmse
72 | 
73 |     def mae(self, x, y, x_new, f_new):
74 |         f_gp,_ = self.predictive(x, y, x_new)
75 |         mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
76 |         return mae
77 | 
78 |     def nlpd(self, x, y, x_new, y_new):
79 |         f_gp, u_gp = self.predictive(x, y, x_new)
80 |         #f_gp = torch.from_numpy(f_gp)
81 |         #u_gp = torch.from_numpy(u_gp)
82 |         v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0)
83 |         nlpd = - torch.mean(self.likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
84 |         return nlpd
85 | 


--------------------------------------------------------------------------------
/baselines/dvigp.py:
--------------------------------------------------------------------------------
  1 | # Implementation of the "Distributed Variational Inference in GPs"
  2 | # by Y. Gal and M. van der Wilk
  3 | #
  4 | # Little adaptation without the LVM assumption
  5 | # for testing and comparison. Simulates a distributed environment.
  6 | # -----------------------------------------------------------------
  7 | # This script belongs to the ModularGP repo
  8 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
  9 | # Copyright (c) 2021 Pablo Moreno-Munoz
 10 | # -----------------------------------------------------------------
 11 | #
 12 | #
 13 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
 14 | # Section for Cognitive Systems
 15 | # Technical University of Denmark (DTU)
 16 | # October 2021
 17 | 
 18 | import torch
 19 | import numpy as np
 20 | from torch.distributions import MultivariateNormal as Normal
 21 | from torch.distributions import kl_divergence
 22 | 
 23 | from GPy.inference.latent_function_inference.posterior import Posterior
 24 | 
 25 | class DVIGP(torch.nn.Module):
 26 |     """
 27 |     -- Distributed Variational Inference in Gaussian Processes --
 28 |     --
 29 |     -- Adaptation to Pytorch + GP framework
 30 |     -- Y. Gal et al. "Distributed Variational Inference in Sparse Gaussian
 31 |                       Process Regression and Latent Variable Models" NIPS 2014
 32 |     """
 33 |     def __init__(self, kernel, likelihood, M, nodes=1, input_dim=None):
 34 |         super(DVIGP, self).__init__()
 35 | 
 36 |         if input_dim is None:
 37 |             input_dim = 1
 38 | 
 39 |         # Nodes to distribute the computational load --
 40 |         self.nodes = int(nodes)
 41 | 
 42 |         # Dimensions --
 43 |         self.M = M                          #num. inducing
 44 |         self.input_dim = int(input_dim)     #dimension of x
 45 | 
 46 |         # GP Elements --
 47 |         self.likelihood = likelihood        #type of likelihood
 48 |         self.kernel = kernel                #type of kernel
 49 |         self.z = torch.nn.Parameter(torch.linspace(-0.9, 0.9, self.M)[:,None], requires_grad=False)
 50 | 
 51 |         # Variational distribution --
 52 |         self.q_m = torch.nn.Parameter(torch.randn(M,1), requires_grad=True)  # variational: mean parameter
 53 |         self.q_L = torch.nn.Parameter(torch.eye(M), requires_grad=True)  # variational: covariance
 54 | 
 55 |     def forward(self, x, y):
 56 |         x_nodes, y_nodes = self.data_to_nodes(x,y)
 57 | 
 58 |         # Variational parameters --
 59 |         q_m = self.q_m
 60 |         q_L = torch.tril(self.q_L)
 61 |         q_S = torch.mm(q_L, q_L.t())
 62 | 
 63 |         # Prior parameters (uses kernel) --
 64 |         Kuu = self.kernel.K(self.z)
 65 |         iKuu, _ = torch.solve(torch.eye(self.M), Kuu)  # is pseudo-inverse?
 66 | 
 67 |         # Distributions -- q(u), p(u)
 68 |         q_u = Normal(q_m.flatten(), q_S)
 69 |         p_u = Normal(torch.zeros(self.M), Kuu)
 70 | 
 71 |         global_params = {'q_m': q_m, 'q_L': q_L, 'q_S': q_S, 'Kuu': Kuu, 'iKuu': iKuu}
 72 | 
 73 |         # Distributed Expectations
 74 |         expectation = 0.0
 75 |         for k, y_k in enumerate(y_nodes):
 76 |             x_k = x_nodes[k]
 77 |             expectation_node = self.forward_node(x_k, y_k, global_params)
 78 |             expectation += expectation_node.sum()
 79 | 
 80 |         # KL divergence --
 81 |         kl = kl_divergence(q_u, p_u)
 82 | 
 83 |         # Lower bound (ELBO) --
 84 |         elbo = expectation - kl
 85 | 
 86 |         return -elbo
 87 | 
 88 |     def forward_node(self, x_node, y_node, global_params):
 89 |         q_m = global_params['q_m']
 90 |         q_L = global_params['q_m']
 91 |         q_S = global_params['q_S']
 92 |         Kuu = global_params['Kuu']
 93 |         iKuu = global_params['iKuu']
 94 | 
 95 |         Kff = self.kernel.K(x_node, x_node)
 96 |         Kfu = self.kernel.K(x_node, self.z)
 97 |         Kuf = torch.transpose(Kfu, 0, 1)
 98 | 
 99 |         A = Kfu.mm(iKuu)
100 |         AT = iKuu.mm(Kuf)
101 | 
102 |         m_f = A.mm(q_m)
103 |         v_f = torch.diag(Kff + A.mm(q_S - Kuu).mm(AT))
104 | 
105 |         # Expectation term of node --
106 |         expectation = self.likelihood.variational_expectation(y_node, m_f, v_f)
107 | 
108 |         return expectation
109 | 
110 |     def data_to_nodes(self, x, y):
111 |         x_nodes = []
112 |         y_nodes = []
113 | 
114 |         N = y.size(0)
115 |         size_node = np.int(np.floor(N/self.nodes))
116 |         for k in range(self.nodes):
117 |             if k < self.nodes - 1:
118 |                 x_nodes.append(x[(0+(k*size_node)):(0+((k+1)*size_node)), :])
119 |                 y_nodes.append(y[(0+(k*size_node)):(0+((k+1)*size_node)), :])
120 |             else:
121 |                 x_nodes.append(x[(0+(k*size_node)):, :])
122 |                 y_nodes.append(y[(0+(k*size_node)):, :])
123 | 
124 |         return x_nodes, y_nodes
125 | 
126 |     def predictive(self, x_new):
127 |         # Matrices
128 |         q_m = self.q_m.detach().numpy()
129 |         q_L = torch.tril(self.q_L)
130 |         q_S = torch.mm(q_L, q_L.t()).detach().numpy()
131 |         Kuu = self.kernel.K(self.z, self.z).detach().numpy()
132 | 
133 |         posterior = Posterior(mean=q_m, cov=q_S, K=Kuu, prior_mean=np.zeros(q_m.shape))
134 |         Kx = self.kernel.K(self.z, x_new).detach().numpy()
135 |         Kxx = self.kernel.K(x_new, x_new).detach().numpy()
136 | 
137 |         # GP Predictive Posterior - mean + variance
138 |         gp_mu = np.dot(Kx.T, posterior.woodbury_vector)
139 |         Kxx = np.diag(Kxx)
140 |         gp_var = (Kxx - np.sum(np.dot(np.atleast_3d(posterior.woodbury_inv).T, Kx) * Kx[None, :, :], 1)).T
141 | 
142 |         gp = gp_mu
143 |         gp_upper = gp_mu + 2*np.sqrt(gp_var) #+ 2 * self.likelihood.sigma.detach().numpy()
144 |         gp_lower = gp_mu - 2*np.sqrt(gp_var) #- 2 * self.likelihood.sigma.detach().numpy()
145 | 
146 |         return gp, gp_upper, gp_lower
147 | 
148 |     def rmse(self, x_new, f_new):
149 |         f_gp,_,_ = self.predictive(x_new)
150 |         rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
151 |         return rmse
152 | 
153 |     def mae(self, x_new, f_new):
154 |         f_gp,_,_ = self.predictive(x_new)
155 |         mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
156 |         return mae
157 | 
158 |     def nlpd(self, x_new, y_new):
159 |         f_gp, u_gp, _ = self.predictive(x_new)
160 |         f_gp = torch.from_numpy(f_gp)
161 |         u_gp = torch.from_numpy(u_gp)
162 |         v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0)
163 |         nlpd = - torch.mean(self.likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
164 |         return nlpd
165 | 
166 | 


--------------------------------------------------------------------------------
/baselines/gpoegp.py:
--------------------------------------------------------------------------------
  1 | # Implementation of the "Generalised Product of GP Experts"
  2 | # by Cao & Fleet, 2014
  3 | # -----------------------------------------------------------------
  4 | # This script belongs to the ModularGP repo
  5 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
  6 | # Copyright (c) 2021 Pablo Moreno-Munoz
  7 | # -----------------------------------------------------------------
  8 | #
  9 | #
 10 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
 11 | # Section for Cognitive Systems
 12 | # Technical University of Denmark (DTU)
 13 | # October 2021
 14 | 
 15 | import torch
 16 | 
 17 | class GenPoeGP(torch.nn.Module):
 18 |     """
 19 |     -- Generalised Product of GP Experts --
 20 |     --
 21 |     -- Adaptation to Pytorch + GP framework
 22 |     -- Y. Cao and D. J. Fleet, "Generalized Product of Experts (...)"
 23 |     -- Reference: https://arxiv.org/abs/1410.7827
 24 |     """
 25 | 
 26 |     def __init__(self, models, input_dim=1.0):
 27 |         super(GenPoeGP, self).__init__()
 28 | 
 29 |         self.input_dim = int(input_dim)  # dimension of x
 30 | 
 31 |         # Adjacent Local GP Models
 32 |         self.models = models  # is a list
 33 | 
 34 |     def forward(self):
 35 |         return 1.0
 36 | 
 37 |     def predictive(self, x, y, x_new):
 38 |         # x is a list of x_k (distributed)
 39 |         # y is a list of y_k (distributed)
 40 | 
 41 |         K = len(self.models)
 42 |         beta_k = 1/K
 43 | 
 44 |         gp_m = torch.zeros(x_new.size())
 45 |         gp_v = torch.zeros(x_new.size())
 46 | 
 47 |         for k, model_k in enumerate(self.models):
 48 |             m_k, v_k = model_k.predictive(x[k], y[k], x_new)
 49 | 
 50 |             gp_m += (beta_k*m_k)/v_k
 51 |             gp_v += beta_k/v_k
 52 | 
 53 |         gp_v = 1.0/gp_v
 54 |         gp_m = gp_v*gp_m
 55 | 
 56 |         return gp_m, gp_v
 57 | 
 58 | 
 59 |     def rmse(self, x, y, x_new, f_new):
 60 |         f_gp,_ = self.predictive(x, y, x_new)
 61 |         rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
 62 |         return rmse
 63 | 
 64 |     def mae(self, x, y, x_new, f_new):
 65 |         f_gp,_ = self.predictive(x, y, x_new)
 66 |         mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
 67 |         return mae
 68 | 
 69 |     def nlpd(self, x, y, x_new, y_new):
 70 |         f_gp, v_gp = self.predictive(x, y, x_new)
 71 |         nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
 72 |         return nlpd
 73 | 
 74 |     # FOR HIERARCHICAL SETTINGS
 75 | 
 76 |     def predictive_layer(self, gps_m, gps_v, x_new):
 77 |         # gps_m is a list of gp_m (distributed)
 78 |         # gps_v is a list of gp_v (distributed)
 79 | 
 80 |         K = len(gps_m)
 81 |         beta_k = 1/K
 82 | 
 83 |         gp_m = torch.zeros(x_new.size())
 84 |         gp_v = torch.zeros(x_new.size())
 85 | 
 86 |         for k, m_k in enumerate(gps_m):
 87 |             v_k = gps_v[k]
 88 | 
 89 |             gp_m += (beta_k*m_k)/v_k
 90 |             gp_v += beta_k/v_k
 91 | 
 92 |         gp_v = 1.0/gp_v
 93 |         gp_m = gp_v*gp_m
 94 | 
 95 |         return gp_m, gp_v
 96 | 
 97 |     def rmse_layer(self, gps_m, gps_v, x_new, f_new):
 98 |         f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new)
 99 |         rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
100 |         return rmse
101 | 
102 |     def mae_layer(self, gps_m, gps_v, x_new, f_new):
103 |         f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new)
104 |         mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
105 |         return mae
106 | 
107 |     def nlpd_layer(self, gps_m, gps_v, x_new, y_new):
108 |         f_gp, v_gp = self.predictive_layer(gps_m, gps_v, x_new)
109 |         nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
110 |         return nlpd
111 | 


--------------------------------------------------------------------------------
/baselines/poegp.py:
--------------------------------------------------------------------------------
  1 | # Implementation of the "Product of GP Experts"
  2 | # by Ng & Deisenroth, 2014
  3 | # -----------------------------------------------------------------
  4 | # This script belongs to the ModularGP repo
  5 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
  6 | # Copyright (c) 2021 Pablo Moreno-Munoz
  7 | # -----------------------------------------------------------------
  8 | #
  9 | #
 10 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
 11 | # Section for Cognitive Systems
 12 | # Technical University of Denmark (DTU)
 13 | # October 2021
 14 | 
 15 | 
 16 | import torch
 17 | 
 18 | class PoeGP(torch.nn.Module):
 19 |     """
 20 |     -- Product of GP Experts --
 21 |     --
 22 |     -- Adaptation to Pytorch + GP framework
 23 |     -- J. W. Ng and M. P. Deisenroth, "Hierarchical Mixtureof-Experts Model for (...) 2014"
 24 |     -- Reference: http://arxiv.org/abs/1412.3078
 25 |     """
 26 |     def __init__(self, models, input_dim=1.0):
 27 |         super(PoeGP, self).__init__()
 28 | 
 29 |         self.input_dim = int(input_dim)  # dimension of x
 30 | 
 31 |         # Adjacent Local GP Models
 32 |         self.models = models  # is a list
 33 | 
 34 |     def forward(self):
 35 |         return 1.0
 36 | 
 37 |     def predictive(self, x, y, x_new):
 38 |         # x is a list of x_k (distributed)
 39 |         # y is a list of y_k (distributed)
 40 | 
 41 |         gp_m = torch.zeros(x_new.size())
 42 |         gp_v = torch.zeros(x_new.size())
 43 | 
 44 |         for k, model_k in enumerate(self.models):
 45 |             m_k, v_k = model_k.predictive(x[k], y[k], x_new)
 46 | 
 47 |             gp_m += m_k/v_k
 48 |             gp_v += 1.0/v_k
 49 | 
 50 |         gp_v = 1.0/gp_v
 51 |         gp_m = gp_v*gp_m
 52 | 
 53 |         return gp_m, gp_v
 54 | 
 55 |     def rmse(self, x, y, x_new, f_new):
 56 |         f_gp,_ = self.predictive(x, y, x_new)
 57 |         rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
 58 |         return rmse
 59 | 
 60 |     def mae(self, x, y, x_new, f_new):
 61 |         f_gp,_ = self.predictive(x, y, x_new)
 62 |         mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
 63 |         return mae
 64 | 
 65 |     def nlpd(self, x, y, x_new, y_new):
 66 |         f_gp, v_gp = self.predictive(x, y, x_new)
 67 |         nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
 68 |         return nlpd
 69 | 
 70 |     # FOR HIERARCHICAL SETTINGS
 71 | 
 72 |     def predictive_layer(self, gps_m, gps_v, x_new):
 73 |         # gps_m is a list of gp_m (distributed)
 74 |         # gps_v is a list of gp_v (distributed)
 75 | 
 76 |         gp_m = torch.zeros(x_new.size())
 77 |         gp_v = torch.zeros(x_new.size())
 78 | 
 79 |         for k, m_k in enumerate(gps_m):
 80 |             v_k = gps_v[k]
 81 | 
 82 |             gp_m += m_k / v_k
 83 |             gp_v += 1.0 / v_k
 84 | 
 85 |         gp_v = 1.0 / gp_v
 86 |         gp_m = gp_v * gp_m
 87 | 
 88 |         return gp_m, gp_v
 89 | 
 90 |     def rmse_layer(self, gps_m, gps_v, x_new, f_new):
 91 |         f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new)
 92 |         rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
 93 |         return rmse
 94 | 
 95 |     def mae_layer(self, gps_m, gps_v, x_new, f_new):
 96 |         f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new)
 97 |         mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
 98 |         return mae
 99 | 
100 |     def nlpd_layer(self, gps_m, gps_v, x_new, y_new):
101 |         f_gp, v_gp = self.predictive_layer(gps_m, gps_v, x_new)
102 |         nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
103 |         return nlpd


--------------------------------------------------------------------------------
/baselines/rbcm.py:
--------------------------------------------------------------------------------
  1 | # Implementation of the "Robust Bayesian Committee Machine"
  2 | # by Deisenroth & Ng, ICML 2015
  3 | # -----------------------------------------------------------------
  4 | # This script belongs to the ModularGP repo
  5 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
  6 | # Copyright (c) 2021 Pablo Moreno-Munoz
  7 | # -----------------------------------------------------------------
  8 | #
  9 | #
 10 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
 11 | # Section for Cognitive Systems
 12 | # Technical University of Denmark (DTU)
 13 | # October 2021
 14 | 
 15 | import torch
 16 | 
 17 | class RobustBayesianCM(torch.nn.Module):
 18 |     """
 19 |     -- Robust Bayesian Committee Machine --
 20 |     --
 21 |     -- Adaptation to Pytorch + GP framework
 22 |     -- M. P. Deisenroth and J. W. Ng, "Distributed Gaussian Processes"
 23 |     -- Reference: http://proceedings.mlr.press/v37/deisenroth15.pdf
 24 |     """
 25 |     def __init__(self, models, input_dim=1.0):
 26 |         super(RobustBayesianCM, self).__init__()
 27 | 
 28 |         self.input_dim = int(input_dim)  # dimension of x
 29 | 
 30 |         # Adjacent Local GP Models
 31 |         self.models = models  # is a list
 32 | 
 33 |     def forward(self):
 34 |         return 1.0
 35 | 
 36 |     def predictive(self, x, y, x_new):
 37 |         # x is a list of x_k (distributed)
 38 |         # y is a list of y_k (distributed)
 39 | 
 40 |         K = len(self.models)
 41 |         prior_v = torch.diagonal(self.models[0].kernel.K(x_new, x_new), 0)[:, None]
 42 |         log_prior_v = torch.log(prior_v)
 43 | 
 44 |         gp_m = torch.zeros(x_new.size())
 45 |         gp_v = torch.zeros(x_new.size())
 46 |         correction = torch.zeros(x_new.size())
 47 | 
 48 |         for k, model_k in enumerate(self.models):
 49 |             m_k, v_k = model_k.predictive(x[k], y[k], x_new)
 50 | 
 51 |             beta_k = 0.5*(log_prior_v - torch.log(v_k))
 52 |             correction += beta_k
 53 | 
 54 |             gp_m += m_k/v_k
 55 |             gp_v += 1.0/v_k
 56 | 
 57 |         correction = (1-correction)/prior_v
 58 |         gp_v = 1.0/(gp_v + correction)
 59 |         gp_m = gp_v*gp_m
 60 | 
 61 |         return gp_m, gp_v
 62 | 
 63 |     def rmse(self, x, y, x_new, f_new):
 64 |         f_gp,_ = self.predictive(x, y, x_new)
 65 |         rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
 66 |         return rmse
 67 | 
 68 |     def mae(self, x, y, x_new, f_new):
 69 |         f_gp,_ = self.predictive(x, y, x_new)
 70 |         mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
 71 |         return mae
 72 | 
 73 |     def nlpd(self, x, y, x_new, y_new):
 74 |         f_gp, v_gp = self.predictive(x, y, x_new)
 75 |         nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
 76 |         return nlpd
 77 | 
 78 |     # FOR HIERARCHICAL SETTINGS
 79 | 
 80 |     def predictive_layer(self, gps_m, gps_v, x_new):
 81 |         # gps_m is a list of gp_m (distributed)
 82 |         # gps_v is a list of gp_v (distributed)
 83 | 
 84 |         K = len(gps_m)
 85 |         prior_v = torch.diagonal(self.models[0].kernel.K(x_new, x_new), 0)[:, None]
 86 |         log_prior_v = torch.log(prior_v)
 87 |         log_prior_v[torch.isnan(log_prior_v)] = 1e-3
 88 | 
 89 |         gp_m = torch.zeros(x_new.size())
 90 |         gp_v = torch.zeros(x_new.size())
 91 |         correction = torch.zeros(x_new.size())
 92 | 
 93 |         for k, m_k in enumerate(gps_m):
 94 |             v_k = gps_v[k]
 95 | 
 96 |             log_v_k = torch.log(v_k)
 97 |             log_v_k[torch.isnan(log_v_k)] = 1e-3
 98 | 
 99 |             beta_k = 0.5 * (log_prior_v - log_v_k)
100 |             correction += beta_k
101 | 
102 |             gp_m += m_k / v_k
103 |             gp_v += 1.0 / v_k
104 | 
105 |         correction = (1 - correction) / prior_v
106 |         gp_v = 1.0 / (gp_v + correction)
107 |         gp_m = gp_v * gp_m
108 | 
109 |         return gp_m, gp_v
110 | 
111 |     def rmse_layer(self, gps_m, gps_v, x_new, f_new):
112 |         f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new)
113 |         rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
114 |         return rmse
115 | 
116 |     def mae_layer(self, gps_m, gps_v, x_new, f_new):
117 |         f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new)
118 |         mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
119 |         return mae
120 | 
121 |     def nlpd_layer(self, gps_m, gps_v, x_new, y_new):
122 |         f_gp, v_gp = self.predictive_layer(gps_m, gps_v, x_new)
123 |         nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
124 |         return nlpd


--------------------------------------------------------------------------------
/data/banana.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/data/banana.mat


--------------------------------------------------------------------------------
/data/nasa.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/data/nasa.mat


--------------------------------------------------------------------------------
/experiments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/experiments/__init__.py


--------------------------------------------------------------------------------
/experiments/banana.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------
  2 | # This script belongs to the ModularGP repo
  3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
  4 | # Copyright (c) 2021 Pablo Moreno-Munoz
  5 | # -----------------------------------------------------------------
  6 | #
  7 | #
  8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
  9 | # Section for Cognitive Systems
 10 | # Technical University of Denmark (DTU)
 11 | # October 2021
 12 | 
 13 | # -----------------------------------------------------------------
 14 | # Experiment -- Banana Classification
 15 | # -----------------------------------------------------------------
 16 | 
 17 | from kernels.rbf import RBF
 18 | from likelihoods.gaussian import Gaussian
 19 | from likelihoods.bernoulli import Bernoulli
 20 | from models.svgp import SVGP
 21 | from models.ensemblegp import EnsembleGP
 22 | from optimization.algorithms import vem_algorithm, ensemble_vem, ensemble_vem_parallel
 23 | from optimization.algorithms import AlgorithmVEM
 24 | from sklearn.model_selection import train_test_split
 25 | 
 26 | import torch
 27 | import numpy as np
 28 | import scipy.io as sio
 29 | import matplotlib.pyplot as plt
 30 | from matplotlib2tikz import save as tikz_save
 31 | 
 32 | plt.rc('text', usetex=True)
 33 | plt.rc('font', family='serif')
 34 | 
 35 | # COOLORS.CO palettes
 36 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e']
 37 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031']
 38 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56']
 39 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e']
 40 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559']
 41 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2']
 42 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54']
 43 | 
 44 | color_palette = color_palette_6
 45 | color_0 = color_palette_6[0]
 46 | color_1 = color_palette_6[4]
 47 | 
 48 | # Load Data --
 49 | data = sio.loadmat('../data/banana.mat')
 50 | y_banana = data['banana_Y']
 51 | x_banana = data['banana_X']
 52 | 
 53 | trials = 10
 54 | nlpd_metrics = np.zeros((1,trials))
 55 | 
 56 | plot_local = False
 57 | plot_ensemble = False
 58 | save = False
 59 | 
 60 | for trial in range(trials):
 61 |     print('TRIAL = ' + str(trial) + '/' + str(trials))
 62 |     x, x_test, y, y_test = train_test_split(x_banana, y_banana, test_size=0.33, random_state=42)
 63 | 
 64 |     x_test = torch.from_numpy(x_test).float()
 65 |     y_test = torch.from_numpy(y_test).float()
 66 | 
 67 |     # Sorting wrt first input dimension
 68 |     y = y[x[:,0].argsort()]
 69 |     x = x[x[:,0].argsort()]
 70 | 
 71 |     # plot limits
 72 |     max_x = x[:,0].max()
 73 |     max_y = x[:,1].max()
 74 |     min_x = x[:,0].min()
 75 |     min_y = x[:,1].min()
 76 | 
 77 |     # Division into 4 regions
 78 |     x_1 = torch.from_numpy(x[(x[:,0]<0.0) & (x[:,1]<0.0),:]).float()
 79 |     y_1 = torch.from_numpy(y[(x[:,0]<0.0) & (x[:,1]<0.0),:]).float()
 80 | 
 81 |     x_2 = torch.from_numpy(x[(x[:,0]>0.0) & (x[:,1]<0.0),:]).float()
 82 |     y_2 = torch.from_numpy(y[(x[:,0]>0.0) & (x[:,1]<0.0),:]).float()
 83 | 
 84 |     x_3 = torch.from_numpy(x[(x[:,0]>0.0) & (x[:,1]>0.0),:]).float()
 85 |     y_3 = torch.from_numpy(y[(x[:,0]>0.0) & (x[:,1]>0.0),:]).float()
 86 | 
 87 |     x_4 = torch.from_numpy(x[(x[:,0]<0.0) & (x[:,1]>0.0),:]).float()
 88 |     y_4 = torch.from_numpy(y[(x[:,0]<0.0) & (x[:,1]>0.0),:]).float()
 89 | 
 90 |     # All tasks
 91 |     x_tasks = [x_1, x_2, x_3, x_4]
 92 |     y_tasks = [y_1, y_2, y_3, y_4]
 93 | 
 94 |     K = len(x_tasks)
 95 |     sigmoid = torch.nn.Sigmoid()
 96 | 
 97 |     M_k = 3         # inducing points per side
 98 |     N_test = 80     # test points per side
 99 | 
100 |     ###########################
101 |     #                         #
102 |     #    DISTRIBUTED TASKS    #
103 |     #                         #
104 |     ###########################
105 | 
106 |     models = []
107 |     for k, x_k in enumerate(x_tasks):
108 | 
109 |         print('-                             -')
110 |         print('----- TASK k=' + str(k + 1) + ' ------')
111 |         print('-                             -')
112 | 
113 |         y_k = y_tasks[k]
114 |         kernel_k = RBF()
115 |         likelihood_k = Bernoulli()
116 |         model_k = SVGP(kernel_k, likelihood_k, M_k**2, input_dim=2)
117 | 
118 |         # initial grid of inducing-points
119 |         mx = torch.mean(x_k[:, 0])
120 |         my = torch.mean(x_k[:, 1])
121 |         vx = torch.var(x_k[:, 0])
122 |         vy = torch.var(x_k[:, 1])
123 | 
124 |         zy = np.linspace(my - 3*vy, my + 3*vy, M_k)
125 |         zx = np.linspace(mx - 3*vx, mx + 3*vx, M_k)
126 |         ZX, ZY = np.meshgrid(zx, zy)
127 |         ZX = ZX.reshape(M_k ** 2, 1)
128 |         ZY = ZY.reshape(M_k ** 2, 1)
129 |         Z = np.hstack((ZX, ZY))
130 |         z_k = torch.from_numpy(Z).float()
131 | 
132 |         model_k.z = torch.nn.Parameter(z_k, requires_grad=True)
133 |         vem_algorithm = AlgorithmVEM(model_k, x_k, y_k, iters=7)
134 | 
135 |         vem_algorithm.ve_its = 20
136 |         vem_algorithm.vm_its = 10
137 |         vem_algorithm.lr_m = 1e-3
138 |         vem_algorithm.lr_L = 1e-6
139 |         vem_algorithm.lr_hyp = 1e-6
140 |         vem_algorithm.lr_z = 1e-4
141 | 
142 |         vem_algorithm.fit()
143 |         models.append(model_k)
144 | 
145 |         # NLPD -- Metrics
146 |         nlpd = model_k.nlpd(x_test, y_test)
147 | 
148 |         print('Local Model ('+str(k+1)+')- NLPD: ', nlpd)
149 |         print(' ')
150 | 
151 |         if plot_local:
152 | 
153 |             min_tx = x[:,0].min() - 0.15
154 |             min_ty = x[:,1].min() - 0.15
155 |             max_tx = x[:,0].max() + 0.15
156 |             max_ty = x[:,1].max() + 0.15
157 | 
158 |             ty = np.linspace(min_ty, max_ty, N_test)
159 |             tx = np.linspace(min_tx, max_tx, N_test)
160 |             TX_grid, TY_grid = np.meshgrid(tx, ty)
161 |             TX = TX_grid.reshape(N_test ** 2, 1)
162 |             TY = TY_grid.reshape(N_test ** 2, 1)
163 |             X_test = np.hstack((TX, TY))
164 |             x_test = torch.from_numpy(X_test).float()
165 | 
166 |             gp, gp_upper, gp_lower = model_k.predictive(x_test)
167 |             gp = sigmoid(torch.from_numpy(gp))
168 | 
169 |             # Plot
170 |             plt.figure(figsize=(7, 6))
171 |             ax = plt.axes()
172 |             plt.plot(x_k[y_k[:, 0] == 0, 0], x_k[y_k[:, 0] == 0, 1], 'o', color=color_1, alpha=0.5)
173 |             plt.plot(x_k[y_k[:, 0] == 1, 0], x_k[y_k[:, 0] == 1, 1], 'o', color=color_0, alpha=0.5)
174 |             plt.plot(model_k.z[:,0].detach(), model_k.z[:,1].detach(), 'kx',  ms=10.0, mew=2.0)
175 |             cs = ax.contour(TX_grid, TY_grid, np.reshape(gp, (N_test, N_test)), linewidths=3, colors='k',
176 |                         levels=[0.25, 0.5, 0.75], zorder=10)
177 |             ax.clabel(cs, inline=1, fontsize=14, fmt='%1.1f')
178 | 
179 |             plt.title(r'Banana Recyclable GP - '+ str(k + 1) )
180 |             plt.xlabel(r'$x_1$ input')
181 |             plt.ylabel(r'$x_2$ input')
182 |             plt.xlim(-2.5, 2.5)
183 |             plt.ylim(-2.5, 2.5)
184 | 
185 |             if save:
186 |                 plt.savefig(fname='./figs/banana/banana_task_' + str(k + 1) + '.pdf', format='pdf')
187 | 
188 |             #plt.show()
189 |             plt.close()
190 | 
191 |     ###########################
192 |     #                         #
193 |     #   ENSEMBLE INFERENCE    #
194 |     #                         #
195 |     ###########################
196 | 
197 |     print('-                   -')
198 |     print('----- ENSEMBLE ------')
199 |     print('-                   -')
200 | 
201 |     M_e = 5
202 |     kernel = RBF()
203 |     likelihood = Bernoulli()
204 |     model_e = EnsembleGP(kernel, likelihood, models, M_e**2, input_dim=2)
205 | 
206 |     # initial grid of inducing-points
207 |     mx = np.mean(x[:, 0])
208 |     my = np.mean(x[:, 1])
209 |     vx = np.var(x[:, 0])
210 |     vy = np.var(x[:, 1])
211 | 
212 |     zy = np.linspace(my - 1.5*vy, my + 1.5*vy, M_e)
213 |     zx = np.linspace(mx - 1.5*vx, mx + 1.5*vx, M_e)
214 |     ZX, ZY = np.meshgrid(zx, zy)
215 |     ZX = ZX.reshape(M_e ** 2, 1)
216 |     ZY = ZY.reshape(M_e ** 2, 1)
217 |     Z = np.hstack((ZX, ZY))
218 |     z_e = torch.from_numpy(Z).float()
219 | 
220 |     model_e.z = torch.nn.Parameter(z_e, requires_grad=True)
221 |     vem_algorithm = AlgorithmVEM(model_e, config='ensemble', iters=20)
222 | 
223 |     vem_algorithm.ve_its = 20
224 |     vem_algorithm.vm_its = 10
225 |     vem_algorithm.lr_m = 1e-3
226 |     vem_algorithm.lr_L = 1e-5
227 |     vem_algorithm.lr_hyp = 1e-6
228 |     vem_algorithm.lr_z = 1e-5
229 | 
230 |     vem_algorithm.fit()
231 | 
232 |     # NLPD -- Metrics
233 |     nlpd = model_e.nlpd(x_test, y_test)
234 | 
235 |     nlpd_metrics[0, trial] = nlpd
236 | 
237 |     print('Banana Ensemble NLPD: ', nlpd)
238 |     print(' ')
239 | 
240 |     if plot_ensemble:
241 | 
242 |         min_tx = x[:,0].min() - 0.15
243 |         min_ty = x[:,1].min() - 0.15
244 |         max_tx = x[:,0].max() + 0.15
245 |         max_ty = x[:,1].max() + 0.15
246 | 
247 |         ty = np.linspace(min_ty, max_ty, N_test)
248 |         tx = np.linspace(min_tx, max_tx, N_test)
249 |         TX_grid, TY_grid = np.meshgrid(tx, ty)
250 |         TX = TX_grid.reshape(N_test ** 2, 1)
251 |         TY = TY_grid.reshape(N_test ** 2, 1)
252 |         X_test = np.hstack((TX, TY))
253 |         x_test = torch.from_numpy(X_test).float()
254 | 
255 |         gp, _, _ = model_e.predictive(x_test)
256 |         gp = sigmoid(torch.from_numpy(gp))
257 | 
258 |         # Plot
259 |         plt.figure(figsize=(7, 6))
260 |         ax = plt.axes()
261 |         plt.plot(x[y[:, 0] == 0, 0], x[y[:, 0] == 0, 1], 'o', color=color_1, alpha=0.5)
262 |         plt.plot(x[y[:, 0] == 1, 0], x[y[:, 0] == 1, 1], 'o', color=color_0, alpha=0.5)
263 |         plt.plot(model_e.z[:,0].detach(), model_e.z[:,1].detach(), 'kx', ms=10.0, mew=2.0)
264 |         cs = ax.contour(TX_grid, TY_grid, np.reshape(gp, (N_test, N_test)), linewidths=3, colors='k',
265 |                     levels=[0.25, 0.5, 0.75], zorder=10)
266 |         ax.clabel(cs, inline=1, fontsize=14, fmt='%1.1f')
267 | 
268 |         plt.title(r'Banana GP Ensemble')
269 |         plt.xlabel(r'$x_1$ input')
270 |         plt.ylabel(r'$x_2$ input')
271 |         plt.xlim(-2.5, 2.5)
272 |         plt.ylim(-2.5, 2.5)
273 | 
274 |         if save:
275 |             plt.savefig(fname='./figs/banana/banana_task_ensemble.pdf', format='pdf')
276 | 
277 |         plt.show()
278 |         #plt.close()
279 | 
280 | 


--------------------------------------------------------------------------------
/experiments/baseline.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------
  2 | # This script belongs to the ModularGP repo
  3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
  4 | # Copyright (c) 2021 Pablo Moreno-Munoz
  5 | # -----------------------------------------------------------------
  6 | #
  7 | #
  8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
  9 | # Section for Cognitive Systems
 10 | # Technical University of Denmark (DTU)
 11 | # October 2021
 12 | #
 13 | # -----------------------------------------------------------------
 14 | # Experiment -- Baselines
 15 | # -----------------------------------------------------------------
 16 | 
 17 | import torch
 18 | import numpy as np
 19 | import matplotlib.pyplot as plt
 20 | 
 21 | plt.rc('text', usetex=True)
 22 | plt.rc('font', family='serif')
 23 | 
 24 | # COOLORS.CO palettes
 25 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e']
 26 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031']
 27 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56']
 28 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e']
 29 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559']
 30 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2']
 31 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54']
 32 | 
 33 | color_palette = color_palette_2
 34 | 
 35 | from kernels.rbf import RBF
 36 | from likelihoods.gaussian import Gaussian
 37 | from models.svgp import SVGP
 38 | from models.ensemblegp import EnsembleGP
 39 | from baselines.distgp import DistGP
 40 | from baselines.poegp import PoeGP
 41 | from baselines.gpoegp import GenPoeGP
 42 | from baselines.bcm import BayesianCM
 43 | from baselines.rbcm import RobustBayesianCM
 44 | from baselines.dvigp import DVIGP
 45 | from optimization.algorithms import AlgorithmVEM
 46 | from optimization.algorithms import GPR_Optimizer
 47 | from util import smooth_function
 48 | 
 49 | #experiment = '10k'
 50 | experiment = '100k'
 51 | #experiment = '1m'
 52 | 
 53 | if experiment == '10k':
 54 |     node_overlapping = 1
 55 |     N_k = 200
 56 |     trials = 10
 57 | elif experiment == '100k':
 58 |     node_overlapping = 5
 59 |     N_k = 400
 60 |     trials = 10
 61 | elif experiment == '1m':
 62 |     node_overlapping = 100
 63 |     N_k = 800
 64 |     trials = 10
 65 | else:
 66 |     raise ValueError('Experiment indicator not valid! Must be {10k, 100k or 1m}')
 67 | 
 68 | M_k = 3
 69 | M_e = 35
 70 | plot_local = True
 71 | plot_ensemble = True
 72 | save = True
 73 | 
 74 | recy_metrics = np.zeros((3,trials))
 75 | poe_metrics = np.zeros((3,trials))
 76 | gpoe_metrics = np.zeros((3,trials))
 77 | bcm_metrics = np.zeros((3,trials))
 78 | rbcm_metrics = np.zeros((3,trials))
 79 | 
 80 | for trial in range(trials):
 81 | 
 82 |     tasks = 50
 83 |     T = 50
 84 | 
 85 |     print('TRIAL = '+str(trial)+'/'+str(trials))
 86 | 
 87 |     ###########################
 88 |     #                         #
 89 |     #    DISTRIBUTED TASKS    #
 90 |     #                         #
 91 |     ###########################
 92 | 
 93 |     min_x = 0.0
 94 |     max_x = T * 0.1
 95 |     segment_x = (max_x - min_x) / tasks
 96 |     x_tasks = []
 97 |     y_tasks = []
 98 |     for n in range(node_overlapping):
 99 |         for k in range(T):
100 |             x_k = ((min_x + (k * segment_x)) - (min_x + ((k + 1) * segment_x))) * torch.rand(N_k, 1) + (
101 |                         min_x + ((k + 1) * segment_x))
102 |             x_k, _ = torch.sort(x_k, dim=0)
103 |             y_k = smooth_function(x_k) + 2.0 * torch.randn(N_k, 1)
104 |             x_tasks.append(x_k)
105 |             y_tasks.append(y_k)
106 | 
107 |     tasks = T * node_overlapping
108 | 
109 |     print('# of tasks: ', tasks)
110 | 
111 |     ###########################
112 |     #                         #
113 |     #   PARALLEL INFERENCE    #
114 |     #                         #
115 |     ###########################
116 | 
117 |     N_k_test = 400
118 |     x_test = torch.linspace(min_x-0.5, max_x+0.5, N_k_test)[:, None]
119 |     models = []       # for recyclable GPs
120 |     models_dist = []  # for distributed GPs
121 |     x_all = []        # for distributed GPs
122 |     y_all = []        # for distributed GPs
123 |     for k, x_k in enumerate(x_tasks):
124 |         print('-                             -')
125 |         print('----- TASK k='+str(k+1)+' ------')
126 |         print('-                             -')
127 |         ######################################################
128 |         # 1. RECYCLABLE GP
129 |         ######################################################
130 |         kernel_k = RBF()
131 |         likelihood_k = Gaussian(fit_noise=False)
132 |         model_k = SVGP(kernel_k, likelihood_k, M_k)
133 | 
134 |         z_k_min = min_x + ((k%T)*segment_x)
135 |         z_k_max = min_x + (((k%T)+1)*segment_x)
136 |         model_k.z = torch.nn.Parameter(torch.linspace(z_k_min, z_k_max, M_k)[:, None], requires_grad=True)
137 |         vem_algorithm = AlgorithmVEM(model_k, x_k, y_tasks[k], iters=15)
138 | 
139 |         vem_algorithm.ve_its = 20
140 |         vem_algorithm.vm_its = 10
141 |         vem_algorithm.lr_m = 1e-6
142 |         vem_algorithm.lr_L = 1e-10
143 |         vem_algorithm.lr_hyp = 1e-10
144 |         vem_algorithm.lr_z = 1e-10
145 | 
146 |         vem_algorithm.fit()
147 |         models.append(model_k)
148 | 
149 |         ######################################################
150 |         # 2. DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE)
151 |         ######################################################
152 | 
153 |         kernel_j = RBF()
154 |         likelihood_j = Gaussian(fit_noise=True)
155 |         model_j = DistGP(kernel_j, likelihood_j)
156 |         GPR_Optimizer(model_j, x_k, y_tasks[k])
157 | 
158 |         models_dist.append(model_j)
159 |         x_all.append(x_k)
160 |         y_all.append(y_tasks[k])
161 | 
162 |         if plot_local:
163 |             gp, gp_upper, gp_lower = model_k.predictive(x_test)
164 |             disgp_m, disgp_v = model_j.predictive(x_k, y_tasks[k], x_test)
165 | 
166 |             disgp = disgp_m.detach().numpy()
167 |             disgp_upper = (disgp_m + 2 * torch.sqrt(disgp_v)).detach().numpy() + 2 * model_j.likelihood.sigma.detach().numpy()
168 |             disgp_lower = (disgp_m - 2 * torch.sqrt(disgp_v)).detach().numpy() - 2 * model_j.likelihood.sigma.detach().numpy()
169 | 
170 |             plt.figure(figsize=(12, 4))
171 |             plt.plot(x_k, y_tasks[k], ls='-', color=color_palette[k%len(color_palette)], markersize=2.5, markeredgewidth=0.75)
172 |             plt.plot(models[k].z.detach(), -20.0*torch.ones(M_k, 1), color=color_palette[k%len(color_palette)], linestyle='', marker='.',markersize=5)
173 | 
174 |             plt.plot(x_test, gp, 'k-', linewidth=1.5)
175 |             #plt.fill_between(x_test.flatten(), gp_lower.flatten(), gp_upper.flatten(), color='b', alpha=0.2,lw='0.5')
176 |             plt.plot(x_test, gp_upper, 'k-', linewidth=2.5)
177 |             plt.plot(x_test, gp_lower, 'k-', linewidth=2.5)
178 | 
179 |             plt.plot(x_test, disgp, 'b-', linewidth=1.5)
180 |             #plt.fill_between(x_test.flatten(), gp_lower.flatten(), gp_upper.flatten(), color='b', alpha=0.2,lw='0.5')
181 |             plt.plot(x_test, disgp_upper, 'b-', linewidth=2.5)
182 |             plt.plot(x_test, disgp_lower, 'b-', linewidth=2.5)
183 | 
184 |             plt.title(r'Variational Sparse GP -- (task=' + str(k+1) + ')')
185 |             plt.xlabel(r'Input, $x$')
186 |             plt.ylabel(r'Output, $y$')
187 |             plt.xlim(min_x - 0.5, max_x + 0.5)
188 |             plt.ylim(-22.0, 22.0)
189 | 
190 |             if save:
191 |                 plt.savefig(fname='./figs/baseline/distributed_task_'+str(k+1)+'.pdf',format='pdf')
192 | 
193 |             plt.close()
194 |             #plt.show()
195 | 
196 |     ###########################
197 |     #                         #
198 |     #   ENSEMBLE INFERENCE    #
199 |     #                         #
200 |     ###########################
201 |     print('-                   -')
202 |     print('----- ENSEMBLE ------')
203 |     print('-                   -')
204 | 
205 |     # TEST DATA FOR EVALUATION
206 |     N_e_test = 400
207 |     x_test_ensemble = torch.linspace(min_x-0.5, max_x+0.5, N_e_test)[:, None]
208 |     f_test_ensemble = smooth_function(x_test_ensemble)
209 |     y_test_ensemble = f_test_ensemble + 2.0*torch.randn(N_e_test,1)
210 | 
211 |     ######################################################
212 |     # 1. RECYCLABLE GP
213 |     ######################################################
214 | 
215 |     kernel = RBF()
216 |     likelihood = Gaussian(fit_noise=False)
217 |     model_e = EnsembleGP(kernel, likelihood, models, M_e)
218 |     model_e.z = torch.nn.Parameter(torch.linspace(min_x, max_x, M_e)[:,None], requires_grad=True)
219 |     vem_algorithm = AlgorithmVEM(model_e, config='ensemble', iters=10)
220 | 
221 |     vem_algorithm.ve_its = 30
222 |     vem_algorithm.vm_its = 10
223 |     vem_algorithm.lr_m = 1e-3
224 |     vem_algorithm.lr_L = 1e-6
225 |     vem_algorithm.lr_hyp = 1e-8
226 |     vem_algorithm.lr_z = 1e-8
227 | 
228 |     vem_algorithm.fit()
229 | 
230 |     nlpd = model_e.nlpd(x_test_ensemble, y_test_ensemble)
231 |     rmse = model_e.rmse(x_test_ensemble, f_test_ensemble)
232 |     mae = model_e.mae(x_test_ensemble, f_test_ensemble)
233 | 
234 |     recy_metrics[0, trial] = nlpd
235 |     recy_metrics[1, trial] = rmse
236 |     recy_metrics[2, trial] = mae
237 | 
238 |     print('Recyclable - NLPD: ', nlpd)
239 |     print('Recyclable - RMSE: ', rmse)
240 |     print('Recyclable - MAE: ',  mae)
241 |     print(' ')
242 | 
243 |     ######################################################
244 |     # 2. DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE)
245 |     ######################################################
246 | 
247 |     # A. POE  _________//
248 | 
249 |     poe_model = PoeGP(models_dist)
250 | 
251 |     nlpd = poe_model.nlpd(x_all, y_all, x_test_ensemble, y_test_ensemble)
252 |     rmse = poe_model.rmse(x_all, y_all, x_test_ensemble, f_test_ensemble)
253 |     mae = poe_model.mae(x_all, y_all, x_test_ensemble, f_test_ensemble)
254 | 
255 |     poe_metrics[0, trial] = nlpd
256 |     poe_metrics[1, trial] = rmse
257 |     poe_metrics[2, trial] = mae
258 | 
259 |     print('POE-NLPD: ', nlpd)
260 |     print('POE-RMSE: ', rmse)
261 |     print('POE-MAE: ',  mae)
262 |     print(' ')
263 | 
264 |     # B. GPOE _________//
265 | 
266 |     gpoe_model = GenPoeGP(models_dist)
267 | 
268 |     nlpd = gpoe_model.nlpd(x_all, y_all, x_test_ensemble, y_test_ensemble)
269 |     rmse = gpoe_model.rmse(x_all, y_all, x_test_ensemble, f_test_ensemble)
270 |     mae = gpoe_model.mae(x_all, y_all, x_test_ensemble, f_test_ensemble)
271 | 
272 |     gpoe_metrics[0, trial] = nlpd
273 |     gpoe_metrics[1, trial] = rmse
274 |     gpoe_metrics[2, trial] = mae
275 | 
276 |     print('GenPOE-NLPD: ', nlpd)
277 |     print('GenPOE-RMSE: ', rmse)
278 |     print('GenPOE-MAE: ',  mae)
279 |     print(' ')
280 | 
281 |     # C. BCM  _________//
282 | 
283 |     bcm_model = BayesianCM(models_dist)
284 | 
285 |     nlpd = bcm_model.nlpd(x_all, y_all, x_test_ensemble, y_test_ensemble)
286 |     rmse = bcm_model.rmse(x_all, y_all, x_test_ensemble, f_test_ensemble)
287 |     mae = bcm_model.mae(x_all, y_all, x_test_ensemble, f_test_ensemble)
288 | 
289 |     bcm_metrics[0, trial] = nlpd
290 |     bcm_metrics[1, trial] = rmse
291 |     bcm_metrics[2, trial] = mae
292 | 
293 |     print('BCM-NLPD: ', nlpd)
294 |     print('BCM-RMSE: ', rmse)
295 |     print('BCM-MAE: ',  mae)
296 |     print(' ')
297 | 
298 |     # D. RBCM _________//
299 | 
300 |     rbcm_model = RobustBayesianCM(models_dist)
301 | 
302 |     nlpd = rbcm_model.nlpd(x_all, y_all, x_test_ensemble, y_test_ensemble)
303 |     rmse = rbcm_model.rmse(x_all, y_all, x_test_ensemble, f_test_ensemble)
304 |     mae = rbcm_model.mae(x_all, y_all, x_test_ensemble, f_test_ensemble)
305 | 
306 |     rbcm_metrics[0, trial] = nlpd
307 |     rbcm_metrics[1, trial] = rmse
308 |     rbcm_metrics[2, trial] = mae
309 | 
310 |     print('RBCM-NLPD: ', nlpd)
311 |     print('RBCM-RMSE: ', rmse)
312 |     print('RBCM-MAE: ',  mae)
313 |     print(' ')
314 | 
315 |     # save to csv file
316 |     np.savetxt('./metrics/recy_metrics_'+ experiment +'.csv', recy_metrics, delimiter=',')
317 |     np.savetxt('./metrics/poe_metrics_' + experiment + '.csv', poe_metrics, delimiter=',')
318 |     np.savetxt('./metrics/gpoe_metrics_' + experiment + '.csv', gpoe_metrics, delimiter=',')
319 |     np.savetxt('./metrics/bcm_metrics_' + experiment + '.csv', bcm_metrics, delimiter=',')
320 |     np.savetxt('./metrics/rbcm_metrics_' + experiment + '.csv', rbcm_metrics, delimiter=',')
321 | 
322 |     if plot_ensemble:
323 |         gp, gp_upper, gp_lower = model_e.predictive(x_test_ensemble)
324 | 
325 |         poe_m, poe_v = poe_model.predictive(x_all, y_all, x_test_ensemble)
326 |         gpoe_m, gpoe_v = gpoe_model.predictive(x_all, y_all, x_test_ensemble)
327 |         bcm_m, bcm_v = gpoe_model.predictive(x_all, y_all, x_test_ensemble)
328 |         rbcm_m, rbcm_v = rbcm_model.predictive(x_all, y_all, x_test_ensemble)
329 | 
330 |         # Plot Ensemble
331 |         plt.figure(figsize=(12, 4))
332 |         for k in range(50):
333 |             #if k%10==0:
334 |             plt.plot(x_tasks[k], y_tasks[k], ls='-', color=color_palette[k%len(color_palette)], markersize=2.5, markeredgewidth=0.75)
335 |             plt.plot(models[k].z.detach(), -20.0*torch.ones(M_k,1), color=color_palette[k%len(color_palette)], linestyle='', marker='.', markersize=5)
336 | 
337 |         plt.plot(model_e.z.detach(), -20.0 * torch.ones(M_e, 1), color='r', linestyle='', marker='x', markersize=5, markeredgewidth=1.0)
338 |         plt.plot(x_test_ensemble, gp, 'k-', linewidth=1.5)
339 |         plt.plot(x_test_ensemble, gp_upper, 'k-', linewidth=2.5)
340 |         plt.plot(x_test_ensemble, gp_lower, 'k-', linewidth=2.5)
341 | 
342 |         poe = poe_m.detach().numpy()
343 |         poe_upper = (poe_m + 2 * torch.sqrt(poe_v)).detach().numpy()  # + 2*model_2.likelihood.sigma.detach().numpy()
344 |         poe_lower = (poe_m - 2 * torch.sqrt(poe_v)).detach().numpy()  # - 2*model_2.likelihood.sigma.detach().numpy()
345 | 
346 |         plt.plot(x_test, poe, 'g-', linewidth=1.5)
347 |         plt.plot(x_test, poe_upper, 'g-', linewidth=2.5)
348 |         plt.plot(x_test, poe_lower, 'g-', linewidth=2.5)
349 | 
350 |         gpoe = gpoe_m.detach().numpy()
351 |         gpoe_upper = (gpoe_m + 2 * torch.sqrt(gpoe_v)).detach().numpy()  # + 2*model_2.likelihood.sigma.detach().numpy()
352 |         gpoe_lower = (gpoe_m - 2 * torch.sqrt(gpoe_v)).detach().numpy()  # - 2*model_2.likelihood.sigma.detach().numpy()
353 | 
354 |         plt.plot(x_test, gpoe, 'm-', linewidth=1.5)
355 |         plt.plot(x_test, gpoe_upper, 'm-', linewidth=2.5)
356 |         plt.plot(x_test, gpoe_lower, 'm-', linewidth=2.5)
357 | 
358 |         bcm = bcm_m.detach().numpy()
359 |         bcm_upper = (bcm_m + 2 * torch.sqrt(bcm_v)).detach().numpy()  # + 2*model_2.likelihood.sigma.detach().numpy()
360 |         bcm_lower = (bcm_m - 2 * torch.sqrt(bcm_v)).detach().numpy()  # - 2*model_2.likelihood.sigma.detach().numpy()
361 | 
362 |         plt.plot(x_test, bcm, 'r-', linewidth=1.5)
363 |         plt.plot(x_test, bcm_upper, 'r-', linewidth=2.5)
364 |         plt.plot(x_test, bcm_lower, 'r-', linewidth=2.5)
365 | 
366 |         rbcm = rbcm_m.detach().numpy()
367 |         rbcm_upper = (rbcm_m + 2 * torch.sqrt(rbcm_v)).detach().numpy()  # + 2*model_2.likelihood.sigma.detach().numpy()
368 |         rbcm_lower = (rbcm_m - 2 * torch.sqrt(rbcm_v)).detach().numpy()  # - 2*model_2.likelihood.sigma.detach().numpy()
369 | 
370 |         plt.plot(x_test, rbcm, 'b-', linewidth=1.5)
371 |         plt.plot(x_test, rbcm_upper, 'b-', linewidth=2.5)
372 |         plt.plot(x_test, rbcm_lower, 'b-', linewidth=2.5)
373 | 
374 |         plt.title(r'Ensemble GP Model -- (tasks='+str(tasks)+')')
375 |         plt.xlabel(r'Input, $x$')
376 |         plt.ylabel(r'Output, $y$')
377 |         plt.xlim(min_x-0.5, max_x+0.5)
378 |         plt.ylim(-22.0, 22.0)
379 | 
380 |         if save:
381 |             plt.savefig(fname='./figs/baseline/distributed_ensemble.pdf',format='pdf')
382 | 
383 |         #plt.show()
384 |         plt.close()


--------------------------------------------------------------------------------
/experiments/dvigp_nlpd.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------
  2 | # This script belongs to the ModularGP repo
  3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
  4 | # Copyright (c) 2021 Pablo Moreno-Munoz
  5 | # -----------------------------------------------------------------
  6 | #
  7 | #
  8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
  9 | # Section for Cognitive Systems
 10 | # Technical University of Denmark (DTU)
 11 | # October 2021
 12 | 
 13 | # -----------------------------------------------------------------
 14 | # Experiment -- Baselines / Y. Gal et al. (2014)
 15 | # -----------------------------------------------------------------
 16 | 
 17 | import torch
 18 | import numpy as np
 19 | import matplotlib.pyplot as plt
 20 | 
 21 | plt.rc('text', usetex=True)
 22 | plt.rc('font', family='serif')
 23 | 
 24 | # COOLORS.CO palettes
 25 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e']
 26 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031']
 27 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56']
 28 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e']
 29 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559']
 30 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2']
 31 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54']
 32 | 
 33 | color_palette = color_palette_2
 34 | 
 35 | from kernels.rbf import RBF
 36 | from likelihoods.gaussian import Gaussian
 37 | from baselines.distgp import DistGP
 38 | from baselines.dvigp import DVIGP
 39 | from optimization.algorithms import GPR_Optimizer
 40 | #from models.svgp import predictive
 41 | from optimization.algorithms import vem_algorithm
 42 | from util import smooth_function
 43 | 
 44 | experiment = '10k'
 45 | #experiment = '100k'
 46 | #experiment = '1m'
 47 | 
 48 | if experiment == '10k':
 49 |     node_overlapping = 1
 50 |     N_k = 200
 51 |     trials = 10
 52 |     N = 10000
 53 | elif experiment == '100k':
 54 |     node_overlapping = 5
 55 |     N_k = 400
 56 |     trials = 10
 57 |     N = 100000
 58 | elif experiment == '1m':
 59 |     node_overlapping = 100
 60 |     N_k = 800
 61 |     trials = 10
 62 |     N = 1000000
 63 | else:
 64 |     raise ValueError('Experiment indicator not valid! Must be {10k, 100k or 1m}')
 65 | 
 66 | M = 35
 67 | plot_local = False
 68 | plot_ensemble = False
 69 | save = False
 70 | 
 71 | dvigp_metrics = np.zeros((3,trials))
 72 | 
 73 | for trial in range(trials):
 74 | 
 75 |     tasks = 50
 76 |     T = 50
 77 | 
 78 |     print('TRIAL = '+str(trial)+'/'+str(trials))
 79 | 
 80 |     ###########################
 81 |     #                         #
 82 |     #    DISTRIBUTED TASKS    #
 83 |     #                         #
 84 |     ###########################
 85 | 
 86 |     min_x = 0.0
 87 |     max_x = T * 0.1
 88 |     x = (min_x - max_x)*torch.rand(N, 1) + max_x
 89 |     x, _ = torch.sort(x, dim=0)
 90 |     y = smooth_function(x) + 2.0*torch.randn(N, 1)
 91 | 
 92 |     tasks = T * node_overlapping
 93 | 
 94 |     print('Number # of tasks: ', tasks)
 95 | 
 96 |     ######################################################
 97 |     # 1. DISTRIBUTED VIGP (Gal 2014)
 98 |     ######################################################
 99 | 
100 |     kernel_j = RBF()
101 |     likelihood_j = Gaussian(fit_noise=True)
102 | 
103 |     model = DVIGP(kernel_j, likelihood_j, M, nodes=tasks)
104 |     model.z = torch.nn.Parameter(torch.linspace(min_x, max_x, M)[:,None], requires_grad=True)
105 |     vem_algorithm(model, x, y, em_iters=20, plot=False)
106 | 
107 |     # TEST DATA FOR EVALUATION
108 |     N_e_test = 400
109 |     x_test_ensemble = torch.linspace(min_x-0.5, max_x+0.5, N_e_test)[:, None]
110 |     f_test_ensemble = smooth_function(x_test_ensemble)
111 |     y_test_ensemble = f_test_ensemble + 2.0*torch.randn(N_e_test,1)
112 | 
113 |     nlpd = model.nlpd(x_test_ensemble, y_test_ensemble)
114 |     rmse = model.rmse(x_test_ensemble, f_test_ensemble)
115 |     mae = model.mae(x_test_ensemble, f_test_ensemble)
116 | 
117 |     dvigp_metrics[0, trial] = nlpd
118 |     dvigp_metrics[1, trial] = rmse
119 |     dvigp_metrics[2, trial] = mae
120 | 
121 |     print('Distributed VIGP - NLPD: ', nlpd)
122 |     print('Distributed VIGP - RMSE: ', rmse)
123 |     print('Distributed VIGP - MAE: ',  mae)
124 |     print(' ')
125 | 
126 | 
127 | 


--------------------------------------------------------------------------------
/experiments/image.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------
  2 | # This script belongs to the ModularGP repo
  3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
  4 | # Copyright (c) 2021 Pablo Moreno-Munoz
  5 | # -----------------------------------------------------------------
  6 | #
  7 | #
  8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
  9 | # Section for Cognitive Systems
 10 | # Technical University of Denmark (DTU)
 11 | # October 2021
 12 | 
 13 | # -----------------------------------------------------------------
 14 | # Experiment -- MNIST
 15 | # -----------------------------------------------------------------
 16 | 
 17 | 
 18 | import torch
 19 | import torchvision
 20 | import numpy as np
 21 | import matplotlib.pyplot as plt
 22 | 
 23 | from kernels.rbf import RBF
 24 | from likelihoods.gaussian import Gaussian
 25 | from likelihoods.bernoulli import Bernoulli
 26 | from models.svgp import SVGP
 27 | from models.ensemblegp import EnsembleGP
 28 | from optimization.algorithms import vem_algorithm, ensemble_vem, ensemble_vem_parallel
 29 | from optimization.algorithms import AlgorithmVEM
 30 | 
 31 | plt.rc('text', usetex=True)
 32 | plt.rc('font', family='serif')
 33 | 
 34 | # COOLORS.CO palettes
 35 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e']
 36 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031']
 37 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56']
 38 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e']
 39 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559']
 40 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2']
 41 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54']
 42 | 
 43 | # For 0-number
 44 | #color_palette = color_palette_5
 45 | #color_0 = color_palette[0]
 46 | #color_1 = color_palette[4]
 47 | 
 48 | # For 1-number
 49 | color_palette = color_palette_3
 50 | color_0 = color_palette[1]
 51 | color_1 = color_palette[4]
 52 | 
 53 | mnist = torchvision.datasets.MNIST('../data/', train=True, download=False, transform=torchvision.transforms.Compose([
 54 |                                torchvision.transforms.ToTensor(), torchvision.transforms.Normalize((0.1307,), (0.3081,))]))
 55 | 
 56 | data = enumerate(torch.utils.data.DataLoader(mnist, batch_size=10, shuffle=False))
 57 | batch_id, (image, label) = next(data)
 58 | 
 59 | number = 1
 60 | if number > 0:
 61 |     i = 3
 62 | else:
 63 |     i = 1
 64 | 
 65 | y = image[i][0]
 66 | y[y>0.0] = 1.0
 67 | y[y<0.0] = 0.0
 68 | 
 69 | pixel = y.size(0)
 70 | 
 71 | y = y.view(1,pixel ** 2).t()
 72 | 
 73 | x1 = np.linspace(-1.0, 1.0, pixel)
 74 | x2 = np.linspace(-1.0, 1.0, pixel)
 75 | X1, X2 = np.meshgrid(x1, x2)
 76 | X1 = X1.reshape(pixel ** 2, 1)
 77 | X2 = -X2.reshape(pixel ** 2, 1)
 78 | X_np = np.hstack((X1, X2))
 79 | x = torch.from_numpy(X_np).float()
 80 | 
 81 | # plot limits
 82 | max_x = x[:,0].max()
 83 | max_y = x[:,1].max()
 84 | min_x = x[:,0].min()
 85 | min_y = x[:,1].min()
 86 | 
 87 | x_tasks = []
 88 | y_tasks = []
 89 | if number == 0:
 90 |     # Division into 4 regions
 91 |     x_1 = x[(x[:,0]<0.0) & (x[:,1]<0.0),:]
 92 |     y_1 = y[(x[:,0]<0.0) & (x[:,1]<0.0),:]
 93 | 
 94 |     x_2 = x[(x[:,0]>0.0) & (x[:,1]<0.0),:]
 95 |     y_2 = y[(x[:,0]>0.0) & (x[:,1]<0.0),:]
 96 | 
 97 |     x_3 = x[(x[:,0]>0.0) & (x[:,1]>0.0),:]
 98 |     y_3 = y[(x[:,0]>0.0) & (x[:,1]>0.0),:]
 99 | 
100 |     x_4 = x[(x[:,0]<0.0) & (x[:,1]>0.0),:]
101 |     y_4 = y[(x[:,0]<0.0) & (x[:,1]>0.0),:]
102 | 
103 |     # All tasks
104 |     x_tasks += [x_1, x_2, x_3, x_4]
105 |     y_tasks += [y_1, y_2, y_3, y_4]
106 | 
107 | elif number == 1:
108 |     # Division into 2 regions
109 |     x_1 = x[(x[:, 1] < 0.0), :]
110 |     y_1 = y[(x[:, 1] < 0.0), :]
111 | 
112 |     x_2 = x[(x[:, 1] > 0.0), :]
113 |     y_2 = y[(x[:, 1] > 0.0), :]
114 | 
115 |     # All tasks
116 |     x_tasks += [x_1, x_2]
117 |     y_tasks += [y_1, y_2]
118 | 
119 | 
120 | K = len(x_tasks)
121 | sigmoid = torch.nn.Sigmoid()
122 | 
123 | M_k = 4         # inducing points per side
124 | N_test = 80     # test points per side
125 | 
126 | plot_local = True
127 | plot_ensemble = True
128 | save = True
129 | 
130 | ###########################
131 | #                         #
132 | #    DISTRIBUTED TASKS    #
133 | #                         #
134 | ###########################
135 | 
136 | models = []
137 | for k, x_k in enumerate(x_tasks):
138 | 
139 |     print('-                             -')
140 |     print('----- TASK k=' + str(k + 1) + ' ------')
141 |     print('-                             -')
142 | 
143 |     y_k = y_tasks[k]
144 |     kernel_k = RBF()
145 |     likelihood_k = Bernoulli()
146 |     model_k = SVGP(kernel_k, likelihood_k, M_k**2, input_dim=2)
147 | 
148 |     # initial grid of inducing-points
149 |     mx = torch.mean(x_k[:, 0])
150 |     my = torch.mean(x_k[:, 1])
151 |     vx = torch.var(x_k[:, 0])
152 |     vy = torch.var(x_k[:, 1])
153 | 
154 |     zy = np.linspace(my - 3*vy, my + 3*vy, M_k)
155 |     zx = np.linspace(mx - 3*vx, mx + 3*vx, M_k)
156 |     ZX, ZY = np.meshgrid(zx, zy)
157 |     ZX = ZX.reshape(M_k ** 2, 1)
158 |     ZY = ZY.reshape(M_k ** 2, 1)
159 |     Z = np.hstack((ZX, ZY))
160 |     z_k = torch.from_numpy(Z).float()
161 | 
162 |     model_k.z = torch.nn.Parameter(z_k, requires_grad=True)
163 |     vem_algorithm = AlgorithmVEM(model_k, x_k, y_k, iters=7)
164 | 
165 |     vem_algorithm.ve_its = 20
166 |     vem_algorithm.vm_its = 10
167 |     vem_algorithm.lr_m = 1e-3
168 |     vem_algorithm.lr_L = 1e-6
169 |     vem_algorithm.lr_hyp = 1e-6
170 |     vem_algorithm.lr_z = 1e-4
171 | 
172 |     vem_algorithm.fit()
173 |     models.append(model_k)
174 | 
175 |     if plot_local:
176 | 
177 |         min_tx = x[:,0].min() - 0.15
178 |         min_ty = x[:,1].min() - 0.15
179 |         max_tx = x[:,0].max() + 0.15
180 |         max_ty = x[:,1].max() + 0.15
181 | 
182 |         ty = np.linspace(min_ty, max_ty, N_test)
183 |         tx = np.linspace(min_tx, max_tx, N_test)
184 |         TX_grid, TY_grid = np.meshgrid(tx, ty)
185 |         TX = TX_grid.reshape(N_test ** 2, 1)
186 |         TY = TY_grid.reshape(N_test ** 2, 1)
187 |         X_test = np.hstack((TX, TY))
188 |         x_test = torch.from_numpy(X_test).float()
189 | 
190 |         gp, gp_upper, gp_lower = model_k.predictive(x_test)
191 |         gp = sigmoid(torch.from_numpy(gp))
192 | 
193 |         # Plot
194 |         plt.figure(figsize=(7, 7))
195 |         ax = plt.axes()
196 |         plt.plot(x_k[y_k[:, 0] == 0, 0], x_k[y_k[:, 0] == 0, 1], 'o', color=color_1, alpha=0.5, ms=12.0)
197 |         plt.plot(x_k[y_k[:, 0] == 1, 0], x_k[y_k[:, 0] == 1, 1], 'o', color=color_0, alpha=0.5, ms=12.0)
198 |         plt.plot(model_k.z[:,0].detach(), model_k.z[:,1].detach(), 'kx',  ms=10.0, mew=2.0)
199 |         cs = ax.contour(TX_grid, TY_grid, np.reshape(gp, (N_test, N_test)), linewidths=3, colors='k',
200 |                     levels=[0.25, 0.5, 0.75], zorder=10)
201 |         ax.clabel(cs, inline=1, fontsize=14, fmt='%1.1f')
202 | 
203 |         plt.title(r'MNIST Recyclable GP - '+ str(k + 1) )
204 |         plt.xlabel(r'$x_1$ input')
205 |         plt.ylabel(r'$x_2$ input')
206 |         plt.xlim(-1.2, 1.2)
207 |         plt.ylim(-1.2, 1.2)
208 | 
209 |         if save:
210 |             plt.savefig(fname='./figs/image/0_number_task_' + str(k + 1) + '.pdf', format='pdf')
211 | 
212 |         plt.show()
213 |         #plt.close()
214 | 
215 | ###########################
216 | #                         #
217 | #   ENSEMBLE INFERENCE    #
218 | #                         #
219 | ###########################
220 | 
221 | print('-                   -')
222 | print('----- ENSEMBLE ------')
223 | print('-                   -')
224 | 
225 | if number == 0:
226 |     M_e = 5
227 | elif number == 1:
228 |     M_e = 4
229 |     
230 | kernel = RBF()
231 | likelihood = Bernoulli()
232 | model_e = EnsembleGP(kernel, likelihood, models, M_e**2, input_dim=2)
233 | 
234 | # initial grid of inducing-points
235 | mx = torch.mean(x[:, 0])
236 | my = torch.mean(x[:, 1])
237 | vx = torch.var(x[:, 0])
238 | vy = torch.var(x[:, 1])
239 | 
240 | zy = np.linspace(my - 1.5*vy, my + 1.5*vy, M_e)
241 | zx = np.linspace(mx - 1.5*vx, mx + 1.5*vx, M_e)
242 | ZX, ZY = np.meshgrid(zx, zy)
243 | ZX = ZX.reshape(M_e ** 2, 1)
244 | ZY = ZY.reshape(M_e ** 2, 1)
245 | Z = np.hstack((ZX, ZY))
246 | z_e = torch.from_numpy(Z).float()
247 | 
248 | model_e.z = torch.nn.Parameter(z_e, requires_grad=True)
249 | vem_algorithm = AlgorithmVEM(model_e, config='ensemble', iters=20)
250 | 
251 | vem_algorithm.ve_its = 20
252 | vem_algorithm.vm_its = 10
253 | vem_algorithm.lr_m = 1e-3
254 | vem_algorithm.lr_L = 1e-5
255 | vem_algorithm.lr_hyp = 1e-6
256 | vem_algorithm.lr_z = 1e-5
257 | 
258 | vem_algorithm.fit()
259 | 
260 | if plot_ensemble:
261 | 
262 |     min_tx = x[:,0].min() - 0.15
263 |     min_ty = x[:,1].min() - 0.15
264 |     max_tx = x[:,0].max() + 0.15
265 |     max_ty = x[:,1].max() + 0.15
266 | 
267 |     ty = np.linspace(min_ty, max_ty, N_test)
268 |     tx = np.linspace(min_tx, max_tx, N_test)
269 |     TX_grid, TY_grid = np.meshgrid(tx, ty)
270 |     TX = TX_grid.reshape(N_test ** 2, 1)
271 |     TY = TY_grid.reshape(N_test ** 2, 1)
272 |     X_test = np.hstack((TX, TY))
273 |     x_test = torch.from_numpy(X_test).float()
274 | 
275 |     gp, _, _ = model_e.predictive(x_test)
276 |     gp = sigmoid(torch.from_numpy(gp))
277 | 
278 |     # Plot
279 |     plt.figure(figsize=(7, 7))
280 |     ax = plt.axes()
281 |     plt.plot(x[y[:, 0] == 0, 0], x[y[:, 0] == 0, 1], 'o', color=color_1, alpha=0.5, ms=12.0)
282 |     plt.plot(x[y[:, 0] == 1, 0], x[y[:, 0] == 1, 1], 'o', color=color_0, alpha=0.5, ms=12.0)
283 |     plt.plot(model_e.z[:,0].detach(), model_e.z[:,1].detach(), 'kx', ms=10.0, mew=2.0)
284 |     cs = ax.contour(TX_grid, TY_grid, np.reshape(gp, (N_test, N_test)), linewidths=3, colors='k',
285 |                 levels=[0.25, 0.5, 0.75], zorder=10)
286 |     ax.clabel(cs, inline=1, fontsize=14, fmt='%1.1f')
287 | 
288 |     plt.title(r'MNIST GP Ensemble')
289 |     plt.xlabel(r'$x_1$ input')
290 |     plt.ylabel(r'$x_2$ input')
291 |     plt.xlim(-1.2, 1.2)
292 |     plt.ylim(-1.2, 1.2)
293 | 
294 |     if save:
295 |         plt.savefig(fname='./figs/image/0_number_ensemble.pdf', format='pdf')
296 | 
297 |     plt.show()
298 | 
299 | # plt.figure(figsize=(6, 6))
300 | # plt.plot(x[y[:, 0] == 0, 0], x[y[:, 0] == 0, 1], 'o', color=color_1, alpha=0.5, ms=8.0)
301 | # plt.plot(x[y[:, 0] == 1, 0], x[y[:, 0] == 1, 1], 'o', color=color_0, alpha=0.5, ms=8.0)
302 | # plt.xlim(-1.5, 1.5)
303 | # plt.ylim(-1.5, 1.5)
304 | # plt.show()
305 | 
306 | # fig = plt.figure()
307 | # for i in range(10):
308 | #   plt.subplot(2,5,i+1)
309 | #   #plt.tight_layout()
310 | #   print(image[i][0])
311 | #   plt.imshow(image[i][0], cmap='gray', interpolation='none')
312 | #   plt.title("Ground Truth: {}".format(label[i]))
313 | #
314 | # plt.show()


--------------------------------------------------------------------------------
/experiments/million_rbcm.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------
  2 | # This script belongs to the ModularGP repo
  3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
  4 | # Copyright (c) 2021 Pablo Moreno-Munoz
  5 | # -----------------------------------------------------------------
  6 | #
  7 | #
  8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
  9 | # Section for Cognitive Systems
 10 | # Technical University of Denmark (DTU)
 11 | # October 2021
 12 | 
 13 | # -----------------------------------------------------------------
 14 | # Experiment II -- Baselines (Million)
 15 | # -----------------------------------------------------------------
 16 | 
 17 | 
 18 | import torch
 19 | import numpy as np
 20 | import matplotlib.pyplot as plt
 21 | 
 22 | plt.rc('text', usetex=True)
 23 | plt.rc('font', family='serif')
 24 | 
 25 | # COOLORS.CO palettes
 26 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e']
 27 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031']
 28 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56']
 29 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e']
 30 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559']
 31 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2']
 32 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54']
 33 | 
 34 | color_palette = color_palette_2
 35 | 
 36 | from kernels.rbf import RBF
 37 | from likelihoods.gaussian import Gaussian
 38 | from models.svgp import SVGP
 39 | from models.ensemblegp import EnsembleGP
 40 | from baselines.distgp import DistGP
 41 | from baselines.poegp import PoeGP
 42 | from baselines.gpoegp import GenPoeGP
 43 | from baselines.bcm import BayesianCM
 44 | from baselines.rbcm import RobustBayesianCM
 45 | from baselines.dvigp import DVIGP
 46 | from optimization.algorithms import AlgorithmVEM
 47 | from optimization.algorithms import GPR_Optimizer
 48 | from util import smooth_function
 49 | 
 50 | experiment = '1m'
 51 | 
 52 | my_path = './../../../../Dropbox/PhD/Works/RecyclableGP/'
 53 | 
 54 | N_k = 400 # 200
 55 | M_k = 3
 56 | M_e = 35
 57 | 
 58 | T = 50
 59 | tasks = 50
 60 | layer_1_merge = 10  # 10
 61 | layer_2_merge = 5  # 10
 62 | trials = 5
 63 | node_overlapping = 1
 64 | 
 65 | plot_layer_0 = False
 66 | plot_layer_1 = True
 67 | plot_layer_2 = True
 68 | plot_ensemble = True
 69 | save = True
 70 | 
 71 | recy_metrics = np.zeros((3,trials))
 72 | poe_metrics = np.zeros((3,trials))
 73 | gpoe_metrics = np.zeros((3,trials))
 74 | bcm_metrics = np.zeros((3,trials))
 75 | rbcm_metrics = np.zeros((3,trials))
 76 | 
 77 | N_test = 400
 78 | min_x = 0.0
 79 | max_x = T * 0.1
 80 | segment_x = (max_x - min_x) / tasks
 81 | x_test = torch.linspace(min_x - 0.5, max_x + 0.5, N_test)[:, None]
 82 | f_test = smooth_function(x_test)
 83 | y_test = f_test + 2.0 * torch.randn(N_test, 1)
 84 | 
 85 | for trial in range(trials):
 86 |     print('TRIAL = '+str(trial+1)+'/'+str(trials))
 87 |     layer_2 = []
 88 |     layer_2_poe_gpm = []  # POE GPs (predictive)
 89 |     layer_2_poe_gpv = []  # POE GPs (predictive)
 90 |     layer_2_gpoe_gpm = []  # GPOE GPs (predictive)
 91 |     layer_2_gpoe_gpv = []  # GPOE GPs (predictive)
 92 |     layer_2_bcm_gpm = []  # BCM GPs (predictive)
 93 |     layer_2_bcm_gpv = []  # BCM GPs (predictive)
 94 |     layer_2_rbcm_gpm = []  # rBCM GPs (predictive)
 95 |     layer_2_rbcm_gpv = []  # rBCM GPs (predictive)
 96 | 
 97 |     for j in range(layer_2_merge):
 98 |         print('LAYER-2 = ' + str(j+1) + '/' + str(layer_2_merge))
 99 |         layer_1 = []
100 |         layer_1_poe_gpm = []  # POE GPs (predictive)
101 |         layer_1_poe_gpv = []  # POE GPs (predictive)
102 |         layer_1_gpoe_gpm = []  # GPOE GPs (predictive)
103 |         layer_1_gpoe_gpv = []  # GPOE GPs (predictive)
104 |         layer_1_bcm_gpm = []  # BCM GPs (predictive)
105 |         layer_1_bcm_gpv = []  # BCM GPs (predictive)
106 |         layer_1_rbcm_gpm = []  # rBCM GPs (predictive)
107 |         layer_1_rbcm_gpv = []  # rBCM GPs (predictive)
108 | 
109 |         for m in range(layer_1_merge):
110 |             print('LAYER-1 = ' + str(m+1) + '/' + str(layer_1_merge))
111 | 
112 |             ###########################
113 |             #        LAYER 0          #
114 |             #      ___________        #
115 |             #      DISTRIBUTED        #
116 |             ###########################
117 | 
118 |             x_tasks = []
119 |             y_tasks = []
120 | 
121 |             # SYNTHETIC DATA
122 |             for n in range(node_overlapping):
123 |                 for k in range(T):
124 |                     x_k = ((min_x + (k * segment_x)) - (min_x + ((k + 1) * segment_x))) * torch.rand(N_k, 1) + (
125 |                                 min_x + ((k + 1) * segment_x))
126 |                     x_k, _ = torch.sort(x_k, dim=0)
127 |                     y_k = smooth_function(x_k) + 2.0 * torch.randn(N_k, 1)
128 |                     x_tasks.append(x_k)
129 |                     y_tasks.append(y_k)
130 | 
131 |             tasks = T * node_overlapping
132 | 
133 |             layer_0 = []            # recyclable GPs
134 |             layer_0_dist = []       # distributed GPs (models)
135 |             layer_0_dist_gpm = []   # distributed GPs (predictive)
136 |             layer_0_dist_gpv = []  # distributed GPs (predictive)
137 | 
138 |             for k, x_k in enumerate(x_tasks):
139 |                 print(' ')
140 |                 print('TRIAL   = ' + str(trial + 1) + '/' + str(trials))
141 |                 print('LAYER-0 = ' + str(k+1) + '/' + str(T*node_overlapping))
142 |                 print('LAYER-1 = ' + str(m+1) + '/' + str(layer_1_merge))
143 |                 print('LAYER-2 = ' + str(j+1) + '/' + str(layer_2_merge))
144 |                 print('\                             -')
145 |                 print(' ---- TASK k=' + str(k + 1) + ' ------')
146 |                 print('/                             -')
147 |                 print(' ')
148 |                 ######################################################
149 |                 # 2. DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE)
150 |                 ######################################################
151 | 
152 |                 kernel_j = RBF()
153 |                 likelihood_j = Gaussian(fit_noise=True)
154 |                 model_j = DistGP(kernel_j, likelihood_j)
155 |                 GPR_Optimizer(model_j, x_k, y_tasks[k])
156 | 
157 |                 dis_gp_m, dis_gp_v = model_j.predictive(x_k, y_tasks[k], x_test)
158 |                 layer_0_dist.append(model_j)
159 |                 layer_0_dist_gpm.append(dis_gp_m)
160 |                 layer_0_dist_gpv.append(dis_gp_v)
161 | 
162 |             ###########################
163 |             #        LAYER 0          #
164 |             #        ________         #
165 |             #        ENSEMBLE         #
166 |             ###########################
167 | 
168 |             print(' ')
169 |             print('TRIAL   = ' + str(trial + 1) + '/' + str(trials))
170 |             print('LAYER-0 = ' + str(k + 1) + '/' + str(T * node_overlapping))
171 |             print('LAYER-1 = ' + str(m + 1) + '/' + str(layer_1_merge))
172 |             print('LAYER-2 = ' + str(j + 1) + '/' + str(layer_2_merge))
173 |             print('\                             -')
174 |             print(' ------ ENSEMBLE LAYER 0 ------')
175 |             print('/                             -')
176 |             print(' ')
177 | 
178 |             #########################################################
179 |             # 2. ENSEMBLE DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE)
180 |             #########################################################
181 |             # A. POE  _________//
182 |             # B. GPOE _________//
183 |             # C. BCM  _________//
184 |             # D. RBCM _________//
185 | 
186 |             poe_model = PoeGP(models=layer_0_dist)
187 |             gpoe_model = GenPoeGP(models=layer_0_dist)
188 |             bcm_model = BayesianCM(models=layer_0_dist)
189 |             rbcm_model = RobustBayesianCM(models=layer_0_dist)
190 | 
191 |             poe_m, poe_v = poe_model.predictive_layer(layer_0_dist_gpm, layer_0_dist_gpv, x_test)
192 |             gpoe_m, gpoe_v = gpoe_model.predictive_layer(layer_0_dist_gpm, layer_0_dist_gpv, x_test)
193 |             bcm_m, bcm_v = bcm_model.predictive_layer(layer_0_dist_gpm, layer_0_dist_gpv, x_test)
194 |             rbcm_m, rbcm_v = rbcm_model.predictive_layer(layer_0_dist_gpm, layer_0_dist_gpv, x_test)
195 | 
196 |             layer_1_poe_gpm.append(poe_m)
197 |             layer_1_poe_gpv.append(poe_v)
198 |             layer_1_gpoe_gpm.append(gpoe_m)
199 |             layer_1_gpoe_gpv.append(gpoe_v)
200 |             layer_1_bcm_gpm.append(bcm_m)
201 |             layer_1_bcm_gpv.append(bcm_v)
202 |             layer_1_rbcm_gpm.append(rbcm_m)
203 |             layer_1_rbcm_gpv.append(rbcm_v)
204 | 
205 |         ###########################
206 |         #        LAYER 1         #
207 |         ###########################
208 | 
209 |         print(' ')
210 |         print('TRIAL   = ' + str(trial + 1) + '/' + str(trials))
211 |         print('LAYER-0 = ' + str(k + 1) + '/' + str(T * node_overlapping))
212 |         print('LAYER-1 = ' + str(m + 1) + '/' + str(layer_1_merge))
213 |         print('LAYER-2 = ' + str(j + 1) + '/' + str(layer_2_merge))
214 |         print('\                             -')
215 |         print(' ------ ENSEMBLE LAYER 1 ------')
216 |         print('/                             -')
217 |         print(' ')
218 | 
219 |         #########################################################
220 |         # 2. ENSEMBLE DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE)
221 |         #########################################################
222 |         # A. POE  _________//
223 |         # B. GPOE _________//
224 |         # C. BCM  _________//
225 |         # D. RBCM _________//
226 | 
227 |         poe_model = PoeGP(models=layer_0_dist)
228 |         gpoe_model = GenPoeGP(models=layer_0_dist)
229 |         bcm_model = BayesianCM(models=layer_0_dist)
230 |         rbcm_model = RobustBayesianCM(models=layer_0_dist)
231 | 
232 |         poe_m, poe_v = poe_model.predictive_layer(layer_1_poe_gpm, layer_1_poe_gpv, x_test)
233 |         gpoe_m, gpoe_v = gpoe_model.predictive_layer(layer_1_gpoe_gpm, layer_1_gpoe_gpv, x_test)
234 |         bcm_m, bcm_v = bcm_model.predictive_layer(layer_1_bcm_gpm, layer_1_bcm_gpv, x_test)
235 |         rbcm_m, rbcm_v = rbcm_model.predictive_layer(layer_1_rbcm_gpm, layer_1_rbcm_gpv, x_test)
236 | 
237 |         layer_2_poe_gpm.append(poe_m)
238 |         layer_2_poe_gpv.append(poe_v)
239 |         layer_2_gpoe_gpm.append(gpoe_m)
240 |         layer_2_gpoe_gpv.append(gpoe_v)
241 |         layer_2_bcm_gpm.append(bcm_m)
242 |         layer_2_bcm_gpv.append(bcm_v)
243 |         layer_2_rbcm_gpm.append(rbcm_m)
244 |         layer_2_rbcm_gpv.append(rbcm_v)
245 | 
246 |     ###########################
247 |     #        LAYER 2         #
248 |     ###########################
249 | 
250 |     print(' ')
251 |     print('TRIAL   = ' + str(trial + 1) + '/' + str(trials))
252 |     print('LAYER-0 = ' + str(k + 1) + '/' + str(T * node_overlapping))
253 |     print('LAYER-1 = ' + str(m + 1) + '/' + str(layer_1_merge))
254 |     print('LAYER-2 = ' + str(j + 1) + '/' + str(layer_2_merge))
255 |     print('\                             -')
256 |     print(' ------  FINAL ENSEMBLE  ------')
257 |     print('/                             -')
258 |     print(' ')
259 | 
260 |     #########################################################
261 |     # 2. ENSEMBLE DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE)
262 |     #########################################################
263 |     # A. POE  _________//
264 |     # B. GPOE _________//
265 |     # C. BCM  _________//
266 |     # D. RBCM _________//
267 | 
268 |     poe_model = PoeGP(models=layer_0_dist)
269 |     gpoe_model = GenPoeGP(models=layer_0_dist)
270 |     bcm_model = BayesianCM(models=layer_0_dist)
271 |     rbcm_model = RobustBayesianCM(models=layer_0_dist)
272 | 
273 |     #########################################################
274 |     # -- METRICS --------------------------------------------
275 |     #########################################################
276 | 
277 |     # A. POE  _________//
278 | 
279 |     nlpd = poe_model.nlpd_layer(layer_2_poe_gpm, layer_2_poe_gpv, x_test, y_test)
280 |     rmse = poe_model.rmse_layer(layer_2_poe_gpm, layer_2_poe_gpv, x_test, f_test)
281 |     mae = poe_model.mae_layer(layer_2_poe_gpm, layer_2_poe_gpv, x_test, f_test)
282 | 
283 |     poe_metrics[0, trial] = nlpd
284 |     poe_metrics[1, trial] = rmse
285 |     poe_metrics[2, trial] = mae
286 | 
287 |     print('POE-NLPD: ', nlpd)
288 |     print('POE-RMSE: ', rmse)
289 |     print('POE-MAE: ',  mae)
290 |     print(' ')
291 | 
292 |     # B. GPOE _________//
293 | 
294 |     nlpd = gpoe_model.nlpd_layer(layer_2_gpoe_gpm, layer_2_gpoe_gpv, x_test, y_test)
295 |     rmse = gpoe_model.rmse_layer(layer_2_gpoe_gpm, layer_2_gpoe_gpv, x_test, f_test)
296 |     mae = gpoe_model.mae_layer(layer_2_gpoe_gpm, layer_2_gpoe_gpv, x_test, f_test)
297 | 
298 |     gpoe_metrics[0, trial] = nlpd
299 |     gpoe_metrics[1, trial] = rmse
300 |     gpoe_metrics[2, trial] = mae
301 | 
302 |     print('GenPOE-NLPD: ', nlpd)
303 |     print('GenPOE-RMSE: ', rmse)
304 |     print('GenPOE-MAE: ',  mae)
305 |     print(' ')
306 | 
307 |     # C. BCM  _________//
308 | 
309 |     nlpd = bcm_model.nlpd_layer(layer_2_bcm_gpm, layer_2_bcm_gpv, x_test, y_test)
310 |     rmse = bcm_model.rmse_layer(layer_2_bcm_gpm, layer_2_bcm_gpv, x_test, f_test)
311 |     mae = bcm_model.mae_layer(layer_2_bcm_gpm, layer_2_bcm_gpv, x_test, f_test)
312 | 
313 |     bcm_metrics[0, trial] = nlpd
314 |     bcm_metrics[1, trial] = rmse
315 |     bcm_metrics[2, trial] = mae
316 | 
317 |     print('BCM-NLPD: ', nlpd)
318 |     print('BCM-RMSE: ', rmse)
319 |     print('BCM-MAE: ',  mae)
320 |     print(' ')
321 | 
322 |     # D. RBCM _________//
323 | 
324 |     nlpd = rbcm_model.nlpd_layer(layer_2_rbcm_gpm, layer_2_rbcm_gpv, x_test, y_test)
325 |     rmse = rbcm_model.rmse_layer(layer_2_rbcm_gpm, layer_2_rbcm_gpv, x_test, f_test)
326 |     mae = rbcm_model.mae_layer(layer_2_rbcm_gpm, layer_2_rbcm_gpv, x_test, f_test)
327 | 
328 |     rbcm_metrics[0, trial] = nlpd
329 |     rbcm_metrics[1, trial] = rmse
330 |     rbcm_metrics[2, trial] = mae
331 | 
332 |     print('RBCM-NLPD: ', nlpd)
333 |     print('RBCM-RMSE: ', rmse)
334 |     print('RBCM-MAE: ',  mae)
335 |     print(' ')
336 | 
337 |     # save to csv file
338 |     #np.savetxt(my_path + 'metrics/recy_metrics_' + experiment + '.csv', recy_metrics, delimiter=',')
339 |     np.savetxt(my_path + 'metrics/poe_metrics_' + experiment + '.csv', poe_metrics, delimiter=',')
340 |     np.savetxt(my_path + 'metrics/gpoe_metrics_' + experiment + '.csv', gpoe_metrics, delimiter=',')
341 |     np.savetxt(my_path + 'metrics/bcm_metrics_' + experiment + '.csv', bcm_metrics, delimiter=',')
342 |     np.savetxt(my_path + 'metrics/rbcm_metrics_' + experiment + '.csv', rbcm_metrics, delimiter=',')
343 | 


--------------------------------------------------------------------------------
/experiments/paralell.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------
  2 | # This script belongs to the ModularGP repo
  3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
  4 | # Copyright (c) 2021 Pablo Moreno-Munoz
  5 | # -----------------------------------------------------------------
  6 | #
  7 | #
  8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
  9 | # Section for Cognitive Systems
 10 | # Technical University of Denmark (DTU)
 11 | # October 2021
 12 | 
 13 | # -----------------------------------------------------------------
 14 | # Experiment -- Parallel Inference
 15 | # -----------------------------------------------------------------
 16 | 
 17 | import torch
 18 | import numpy as np
 19 | import matplotlib.pyplot as plt
 20 | from tikzplotlib import save as tikz_save
 21 | 
 22 | plt.rc('text', usetex=True)
 23 | plt.rc('font', family='serif')
 24 | 
 25 | # COOLORS.CO palettes
 26 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e']
 27 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031']
 28 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56']
 29 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e']
 30 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559']
 31 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2']
 32 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54']
 33 | 
 34 | color_palette = color_palette_2
 35 | 
 36 | from kernels.rbf import RBF
 37 | from likelihoods.gaussian import Gaussian
 38 | from models.svgp import SVGP
 39 | from models.ensemblegp import EnsembleGP
 40 | from optimization.algorithms import vem_algorithm, ensemble_vem, ensemble_vem_parallel
 41 | from optimization.algorithms import AlgorithmVEM
 42 | from util import smooth_function, smooth_function_bias
 43 | 
 44 | tasks = 5
 45 | N_k = 500
 46 | M_k = 15
 47 | M_e = 35
 48 | plot_local = True
 49 | plot_ensemble = True
 50 | save = True
 51 | 
 52 | ###########################
 53 | #                         #
 54 | #    DISTRIBUTED TASKS    #
 55 | #                         #
 56 | ###########################
 57 | 
 58 | min_x = 0.0
 59 | max_x = 5.5
 60 | segment_x = (max_x - min_x)/tasks
 61 | x_tasks = []
 62 | y_tasks = []
 63 | for k in range(tasks):
 64 |     x_k = ((min_x+(k*segment_x))-(min_x+((k+1)*segment_x)))*torch.rand(N_k,1) + (min_x+((k+1)*segment_x))
 65 |     x_k, _ = torch.sort(x_k, dim=0)
 66 |     y_k = smooth_function_bias(x_k) + 2.0*torch.randn(N_k,1)
 67 |     x_tasks.append(x_k)
 68 |     y_tasks.append(y_k)
 69 | 
 70 | ###########################
 71 | #                         #
 72 | #   PARALLEL INFERENCE    #
 73 | #                         #
 74 | ###########################
 75 | 
 76 | N_k_test = 400
 77 | x_test = torch.linspace(min_x-0.5, max_x+0.5, N_k_test)[:, None]
 78 | models = []
 79 | for k, x_k in enumerate(x_tasks):
 80 |     print('-                             -')
 81 |     print('----- TASK k='+str(k+1)+' ------')
 82 |     print('-                             -')
 83 |     kernel_k = RBF()
 84 |     likelihood_k = Gaussian(fit_noise=False)
 85 |     model_k = SVGP(kernel_k, likelihood_k, M_k)
 86 | 
 87 |     z_k_min = min_x+(k*segment_x)
 88 |     z_k_max = min_x+((k+1)*segment_x)
 89 |     #model_k.z = torch.nn.Parameter((z_k_max - z_k_min)*torch.rand(M_k, 1) + z_k_min, requires_grad=True)
 90 |     model_k.z = torch.nn.Parameter(torch.linspace(z_k_min, z_k_max, M_k)[:, None], requires_grad=True)
 91 |     vem_algorithm = AlgorithmVEM(model_k, x_k, y_tasks[k], iters=15)
 92 | 
 93 |     vem_algorithm.ve_its = 20
 94 |     vem_algorithm.vm_its = 10
 95 |     vem_algorithm.lr_m = 1e-6
 96 |     vem_algorithm.lr_L = 1e-10
 97 |     vem_algorithm.lr_hyp = 1e-10
 98 |     vem_algorithm.lr_z = 1e-10
 99 | 
100 |     vem_algorithm.fit()
101 | 
102 |     models.append(model_k)
103 | 
104 |     if plot_local:
105 |         gp, gp_upper, gp_lower = model_k.predictive(x_test)
106 | 
107 |         plt.figure(figsize=(12, 4))
108 |         plt.plot(x_k, y_tasks[k], ls='-', color=color_palette[k], lw=1.5)
109 |         plt.plot(models[k].z.detach(), -20.0*torch.ones(M_k, 1), color=color_palette[k], linestyle='', marker='.',markersize=5)
110 | 
111 |         plt.plot(x_test, gp, 'k-', linewidth=1.5)
112 |         #plt.fill_between(x_test.flatten(), gp_lower.flatten(), gp_upper.flatten(), color='b', alpha=0.2,lw='0.5')
113 |         plt.plot(x_test, gp_upper, 'k-', linewidth=3.0)
114 |         plt.plot(x_test, gp_lower, 'k-', linewidth=3.0)
115 | 
116 |         plt.title(r'Variational Sparse GP -- (task=' + str(k+1) + ')')
117 |         plt.xlabel(r'Input, $x$')
118 |         plt.ylabel(r'Output, $y$')
119 |         plt.xlim(min_x - 0.5, max_x + 0.5)
120 |         plt.ylim(-22.0, 22.0)
121 | 
122 |         if save:
123 |             plt.savefig(fname='./figs/ parallel_task_'+str(k+1)+'.pdf',format='pdf')
124 | 
125 |         plt.show()
126 | 
127 | ###########################
128 | #                         #
129 | #   ENSEMBLE INFERENCE    #
130 | #                         #
131 | ###########################
132 | print('-                   -')
133 | print('----- ENSEMBLE ------')
134 | print('-                   -')
135 | 
136 | kernel = RBF()
137 | likelihood = Gaussian(fit_noise=False)
138 | model_e = EnsembleGP(kernel, likelihood, models, M_e)
139 | model_e.z = torch.nn.Parameter(torch.linspace(min_x, max_x, M_e)[:,None], requires_grad=True)
140 | vem_algorithm = AlgorithmVEM(model_e, config='ensemble', iters=30)
141 | 
142 | vem_algorithm.ve_its = 30
143 | vem_algorithm.vm_its = 10
144 | vem_algorithm.lr_m = 1e-3
145 | vem_algorithm.lr_L = 1e-6
146 | vem_algorithm.lr_hyp = 1e-8
147 | vem_algorithm.lr_z = 1e-8
148 | 
149 | vem_algorithm.fit()
150 | 
151 | N_e_test = 400
152 | x_test_ensemble = torch.linspace(min_x-0.5, max_x+0.5, N_e_test)[:, None]
153 | 
154 | if plot_ensemble:
155 |     gp, gp_upper, gp_lower = model_e.predictive(x_test_ensemble)
156 | 
157 |     # Plot Ensemble
158 |     plt.figure(figsize=(12, 4))
159 |     for k, x_k in enumerate(x_tasks):
160 |         #if k%10==0:
161 |         plt.plot(x_k, y_tasks[k], ls='-', color=color_palette[k], lw=1.5)
162 |         plt.plot(models[k].z.detach(), -20.0*torch.ones(M_k,1), color=color_palette[k], linestyle='', marker='.', markersize=5)
163 | 
164 |     plt.plot(model_e.z.detach(), -20.0 * torch.ones(M_e, 1), color='k', linestyle='', marker='x', markersize=7, markeredgewidth=1.1)
165 |     plt.plot(x_test_ensemble, gp, 'k-', linewidth=1.5)
166 |     #plt.fill_between(x_test_ensemble.flatten(), gp_lower.flatten(), gp_upper.flatten(), color='b', alpha=0.2, lw='0.5')
167 |     plt.plot(x_test_ensemble, gp_upper, 'k-', linewidth=3.0)
168 |     plt.plot(x_test_ensemble, gp_lower, 'k-', linewidth=3.0)
169 | 
170 |     plt.title(r'Ensemble GP Model -- (tasks='+str(tasks)+')')
171 |     plt.xlabel(r'Input, $x$')
172 |     plt.ylabel(r'Output, $y$')
173 |     plt.xlim(min_x-0.5, max_x+0.5)
174 |     plt.ylim(-22.0, 22.0)
175 | 
176 |     if save:
177 |         plt.savefig(fname='./figs/parallel_ensemble.pdf',format='pdf')
178 | 
179 |     plt.show()
180 | 
181 |     N_e_test = 400
182 |     x_test_ensemble = torch.linspace(min_x-0.5, max_x+0.5, N_e_test)[:, None]
183 |     f_test_ensemble = smooth_function(x_test_ensemble)
184 |     y_test_ensemble = f_test_ensemble + 2.0*torch.randn(N_e_test,1)
185 | 
186 |     nlpd = model_e.nlpd(x_test_ensemble, y_test_ensemble)
187 |     rmse = model_e.rmse(x_test_ensemble, f_test_ensemble)
188 |     mae = model_e.mae(x_test_ensemble, f_test_ensemble)
189 | 
190 |     print("NLPD: ", nlpd)
191 |     print("RMSE: ", rmse)
192 |     print("MAE: ", mae)


--------------------------------------------------------------------------------
/experiments/solar.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------
  2 | # This script belongs to the ModularGP repo
  3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
  4 | # Copyright (c) 2021 Pablo Moreno-Munoz
  5 | # -----------------------------------------------------------------
  6 | #
  7 | #
  8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
  9 | # Section for Cognitive Systems
 10 | # Technical University of Denmark (DTU)
 11 | # October 2021
 12 | 
 13 | # -----------------------------------------------------------------
 14 | # Experiment -- Solar Dataset
 15 | # -----------------------------------------------------------------
 16 | 
 17 | from kernels.rbf import RBF
 18 | from likelihoods.gaussian import Gaussian
 19 | from models.svgp import SVGP
 20 | from models.ensemblegp import EnsembleGP
 21 | from baselines.distgp import DistGP
 22 | from baselines.poegp import PoeGP
 23 | from baselines.gpoegp import GenPoeGP
 24 | from baselines.bcm import BayesianCM
 25 | from baselines.rbcm import RobustBayesianCM
 26 | from baselines.dvigp import DVIGP
 27 | from optimization.algorithms import AlgorithmVEM
 28 | from optimization.algorithms import GPR_Optimizer
 29 | from optimization.algorithms import AlgorithmVEM
 30 | from sklearn.model_selection import train_test_split
 31 | 
 32 | import torch
 33 | import numpy as np
 34 | import scipy.io as sio
 35 | import matplotlib.pyplot as plt
 36 | 
 37 | plt.rc('text', usetex=True)
 38 | plt.rc('font', family='serif')
 39 | 
 40 | # COOLORS.CO palettes
 41 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e']
 42 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031']
 43 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56']
 44 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e']
 45 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559']
 46 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2']
 47 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54']
 48 | 
 49 | palette = color_palette_4
 50 | 
 51 | trials = 10
 52 | experiment = 'solar'
 53 | 
 54 | recy_metrics = np.zeros((3,trials))
 55 | poe_metrics = np.zeros((3,trials))
 56 | gpoe_metrics = np.zeros((3,trials))
 57 | bcm_metrics = np.zeros((3,trials))
 58 | rbcm_metrics = np.zeros((3,trials))
 59 | 
 60 | # Load Solar Data --
 61 | data = sio.loadmat('../data/nasa.mat')
 62 | y = data['nasa'][:,2]
 63 | y = np.log(y + 1)
 64 | y = y[:,np.newaxis]
 65 | y = (y - np.mean(y))    # mean normalization
 66 | x = np.linspace(0,100, y.shape[0])[:,np.newaxis]
 67 | 
 68 | print(y.shape)
 69 | 
 70 | 
 71 | for trial in range(trials):
 72 | 
 73 |     print('TRIAL = ' + str(trial) + '/' + str(trials))
 74 | 
 75 |     ###########################
 76 |     #                         #
 77 |     #    DISTRIBUTED TASKS    #
 78 |     #                         #
 79 |     ###########################
 80 | 
 81 |     tasks = 50
 82 |     min_x = 0.0
 83 |     max_x = 100.0
 84 |     segment_x = (max_x - min_x)/tasks
 85 |     x_tasks = []    # training x -- inputs
 86 |     y_tasks = []    # training y -- outputs
 87 | 
 88 |     x_test = torch.zeros(1,1)     # test x -- inputs
 89 |     y_test = torch.zeros(1,1)     # test y -- outputs
 90 | 
 91 |     n_training = 0
 92 |     n_test = 0
 93 |     for k in range(tasks):
 94 |         min_x_k = min_x + (k*segment_x)
 95 |         max_x_k = min_x + ((k+1)*segment_x)
 96 |         y_k = y[(x[:, 0] > min_x_k) & (x[:, 0] < max_x_k), :]
 97 |         x_k = x[(x[:, 0] > min_x_k) & (x[:, 0] < max_x_k), :]
 98 | 
 99 |         x_k_train, x_k_test, y_k_train, y_k_test = train_test_split(x_k, y_k, test_size = 0.2, random_state = 42)
100 | 
101 |         x_tasks.append(torch.from_numpy(x_k_train).float())
102 |         y_tasks.append(torch.from_numpy(y_k_train).float())
103 | 
104 |         x_test = torch.cat((x_test, torch.from_numpy(x_k_test).float()), 0)
105 |         y_test = torch.cat((y_test, torch.from_numpy(y_k_test).float()), 0)
106 | 
107 |         #x_k_test = x_k[::5, :]
108 |         #y_k_test = y_k[::5, :]
109 | 
110 |         #x_tasks.append(torch.from_numpy(np.delete(x_k,np.s_[::5])[:,None]).float())
111 |         #y_tasks.append(torch.from_numpy(np.delete(y_k,np.s_[::5])[:,None]).float())
112 | 
113 |         #x_test = torch.cat((x_test, torch.from_numpy(x_k_test).float()), 0)
114 |         #y_test = torch.cat((y_test, torch.from_numpy(y_k_test).float()), 0)
115 | 
116 |         n_training += y_k_train.shape[0]
117 |         n_test += y_k_test.shape[0]
118 | 
119 | 
120 |     print('Total # of tasks: ', len(x_tasks))
121 |     print('Number # of training samples: ', n_training)
122 |     print('Number # of test samples: ', n_test)
123 | 
124 |     ###########################
125 |     #                         #
126 |     #   PARALLEL INFERENCE    #
127 |     #                         #
128 |     ###########################
129 | 
130 |     M_k = 6
131 |     models = []       # for recyclable GPs
132 |     models_dist = []  # for distributed GPs
133 |     x_all = []        # for distributed GPs
134 |     y_all = []        # for distributed GPs
135 |     for k, x_k in enumerate(x_tasks):
136 |         print('-                             -')
137 |         print('----- TASK k=' + str(k + 1) + ' ------')
138 |         print('-                             -')
139 |         ######################################################
140 |         # 1. RECYCLABLE GP
141 |         ######################################################
142 |         kernel_k = RBF(length_scale=0.2, variance=1.0)
143 |         likelihood_k = Gaussian(sigma=0.1, fit_noise=True)
144 |         model_k = SVGP(kernel_k, likelihood_k, M_k)
145 | 
146 |         z_k_min = min_x + (k*segment_x)
147 |         z_k_max = min_x + ((k+1)*segment_x)
148 |         model_k.z = torch.nn.Parameter(torch.linspace(z_k_min, z_k_max, M_k)[:, None], requires_grad=True)
149 | 
150 |         vem_algorithm = AlgorithmVEM(model_k, x_k, y_tasks[k], iters=20)
151 | 
152 |         vem_algorithm.ve_its = 20
153 |         vem_algorithm.vm_its = 20
154 |         vem_algorithm.lr_m = 1e-5
155 |         vem_algorithm.lr_L = 1e-8
156 |         vem_algorithm.lr_hyp = 1e-10
157 |         vem_algorithm.lr_z = 1e-10
158 | 
159 |         vem_algorithm.fit()
160 | 
161 |         ######################################################
162 |         # 2. DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE)
163 |         ######################################################
164 | 
165 |         kernel_j = RBF()
166 |         likelihood_j = Gaussian(fit_noise=False)
167 |         model_j = DistGP(kernel_j, likelihood_j)
168 |         GPR_Optimizer(model_j, x_k, y_tasks[k])
169 | 
170 |         models_dist.append(model_j)
171 |         x_all.append(x_k)
172 |         y_all.append(y_tasks[k])
173 | 
174 |     ###########################
175 |     #                         #
176 |     #   ENSEMBLE INFERENCE    #
177 |     #                         #
178 |     ###########################
179 |     print('-                   -')
180 |     print('----- ENSEMBLE ------')
181 |     print('-                   -')
182 | 
183 |     ######################################################
184 |     # 1. RECYCLABLE GP
185 |     ######################################################
186 | 
187 |     M_e = 90
188 |     kernel = RBF()
189 |     likelihood = Gaussian(fit_noise=False)
190 |     model_e = EnsembleGP(kernel, likelihood, models, M_e)
191 |     model_e.z = torch.nn.Parameter(torch.linspace(min_x, max_x, M_e)[:, None], requires_grad=True)
192 |     vem_algorithm = AlgorithmVEM(model_e, config='ensemble', iters=10)
193 | 
194 |     vem_algorithm.ve_its = 30
195 |     vem_algorithm.vm_its = 10
196 |     vem_algorithm.lr_m = 1e-3
197 |     vem_algorithm.lr_L = 1e-6
198 |     vem_algorithm.lr_hyp = 1e-8
199 |     vem_algorithm.lr_z = 1e-8
200 | 
201 |     vem_algorithm.fit()
202 | 
203 |     nlpd = model_e.nlpd(x_test, y_test)
204 |     rmse = model_e.rmse(x_test, y_test)
205 |     mae = model_e.mae(x_test, y_test)
206 | 
207 |     recy_metrics[0, trial] = nlpd
208 |     recy_metrics[1, trial] = rmse
209 |     recy_metrics[2, trial] = mae
210 | 
211 |     print('Recyclable - NLPD: ', nlpd)
212 |     print('Recyclable - RMSE: ', rmse)
213 |     print('Recyclable - MAE: ', mae)
214 |     print(' ')
215 | 
216 |     ######################################################
217 |     # 2. DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE)
218 |     ######################################################
219 | 
220 |     # A. POE  _________//
221 | 
222 |     poe_model = PoeGP(models_dist)
223 | 
224 |     nlpd = poe_model.nlpd(x_all, y_all, x_test, y_test)
225 |     rmse = poe_model.rmse(x_all, y_all, x_test, y_test)
226 |     mae = poe_model.mae(x_all, y_all, x_test, y_test)
227 | 
228 |     poe_metrics[0, trial] = nlpd
229 |     poe_metrics[1, trial] = rmse
230 |     poe_metrics[2, trial] = mae
231 | 
232 |     print('POE-NLPD: ', nlpd)
233 |     print('POE-RMSE: ', rmse)
234 |     print('POE-MAE: ',  mae)
235 |     print(' ')
236 | 
237 |     # B. GPOE _________//
238 | 
239 |     gpoe_model = GenPoeGP(models_dist)
240 | 
241 |     nlpd = gpoe_model.nlpd(x_all, y_all, x_test, y_test)
242 |     rmse = gpoe_model.rmse(x_all, y_all, x_test, y_test)
243 |     mae = gpoe_model.mae(x_all, y_all, x_test, y_test)
244 | 
245 |     gpoe_metrics[0, trial] = nlpd
246 |     gpoe_metrics[1, trial] = rmse
247 |     gpoe_metrics[2, trial] = mae
248 | 
249 |     print('GenPOE-NLPD: ', nlpd)
250 |     print('GenPOE-RMSE: ', rmse)
251 |     print('GenPOE-MAE: ',  mae)
252 |     print(' ')
253 | 
254 |     # C. BCM  _________//
255 | 
256 |     bcm_model = BayesianCM(models_dist)
257 | 
258 |     nlpd = bcm_model.nlpd(x_all, y_all, x_test, y_test)
259 |     rmse = bcm_model.rmse(x_all, y_all, x_test, y_test)
260 |     mae = bcm_model.mae(x_all, y_all, x_test, y_test)
261 | 
262 |     bcm_metrics[0, trial] = nlpd
263 |     bcm_metrics[1, trial] = rmse
264 |     bcm_metrics[2, trial] = mae
265 | 
266 |     print('BCM-NLPD: ', nlpd)
267 |     print('BCM-RMSE: ', rmse)
268 |     print('BCM-MAE: ',  mae)
269 |     print(' ')
270 | 
271 |     # D. RBCM _________//
272 | 
273 |     rbcm_model = RobustBayesianCM(models_dist)
274 | 
275 |     nlpd = rbcm_model.nlpd(x_all, y_all, x_test, y_test)
276 |     rmse = rbcm_model.rmse(x_all, y_all, x_test, y_test)
277 |     mae = rbcm_model.mae(x_all, y_all, x_test, y_test)
278 | 
279 |     rbcm_metrics[0, trial] = nlpd
280 |     rbcm_metrics[1, trial] = rmse
281 |     rbcm_metrics[2, trial] = mae
282 | 
283 |     print('RBCM-NLPD: ', nlpd)
284 |     print('RBCM-RMSE: ', rmse)
285 |     print('RBCM-MAE: ',  mae)
286 |     print(' ')
287 | 
288 |     # save to csv file
289 |     np.savetxt('./metrics/recy_metrics_' + experiment + '.csv', recy_metrics, delimiter=',')
290 |     np.savetxt('./metrics/poe_metrics_' + experiment + '.csv', poe_metrics, delimiter=',')
291 |     np.savetxt('./metrics/gpoe_metrics_' + experiment + '.csv', gpoe_metrics, delimiter=',')
292 |     np.savetxt('./metrics/bcm_metrics_' + experiment + '.csv', bcm_metrics, delimiter=',')
293 |     np.savetxt('./metrics/rbcm_metrics_' + experiment + '.csv', rbcm_metrics, delimiter=',')


--------------------------------------------------------------------------------
/extra/modular_gp_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/extra/modular_gp_logo.png


--------------------------------------------------------------------------------
/kernels/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/kernels/__init__.py


--------------------------------------------------------------------------------
/kernels/coregionalization.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------------------
 2 | # This script belongs to the ModularGP repo
 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
 4 | # Copyright (c) 2021 Pablo Moreno-Munoz
 5 | # -----------------------------------------------------------------
 6 | #
 7 | #
 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
 9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 | 
13 | import torch
14 | from util import squared_distance
15 | from kernels.kernel import Kernel
16 | from kernels.rbf import RBF
17 | 
18 | class LMC(Kernel):
19 |     """
20 |     Class for Linear Model of Coregionalization / Kernel
21 |     """
22 | 
23 |     def __init__(self, kernels, output_dim, rank=1, W=None, kappa=None, variance=None, length_scale=None, input_dim=None):
24 |         super().__init__(input_dim)
25 | 
26 |         # Dimensionality of coregionalization kernel
27 |         self.Q = len(kernels)
28 |         self.output_dim = output_dim
29 |         self.rank = rank
30 |         if self.rank > output_dim:
31 |             print("Warning: Unusual choice of rank, rank should be less than output dim.")
32 | 
33 |         # Coregionalization kernel / mixing hyper-parameters
34 |         if W is None:
35 |             self.W = torch.nn.Parameter(torch.randn(self.output_dim, self.Q), requires_grad=True)
36 |         else:
37 |             assert W.shape == (self.output_dim, self.Q, self.rank)
38 | 
39 |         # Registration of coregionalization parameters
40 |         self.register_parameter('coregionalization_W', self.W)
41 | 
42 |         # Independent kernels
43 |         self.kernels = kernels
44 | 
45 |     def B_coefficients(self):
46 |         B_coeff = []
47 |         for q in range(self.Q):
48 |             B_q = torch.mm(self.W[:,q:q+1], self.W[:,q:q+1].t())
49 |             B_coeff.append(B_q)
50 |         return B_coeff
51 | 
52 |     def Kff(self, X, k):
53 |         """
54 |         Builds the cross-covariance matrix Kfdfd = cov[f_d(x),f_d(x)] of a Multi-output GP
55 |         :param X: Input data
56 |         :param k: Output function
57 |         """
58 |         N,_ = X.shape
59 |         Kff = torch.zeros(N,N)
60 |         B = self.B_coefficients()
61 |         for q, B_q in enumerate(B):
62 |             Kff += B_q[k,k] * self.kernels[q].K(X, X)
63 | 
64 |         return Kff
65 | 
66 |     def Kfu(self, X, Z, k):
67 |         """
68 |         Builds the cross-covariance cov[f_d(x),u(z)] of a Multi-output GP
69 |         :param X: Input data
70 |         :param Z: Inducing points (M, D, Q)
71 |         :param k: Output function
72 |         """
73 |         N, _ = X.shape
74 |         M, Xdim, _ = Z.shape
75 | 
76 |         B = self.B_coefficients()
77 |         Kfu = torch.empty(N, M, self.Q)
78 |         for q, B_q in enumerate(B):
79 |             Kfu[:,:,q] = self.W[k,q] * self.kernels[q].K(X, Z[:,:,q])
80 | 
81 |         return Kfu
82 | 
83 | 


--------------------------------------------------------------------------------
/kernels/kernel.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------------------
 2 | # This script belongs to the ModularGP repo
 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
 4 | # Copyright (c) 2021 Pablo Moreno-Munoz
 5 | # -----------------------------------------------------------------
 6 | #
 7 | #
 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
 9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 | 
13 | 
14 | 
15 | import torch
16 | import numpy as np
17 | from util import squared_distance
18 | 
19 | class Kernel(torch.nn.Module):
20 |     """
21 |     Base class for kernels
22 |     """
23 |     def __init__(self, input_dim=None):
24 |         super(Kernel, self).__init__()
25 | 
26 |         # Input dimension -- x
27 |         if input_dim is None:
28 |             input_dim = 1
29 |         else:
30 |             input_dim = int(input_dim)
31 | 
32 |         self.input_dim = input_dim


--------------------------------------------------------------------------------
/kernels/rbf.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------------------
 2 | # This script belongs to the ModularGP repo
 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
 4 | # Copyright (c) 2021 Pablo Moreno-Munoz
 5 | # -----------------------------------------------------------------
 6 | #
 7 | #
 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
 9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 | 
13 | 
14 | import torch
15 | import numpy as np
16 | from kernels.stationary import Stationary
17 | 
18 | class RBF(Stationary):
19 |     """
20 |     The Radial Basis Function (RBF) or Squared Exponential / Gaussian Kernel
21 |     """
22 | 
23 |     def K(self, X, X2=None):
24 |         variance = self.variance.abs().clamp(min=0.0, max=5.0)
25 |         r2 = torch.clamp(self.squared_dist(X, X2),min=0.0, max=np.inf)
26 |         K = variance*torch.exp(-r2 / 2.0)
27 | 
28 |         # Assure that is PSD
29 |         if X2 is None:
30 |             try:
31 |                 _ = torch.cholesky(K)
32 |             except RuntimeError:
33 |                 print('Jitter added')
34 |                 jitter = 1e-5
35 |                 idx = torch.arange(K.shape[-1])
36 |                 Kprime = K.clone()
37 |                 Kprime[idx, idx] += jitter
38 |                 K = Kprime
39 | 
40 |         return K


--------------------------------------------------------------------------------
/kernels/stationary.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------------------
 2 | # This script belongs to the ModularGP repo
 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
 4 | # Copyright (c) 2021 Pablo Moreno-Munoz
 5 | # -----------------------------------------------------------------
 6 | #
 7 | #
 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
 9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 | 
13 | 
14 | import torch
15 | from util import squared_distance
16 | from kernels.kernel import Kernel
17 | 
18 | class Stationary(Kernel):
19 |     """
20 |     Class for Stationary Kernel
21 |     """
22 | 
23 |     def __init__(self, variance=None, length_scale=None, input_dim=None, ARD=False):
24 |         super().__init__(input_dim)
25 | 
26 |         if input_dim is None:
27 |             self.input_dim = 1
28 |         else:
29 |             self.input_dim = input_dim
30 | 
31 |         self.ARD = ARD  # Automatic relevance determination
32 |         # Length-scale/smoothness of the kernel -- l
33 |         if self.ARD:
34 |             if length_scale is None:
35 |                 length_scale = 0.1 * torch.ones(self.input_dim)
36 |         else:
37 |             if length_scale is None:
38 |                 length_scale = 0.1
39 | 
40 |         # Variance/amplitude of the kernel - /sigma
41 |         if variance is None:
42 |             variance = 2.0
43 | 
44 |         self.length_scale = torch.nn.Parameter(length_scale*torch.ones(1), requires_grad=True)
45 |         self.variance = torch.nn.Parameter(variance*torch.ones(1), requires_grad=True)
46 |         self.register_parameter('length_scale', self.length_scale)
47 |         self.register_parameter('variance', self.variance)
48 | 
49 |     def squared_dist(self, X, X2):
50 |         """
51 |         Returns the SCALED squared distance between X and X2.
52 |         """
53 |         length_scale = self.length_scale.abs().clamp(min=0.0, max=10.0)
54 | 
55 |         if not self.ARD:
56 |             if X2 is None:
57 |                 dist = squared_distance(X/length_scale)
58 |             else:
59 |                 dist = squared_distance(X/length_scale, X2/length_scale)
60 |         else:
61 |             if X2 is None:
62 |                 dist = squared_distance(X / length_scale)
63 |             else:
64 |                 dist = squared_distance(X / length_scale, X2 / length_scale)
65 | 
66 |         return dist
67 | 
68 |     def Kdiag(self, X):
69 |         variance = torch.abs(self.variance)
70 |         return variance.expand(X.size(0))


--------------------------------------------------------------------------------
/likelihoods/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/likelihoods/__init__.py


--------------------------------------------------------------------------------
/likelihoods/bernoulli.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------------------
 2 | # This script belongs to the ModularGP repo
 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
 4 | # Copyright (c) 2021 Pablo Moreno-Munoz
 5 | # -----------------------------------------------------------------
 6 | #
 7 | #
 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
 9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 | 
13 | import torch
14 | import numpy as np
15 | from likelihoods.likelihood import Likelihood
16 | from torch.distributions.normal import Normal
17 | from torch.distributions.bernoulli import Bernoulli as Ber
18 | 
19 | class Bernoulli(Likelihood):
20 |     """
21 |     Class for Gaussian Likelihood
22 |     """
23 |     def __init__(self):
24 |         super(Bernoulli, self).__init__()
25 | 
26 | 
27 |     def pdf(self, f, y):
28 | 
29 |         sigmoid = torch.nn.Sigmoid()
30 |         p = sigmoid(f)#.flatten()
31 |         bernoulli = Ber(probs=p)
32 |         pdf = torch.exp(bernoulli.log_prob(y))
33 |         return pdf
34 | 
35 |     def logpdf(self, f, y):
36 |         sigmoid = torch.nn.Sigmoid()
37 |         p = sigmoid(f).flatten()
38 |         bernoulli = Ber(probs=p)
39 |         logpdf = bernoulli.log_prob(y)
40 |         return logpdf
41 | 
42 |     def variational_expectation(self, y, m, v):
43 |         # Gauss-Hermite Quadrature
44 |         gh_p, gh_w = self.gh_points()
45 |         gh_w = torch.div(gh_w, np.sqrt(np.pi))
46 | 
47 |         m, v, y = m.flatten(), v.flatten(), y.flatten()
48 |         f = gh_p[None, :] * torch.sqrt(2. * v[:, None]) + m[:, None]
49 |         y = y[:,None].repeat(1,f.size(1))
50 | 
51 |         logp = self.logpdf(f.view(-1), y.view(-1))
52 |         logp = logp.view(f.size()).double()
53 |         gh_w = gh_w[:, None]
54 | 
55 |         var_exp = logp.mm(gh_w)
56 |         return var_exp
57 | 
58 |     def log_predictive(self, y_test, mu_gp, v_gp, num_samples=1000):
59 |         N_test = y_test.size(0)
60 |         # function samples:
61 |         normal = Normal(loc=mu_gp.flatten(), scale=torch.sqrt(v_gp).flatten())
62 |         f_samples = torch.reshape(normal.sample(sample_shape=(1,num_samples))[0,:,:], (-1,))
63 | 
64 |         # monte-carlo:
65 |         logpdf = self.logpdf(f_samples, y_test.repeat(num_samples,1).flatten())
66 |         log_pred = -np.log(num_samples) + torch.logsumexp(logpdf, dim=0)
67 |         return -log_pred
68 | 
69 | 


--------------------------------------------------------------------------------
/likelihoods/gaussian.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------------------
 2 | # This script belongs to the ModularGP repo
 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
 4 | # Copyright (c) 2021 Pablo Moreno-Munoz
 5 | # -----------------------------------------------------------------
 6 | #
 7 | #
 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
 9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 | 
13 | import torch
14 | import numpy as np
15 | from likelihoods.likelihood import Likelihood
16 | from torch.distributions.normal import Normal
17 | 
18 | class Gaussian(Likelihood):
19 |     """
20 |     Class for Gaussian Likelihood
21 |     """
22 |     def __init__(self, sigma=None, fit_noise=False):
23 |         super(Gaussian, self).__init__()
24 | 
25 |         if sigma is None:
26 |             sigma=1.0
27 | 
28 |         self.sigma = torch.nn.Parameter(sigma*torch.ones(1), requires_grad=fit_noise)
29 | 
30 | 
31 |     def pdf(self, f, y):
32 |         normal = Normal(loc=f, scale=self.sigma)
33 |         pdf = torch.exp(normal.log_prob(y))
34 |         return pdf
35 | 
36 |     def logpdf(self, f, y):
37 |         normal = Normal(loc=f, scale=self.sigma)
38 |         logpdf = normal.log_prob(y)
39 |         return logpdf
40 | 
41 |     def variational_expectation(self, y, m, v):
42 |         # Variational Expectation of log-likelihood -- Analytical
43 |         lik_variance = self.sigma.pow(2)
44 |         expectation = - np.log(2*np.pi) - torch.log(lik_variance) \
45 |                       - (y.pow(2) + m.pow(2) + v - (2*m*y)).div(lik_variance)
46 | 
47 |         return 0.5*expectation
48 | 
49 |     def log_predictive(self, y_test, mu_gp, v_gp, num_samples=1000):
50 |         # function samples:
51 |         normal = Normal(loc=mu_gp.flatten(), scale=torch.sqrt(v_gp).flatten())
52 |         f_samples = normal.sample(sample_shape=(1,num_samples))[0,:,:]
53 | 
54 |         # monte-carlo:
55 |         logpdf = self.logpdf(f_samples, y_test.flatten())
56 |         log_pred = -np.log(num_samples) + torch.logsumexp(logpdf, dim=0)
57 |         return log_pred


--------------------------------------------------------------------------------
/likelihoods/hetgaussian.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # -----------------------------------------------------------------
 3 | # This script belongs to the ModularGP repo
 4 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
 5 | # Copyright (c) 2021 Pablo Moreno-Munoz
 6 | # -----------------------------------------------------------------
 7 | #
 8 | #
 9 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
10 | # Section for Cognitive Systems
11 | # Technical University of Denmark (DTU)
12 | # October 2021
13 | 
14 | 
15 | import torch
16 | import numpy as np
17 | from likelihoods.likelihood import Likelihood
18 | from torch.distributions.normal import Normal
19 | from util import safe_exp, safe_square
20 | 
21 | class HetGaussian(Likelihood):
22 |     """
23 |     Class for Heteroscedastic Gaussian Likelihood
24 |         --
25 |     -- Adaptation to Pytorch+GP framework
26 |     -- Based on M. Lázaro-Gredilla et al. "Variational Heteroscedastic Gaussian Process Regression" @ ICML 2011
27 |     -- Reference: https://icml.cc/Conferences/2011/papers/456_icmlpaper.pdf
28 |     """
29 |     def __init__(self):
30 |         super(HetGaussian, self).__init__()
31 | 
32 |     def pdf(self, f, g, y):
33 |         normal = Normal(loc=f, scale=safe_exp(g))
34 |         pdf = safe_exp(normal.log_prob(y))
35 |         return pdf
36 | 
37 |     def logpdf(self, f, g, y):
38 |         normal = Normal(loc=f, scale=safe_exp(g))
39 |         logpdf = normal.log_prob(y)
40 |         return logpdf
41 | 
42 |     def variational_expectation(self, y, m_f, v_f, m_g, v_g):
43 |         # Variational Expectation of log-likelihood -- Analytical
44 |         precision = torch.clamp(safe_exp(-m_g + (0.5*v_g)), min=-1e9, max=1e9)
45 |         #squares = torch.clamp(safe_square(y) + safe_square(m_f) + v_f - (2*m_f*y), min=-1e9, max=1e9)
46 |         squares = torch.clamp(y**2 + m_f**2 + v_f - (2 * m_f * y), min=-1e9, max=1e9)
47 |         expectation = -np.log(2*np.pi) - m_g - (precision*squares)
48 |         return 0.5*expectation
49 | 
50 |     def log_predictive(self, y_test, mu_f_gp, v_f_gp, mu_g_gp, v_g_gp, num_samples=1000):
51 |         # function samples f:
52 |         normal = Normal(loc=mu_f_gp.flatten(), scale=torch.sqrt(v_f_gp).flatten())
53 |         f_samples = normal.sample(sample_shape=(1,num_samples))[0,:,:]
54 | 
55 |         # function samples g:
56 |         normal = Normal(loc=mu_g_gp.flatten(), scale=torch.sqrt(v_g_gp).flatten())
57 |         g_samples = normal.sample(sample_shape=(1,num_samples))[0,:,:]
58 | 
59 |         # monte-carlo:
60 |         logpdf = self.logpdf(f_samples, g_samples, y_test.flatten())
61 |         log_pred = -np.log(num_samples) + torch.logsumexp(logpdf, dim=0)
62 |         return log_pred


--------------------------------------------------------------------------------
/likelihoods/likelihood.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------------------
 2 | # This script belongs to the ModularGP repo
 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
 4 | # Copyright (c) 2021 Pablo Moreno-Munoz
 5 | # -----------------------------------------------------------------
 6 | #
 7 | #
 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
 9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 | 
13 | import torch
14 | import numpy as np
15 | 
16 | class Likelihood(torch.nn.Module):
17 |     """
18 |     Base class for likelihoods
19 |     """
20 |     def __init__(self):
21 |         super(Likelihood, self).__init__()
22 | 
23 |     def gh_points(self, T=20):
24 |         # Gaussian-Hermite Quadrature points
25 |         gh_p, gh_w = np.polynomial.hermite.hermgauss(T)
26 |         gh_p, gh_w = torch.from_numpy(gh_p), torch.from_numpy(gh_w)
27 |         return gh_p, gh_w
28 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/models/__init__.py


--------------------------------------------------------------------------------
/models/chainedgp.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------
  2 | # This script belongs to the ModularGP repo
  3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
  4 | # Copyright (c) 2021 Pablo Moreno-Munoz
  5 | # -----------------------------------------------------------------
  6 | #
  7 | #
  8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
  9 | # Section for Cognitive Systems
 10 | # Technical University of Denmark (DTU)
 11 | # October 2021
 12 | 
 13 | 
 14 | import torch
 15 | from torch.distributions import MultivariateNormal as Normal
 16 | from likelihoods.hetgaussian import HetGaussian
 17 | from torch.distributions import kl_divergence
 18 | 
 19 | import numpy as np
 20 | from GPy.inference.latent_function_inference import LatentFunctionInference
 21 | from GPy.inference.latent_function_inference.posterior import Posterior
 22 | 
 23 | 
 24 | class ChainedGP(torch.nn.Module):
 25 |     """
 26 |     -- Chained Gaussian Process with Heteroscedastic Gaussian Likelihood --
 27 |     --
 28 |     -- Adaptation to Pytorch+GP framework
 29 |     -- Based on A. Saul et al. "Chained Gaussian Processes" @ AISTATS 2016
 30 |     -- Reference: http://proceedings.mlr.press/v51/saul16.pdf
 31 |     """
 32 |     def __init__(self, kernel_f, kernel_g, M, input_dim=None, batch_rate=1.0):
 33 |         super(ChainedGP, self).__init__()
 34 | 
 35 |         if input_dim is None:
 36 |             input_dim = 1
 37 | 
 38 |         # Dimensions --
 39 |         self.M = M                          # num. inducing
 40 |         self.input_dim = int(input_dim)     # dimension of x
 41 |         self.batch_rate = batch_rate        # rate of mini-batch/dataset
 42 | 
 43 |         # GP Elements --
 44 |         self.likelihood = HetGaussian()     # type of likelihood
 45 |         self.kernel_f = kernel_f            # type of kernel for f
 46 |         self.kernel_g = kernel_g            # type of kernel for g
 47 | 
 48 |         self.logZ = 0.0
 49 | 
 50 |         if self.input_dim > 1:
 51 |             self.z = torch.nn.Parameter(2*torch.rand(self.M, self.input_dim) - 1.0, requires_grad=False)
 52 |         else:
 53 |             self.z = torch.nn.Parameter(torch.linspace(-0.9, 0.9, self.M)[:,None], requires_grad=False)
 54 | 
 55 |         # Variational distribution f --
 56 |         self.q_m_f = torch.nn.Parameter(0.5*torch.randn(M,1), requires_grad=True)  # variational: mean parameter
 57 |         self.q_L_f = torch.nn.Parameter(torch.eye(M), requires_grad=True)  # variational: covariance
 58 | 
 59 |         # Variational distribution g --
 60 |         self.q_m_g = torch.nn.Parameter(0.5*torch.randn(M,1), requires_grad=True)  # variational: mean parameter
 61 |         self.q_L_g = torch.nn.Parameter(torch.eye(M), requires_grad=True)  # variational: covariance
 62 | 
 63 |     def forward(self, x, y):
 64 | 
 65 |         # Variational parameters f --
 66 |         q_m_f = self.q_m_f
 67 |         q_L_f = torch.tril(self.q_L_f)
 68 |         q_S_f = torch.mm(q_L_f, q_L_f.t())
 69 | 
 70 |         # Variational parameters g --
 71 |         q_m_g = self.q_m_g
 72 |         q_L_g = torch.tril(self.q_L_g)
 73 |         q_S_g = torch.mm(q_L_g, q_L_g.t())
 74 | 
 75 |         # Prior parameters (uses kernel) --
 76 |         Kuu_f = self.kernel_f.K(self.z)
 77 |         Kuu_g = self.kernel_g.K(self.z)
 78 | 
 79 |         # Distributions -- q(u), p(u)
 80 |         q_u_f = Normal(q_m_f.flatten(), q_S_f)
 81 |         p_u_f = Normal(torch.zeros(self.M), Kuu_f)
 82 | 
 83 |         q_u_g = Normal(q_m_g.flatten(), q_S_g)
 84 |         p_u_g = Normal(torch.zeros(self.M), Kuu_g)
 85 | 
 86 |         # Calculus of q(f) --
 87 |         Kff = self.kernel_f.K(x,x)
 88 |         Kfu = self.kernel_f.K(x, self.z)
 89 |         Kuf = torch.transpose(Kfu,0,1)
 90 |         iKuu,_ = torch.solve(torch.eye(self.M), Kuu_f)  # is pseudo-inverse?
 91 | 
 92 |         A = Kfu.mm(iKuu)
 93 |         AT = iKuu.mm(Kuf)
 94 | 
 95 |         m_f = A.mm(q_m_f)
 96 |         v_f = torch.diag(Kff + A.mm(q_S_f - Kuu_f).mm(AT))
 97 | 
 98 |         # Calculus of q(g) --
 99 |         Kff = self.kernel_g.K(x,x)
100 |         Kfu = self.kernel_g.K(x, self.z)
101 |         Kuf = torch.transpose(Kfu,0,1)
102 |         iKuu,_ = torch.solve(torch.eye(self.M), Kuu_g)  # is pseudo-inverse?
103 | 
104 |         A = Kfu.mm(iKuu)
105 |         AT = iKuu.mm(Kuf)
106 | 
107 |         m_g = A.mm(q_m_g)
108 |         v_g = torch.diag(Kff + A.mm(q_S_g - Kuu_g).mm(AT))
109 | 
110 |         # Expectation term --
111 |         expectation = self.likelihood.variational_expectation(y, m_f, v_f, m_g, v_g)
112 | 
113 |         # KL divergence --
114 |         kl = kl_divergence(q_u_f, p_u_f) + kl_divergence(q_u_g, p_u_g)
115 | 
116 |         # Lower bound (ELBO) --
117 |         elbo = self.batch_rate*expectation.sum() - kl
118 |         return -elbo
119 | 
120 |     def predictive(self, x_new, lik_noise=False):
121 |         # Matrices f
122 |         q_m_f = self.q_m_f.detach().numpy()
123 |         q_L_f = torch.tril(self.q_L_f)
124 |         q_S_f = torch.mm(q_L_f, q_L_f.t()).detach().numpy()
125 |         Kuu_f = self.kernel_f.K(self.z, self.z).detach().numpy()
126 | 
127 |         # Matrices g
128 |         q_m_g = self.q_m_g.detach().numpy()
129 |         q_L_g = torch.tril(self.q_L_g)
130 |         q_S_g = torch.mm(q_L_g, q_L_g.t()).detach().numpy()
131 |         Kuu_g = self.kernel_g.K(self.z, self.z).detach().numpy()
132 | 
133 |         # GP function f ------
134 |         posterior = Posterior(mean=q_m_f, cov=q_S_f, K=Kuu_f, prior_mean=np.zeros(q_m_f.shape))
135 |         Kx = self.kernel_f.K(self.z, x_new).detach().numpy()
136 |         Kxx = self.kernel_f.K(x_new, x_new).detach().numpy()
137 | 
138 |         # GP Predictive Posterior - mean + variance
139 |         gp_mu_f = np.dot(Kx.T, posterior.woodbury_vector)
140 |         Kxx = np.diag(Kxx)
141 |         gp_var_f = (Kxx - np.sum(np.dot(np.atleast_3d(posterior.woodbury_inv).T, Kx) * Kx[None, :, :], 1)).T
142 | 
143 |         gp_f = gp_mu_f
144 |         gp_v_f = gp_var_f
145 | 
146 |         # GP function g ------
147 |         posterior = Posterior(mean=q_m_g, cov=q_S_g, K=Kuu_g, prior_mean=np.zeros(q_m_g.shape))
148 |         Kx = self.kernel_g.K(self.z, x_new).detach().numpy()
149 |         Kxx = self.kernel_g.K(x_new, x_new).detach().numpy()
150 | 
151 |         # GP Predictive Posterior - mean + variance
152 |         gp_mu_g = np.dot(Kx.T, posterior.woodbury_vector)
153 |         Kxx = np.diag(Kxx)
154 |         gp_var_g = (Kxx - np.sum(np.dot(np.atleast_3d(posterior.woodbury_inv).T, Kx) * Kx[None, :, :], 1)).T
155 | 
156 |         gp_g = gp_mu_g
157 |         gp_v_g = gp_var_g
158 | 
159 |         return gp_f, gp_v_f, gp_g, gp_v_g
160 | 
161 |     def rmse(self, x_new, f_new):
162 |         f_gp,_,_,_ = self.predictive(x_new)
163 |         rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
164 |         return rmse
165 | 
166 |     def mae(self, x_new, f_new):
167 |         f_gp,_,_,_ = self.predictive(x_new)
168 |         mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
169 |         return mae
170 | 
171 |     def nlpd(self, x_new, y_new):
172 |         f_gp, v_f_gp, g_gp, v_g_gp = self.predictive(x_new)
173 |         f_gp = torch.from_numpy(f_gp)
174 |         v_f_gp = torch.from_numpy(v_f_gp)
175 |         g_gp = torch.from_numpy(g_gp)
176 |         v_g_gp = torch.from_numpy(v_g_gp)
177 |         nlpd = - torch.mean(self.likelihood.log_predictive(y_new, f_gp, v_f_gp, g_gp, v_g_gp)).detach().numpy()
178 |         return nlpd
179 | 
180 |     def evidence(self, x, y, N_samples=None):
181 |         # Approximation CI
182 |         if N_samples is None:
183 |             N_samples = 1000
184 | 
185 |         N,_ = x.shape
186 |         v_f = torch.zeros(N)
187 |         for i in range(N):
188 |             v_f[i] = self.kernel.K(x[i:i+1,:],x[i:i+1,:])
189 | 
190 |         m_f = torch.zeros(v_f.shape)
191 |         p_f = Normal(m_f, torch.diag(v_f))
192 |         f_samples = p_f.sample([N_samples]).t()    # N x N_samples
193 |         mc_pdf = self.likelihood.pdf(f_samples, torch.tile(y, (1,N_samples)))
194 | 
195 |         mc_expectations = 1/N_samples * torch.sum(torch.clamp(mc_pdf, min=1e-100),1)
196 |         print(mc_expectations)
197 |         logZ = torch.sum(torch.log(mc_expectations))
198 | 
199 |         self.logZ = logZ
200 |         return logZ
201 | 
202 | 
203 | 


--------------------------------------------------------------------------------
/models/ensemblegp.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------
  2 | # This script belongs to the ModularGP repo
  3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
  4 | # Copyright (c) 2021 Pablo Moreno-Munoz
  5 | # -----------------------------------------------------------------
  6 | #
  7 | #
  8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
  9 | # Section for Cognitive Systems
 10 | # Technical University of Denmark (DTU)
 11 | # October 2021
 12 | 
 13 | 
 14 | import torch
 15 | from torch.distributions import MultivariateNormal as Normal
 16 | from torch.distributions import kl_divergence
 17 | from GPy.inference.latent_function_inference.posterior import Posterior
 18 | import numpy as np
 19 | 
 20 | class EnsembleGP(torch.nn.Module):
 21 |     """
 22 |     -- Ensemble Variational Inference for Gaussian Processes --
 23 |     """
 24 |     def __init__(self, kernel, likelihood, models, M, input_dim=None):
 25 |         super(EnsembleGP, self).__init__()
 26 | 
 27 |         if input_dim is None:
 28 |             input_dim = 1
 29 | 
 30 |         # Dimensions --
 31 |         self.M = M  # num. inducing
 32 |         self.input_dim = int(input_dim)  # dimension of x
 33 | 
 34 |         # Ensemble GP Elements --
 35 |         self.likelihood = likelihood
 36 |         self.kernel = kernel
 37 | 
 38 |         if self.input_dim > 1:
 39 |             self.z = torch.nn.Parameter(2*torch.rand(self.M, self.input_dim) - 1.0, requires_grad=False)
 40 |         else:
 41 |             self.z = torch.nn.Parameter(torch.linspace(-0.9, 0.9, self.M)[:,None], requires_grad=False)
 42 | 
 43 |         # Adjacent GP Models
 44 |         self.models = models  # is a list
 45 | 
 46 |         # Ensemble Variational distribution --
 47 |         self.q_m = torch.nn.Parameter(torch.randn(M, 1), requires_grad=True)  # variational: mean parameter
 48 |         self.q_L = torch.nn.Parameter(torch.eye(M), requires_grad=True)  # variational: covariance
 49 | 
 50 |     def ensemble(self):
 51 |         # GP prior
 52 |         Kuu = self.kernel.K(self.z, self.z)
 53 |         iKuu, _ = torch.solve(torch.eye(self.M), Kuu)  # is pseudo-inverse?
 54 | 
 55 |         q_m = self.q_m
 56 |         q_L = torch.tril(self.q_L)
 57 |         q_S = torch.mm(q_L, q_L.t())
 58 | 
 59 |         ensemble_m = []
 60 |         ensemble_S = []
 61 | 
 62 |         # Ensemble GP Distributions
 63 |         for model_k in self.models:
 64 |             Kkk = self.kernel.K(model_k.z, model_k.z)
 65 |             Kuk = self.kernel.K(self.z, model_k.z)
 66 |             Kku = torch.transpose(Kuk,0,1)
 67 | 
 68 |             A = Kku.mm(iKuu)
 69 |             AT = iKuu.mm(Kuk)
 70 | 
 71 |             m_k = Kku.mm(iKuu).mm(q_m)
 72 |             S_k = Kkk + A.mm(q_S - Kuu).mm(AT)
 73 | 
 74 |             ensemble_m.append(m_k)
 75 |             ensemble_S.append(S_k)
 76 | 
 77 |         return ensemble_m, ensemble_S
 78 | 
 79 |     def expectation(self):
 80 |         E = 0.0
 81 |         ensemble_m, ensemble_S = self.ensemble()
 82 | 
 83 |         # Expectation of k ensembles --
 84 |         for k,model_k in enumerate(self.models):
 85 |             # Ensemble GP -- q_e()
 86 |             m_e = ensemble_m[k]
 87 |             S_e = ensemble_S[k]
 88 | 
 89 |             # Past GP variational distribution -- q_k()
 90 |             m_k = model_k.q_m
 91 |             L_k = torch.tril(model_k.q_L)
 92 |             S_k = torch.mm(L_k, L_k.t())
 93 |             iS_k, _ = torch.solve(torch.eye(model_k.M), S_k)  # is pseudo-inverse?
 94 | 
 95 |             # Past GP prior -- p_k()
 96 |             z_k = model_k.z
 97 |             Kkk = model_k.kernel.K(z_k, z_k)
 98 |             iKkk, _ = torch.solve(torch.eye(model_k.M), Kkk)  # is pseudo-inverse?
 99 | 
100 |             # Expectation on terms -- E[log_p()] and E[log_q()]
101 |             E_log_q = -torch.trace(iS_k.mm(S_e)) - (m_e - m_k).t().mm(iS_k).mm(m_e - m_k) - torch.logdet(2*np.pi*S_k)
102 |             E_log_p = -torch.trace(iKkk.mm(S_e)) - m_e.t().mm(iKkk).mm(m_e) - torch.logdet(2*np.pi*Kkk)
103 | 
104 |             # General Expectation -- E[sum_k E[log_q_k] - E[log_p_k]]
105 |             E += 0.5*(E_log_q - E_log_p) + model_k.logZ
106 | 
107 |         return E
108 | 
109 |     def divergence(self, p, q):
110 |         kl = kl_divergence(q,p)
111 |         return kl
112 | 
113 |     def forward(self):
114 | 
115 |         # Variational parameters --
116 |         q_m = self.q_m
117 |         q_L = torch.tril(self.q_L)
118 |         q_S = torch.mm(q_L, q_L.t())
119 | 
120 |         # Prior parameters (uses kernel) --
121 |         Kuu = self.kernel.K(self.z, self.z)
122 | 
123 |         # Distributions -- q(u), p(u)
124 |         q_u = Normal(q_m.flatten(), q_S)
125 |         p_u = Normal(torch.zeros(self.M), Kuu)
126 | 
127 |         # Expectation --
128 |         expectation = self.expectation()
129 | 
130 |         # KL divergence --
131 |         kl = self.divergence(q_u, p_u)
132 | 
133 |         # Calls ELBO
134 |         elbo = expectation - kl
135 |         return -elbo
136 | 
137 |     def predictive(self, x_new):
138 |         # Matrices
139 |         q_m = self.q_m.detach().numpy()
140 |         q_L = torch.tril(self.q_L)
141 |         q_S = torch.mm(q_L, q_L.t()).detach().numpy()
142 |         Kuu = self.kernel.K(self.z, self.z).detach().numpy()
143 | 
144 |         posterior = Posterior(mean=q_m, cov=q_S, K=Kuu, prior_mean=np.zeros(q_m.shape))
145 |         Kx = self.kernel.K(self.z, x_new).detach().numpy()
146 |         Kxx = self.kernel.K(x_new, x_new).detach().numpy()
147 | 
148 |         # GP Predictive Posterior - mean + variance
149 |         gp_mu = np.dot(Kx.T, posterior.woodbury_vector)
150 |         Kxx = np.diag(Kxx)
151 |         gp_var = (Kxx - np.sum(np.dot(np.atleast_3d(posterior.woodbury_inv).T, Kx) * Kx[None, :, :], 1)).T
152 | 
153 |         gp = gp_mu
154 |         gp_upper = gp_mu + 2*np.sqrt(gp_var) #+ 2*self.likelihood.sigma.detach().numpy()
155 |         gp_lower = gp_mu - 2*np.sqrt(gp_var) #- 2*self.likelihood.sigma.detach().numpy()
156 | 
157 |         return gp, gp_upper, gp_lower
158 | 
159 |     def rmse(self, x_new, f_new):
160 |         f_gp,_,_ = self.predictive(x_new)
161 |         rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
162 |         return rmse
163 | 
164 |     def mae(self, x_new, f_new):
165 |         f_gp,_,_ = self.predictive(x_new)
166 |         mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
167 |         return mae
168 | 
169 |     def nlpd(self, x_new, y_new):
170 |         f_gp, u_gp, _ = self.predictive(x_new)
171 |         f_gp = torch.from_numpy(f_gp)
172 |         u_gp = torch.from_numpy(u_gp)
173 |         v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0)
174 |         nlpd = - torch.mean(self.likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
175 |         return nlpd
176 | 
177 | 


--------------------------------------------------------------------------------
/models/hetmoensemble.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------
  2 | # This script belongs to the ModularGP repo
  3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
  4 | # Copyright (c) 2021 Pablo Moreno-Munoz
  5 | # -----------------------------------------------------------------
  6 | #
  7 | #
  8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
  9 | # Section for Cognitive Systems
 10 | # Technical University of Denmark (DTU)
 11 | # October 2021
 12 | 
 13 | import torch
 14 | from torch.distributions import MultivariateNormal as Normal
 15 | from torch.distributions import kl_divergence
 16 | from kernels.coregionalization import LMC
 17 | from GPy.inference.latent_function_inference.posterior import Posterior
 18 | import numpy as np
 19 | 
 20 | class HetMultiOutputEnsembleGP(torch.nn.Module):
 21 |     """
 22 |     -- Heterogeneous Multi Output Ensemble for Gaussian Processes --
 23 |     -- Accepts one channel x,y of data. --
 24 |     """
 25 | 
 26 |     def __init__(self, models, likelihood, kernels, Q, M, input_dim=None, batch_rate=1.0):
 27 |         super(HetMultiOutputEnsembleGP, self).__init__()
 28 | 
 29 |         if input_dim is None:
 30 |             input_dim = 1
 31 |         self.batch_rate = batch_rate        # rate of mini-batch/dataset
 32 | 
 33 |         # Dimensions --
 34 |         self.M = M                          # num. inducing
 35 |         self.K = len(models)                # num. models
 36 |         self.input_dim = int(input_dim)     # dimension of x
 37 | 
 38 |         # Multi-output GP Ensemble Elements --
 39 |         self.Q = Q
 40 |         self.likelihood = likelihood
 41 |         self.D = self.K + 1  # the number of modules + data channel
 42 | 
 43 |         # Kernels --
 44 |         self.kernels = torch.nn.ModuleList()
 45 |         for q in range(self.Q):
 46 |             self.kernels.append(kernels[q])
 47 |         self.coregionalization = LMC(self.kernels, self.D)  # is a list
 48 | 
 49 |         if self.input_dim > 1:
 50 |             self.z = torch.nn.Parameter(torch.rand(self.M, self.input_dim, self.Q), requires_grad=False)
 51 |         else:
 52 |             self.z = torch.nn.Parameter(torch.tile(torch.linspace(0.1, 0.9, self.M)[:,None, None], (1, 1, self.Q)), requires_grad=False)
 53 | 
 54 |         # Adjacent GP Models
 55 |         self.models = models  # is a list
 56 | 
 57 |         # Ensemble Variational distribution --
 58 |         self.q_m = torch.nn.Parameter(torch.randn(M, Q), requires_grad=True)  # variational: mean parameter
 59 |         self.q_L = torch.nn.Parameter(torch.tile(torch.eye(M)[:,:,None], (1, 1, self.Q)), requires_grad=True)  # variational: covariance
 60 | 
 61 | 
 62 |     def ensemble(self):
 63 |         # MOGP prior + Variational parameters
 64 |         q_m = self.q_m
 65 |         q_S = torch.zeros(self.M, self.M, self.Q)
 66 |         Kvv = torch.zeros(self.M, self.M, self.Q)
 67 |         iKvv = torch.zeros(self.M, self.M, self.Q)
 68 |         for q in range(self.Q):
 69 |             Kvv_q = self.kernels[q].K(self.z[:,:,q], self.z[:,:,q])
 70 |             iKvv_q, _ = torch.solve(torch.eye(self.M), Kvv_q)           # is pseudo-inverse?
 71 |             Kvv[:,:,q] = Kvv_q
 72 |             iKvv[:,:,q] = iKvv_q
 73 | 
 74 |             q_L = torch.tril(self.q_L[:,:,q])
 75 |             q_S[:,:,q] = torch.mm(q_L, q_L.t())
 76 | 
 77 |         ensemble_m = []
 78 |         ensemble_S = []
 79 | 
 80 |         # Ensemble MOGP Distributions
 81 |         for k, model_k in enumerate(self.models):
 82 | 
 83 |             Kuu = self.coregionalization.Kff(model_k.z, k)
 84 |             Kuv = self.coregionalization.Kfu(model_k.z, self.z, k)
 85 | 
 86 |             m_k = 0.0
 87 |             S_k = Kuu
 88 | 
 89 |             # TODO: Make the following faster
 90 |             for q in range(self.Q):
 91 | 
 92 |                 A = Kuv[:,:,q].mm(iKvv[:,:,q])
 93 |                 AT = iKvv[:,:,q].mm(Kuv[:,:,q].t())
 94 | 
 95 |                 m_k += A.mm(q_m[:,q:q+1])
 96 |                 S_k += A.mm(q_S[:,:,q]).mm(AT) - A.mm(Kuv[:,:,q].t())
 97 | 
 98 |             ensemble_m.append(m_k)
 99 |             ensemble_S.append(S_k)
100 | 
101 |         return ensemble_m, ensemble_S
102 | 
103 | 
104 |     def expectation(self, x, y):
105 |         E = 0.0
106 |         ensemble_m, ensemble_S = self.ensemble()
107 | 
108 |         # Expectation of k ensembles --
109 |         for k,model_k in enumerate(self.models):
110 |             # Ensemble GP -- q_e()
111 |             m_e = ensemble_m[k]
112 |             S_e = ensemble_S[k]
113 | 
114 |             # Past GP variational distribution -- q_k()
115 |             m_k = model_k.q_m
116 |             L_k = torch.tril(model_k.q_L)
117 |             S_k = torch.mm(L_k, L_k.t())
118 |             iS_k, _ = torch.solve(torch.eye(model_k.M), S_k)  # is pseudo-inverse?
119 | 
120 |             # Past GP prior -- p_k()
121 |             z_k = model_k.z
122 |             Kkk = model_k.kernel.K(z_k, z_k)
123 |             iKkk, _ = torch.solve(torch.eye(model_k.M), Kkk)  # is pseudo-inverse?
124 | 
125 |             # Expectation on terms -- E[log_p()] and E[log_q()]
126 |             E_log_q = -torch.trace(iS_k.mm(S_e)) - (m_e - m_k).t().mm(iS_k).mm(m_e - m_k) - torch.logdet(2*np.pi*S_k)
127 |             E_log_p = -torch.trace(iKkk.mm(S_e)) - m_e.t().mm(iKkk).mm(m_e) - torch.logdet(2*np.pi*Kkk)
128 | 
129 |             # General Expectation -- E[sum_k E[log_q_k] - E[log_p_k]]
130 |             E += 0.5*(E_log_q - E_log_p) + model_k.logZ
131 | 
132 |         # Expectation of data channel --
133 |         q_m = self.q_m
134 |         q_S = torch.zeros(self.M, self.M, self.Q)
135 |         Kuu = torch.zeros(self.M, self.M, self.Q)
136 |         iKuu = torch.zeros(self.M, self.M, self.Q)
137 | 
138 |         for q in range(self.Q):
139 |             # MOGP latent functions prior
140 |             Kuu_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q])
141 |             iKuu_q, _ = torch.solve(torch.eye(self.M), Kuu_q)  # is pseudo-inverse?
142 |             Kuu[:, :, q] = Kuu_q
143 |             iKuu[:, :, q] = iKuu_q
144 | 
145 |             # Variational parameters + Gaussian integration
146 |             q_L = torch.tril(self.q_L[:, :, q])
147 |             q_S[:, :, q] = torch.mm(q_L, q_L.t())
148 |         Kff = self.coregionalization.Kff(x, self.D-1)
149 |         Kfu = self.coregionalization.Kfu(x, self.z, self.D-1)
150 | 
151 |         m_f = 0.0
152 |         S_f = Kff
153 | 
154 |         for q in range(self.Q):
155 |             A = Kfu[:, :, q].mm(iKuu[:, :, q])
156 |             AT = iKuu[:, :, q].mm(Kfu[:, :, q].t())
157 | 
158 |             m_f += A.mm(q_m[:, q:q + 1])
159 |             S_f += A.mm(q_S[:, :, q]).mm(AT) - A.mm(Kfu[:, :, q].t())
160 | 
161 |         v_f = torch.diag(S_f)
162 |         expectation_y = self.likelihood.variational_expectation(y, m_f, v_f)
163 | 
164 |         return E, expectation_y
165 | 
166 |     def divergence(self, p_v, q_v):
167 |         kl = 0.0
168 |         for q in range(self.Q):
169 |             kl += kl_divergence(q_v[q], p_v[q])
170 |         return kl
171 | 
172 |     def forward(self, x, y):
173 | 
174 |         q_u = []
175 |         p_u = []
176 |         q_m = self.q_m
177 |         q_S = torch.zeros(self.M, self.M, self.Q)
178 |         Kuu = torch.zeros(self.M, self.M, self.Q)
179 |         for q in range(self.Q):
180 | 
181 |             # Variational parameters --
182 |             q_L = torch.tril(self.q_L[:,:,q])
183 |             q_S[:,:,q] = torch.mm(q_L, q_L.t())
184 | 
185 |             # Prior parameters (uses kernel) --
186 |             Kuu_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q])
187 |             Kuu[:, :, q] = Kuu_q
188 | 
189 |             # Distributions -- q(u), p(u)
190 |             q_u.append(Normal(q_m[:,q].flatten(), q_S[:,:,q]))
191 |             p_u.append(Normal(torch.zeros(self.M), Kuu[:,:,q]))
192 | 
193 |         # Expectation --
194 |         expectation, exp_y = self.expectation(x, y)
195 |         expectation_y = self.batch_rate * exp_y.sum()
196 | 
197 |         # KL divergence --
198 |         kl = self.divergence(q_u, p_u)
199 | 
200 |         # Calls ELBO
201 |         elbo = expectation + expectation_y - kl
202 |         return -elbo
203 | 
204 |     def predictive(self, xnew, k):
205 |         # MOGP prior + Variational parameters
206 |         q_m = self.q_m
207 |         q_S = torch.zeros(self.M, self.M, self.Q)
208 |         Kvv = torch.zeros(self.M, self.M, self.Q)
209 |         iKvv = torch.zeros(self.M, self.M, self.Q)
210 | 
211 |         # Posterior distribution on new input data
212 |         Kuu = self.coregionalization.Kff(xnew, k)
213 |         Kuv = self.coregionalization.Kfu(xnew, self.z, k)
214 | 
215 |         m_k = 0.0
216 |         S_k = Kuu
217 |         for q in range(self.Q):
218 |             # MOGP latent functions prior
219 |             Kvv_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q])
220 |             iKvv_q, _ = torch.solve(torch.eye(self.M), Kvv_q)  # is pseudo-inverse?
221 |             Kvv[:, :, q] = Kvv_q
222 |             iKvv[:, :, q] = iKvv_q
223 | 
224 |             # Variational parameters + Gaussian integration
225 |             q_L = torch.tril(self.q_L[:, :, q])
226 |             q_S[:, :, q] = torch.mm(q_L, q_L.t())
227 | 
228 |             A = Kuv[:, :, q].mm(iKvv[:, :, q])
229 |             AT = iKvv[:, :, q].mm(Kuv[:, :, q].t())
230 | 
231 |             m_k += A.mm(q_m[:, q:q + 1])
232 |             S_k += A.mm(q_S[:, :, q]).mm(AT) - A.mm(Kuv[:, :, q].t())
233 | 
234 |         m_k = m_k.detach().numpy()
235 |         S_k = S_k.detach().numpy()
236 | 
237 |         gp_mu = m_k.flatten()
238 |         gp_var = np.diagonal(S_k)
239 | 
240 |         gp = gp_mu
241 |         gp_upper = gp_mu + 2 * np.sqrt(gp_var)  # + 2*self.likelihood.sigma.detach().numpy()
242 |         gp_lower = gp_mu - 2 * np.sqrt(gp_var)  # - 2*self.likelihood.sigma.detach().numpy()
243 | 
244 |         return gp, gp_upper, gp_lower
245 | 
246 |     def rmse(self, x_new, f_new, k):
247 |         f_gp,_,_ = self.predictive(x_new, k)
248 |         rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
249 |         return rmse
250 | 
251 |     def mae(self, x_new, f_new, k):
252 |         f_gp,_,_ = self.predictive(x_new, k)
253 |         mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
254 |         return mae
255 | 
256 |     def nlpd(self, x_new, y_new, k):
257 |         f_gp, u_gp, _ = self.predictive(x_new, k)
258 |         f_gp = torch.from_numpy(f_gp)
259 |         u_gp = torch.from_numpy(u_gp)
260 |         v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0)
261 |         nlpd = - torch.mean(self.likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
262 |         return nlpd


--------------------------------------------------------------------------------
/models/moensemble.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------
  2 | # This script belongs to the ModularGP repo
  3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
  4 | # Copyright (c) 2021 Pablo Moreno-Munoz
  5 | # -----------------------------------------------------------------
  6 | #
  7 | #
  8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
  9 | # Section for Cognitive Systems
 10 | # Technical University of Denmark (DTU)
 11 | # October 2021
 12 | 
 13 | 
 14 | import torch
 15 | from torch.distributions import MultivariateNormal as Normal
 16 | from torch.distributions import kl_divergence
 17 | from kernels.coregionalization import LMC
 18 | from GPy.inference.latent_function_inference.posterior import Posterior
 19 | import numpy as np
 20 | 
 21 | class MultiOutputEnsembleGP(torch.nn.Module):
 22 |     """
 23 |     -- Multi Output Ensemble for Gaussian Processes --
 24 |     """
 25 | 
 26 |     def __init__(self, models, kernels, Q, M, input_dim=None):
 27 |         super(MultiOutputEnsembleGP, self).__init__()
 28 | 
 29 |         if input_dim is None:
 30 |             input_dim = 1
 31 | 
 32 |         # Dimensions --
 33 |         self.M = M  # num. inducing
 34 |         self.K = len(models)  # num. models
 35 |         self.input_dim = int(input_dim)  # dimension of x
 36 | 
 37 |         # Multi-output GP Ensemble Elements --
 38 |         self.Q = Q
 39 | 
 40 |         # Kernels --
 41 |         self.kernels = torch.nn.ModuleList()
 42 |         for q in range(self.Q):
 43 |             self.kernels.append(kernels[q])
 44 |         self.coregionalization = LMC(self.kernels, self.K)  # is a list
 45 | 
 46 |         if self.input_dim > 1:
 47 |             self.z = torch.nn.Parameter(torch.rand(self.M, self.input_dim, self.Q), requires_grad=False)
 48 |         else:
 49 |             self.z = torch.nn.Parameter(torch.tile(torch.linspace(0.1, 0.9, self.M)[:,None, None], (1, 1, self.Q)), requires_grad=False)
 50 | 
 51 |         # Adjacent GP Models
 52 |         self.models = models  # is a list
 53 | 
 54 |         # Ensemble Variational distribution --
 55 |         self.q_m = torch.nn.Parameter(2*torch.randn(M, Q), requires_grad=True)  # variational: mean parameter
 56 |         self.q_L = torch.nn.Parameter(0.5*torch.tile(torch.eye(M)[:,:,None], (1, 1, self.Q)), requires_grad=True)  # variational: covariance
 57 | 
 58 | 
 59 |     def ensemble(self):
 60 |         # MOGP prior + Variational parameters
 61 |         q_m = self.q_m
 62 |         q_S = torch.zeros(self.M, self.M, self.Q)
 63 |         Kvv = torch.zeros(self.M, self.M, self.Q)
 64 |         iKvv = torch.zeros(self.M, self.M, self.Q)
 65 |         for q in range(self.Q):
 66 |             Kvv_q = self.kernels[q].K(self.z[:,:,q], self.z[:,:,q])
 67 |             iKvv_q, _ = torch.solve(torch.eye(self.M), Kvv_q)  # is pseudo-inverse?
 68 |             Kvv[:,:,q] = Kvv_q
 69 |             iKvv[:,:,q] = iKvv_q
 70 | 
 71 |             q_L = torch.tril(self.q_L[:,:,q])
 72 |             q_S[:,:,q] = torch.mm(q_L, q_L.t())
 73 | 
 74 |         ensemble_m = []
 75 |         ensemble_S = []
 76 | 
 77 |         # Ensemble MOGP Distributions
 78 |         for k, model_k in enumerate(self.models):
 79 | 
 80 |             Kuu = self.coregionalization.Kff(model_k.z, k)
 81 |             Kuv = self.coregionalization.Kfu(model_k.z, self.z, k)
 82 | 
 83 |             m_k = 0.0
 84 |             S_k = Kuu
 85 | 
 86 |             for q in range(self.Q):
 87 | 
 88 |                 A = Kuv[:,:,q].mm(iKvv[:,:,q])
 89 |                 AT = iKvv[:,:,q].mm(Kuv[:,:,q].t())
 90 | 
 91 |                 m_k += A.mm(q_m[:,q:q+1])
 92 |                 S_k += A.mm(q_S[:,:,q]).mm(AT) - A.mm(Kuv[:,:,q].t())
 93 | 
 94 |             ensemble_m.append(m_k)
 95 |             ensemble_S.append(S_k)
 96 | 
 97 |         return ensemble_m, ensemble_S
 98 | 
 99 | 
100 |     def expectation(self):
101 |         E = 0.0
102 |         ensemble_m, ensemble_S = self.ensemble()
103 | 
104 |         # Expectation of k ensembles --
105 |         for k,model_k in enumerate(self.models):
106 |             # Ensemble GP -- q_e()
107 |             m_e = ensemble_m[k]
108 |             S_e = ensemble_S[k]
109 | 
110 |             # Past GP variational distribution -- q_k()
111 |             m_k = model_k.q_m
112 |             L_k = torch.tril(model_k.q_L)
113 |             S_k = torch.mm(L_k, L_k.t())
114 |             iS_k, _ = torch.solve(torch.eye(model_k.M), S_k)  # is pseudo-inverse?
115 | 
116 |             # Past GP prior -- p_k()
117 |             z_k = model_k.z
118 |             Kkk = model_k.kernel.K(z_k, z_k)
119 |             iKkk, _ = torch.solve(torch.eye(model_k.M), Kkk)  # is pseudo-inverse?
120 | 
121 |             # Expectation on terms -- E[log_p()] and E[log_q()]
122 |             E_log_q = -torch.trace(iS_k.mm(S_e)) - (m_e - m_k).t().mm(iS_k).mm(m_e - m_k) - torch.logdet(2*np.pi*S_k)
123 |             E_log_p = -torch.trace(iKkk.mm(S_e)) - m_e.t().mm(iKkk).mm(m_e) - torch.logdet(2*np.pi*Kkk)
124 | 
125 |             # General Expectation -- E[sum_k E[log_q_k] - E[log_p_k]]
126 |             E += 0.5*(E_log_q - E_log_p) + model_k.logZ
127 | 
128 |         return E
129 | 
130 |     def divergence(self, p_v, q_v):
131 |         kl = 0.0
132 |         for q in range(self.Q):
133 |             kl += kl_divergence(q_v[q], p_v[q])
134 |         return kl
135 | 
136 |     def forward(self):
137 | 
138 |         q_u = []
139 |         p_u = []
140 |         q_m = self.q_m
141 |         q_S = torch.zeros(self.M, self.M, self.Q)
142 |         Kuu = torch.zeros(self.M, self.M, self.Q)
143 |         for q in range(self.Q):
144 | 
145 |             # Variational parameters --
146 |             q_L = torch.tril(self.q_L[:,:,q])
147 |             q_S[:,:,q] = torch.mm(q_L, q_L.t())
148 | 
149 |             # Prior parameters (uses kernel) --
150 |             Kuu_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q])
151 |             Kuu[:, :, q] = Kuu_q
152 | 
153 |             # Distributions -- q(u), p(u)
154 |             q_u.append(Normal(q_m[:,q].flatten(), q_S[:,:,q]))
155 |             p_u.append(Normal(torch.zeros(self.M), Kuu[:,:,q]))
156 | 
157 |         # Expectation --
158 |         expectation = self.expectation()
159 | 
160 |         # KL divergence --
161 |         kl = self.divergence(q_u, p_u)
162 | 
163 |         # Calls ELBO
164 |         elbo = expectation - kl
165 |         return -elbo
166 | 
167 |     def predictive(self, xnew, k):
168 |         # MOGP prior + Variational parameters
169 |         q_m = self.q_m
170 |         q_S = torch.zeros(self.M, self.M, self.Q)
171 |         Kvv = torch.zeros(self.M, self.M, self.Q)
172 |         iKvv = torch.zeros(self.M, self.M, self.Q)
173 | 
174 |         # Posterior distribution on new input data
175 |         Kuu = self.coregionalization.Kff(xnew, k)
176 |         Kuv = self.coregionalization.Kfu(xnew, self.z, k)
177 | 
178 |         m_k = 0.0
179 |         S_k = Kuu
180 |         for q in range(self.Q):
181 |             # MOGP latent functions prior
182 |             Kvv_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q])
183 |             iKvv_q, _ = torch.solve(torch.eye(self.M), Kvv_q)  # is pseudo-inverse?
184 |             Kvv[:, :, q] = Kvv_q
185 |             iKvv[:, :, q] = iKvv_q
186 | 
187 |             # Variational parameters + Gaussian integration
188 |             q_L = torch.tril(self.q_L[:, :, q])
189 |             q_S[:, :, q] = torch.mm(q_L, q_L.t())
190 | 
191 |             A = Kuv[:, :, q].mm(iKvv[:, :, q])
192 |             AT = iKvv[:, :, q].mm(Kuv[:, :, q].t())
193 | 
194 |             m_k += A.mm(q_m[:, q:q + 1])
195 |             S_k += A.mm(q_S[:, :, q]).mm(AT) - A.mm(Kuv[:, :, q].t())
196 | 
197 |         m_k = m_k.detach().numpy()
198 |         S_k = S_k.detach().numpy()
199 | 
200 |         gp_mu = m_k.flatten()
201 |         gp_var = np.diagonal(S_k)
202 | 
203 |         gp = gp_mu
204 |         gp_upper = gp_mu + 2 * np.sqrt(gp_var)  # + 2*self.likelihood.sigma.detach().numpy()
205 |         gp_lower = gp_mu - 2 * np.sqrt(gp_var)  # - 2*self.likelihood.sigma.detach().numpy()
206 | 
207 |         return gp, gp_upper, gp_lower
208 | 
209 |     def rmse(self, x_new, f_new, k):
210 |         f_gp,_,_ = self.predictive(x_new, k)
211 |         rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
212 |         return rmse
213 | 
214 |     def mae(self, x_new, f_new, k):
215 |         f_gp,_,_ = self.predictive(x_new, k)
216 |         mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
217 |         return mae
218 | 
219 |     def nlpd(self, likelihood, x_new, y_new, k):
220 |         f_gp, u_gp, _ = self.predictive(x_new, k)
221 |         f_gp = torch.from_numpy(f_gp)
222 |         u_gp = torch.from_numpy(u_gp)
223 |         v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0)
224 |         nlpd = - torch.mean(likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
225 |         return nlpd


--------------------------------------------------------------------------------
/models/svgp.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------
  2 | # This script belongs to the ModularGP repo
  3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
  4 | # Copyright (c) 2021 Pablo Moreno-Munoz
  5 | # -----------------------------------------------------------------
  6 | #
  7 | #
  8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
  9 | # Section for Cognitive Systems
 10 | # Technical University of Denmark (DTU)
 11 | # October 2021
 12 | 
 13 | import torch
 14 | from torch.distributions import MultivariateNormal as Normal
 15 | from torch.distributions import kl_divergence
 16 | 
 17 | import numpy as np
 18 | from GPy.inference.latent_function_inference import LatentFunctionInference
 19 | from GPy.inference.latent_function_inference.posterior import Posterior
 20 | 
 21 | 
 22 | class SVGP(torch.nn.Module):
 23 |     """
 24 |     -- Sparse Variational Gaussian Process --
 25 |     --
 26 |     -- Adaptation to Pytorch + GP framework
 27 |     -- Based on Hensman et al. "Scalable Variational Gaussian Process Classification" AISTATS 2015
 28 |     -- Reference: http://proceedings.mlr.press/v38/hensman15.pdf
 29 |     """
 30 |     def __init__(self, kernel, likelihood, M, input_dim=None, batch_rate=1.0):
 31 |         super(SVGP, self).__init__()
 32 | 
 33 |         if input_dim is None:
 34 |             input_dim = 1
 35 | 
 36 |         # Dimensions --
 37 |         self.M = M                          #num. inducing
 38 |         self.input_dim = int(input_dim)     #dimension of x
 39 |         self.batch_rate =  batch_rate       #rate of mini-batch/dataset
 40 | 
 41 |         # GP Elements --
 42 |         self.likelihood = likelihood        #type of likelihood
 43 |         self.kernel = kernel                #type of kernel
 44 | 
 45 |         self.logZ = 0.0
 46 | 
 47 |         if self.input_dim > 1:
 48 |             self.z = torch.nn.Parameter(2*torch.rand(self.M, self.input_dim) - 1.0, requires_grad=False)
 49 |         else:
 50 |             self.z = torch.nn.Parameter(torch.linspace(-0.9, 0.9, self.M)[:,None], requires_grad=False)
 51 | 
 52 |         # Variational distribution --
 53 |         self.q_m = torch.nn.Parameter(torch.randn(M,1), requires_grad=True)  # variational: mean parameter
 54 |         self.q_L = torch.nn.Parameter(torch.eye(M), requires_grad=True)  # variational: covariance
 55 | 
 56 |     def forward(self, x, y):
 57 | 
 58 |         # Variational parameters --
 59 |         q_m = self.q_m
 60 |         q_L = torch.tril(self.q_L)
 61 |         q_S = torch.mm(q_L, q_L.t())
 62 | 
 63 |         # Prior parameters (uses kernel) --
 64 |         Kuu = self.kernel.K(self.z)
 65 | 
 66 |         # Distributions -- q(u), p(u)
 67 |         q_u = Normal(q_m.flatten(), q_S)
 68 |         p_u = Normal(torch.zeros(self.M), Kuu)
 69 | 
 70 |         # Calculus of q(f) --
 71 |         Kff = self.kernel.K(x,x)
 72 |         Kfu = self.kernel.K(x, self.z)
 73 |         Kuf = torch.transpose(Kfu,0,1)
 74 |         iKuu,_ = torch.solve(torch.eye(self.M), Kuu)  # is pseudo-inverse?
 75 | 
 76 |         A = Kfu.mm(iKuu)
 77 |         AT = iKuu.mm(Kuf)
 78 | 
 79 |         m_f = A.mm(q_m)
 80 |         v_f = torch.diag(Kff + A.mm(q_S - Kuu).mm(AT))
 81 | 
 82 |         # Expectation term --
 83 |         expectation = self.likelihood.variational_expectation(y, m_f, v_f)
 84 | 
 85 |         # KL divergence --
 86 |         kl = kl_divergence(q_u, p_u)
 87 | 
 88 |         # Lower bound (ELBO) --
 89 |         elbo = self.batch_rate*expectation.sum() - kl
 90 |         return -elbo
 91 | 
 92 |     def predictive(self, x_new, lik_noise=False):
 93 |         # Matrices
 94 |         q_m = self.q_m.detach().numpy()
 95 |         q_L = torch.tril(self.q_L)
 96 |         q_S = torch.mm(q_L, q_L.t()).detach().numpy()
 97 |         Kuu = self.kernel.K(self.z, self.z).detach().numpy()
 98 | 
 99 |         posterior = Posterior(mean=q_m, cov=q_S, K=Kuu, prior_mean=np.zeros(q_m.shape))
100 |         Kx = self.kernel.K(self.z, x_new).detach().numpy()
101 |         Kxx = self.kernel.K(x_new, x_new).detach().numpy()
102 | 
103 |         # GP Predictive Posterior - mean + variance
104 |         gp_mu = np.dot(Kx.T, posterior.woodbury_vector)
105 |         Kxx = np.diag(Kxx)
106 |         gp_var = (Kxx - np.sum(np.dot(np.atleast_3d(posterior.woodbury_inv).T, Kx) * Kx[None, :, :], 1)).T
107 | 
108 |         gp = gp_mu
109 |         if lik_noise:
110 |             gp_upper = gp_mu + 2 * np.sqrt(gp_var) + 2 * self.likelihood.sigma.detach().numpy()
111 |             gp_lower = gp_mu - 2 * np.sqrt(gp_var) - 2 * self.likelihood.sigma.detach().numpy()
112 |         else:
113 |             gp_upper = gp_mu + 2*np.sqrt(gp_var)
114 |             gp_lower = gp_mu - 2*np.sqrt(gp_var)
115 | 
116 |         return gp, gp_upper, gp_lower
117 | 
118 |     def rmse(self, x_new, f_new):
119 |         f_gp,_,_ = self.predictive(x_new)
120 |         rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
121 |         return rmse
122 | 
123 |     def mae(self, x_new, f_new):
124 |         f_gp,_,_ = self.predictive(x_new)
125 |         mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
126 |         return mae
127 | 
128 |     def nlpd(self, x_new, y_new):
129 |         f_gp, u_gp, _ = self.predictive(x_new)
130 |         f_gp = torch.from_numpy(f_gp)
131 |         u_gp = torch.from_numpy(u_gp)
132 |         v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0)
133 |         nlpd = - torch.mean(self.likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
134 |         return nlpd
135 | 
136 |     def evidence(self, x, y, N_samples=None):
137 |         # Approximation CI
138 |         if N_samples is None:
139 |             N_samples = 1000
140 | 
141 |         N,_ = x.shape
142 |         v_f = torch.zeros(N)
143 |         for i in range(N):
144 |             v_f[i] = self.kernel.K(x[i:i+1,:],x[i:i+1,:])
145 |         #v_f = torch.diag(self.kernel.K(x,x), 0)
146 |         m_f = torch.zeros(v_f.shape)
147 |         p_f = Normal(m_f, torch.diag(v_f))
148 |         f_samples = p_f.sample([N_samples]).t()    # N x N_samples
149 |         mc_pdf = self.likelihood.pdf(f_samples, torch.tile(y, (1,N_samples)))
150 | 
151 |         mc_expectations = 1/N_samples * torch.sum(torch.clamp(mc_pdf, min=1e-100),1)
152 |         print(mc_expectations)
153 |         logZ = torch.sum(torch.log(mc_expectations))
154 | 
155 |         self.logZ = logZ
156 |         return logZ
157 | 
158 | 
159 | 


--------------------------------------------------------------------------------
/models/svmogp.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------
  2 | # This script belongs to the ModularGP repo
  3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
  4 | # Copyright (c) 2021 Pablo Moreno-Munoz
  5 | # -----------------------------------------------------------------
  6 | #
  7 | #
  8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
  9 | # Section for Cognitive Systems
 10 | # Technical University of Denmark (DTU)
 11 | # October 2021
 12 | 
 13 | import torch
 14 | from torch.distributions import MultivariateNormal as Normal
 15 | from torch.distributions import kl_divergence
 16 | from kernels.coregionalization import LMC
 17 | 
 18 | import numpy as np
 19 | from GPy.inference.latent_function_inference import LatentFunctionInference
 20 | from GPy.inference.latent_function_inference.posterior import Posterior
 21 | 
 22 | 
 23 | class SVMOGP(torch.nn.Module):
 24 |     """
 25 |     -- Sparse Variational Multi-output Gaussian Process --
 26 |     --
 27 |     -- Adaptation to Pytorch + GP framework --
 28 |     -- Based on M. A. Álvarez and N. Lawrence, "Sparse convolved Gaussian processes for multi-output regression" NIPS'08
 29 |     -- Reference: http://papers.neurips.cc/paper/3553-sparse-convolved-gaussian-processes-for-multi-output-regression.pdf
 30 |     """
 31 |     def __init__(self, kernels, likelihoods, Q, M, input_dim=None, batch_rates=None):
 32 |         super(SVMOGP, self).__init__()
 33 | 
 34 |         if input_dim is None:
 35 |             input_dim = 1
 36 | 
 37 | 
 38 |         # Dimensions --
 39 |         self.M = M  # num. inducing
 40 |         self.Q = Q  # num. latent functions
 41 |         self.input_dim = int(input_dim)  # dimension of x
 42 | 
 43 |         # Likelihoods --
 44 |         self.likelihoods = likelihoods  # list of likelihoods
 45 |         self.D = len(self.likelihoods)  # num. output channels
 46 | 
 47 |         if batch_rates is None:
 48 |             self.batch_rates = self.D*[1.0]
 49 |         else:
 50 |             self.batch_rates = batch_rates
 51 | 
 52 |         # Kernels --
 53 |         self.kernels = torch.nn.ModuleList()
 54 |         for q in range(self.Q):
 55 |             self.kernels.append(kernels[q])
 56 |         self.coregionalization = LMC(self.kernels, self.D)  # is a list
 57 | 
 58 |         # Inducing points --
 59 |         if self.input_dim > 1:
 60 |             self.z = torch.nn.Parameter(torch.rand(self.M, self.input_dim, self.Q), requires_grad=False)
 61 |         else:
 62 |             self.z = torch.nn.Parameter(torch.tile(torch.linspace(0.1, 0.9, self.M)[:,None, None], (1, 1, self.Q)), requires_grad=False)
 63 | 
 64 | 
 65 |         # Variational distributions --
 66 |         self.q_m = torch.nn.Parameter(2*torch.randn(M, Q), requires_grad=True)  # variational: mean parameter
 67 |         self.q_L = torch.nn.Parameter(torch.tile(torch.eye(M)[:, :, None], (1, 1, self.Q)), requires_grad=True)  # variational: covariance
 68 | 
 69 |     def expectation(self, x, y):
 70 |         # Check length of input+output lists
 71 |         assert len(x) == self.D
 72 |         assert len(y) == self.D
 73 | 
 74 |         # MOGP prior + Variational parameters
 75 |         q_m = self.q_m
 76 |         q_S = torch.zeros(self.M, self.M, self.Q)
 77 |         Kuu = torch.zeros(self.M, self.M, self.Q)
 78 |         iKuu = torch.zeros(self.M, self.M, self.Q)
 79 | 
 80 |         for q in range(self.Q):
 81 |             # MOGP latent functions prior
 82 |             Kuu_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q])
 83 |             iKuu_q, _ = torch.solve(torch.eye(self.M), Kuu_q)  # is pseudo-inverse?
 84 |             Kuu[:, :, q] = Kuu_q
 85 |             iKuu[:, :, q] = iKuu_q
 86 | 
 87 |             # Variational parameters + Gaussian integration
 88 |             q_L = torch.tril(self.q_L[:, :, q])
 89 |             q_S[:, :, q] = torch.mm(q_L, q_L.t())
 90 | 
 91 |         # Expectation values (NxD)
 92 |         expectation = []
 93 |         for d in range(self.D):
 94 |             Kff = self.coregionalization.Kff(x[d], d)
 95 |             Kfu = self.coregionalization.Kfu(x[d], self.z, d)
 96 | 
 97 |             m_f = 0.0
 98 |             S_f = Kff
 99 | 
100 |             for q in range(self.Q):
101 |                 A = Kfu[:, :, q].mm(iKuu[:, :, q])
102 |                 AT = iKuu[:, :, q].mm(Kfu[:, :, q].t())
103 | 
104 |                 m_f += A.mm(q_m[:, q:q + 1])
105 |                 S_f += A.mm(q_S[:, :, q]).mm(AT) - A.mm(Kfu[:, :, q].t())
106 | 
107 |             v_f = torch.diag(S_f)
108 |             expectation.append(self.likelihoods[d].variational_expectation(y[d], m_f, v_f))
109 | 
110 |         return expectation
111 | 
112 |     def divergence(self, p_u, q_u):
113 |         kl = 0.0
114 |         for q in range(self.Q):
115 |             kl += kl_divergence(q_u[q], p_u[q])
116 |         return kl
117 | 
118 |     def forward(self, x, y):
119 | 
120 |         # Empty variables for filling in 1:Q
121 |         q_u = []
122 |         p_u = []
123 |         q_m = self.q_m
124 |         q_S = torch.zeros(self.M, self.M, self.Q)
125 |         Kuu = torch.zeros(self.M, self.M, self.Q)
126 |         for q in range(self.Q):
127 | 
128 |             # Variational parameters --
129 |             q_L = torch.tril(self.q_L[:,:,q])
130 |             q_S[:,:,q] = torch.mm(q_L, q_L.t())
131 | 
132 |             # Prior parameters (uses kernel) --
133 |             Kuu_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q])
134 |             Kuu[:, :, q] = Kuu_q
135 | 
136 |             # Distributions -- q(u), p(u)
137 |             q_u.append(Normal(q_m[:,q].flatten(), q_S[:,:,q]))
138 |             p_u.append(Normal(torch.zeros(self.M), Kuu[:,:,q]))
139 | 
140 |         # Expectation term --
141 |         expectation = 0.0
142 |         expectation_mo = self.expectation(x, y)
143 |         for d, exp in enumerate(expectation_mo):
144 |             expectation += self.batch_rates[d] * exp.sum()
145 | 
146 |         # KL divergence --
147 |         kl = self.divergence(q_u, p_u)
148 | 
149 |         # Lower bound (ELBO) --
150 |         elbo = expectation - kl
151 |         return -elbo
152 | 
153 |     def predictive(self, xnew, d):
154 |         # MOGP prior + Variational parameters
155 |         q_m = self.q_m
156 |         q_S = torch.zeros(self.M, self.M, self.Q)
157 |         Kuu = torch.zeros(self.M, self.M, self.Q)
158 |         iKuu = torch.zeros(self.M, self.M, self.Q)
159 | 
160 |         # Posterior distribution on new input data
161 |         Kff = self.coregionalization.Kff(xnew, d)
162 |         Kfu = self.coregionalization.Kfu(xnew, self.z, d)
163 | 
164 |         m_pred = 0.0
165 |         S_pred = Kff
166 |         for q in range(self.Q):
167 |             # MOGP latent functions prior
168 |             Kuu_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q])
169 |             iKuu_q, _ = torch.solve(torch.eye(self.M), Kuu_q)  # is pseudo-inverse?
170 |             Kuu[:, :, q] = Kuu_q
171 |             iKuu[:, :, q] = iKuu_q
172 | 
173 |             # Variational parameters + Gaussian integration
174 |             q_L = torch.tril(self.q_L[:, :, q])
175 |             q_S[:, :, q] = torch.mm(q_L, q_L.t())
176 | 
177 |             A = Kfu[:, :, q].mm(iKuu[:, :, q])
178 |             AT = iKuu[:, :, q].mm(Kfu[:, :, q].t())
179 | 
180 |             m_pred += A.mm(q_m[:, q:q + 1])
181 |             S_pred += A.mm(q_S[:, :, q]).mm(AT) - A.mm(Kfu[:, :, q].t())
182 | 
183 |         # Detach and numpy easier for plotting.
184 |         m_pred = m_pred.detach().numpy()
185 |         S_pred = S_pred.detach().numpy()
186 | 
187 |         gp_mu = m_pred.flatten()
188 |         gp_var = np.diagonal(S_pred)
189 | 
190 |         gp = gp_mu
191 |         gp_upper = gp_mu + 2 * np.sqrt(gp_var)  # + 2*self.likelihood.sigma.detach().numpy()
192 |         gp_lower = gp_mu - 2 * np.sqrt(gp_var)  # - 2*self.likelihood.sigma.detach().numpy()
193 | 
194 |         return gp, gp_upper, gp_lower
195 | 
196 |     def rmse(self, x_new, f_new, d):
197 |         f_gp,_,_ = self.predictive(x_new, d)
198 |         rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
199 |         return rmse
200 | 
201 |     def mae(self, x_new, f_new, d):
202 |         f_gp,_,_ = self.predictive(x_new, d)
203 |         mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
204 |         return mae
205 | 
206 |     def nlpd(self, x_new, y_new, d):
207 |         f_gp, u_gp, _ = self.predictive(x_new, d)
208 |         f_gp = torch.from_numpy(f_gp)
209 |         u_gp = torch.from_numpy(u_gp)
210 |         v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0)
211 |         nlpd = - torch.mean(self.likelihoods[d].log_predictive(y_new, f_gp, v_gp)).detach().numpy()
212 |         return nlpd
213 | 


--------------------------------------------------------------------------------
/optimization/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/optimization/__init__.py


--------------------------------------------------------------------------------
/optimization/algorithms.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------
  2 | # This script belongs to the ModularGP repo
  3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
  4 | # Copyright (c) 2021 Pablo Moreno-Munoz
  5 | # -----------------------------------------------------------------
  6 | #
  7 | #
  8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
  9 | # Section for Cognitive Systems
 10 | # Technical University of Denmark (DTU)
 11 | # October 2021
 12 | 
 13 | import torch
 14 | import numpy as np
 15 | import matplotlib.pyplot as plt
 16 | 
 17 | from likelihoods.gaussian import Gaussian
 18 | from likelihoods.bernoulli import Bernoulli
 19 | 
 20 | class AlgorithmVEM():
 21 |     def __init__(self, model, x=None, y=None, config='svgp', iters=20):
 22 |         super(AlgorithmVEM, self).__init__()
 23 | 
 24 |         self.model = model
 25 |         if x is not None:
 26 |             self.x = x
 27 |         if y is not None:
 28 |             self.y = y
 29 |         self.iters = iters
 30 | 
 31 |         if config == 'svgp' or config == 'ensemble':
 32 |             self.config = config
 33 |         else:
 34 |             raise ValueError('Not valid model type for Algorithm VEM, choose \'svgp\' or \'ensemble\'')
 35 | 
 36 |         if self.config == 'svgp':
 37 |             # Learning rates per param.
 38 |             self.lr_m = 1e-6
 39 |             self.lr_L = 1e-12
 40 |             self.lr_hyp = 1e-10
 41 |             self.lr_z = 1e-10
 42 | 
 43 |             # VE + VM iterations.
 44 |             self.ve_its = 20
 45 |             self.vm_its = 10
 46 |             self.z_its = 10
 47 | 
 48 |         elif self.config == 'ensemble':
 49 |             # Learning rates per param.
 50 |             self.lr_m = 1e-3
 51 |             self.lr_L = 1e-6
 52 |             self.lr_hyp = 1e-8
 53 |             self.lr_z = 1e-6
 54 | 
 55 |             # VE + VM iterations.
 56 |             self.ve_its = 30
 57 |             self.vm_its = 10
 58 |             self.z_its = 10
 59 | 
 60 |     def fit(self, opt='sgd', plot=False):
 61 |         if opt == 'sgd':
 62 |             ve_optimizer = torch.optim.SGD([{'params':self.model.q_m, 'lr':self.lr_m},{'params':self.model.q_L,'lr':self.lr_L}], lr=1e-12, momentum=0.9)
 63 | 
 64 |             if isinstance(self.model, Gaussian):
 65 |                 vm_optimizer = torch.optim.SGD([{'params':self.model.kernel.parameters(), 'lr':self.lr_hyp},{'params':self.model.likelihood.sigma,'lr':self.lr_hyp}], lr=1e-12, momentum=0.9)
 66 |             else:
 67 |                 vm_optimizer = torch.optim.SGD([{'params': self.model.kernel.parameters(), 'lr': self.lr_hyp}], lr=1e-12, momentum=0.9)
 68 | 
 69 |             z_optimizer = torch.optim.SGD([{'params':self.model.z, 'lr':self.lr_z}], lr=1e-10, momentum=0.9)
 70 | 
 71 |             elbo_its = np.empty((self.iters, 1))
 72 |             for em_it in range(self.iters):
 73 | 
 74 |                 # VE STEP
 75 |                 for it in range(self.ve_its):
 76 |                     if self.config == 'svgp':
 77 |                         elbo_it = self.model(self.x,self.y)    # Forward pass -> computes ELBO
 78 |                     elif self.config == 'ensemble':
 79 |                         elbo_it = self.model()  # Forward pass -> computes ELBO
 80 | 
 81 |                     ve_optimizer.zero_grad()
 82 |                     elbo_it.backward()      # Backward pass <- computes gradients
 83 |                     ve_optimizer.step()
 84 | 
 85 |                     # Overfitting avoidance
 86 |                     if self.config == 'ensemble':
 87 |                         if self.model().item() < 10.0:
 88 |                             break
 89 | 
 90 |                 # VM STEP
 91 |                 # 1. hyper-parameters
 92 |                 for it in range(self.vm_its):
 93 |                     if self.config == 'svgp':
 94 |                         elbo_it = self.model(self.x,self.y)    # Forward pass -> computes ELBO
 95 |                     elif self.config == 'ensemble':
 96 |                         elbo_it = self.model()  # Forward pass -> computes ELBO
 97 | 
 98 |                     vm_optimizer.zero_grad()
 99 |                     elbo_it.backward()      # Backward pass <- computes gradients
100 |                     vm_optimizer.step()
101 | 
102 |                     # Overfitting avoidance
103 |                     if self.config == 'ensemble':
104 |                         if self.model().item() < 10.0:
105 |                             break
106 | 
107 |                 # 2. inducing-points
108 |                 for it in range(self.z_its):
109 |                     if self.config == 'svgp':
110 |                         elbo_it = self.model(self.x,self.y)    # Forward pass -> computes ELBO
111 |                     elif self.config == 'ensemble':
112 |                         elbo_it = self.model()  # Forward pass -> computes ELBO
113 | 
114 |                     z_optimizer.zero_grad()
115 |                     elbo_it.backward()  # Backward pass <- computes gradients
116 |                     z_optimizer.step()
117 | 
118 |                     # Overfitting avoidance
119 |                     if self.config == 'ensemble':
120 |                         if self.model().item() < 10.0:
121 |                             break
122 | 
123 |                 print('Variational EM step (it=' + str(em_it) + ')')
124 |                 if self.config == 'svgp':
125 |                     print('  \__ elbo =', self.model(self.x, self.y).item())
126 |                     elbo_its[em_it] = - self.model(self.x, self.y).item()
127 |                 elif self.config == 'ensemble':
128 |                     print('  \__ elbo =', self.model().item())
129 |                     elbo_its[em_it] = - self.model().item()
130 | 
131 |                     # Overfitting avoidance
132 |                     if self.model().item() < 10.0:
133 |                         break
134 | 
135 |         elif opt == 'lbfgs':
136 |             optim_param= torch.optim.LBFGS([self.model.q_m, self.model.q_L], lr=self.lr_m, max_iter=self.ve_its)
137 |             optim_hyper = torch.optim.LBFGS(list(self.model.kernel.parameters()) + [self.model.likelihood.sigma], lr=self.lr_hyp, max_iter=self.vm_its)
138 |             optim_z = torch.optim.LBFGS([self.model.z], lr=self.lr_z, max_iter=self.vm_its)
139 | 
140 |             elbo_its = np.empty((self.iters, 1))
141 |             for em_it in range(self.iters):
142 | 
143 |                 # VE STEP
144 |                 def closure():
145 |                     optim_param.zero_grad()
146 |                     if self.config == 'svgp':
147 |                         elbo_it = self.model(self.x, self.y)  # Forward pass -> computes ELBO
148 |                     elif self.config == 'ensemble':
149 |                         elbo_it = self.model()  # Forward pass -> computes ELBO
150 | 
151 |                     elbo_it.backward()
152 |                     return elbo_it
153 | 
154 |                 optim_param.step(closure)
155 |                 if self.config == 'svgp':
156 |                     print('  param >>> elbo =', self.model(self.x, self.y).item())
157 |                 elif self.config == 'ensemble':
158 |                     print('  param >>> elbo =', self.model().item())
159 | 
160 |                 # VM STEP
161 |                 # 1. hyper-parameters
162 |                 def closure():
163 |                     optim_hyper.zero_grad()
164 |                     if self.config == 'svgp':
165 |                         elbo_it = self.model(self.x, self.y)  # Forward pass -> computes ELBO
166 |                     elif self.config == 'ensemble':
167 |                         elbo_it = self.model()  # Forward pass -> computes ELBO
168 | 
169 |                     elbo_it.backward()
170 |                     return elbo_it
171 | 
172 |                 optim_hyper.step(closure)
173 |                 if self.config == 'svgp':
174 |                     print('  hyper >>> elbo =', self.model(self.x, self.y).item())
175 |                 elif self.config == 'ensemble':
176 |                     print('  hyper >>> elbo =', self.model().item())
177 | 
178 |                 # 2. inducing-points
179 |                 def closure():
180 |                     optim_z.zero_grad()
181 |                     if self.config == 'svgp':
182 |                         elbo_it = self.model(self.x, self.y)  # Forward pass -> computes ELBO
183 |                     elif self.config == 'ensemble':
184 |                         elbo_it = self.model()  # Forward pass -> computes ELBO
185 | 
186 |                     elbo_it.backward()
187 |                     return elbo_it
188 | 
189 |                 optim_z.step(closure)
190 |                 if self.config == 'svgp':
191 |                     print('  z pts >>> elbo =', self.model(self.x, self.y).item())
192 |                 elif self.config == 'ensemble':
193 |                     print('  z pts >>> elbo =', self.model().item())
194 | 
195 | 
196 |                 print('Variational EM step (it=' + str(em_it) + ')')
197 |                 if self.config == 'svgp':
198 |                     print('  \__ elbo =', self.model(self.x, self.y).item())
199 |                     elbo_its[em_it] = - self.model(self.x, self.y).item()
200 |                 elif self.config == 'ensemble':
201 |                     print('  \__ elbo =', self.model().item())
202 |                     elbo_its[em_it] = - self.model().item()
203 | 
204 |         else:
205 |             print('Not valid optimizer')
206 | 
207 |         if plot:
208 |             plt.figure()
209 |             plt.plot(elbo_its, 'k-')
210 |             plt.title('Ensemble GP Inference (ELBO)')
211 |             plt.xlabel('Iterations')
212 |             plt.show()
213 | 
214 | def GPR_Optimizer(model, x, y, its=50, lr=1e-2):
215 |     optimizer = torch.optim.LBFGS(model.parameters(), lr=lr, max_iter=10)
216 |     elbo_its = np.empty((its, 1))
217 |     for it in range(its):
218 |         def closure():
219 |             optimizer.zero_grad()
220 |             elbo_opt = model(x, y)
221 |             elbo_opt.backward()
222 |             return elbo_opt
223 | 
224 |         optimizer.step(closure)
225 | 
226 |         print('Optimization step (it=' + str(it) + ')')
227 |         print('  \__ log_marginal =', model(x, y).item())
228 |         elbo_its[it] = -model(x, y).item()
229 | 
230 | 
231 | def vem_algorithm(model, x, y, em_iters=10, optimizer='sgd',plot=False):
232 |     if optimizer=='sgd':
233 |         ve_optimizer = torch.optim.SGD([{'params':model.q_m, 'lr':1e-6},{'params':model.q_L,'lr':1e-12}], lr=1e-12, momentum=0.9)
234 |         vm_optimizer = torch.optim.SGD(model.kernel.parameters(), lr=1e-10, momentum=0.9)
235 |         z_optimizer = torch.optim.SGD([{'params':model.z, 'lr':1e-10}], lr=1e-10, momentum=0.9)
236 | 
237 |         VE_iters = 20
238 |         VM_iters = 10
239 |         Z_iters = 10
240 | 
241 |         elbo_its = np.empty((em_iters, 1))
242 |         for em_it in range(em_iters):
243 | 
244 |             # VE STEP
245 |             for it in range(VE_iters):
246 |                 elbo_it = model(x,y)    # Forward pass -> computes ELBO
247 |                 ve_optimizer.zero_grad()
248 |                 elbo_it.backward()      # Backward pass <- computes gradients
249 |                 ve_optimizer.step()
250 | 
251 |             # VM STEP
252 |             # 1. hyper-parameters
253 |             for it in range(VM_iters):
254 |                 elbo_it = model(x,y)    # Forward pass -> computes ELBO
255 |                 vm_optimizer.zero_grad()
256 |                 elbo_it.backward()      # Backward pass <- computes gradients
257 |                 vm_optimizer.step()
258 | 
259 |             # 2. inducing-points
260 |             for it in range(Z_iters):
261 |                 elbo_it = model(x,y)  # Forward pass -> computes ELBO
262 |                 z_optimizer.zero_grad()
263 |                 elbo_it.backward()  # Backward pass <- computes gradients
264 |                 z_optimizer.step()
265 | 
266 |             print('Variational EM step (it=' + str(em_it) + ')')
267 |             print('  \__ elbo =', model(x, y).item())
268 |             elbo_its[em_it] = -model(x, y).item()
269 | 
270 | 
271 |     elif optimizer=='lbfgs':
272 |         ve_optimizer = torch.optim.LBFGS([{model.q_m, model.q_L}], max_iter=50)
273 |         vm_optimizer = torch.optim.LBFGS(model.kernel.parameters(), lr=1e-3, max_iter=10)
274 | 
275 | 
276 |         elbo_its = np.empty((em_iters,1))
277 |         for em_it in range(em_iters):
278 |             # VE STEP
279 |             for name, param in model.kernel.named_parameters():
280 |                 param.requires_grad = False
281 | 
282 |                 def closure():
283 |                     ve_optimizer.zero_grad()
284 |                     elbo_opt = model(x, y)
285 |                     #print('ELBO:', elbo_opt.item())
286 |                     elbo_opt.backward()
287 |                     return elbo_opt
288 | 
289 |                 ve_optimizer.step(closure)
290 | 
291 |                 # VM STEP
292 |                 for name, param in model.kernel.named_parameters():
293 |                     param.requires_grad = True
294 | 
295 |                     def closure():
296 |                         vm_optimizer.zero_grad()
297 |                         elbo_opt = model(x, y)
298 |                         #print('ELBO:', elbo_opt.item())
299 |                         elbo_opt.backward()
300 |                         return elbo_opt
301 | 
302 |                 vm_optimizer.step(closure)
303 | 
304 |             print('Variational EM step (it=' + str(em_it) + ')')
305 |             print('  \__ elbo =', model(x, y).item())
306 |             elbo_its[em_it] = -model(x, y).item()
307 | 
308 |     if plot:
309 |         plt.figure()
310 |         plt.plot(elbo_its, 'k-')
311 |         plt.title('Sparse GP Regression (ELBO)')
312 |         plt.xlabel('Iterations')
313 |         plt.show()
314 | 
315 | def ensemble_vem(model, em_iters=20, optimizer='sgd',plot=False):
316 |     if optimizer=='sgd':
317 |         ve_optimizer = torch.optim.SGD([{'params':model.q_m, 'lr':1e-3},{'params':model.q_L,'lr':1e-6}], lr=1e-6, momentum=0.9)
318 |         vm_optimizer = torch.optim.SGD(model.kernel.parameters(), lr=1e-8, momentum=0.9)
319 |         z_optimizer = torch.optim.SGD([{'params':model.z, 'lr':1e-6}], lr=1e-8, momentum=0.9)
320 | 
321 |         VE_iters = 30
322 |         VM_iters = 10
323 |         Z_iters = 10
324 | 
325 |         elbo_its = np.empty((em_iters, 1))
326 |         for em_it in range(em_iters):
327 |             # VE STEP
328 |             # 1. Variational parameters
329 |             for it in range(VE_iters):
330 |                 elbo_it = model()    # Forward pass -> computes ELBO
331 |                 ve_optimizer.zero_grad()
332 |                 elbo_it.backward()      # Backward pass <- computes gradients
333 |                 ve_optimizer.step()
334 | 
335 |             # VM STEP
336 |             # 1. hyper-parameters
337 |             for it in range(VM_iters):
338 |                 elbo_it = model()    # Forward pass -> computes ELBO
339 |                 vm_optimizer.zero_grad()
340 |                 elbo_it.backward()      # Backward pass <- computes gradients
341 |                 vm_optimizer.step()
342 | 
343 |             # 2. inducing-points
344 |             for it in range(Z_iters):
345 |                 elbo_it = model()    # Forward pass -> computes ELBO
346 |                 z_optimizer.zero_grad()
347 |                 elbo_it.backward()      # Backward pass <- computes gradients
348 |                 z_optimizer.step()
349 | 
350 |             print('Variational EM step (it=' + str(em_it) + ')')
351 |             print('  \__ elbo =', model().item())
352 |             elbo_its[em_it] = -model().item()
353 | 
354 |             if -model().item() > 0.0:
355 |                 break
356 | 
357 |     if plot:
358 |         plt.figure()
359 |         plt.plot(elbo_its, 'k-')
360 |         plt.title('Ensemble GP Inference (ELBO)')
361 |         plt.xlabel('Iterations')
362 |         plt.show()
363 | 
364 | 
365 | def ensemble_vem_parallel(model, em_iters=30, optimizer='sgd',plot=False):
366 |     if optimizer=='sgd':
367 |         ve_optimizer = torch.optim.SGD([{'params':model.q_m, 'lr':1e-3},{'params':model.q_L,'lr':1e-6}], lr=1e-6, momentum=0.9)
368 |         vm_optimizer = torch.optim.SGD(model.kernel.parameters(), lr=1e-8, momentum=0.9)
369 |         z_optimizer = torch.optim.SGD([{'params':model.z, 'lr':1e-8}], lr=1e-8, momentum=0.9)
370 | 
371 |         VE_iters = 30
372 |         VM_iters = 10
373 |         Z_iters = 10
374 | 
375 |         elbo_its = np.zeros((em_iters, 1))
376 |         for em_it in range(em_iters):
377 |             # VE STEP
378 |             # 1. Variational parameters
379 |             for it in range(VE_iters):
380 |                 elbo_it = model()    # Forward pass -> computes ELBO
381 |                 ve_optimizer.zero_grad()
382 |                 elbo_it.backward()      # Backward pass <- computes gradients
383 |                 ve_optimizer.step()
384 | 
385 |             # VM STEP
386 |             # 1. hyper-parameters
387 |             for it in range(VM_iters):
388 |                 elbo_it = model()    # Forward pass -> computes ELBO
389 |                 vm_optimizer.zero_grad()
390 |                 elbo_it.backward()      # Backward pass <- computes gradients
391 |                 vm_optimizer.step()
392 | 
393 |             # 2. inducing-points
394 |             for it in range(Z_iters):
395 |                 elbo_it = model()    # Forward pass -> computes ELBO
396 |                 z_optimizer.zero_grad()
397 |                 elbo_it.backward()      # Backward pass <- computes gradients
398 |                 z_optimizer.step()
399 | 
400 |             print('Variational EM step (it=' + str(em_it) + ')')
401 |             print('  \__ elbo =', model().item())
402 |             elbo_its[em_it] = -model().item()
403 | 
404 |             if -model().item() > 0.0:
405 |                 break
406 | 
407 |     if plot:
408 |         plt.figure()
409 |         plt.plot(elbo_its, 'k-')
410 |         plt.title('Ensemble GP Inference (ELBO)')
411 |         plt.xlabel('Iterations')
412 |         plt.show()
413 | 
414 | def vem_algorithm_infographic(model, x, y, em_iters=10, plot=False):
415 |     ve_optimizer = torch.optim.SGD([{'params':model.q_m, 'lr':1e-5},{'params':model.q_L,'lr':1e-8}], lr=1e-12, momentum=0.9)
416 |     vm_optimizer = torch.optim.SGD(model.kernel.parameters(), lr=1e-10, momentum=0.9)
417 |     z_optimizer = torch.optim.SGD([{'params':model.z, 'lr':1e-10}], lr=1e-10, momentum=0.9)
418 | 
419 |     VE_iters = 20
420 |     VM_iters = 20
421 |     Z_iters = 10
422 | 
423 |     elbo_its = np.empty((em_iters, 1))
424 |     for em_it in range(em_iters):
425 | 
426 |         # VE STEP
427 |         for it in range(VE_iters):
428 |             elbo_it = model(x,y)    # Forward pass -> computes ELBO
429 |             ve_optimizer.zero_grad()
430 |             elbo_it.backward()      # Backward pass <- computes gradients
431 |             ve_optimizer.step()
432 | 
433 |         # VM STEP
434 |         # 1. hyper-parameters
435 |         for it in range(VM_iters):
436 |             elbo_it = model(x,y)    # Forward pass -> computes ELBO
437 |             vm_optimizer.zero_grad()
438 |             elbo_it.backward()      # Backward pass <- computes gradients
439 |             vm_optimizer.step()
440 | 
441 |         # 2. inducing-points
442 |         for it in range(Z_iters):
443 |             elbo_it = model(x,y)  # Forward pass -> computes ELBO
444 |             z_optimizer.zero_grad()
445 |             elbo_it.backward()  # Backward pass <- computes gradients
446 |             z_optimizer.step()
447 | 
448 |         print('Variational EM step (it=' + str(em_it) + ')')
449 |         print('  \__ elbo =', model(x, y).item())
450 |         elbo_its[em_it] = -model(x, y).item()
451 | 
452 | 
453 | def ensemble_vem_infographic(model, em_iters=30, optimizer='sgd',plot=False):
454 |     if optimizer=='sgd':
455 |         ve_optimizer = torch.optim.SGD([{'params':model.q_m, 'lr':1e-3},{'params':model.q_L,'lr':1e-6}], lr=1e-6, momentum=0.9)
456 |         vm_optimizer = torch.optim.SGD(model.kernel.parameters(), lr=1e-8, momentum=0.9)
457 |         z_optimizer = torch.optim.SGD([{'params':model.z, 'lr':1e-8}], lr=1e-8, momentum=0.9)
458 | 
459 |         VE_iters = 30
460 |         VM_iters = 20
461 |         Z_iters = 10
462 | 
463 |         elbo_its = np.zeros((em_iters, 1))
464 |         for em_it in range(em_iters):
465 |             # VE STEP
466 |             # 1. Variational parameters
467 |             for it in range(VE_iters):
468 |                 elbo_it = model()    # Forward pass -> computes ELBO
469 |                 ve_optimizer.zero_grad()
470 |                 elbo_it.backward()      # Backward pass <- computes gradients
471 |                 ve_optimizer.step()
472 | 
473 |             # VM STEP
474 |             # 1. hyper-parameters
475 |             for it in range(VM_iters):
476 |                 elbo_it = model()    # Forward pass -> computes ELBO
477 |                 vm_optimizer.zero_grad()
478 |                 elbo_it.backward()      # Backward pass <- computes gradients
479 |                 vm_optimizer.step()
480 | 
481 |             # 2. inducing-points
482 |             for it in range(Z_iters):
483 |                 elbo_it = model()    # Forward pass -> computes ELBO
484 |                 z_optimizer.zero_grad()
485 |                 elbo_it.backward()      # Backward pass <- computes gradients
486 |                 z_optimizer.step()
487 | 
488 |             print('Variational EM step (it=' + str(em_it) + ')')
489 |             print('  \__ elbo =', model().item())
490 |             elbo_its[em_it] = -model().item()
491 | 
492 |             if -model().item() > 0.0:
493 |                 break
494 | 
495 | def moensemble_vem(model, em_iters=20, optimizer='sgd',plot=False):
496 |     if optimizer=='sgd':
497 |         ve_optimizer = torch.optim.SGD([{'params': model.q_m, 'lr': 1e-3},
498 |                                         {'params': model.q_L,'lr': 1e-6}], lr=1e-6, momentum=0.9)
499 |         vm_optimizer = torch.optim.SGD([{'params': model.kernels.parameters(), 'lr': 1e-8},
500 |                                         {'params': model.coregionalization.W, 'lr': 1e-6}], lr=1e-8, momentum=0.9)
501 |         z_optimizer = torch.optim.SGD([{'params': model.z, 'lr':1e-7}], lr=1e-8, momentum=0.9)
502 | 
503 |         VE_iters = 30
504 |         VM_iters = 20
505 |         Z_iters = 5
506 | 
507 |         elbo_its = np.empty((em_iters, 1))
508 |         for em_it in range(em_iters):
509 |             # VE STEP
510 |             # 1. Variational parameters
511 |             for it in range(VE_iters):
512 |                 elbo_it = model()    # Forward pass -> computes ELBO
513 |                 ve_optimizer.zero_grad()
514 |                 elbo_it.backward()      # Backward pass <- computes gradients
515 |                 ve_optimizer.step()
516 | 
517 |             # VM STEP
518 |             # 1. hyper-parameters
519 |             for it in range(VM_iters):
520 |                 elbo_it = model()    # Forward pass -> computes ELBO
521 |                 vm_optimizer.zero_grad()
522 |                 elbo_it.backward()      # Backward pass <- computes gradients
523 |                 vm_optimizer.step()
524 | 
525 |             # 2. inducing-points
526 |             for it in range(Z_iters):
527 |                 elbo_it = model()    # Forward pass -> computes ELBO
528 |                 z_optimizer.zero_grad()
529 |                 elbo_it.backward()      # Backward pass <- computes gradients
530 |                 z_optimizer.step()
531 | 
532 |             print('Variational EM step (it=' + str(em_it) + ')')
533 |             print('  \__ elbo =', model().item())
534 |             elbo_its[em_it] = -model().item()
535 | 
536 |             if -model().item() > 0.0:
537 |                 break
538 | 
539 |     if plot:
540 |         plt.figure()
541 |         plt.plot(elbo_its, 'k-')
542 |         plt.title('Ensemble GP Inference (ELBO)')
543 |         plt.xlabel('Iterations')
544 |         plt.show()
545 | 
546 | class AlgorithmMOVEM():
547 |     def __init__(self, model, iters=20, plot=False):
548 |         super(AlgorithmMOVEM, self).__init__()
549 | 
550 |         self.model = model
551 |         self.iters = iters
552 | 
553 |         # Learning rates per param.
554 |         self.lr_m = 1e-3
555 |         self.lr_L = 1e-6
556 |         self.lr_B = 1e-6
557 |         self.lr_hyp = 1e-8
558 |         self.lr_z = 1e-7
559 | 
560 |         # VE + VM iterations.
561 |         self.ve_iters = 30
562 |         self.vm_iters = 20
563 |         self.z_iters = 10
564 | 
565 |     def fit(self, plot=False):
566 | 
567 |         ve_optimizer = torch.optim.SGD([{'params': self.model.q_m, 'lr': self.lr_m},
568 |                                         {'params': self.model.q_L,'lr': self.lr_L}], lr=1e-6, momentum=0.9)
569 |         vm_optimizer = torch.optim.SGD([{'params': self.model.kernels.parameters(), 'lr': self.lr_hyp},
570 |                                         {'params': self.model.coregionalization.W, 'lr': self.lr_B}], lr=1e-8, momentum=0.9)
571 |         z_optimizer = torch.optim.SGD([{'params': self.model.z, 'lr': self.lr_z}], lr=1e-8, momentum=0.9)
572 | 
573 |         elbo_its = np.empty((self.iters, 1))
574 |         for em_it in range(self.iters):
575 |             # VE STEP
576 |             # 1. Variational parameters
577 |             for it in range(self.ve_iters):
578 |                 elbo_it = self.model()  # Forward pass -> computes ELBO
579 |                 ve_optimizer.zero_grad()
580 |                 elbo_it.backward()  # Backward pass <- computes gradients
581 |                 ve_optimizer.step()
582 | 
583 |             # VM STEP
584 |             # 1. hyper-parameters
585 |             for it in range(self.vm_iters):
586 |                 elbo_it = self.model()  # Forward pass -> computes ELBO
587 |                 vm_optimizer.zero_grad()
588 |                 elbo_it.backward()  # Backward pass <- computes gradients
589 |                 vm_optimizer.step()
590 | 
591 |             # 2. inducing-points
592 |             for it in range(self.z_iters):
593 |                 elbo_it = self.model()  # Forward pass -> computes ELBO
594 |                 z_optimizer.zero_grad()
595 |                 elbo_it.backward()  # Backward pass <- computes gradients
596 |                 z_optimizer.step()
597 | 
598 |             print('Variational EM step (it=' + str(em_it) + ')')
599 |             print('  \__ elbo =', self.model().item())
600 |             elbo_its[em_it] = -self.model().item()
601 | 
602 |             if -self.model().item() > 0.0:
603 |                 break
604 | 
605 |         if plot:
606 |             plt.figure()
607 |             plt.plot(elbo_its, 'k-')
608 |             plt.title('Ensemble GP Inference (ELBO)')
609 |             plt.xlabel('Iterations')
610 |             plt.show()


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | alembic==1.4.1
  2 | anyio==3.2.1
  3 | appnope==0.1.2
  4 | argon2-cffi==20.1.0
  5 | async-generator==1.10
  6 | attrs==21.2.0
  7 | Babel==2.9.1
  8 | backcall==0.2.0
  9 | bleach==3.3.0
 10 | certifi==2020.6.20
 11 | cffi==1.14.5
 12 | chardet==4.0.0
 13 | click==8.0.1
 14 | cloudpickle==2.0.0
 15 | cycler==0.10.0
 16 | Cython==0.29.23
 17 | databricks-cli==0.15.0
 18 | decorator==5.0.7
 19 | defusedxml==0.7.1
 20 | docker==5.0.2
 21 | entrypoints==0.3
 22 | Flask==2.0.1
 23 | gitdb==4.0.7
 24 | GitPython==3.1.23
 25 | GPy==1.10.0
 26 | gpytorch==1.5.1
 27 | greenlet==1.1.1
 28 | gunicorn==20.1.0
 29 | idna==2.10
 30 | importlib-metadata==4.0.1
 31 | ipykernel==5.5.5
 32 | ipython==7.24.1
 33 | ipython-genutils==0.2.0
 34 | itsdangerous==2.0.1
 35 | jedi==0.18.0
 36 | Jinja2==3.0.1
 37 | joblib==1.0.1
 38 | json5==0.9.6
 39 | jsonschema==3.2.0
 40 | jupyter-client==6.1.12
 41 | jupyter-core==4.7.1
 42 | jupyter-server==1.9.0
 43 | jupyterlab==3.0.16
 44 | jupyterlab-pygments==0.1.2
 45 | jupyterlab-server==2.6.0
 46 | kiwisolver==1.3.1
 47 | Mako==1.1.5
 48 | MarkupSafe==2.0.1
 49 | matplotlib==3.4.2
 50 | matplotlib-inline==0.1.2
 51 | matplotlib2tikz==0.7.6
 52 | mistune==0.8.4
 53 | mlflow==1.20.2
 54 | nbclassic==0.3.1
 55 | nbclient==0.5.3
 56 | nbconvert==6.1.0
 57 | nbformat==5.1.3
 58 | nest-asyncio==1.5.1
 59 | networkx @ file:///tmp/build/80754af9/networkx_1627459939258/work
 60 | notebook==6.4.0
 61 | numpy==1.20.3
 62 | opt-einsum==3.3.0
 63 | packaging==20.9
 64 | pandas==1.2.4
 65 | pandocfilters==1.4.3
 66 | paramz==0.9.5
 67 | parso==0.8.2
 68 | pexpect==4.8.0
 69 | pickleshare==0.7.5
 70 | Pillow==8.2.0
 71 | prometheus-client==0.11.0
 72 | prometheus-flask-exporter==0.18.2
 73 | prompt-toolkit==3.0.19
 74 | protobuf==3.17.3
 75 | ptyprocess==0.7.0
 76 | pycparser==2.20
 77 | Pygments==2.9.0
 78 | pyparsing==2.4.7
 79 | pyreadstat==1.1.2
 80 | pyro-api==0.1.2
 81 | pyro-ppl==1.7.0
 82 | pyrsistent==0.17.3
 83 | python-dateutil==2.8.1
 84 | python-editor==1.0.4
 85 | pytz==2021.1
 86 | PyYAML==5.4.1
 87 | pyzmq==22.1.0
 88 | querystring-parser==1.2.4
 89 | requests==2.25.1
 90 | requests-unixsocket==0.2.0
 91 | scikit-learn==0.24.2
 92 | scipy==1.6.3
 93 | Send2Trash==1.7.1
 94 | six==1.16.0
 95 | sklearn==0.0
 96 | smmap==4.0.0
 97 | sniffio==1.2.0
 98 | SQLAlchemy==1.4.23
 99 | sqlparse==0.4.2
100 | tabulate==0.8.9
101 | terminado==0.10.1
102 | testpath==0.5.0
103 | threadpoolctl==2.1.0
104 | tikzplotlib==0.9.8
105 | torch==1.9.0
106 | torchplot==0.2.0
107 | torchvision==0.9.1
108 | tornado==6.1
109 | tqdm==4.62.2
110 | traitlets==5.0.5
111 | typing-extensions==3.10.0.0
112 | urllib3==1.26.5
113 | wcwidth==0.2.5
114 | webencodings==0.5.1
115 | websocket-client==1.1.0
116 | Werkzeug==2.0.1
117 | zipp==3.4.1
118 | 


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
  1 | # squared distance is based on the gptorch code
  2 | # by Steven Atkinson (steven@atkinson.mn)
  3 | # -----------------------------------------------------------------
  4 | # This script belongs to the ModularGP repo
  5 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
  6 | # Copyright (c) 2021 Pablo Moreno-Munoz
  7 | # -----------------------------------------------------------------
  8 | #
  9 | #
 10 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
 11 | # Section for Cognitive Systems
 12 | # Technical University of Denmark (DTU)
 13 | # October 2021
 14 | 
 15 | import os
 16 | import torch
 17 | import numpy as np
 18 | import pandas as pd
 19 | from torch.utils.data import Dataset, DataLoader
 20 | 
 21 | _lim_val = np.finfo(np.float64).max
 22 | _lim_val_exp = np.log(_lim_val)
 23 | _lim_val_square = np.sqrt(_lim_val)
 24 | #_lim_val_cube = cbrt(_lim_val)
 25 | _lim_val_cube = np.nextafter(_lim_val**(1/3.0), -np.inf)
 26 | _lim_val_quad = np.nextafter(_lim_val**(1/4.0), -np.inf)
 27 | _lim_val_three_times = np.nextafter(_lim_val/3.0, -np.inf)
 28 | 
 29 | 
 30 | def safe_exp(f):
 31 |     clamp_f = torch.clamp(f, min=-np.inf, max=_lim_val_exp)
 32 |     return torch.exp(clamp_f)
 33 | 
 34 | def safe_square(f):
 35 |     f = torch.clamp(f, min=-np.inf, max=_lim_val_square)
 36 |     return f**2
 37 | 
 38 | def safe_cube(f):
 39 |     f = torch.clamp(f, min=-np.inf, max=_lim_val_cube)
 40 |     return f**3
 41 | 
 42 | def safe_quad(f):
 43 |     f = torch.clamp(f, min=-np.inf, max=_lim_val_quad)
 44 |     return f**4
 45 | 
 46 | def true_function(x):
 47 |     y = 4.5*torch.cos(2*np.pi*x + 1.5*np.pi) - \
 48 |         3*torch.sin(4.3*np.pi*x + 0.3*np.pi) + \
 49 |         5*torch.cos(7*np.pi*x + 2.4*np.pi)
 50 |     return y
 51 | 
 52 | def smooth_function(x):
 53 |     y = 4.5*torch.cos(2*np.pi*x + 1.5*np.pi) - \
 54 |         3*torch.sin(4.3*np.pi*x + 0.3*np.pi)
 55 |     return y
 56 | 
 57 | def smooth_function_bias(x):
 58 |     y = 4.5*torch.cos(2*np.pi*x + 1.5*np.pi) - \
 59 |         3*torch.sin(4.3*np.pi*x + 0.3*np.pi) + \
 60 |         3.0*x - 7.5
 61 |     return y
 62 | 
 63 | 
 64 | def true_u_functions(x_list, Q):
 65 |     u_functions = []
 66 |     amplitude = (1.5 - 0.5) * torch.rand(Q, 3) + 0.5
 67 |     freq = (3 - 1) * torch.rand(Q, 3) + 1
 68 |     shift = 2 * torch.rand(Q, 3)
 69 |     for x in x_list:
 70 |         u_function = torch.empty(x.shape[0], Q)
 71 |         for q in range(Q):
 72 |             u_function[:,q,None] = 3.0 * amplitude[q, 0] * np.cos(freq[q, 0] * np.pi * x + shift[q, 0] * np.pi) - \
 73 |                                      2.0 * amplitude[q, 1] * np.sin(2 * freq[q, 1] * np.pi * x + shift[q, 1] * np.pi) + \
 74 |                                      amplitude[q, 2] * np.cos(4 * freq[q, 2] * np.pi * x + shift[q, 2] * np.pi)
 75 |         u_functions.append(u_function)
 76 |     return u_functions
 77 | 
 78 | 
 79 | def true_f_functions(x_list, Q):
 80 |     K = len(x_list)
 81 |     W = 0.5 * torch.randn(K, Q)
 82 |     f_functions = []
 83 |     u_functions = true_u_functions(x_list, Q)
 84 |     for k, u_function in enumerate(u_functions):
 85 |         Nk = u_function.shape[0]
 86 |         f_function = torch.zeros(Nk, 1)
 87 |         for q in range(Q):
 88 |             f_function += torch.tile(W[k:k+1, q:q+1], (Nk, 1)) * u_function[:, q:q+1]
 89 | 
 90 |         f_functions.append(f_function)
 91 | 
 92 |     return f_functions
 93 | 
 94 | 
 95 | def squared_distance(x1, x2=None):
 96 |     """
 97 |     Given points x1 [n1 x d1] and x2 [n2 x d2], return a [n1 x n2] matrix with
 98 |     the pairwise squared distances between the points.
 99 |     Entry (i, j) is sum_{j=1}^d (x_1[i, j] - x_2[i, j]) ^ 2
100 |     """
101 |     if x2 is None:
102 |         return squared_distance(x1, x1)
103 | 
104 |     x1s = x1.pow(2).sum(1, keepdim=True)
105 |     x2s = x2.pow(2).sum(1, keepdim=True)
106 | 
107 |     r2 = x1s + x2s.t() -2.0 * x1 @ x2.t()
108 | 
109 |     # Prevent negative squared distances using torch.clamp
110 |     # NOTE: Clamping is for numerics.
111 |     # This use of .detach() is to avoid breaking the gradient flow.
112 |     return r2 - (torch.clamp(r2, max=0.0)).detach()
113 | 
114 | 
115 | class DataGP(Dataset):
116 |     def __init__(self, x, y):
117 |         if not torch.is_tensor(x):
118 |             self.x = torch.from_numpy(x)
119 |         if not torch.is_tensor(y):
120 |             self.y = torch.from_numpy(y)
121 | 
122 |     def __len__(self):
123 |         return len(self.x)
124 | 
125 |     def __getitem__(self, item):
126 |         return self.x[item], self.y[item]
127 | 
128 | 
129 | class DataMOGP(Dataset):
130 |     def __init__(self, x, y):
131 |         self.x = x  # x is a list
132 |         self.y = y  # y is a list
133 | 
134 |     def __len__(self):
135 |         return min(len(x_d) for x_d in self.x)
136 | 
137 |     def __getitem__(self, item):
138 |         x_tuple = tuple(x_d[item] for x_d in self.x)
139 |         y_tuple = tuple(y_d[item] for y_d in self.y)
140 |         return x_tuple, y_tuple
141 | 
142 | 
143 | 
144 | 


--------------------------------------------------------------------------------