├── LICENSE
├── README.md
├── __init__.py
├── baselines
├── __init__.py
├── bcm.py
├── distgp.py
├── dvigp.py
├── gpoegp.py
├── poegp.py
└── rbcm.py
├── data
├── banana.mat
└── nasa.mat
├── experiments
├── __init__.py
├── banana.py
├── baseline.py
├── dvigp_nlpd.py
├── image.py
├── million.py
├── million_rbcm.py
├── paralell.py
└── solar.py
├── extra
└── modular_gp_logo.png
├── kernels
├── __init__.py
├── coregionalization.py
├── kernel.py
├── rbf.py
└── stationary.py
├── likelihoods
├── __init__.py
├── bernoulli.py
├── gaussian.py
├── hetgaussian.py
└── likelihood.py
├── models
├── __init__.py
├── chainedgp.py
├── ensemblegp.py
├── hetmoensemble.py
├── moensemble.py
├── svgp.py
└── svmogp.py
├── optimization
├── __init__.py
└── algorithms.py
├── requirements.txt
└── util.py
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Modular Gaussian Processes
for Transfer Learning
2 |
3 |
4 |
5 | ## 🧩 Introduction
6 |
7 | This repository contains the implementation of our paper [Modular Gaussian Processes for Transfer Learning](https://arxiv.org/abs/2110.13515) accepted in the 35th Conference on Neural Information Processing Systems (NeurIPS) 2021. The entire code is written in Python and is based on the [Pytorch](https://pytorch.org/) framework.
8 |
9 | ### 🧩 Idea
10 |
11 | Here, you may find a new framework for transfer learning based on *modular Gaussian processes* (GP). The underlying idea is to avoid the revisiting of samples once a model is trained and well-fitted, so the model can be repurposed in combination with other or new data. We build *dictionaries* of modules (models), where each one contains only parameters and hyperparameters, but not observations. Finally, we are able to build *meta-models* (GP models) from different combinations of modules without reusing the old data.
12 |
13 | ## 🧩 Citation
14 |
15 | Please, if you use this code, include the following citation:
16 | ```
17 | @inproceedings{MorenoArtesAlvarez21,
18 | title = {Modular {G}aussian Processes for Transfer Learning},
19 | author = {Moreno-Mu\~noz, Pablo and Art\'es-Rodr\'iguez, Antonio and \'Alvarez, Mauricio A},
20 | booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
21 | year = {2021}
22 | }
23 | ```
24 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/__init__.py
--------------------------------------------------------------------------------
/baselines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/baselines/__init__.py
--------------------------------------------------------------------------------
/baselines/bcm.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 | import torch
14 |
15 | class BayesianCM(torch.nn.Module):
16 | """
17 | -- Bayesian Committee Machine --
18 | --
19 | -- Adaptation to Pytorch + GP framework
20 | -- V. Tresp "A Bayesian Committee Machine"
21 | -- Reference: https://www.dbs.ifi.lmu.de/~tresp/papers/bcm6.pdf
22 | """
23 |
24 | def __init__(self, models, input_dim=1.0):
25 | super(BayesianCM, self).__init__()
26 |
27 | self.input_dim = int(input_dim) # dimension of x
28 |
29 | # Adjacent Local GP Models
30 | self.models = models # is a list
31 |
32 | def forward(self):
33 | return 1.0
34 |
35 | def predictive(self, x, y, x_new):
36 | # x is a list of x_k (distributed)
37 | # y is a list of y_k (distributed)
38 |
39 | K = len(self.models)
40 | prior_v = torch.diagonal(self.models[0].kernel.K(x_new, x_new), 0)[:, None]
41 | correction = (1-K)/prior_v
42 |
43 | gp_m = torch.zeros(x_new.size())
44 | gp_v = torch.zeros(x_new.size())
45 |
46 | for k, model_k in enumerate(self.models):
47 | m_k, v_k = model_k.predictive(x[k], y[k], x_new)
48 |
49 | gp_m += m_k/v_k
50 | gp_v += 1.0/v_k
51 |
52 | gp_v = 1.0/(gp_v + correction)
53 | gp_m = gp_v*gp_m
54 |
55 | return gp_m, gp_v
56 |
57 | def rmse(self, x, y, x_new, f_new):
58 | f_gp,_ = self.predictive(x, y, x_new)
59 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
60 | return rmse
61 |
62 | def mae(self, x, y, x_new, f_new):
63 | f_gp,_ = self.predictive(x, y, x_new)
64 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
65 | return mae
66 |
67 | def nlpd(self, x, y, x_new, y_new):
68 | f_gp, v_gp = self.predictive(x, y, x_new)
69 | nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
70 | return nlpd
71 |
72 | # FOR HIERARCHICAL SETTINGS
73 |
74 | def predictive_layer(self, gps_m, gps_v, x_new):
75 | # gps_m is a list of gp_m (distributed)
76 | # gps_v is a list of gp_v (distributed)
77 |
78 | K = len(gps_m)
79 | prior_v = torch.diagonal(self.models[0].kernel.K(x_new, x_new), 0)[:, None]
80 | prior_v += 1e-1
81 | correction = (1 - K)/prior_v
82 |
83 | gp_m = torch.zeros(x_new.size())
84 | gp_v = torch.zeros(x_new.size())
85 |
86 | for k, m_k in enumerate(gps_m):
87 | v_k = gps_v[k]
88 |
89 | gp_m += m_k / v_k
90 | gp_v += 1.0 / v_k
91 |
92 | gp_v = 1.0 / (gp_v + correction)
93 | gp_m = gp_v * gp_m
94 |
95 | return gp_m, gp_v
96 |
97 | def rmse_layer(self, gps_m, gps_v, x_new, f_new):
98 | f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new)
99 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
100 | return rmse
101 |
102 | def mae_layer(self, gps_m, gps_v, x_new, f_new):
103 | f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new)
104 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
105 | return mae
106 |
107 | def nlpd_layer(self, gps_m, gps_v, x_new, y_new):
108 | f_gp, v_gp = self.predictive_layer(gps_m, gps_v, x_new)
109 | nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
110 | return nlpd
--------------------------------------------------------------------------------
/baselines/distgp.py:
--------------------------------------------------------------------------------
1 | # Implementation of the "Distributed GP"
2 | # by Deisenroth & Ng, ICML 2015
3 | # -----------------------------------------------------------------
4 | # This script belongs to the ModularGP repo
5 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
6 | # Copyright (c) 2021 Pablo Moreno-Munoz
7 | # -----------------------------------------------------------------
8 | #
9 | #
10 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
11 | # Section for Cognitive Systems
12 | # Technical University of Denmark (DTU)
13 | # October 2021
14 |
15 | import torch
16 |
17 | class DistGP(torch.nn.Module):
18 | """
19 | -- Distributed Gaussian Process Regression--
20 | --
21 | -- Adaptation to Pytorch + GP framework
22 | -- M. P. Deisenroth and J. W. Ng, "Distributed Gaussian Processes"
23 | -- Reference: http://proceedings.mlr.press/v37/deisenroth15.pdf
24 | """
25 |
26 | def __init__(self, kernel, likelihood, input_dim=None):
27 | super(DistGP, self).__init__()
28 |
29 | if input_dim is None:
30 | input_dim = 1
31 |
32 | self.input_dim = int(input_dim) #dimension of x
33 |
34 | # GP Elements --
35 | self.likelihood = likelihood #type of likelihood
36 | self.kernel = kernel #type of kernel
37 |
38 |
39 | def forward(self, x, y):
40 | identity = torch.eye(y.size(0))
41 | s_n = torch.pow(self.likelihood.sigma, 2.0)
42 |
43 | K = self.kernel.K(x,x)
44 | KI = K + torch.mul(s_n,identity)
45 | iKI, _ = torch.solve(torch.eye(KI.size(0)), KI)
46 | yiKIy = y.t().mm(iKI).mm(y)
47 |
48 | log_marginal = -0.5*yiKIy - 0.5*torch.logdet(KI)
49 | return -log_marginal
50 |
51 | def predictive(self, x, y, x_new):
52 |
53 | Kx = self.kernel.K(x, x_new)
54 | Kxx = self.kernel.K(x_new, x_new)
55 |
56 | identity = torch.eye(y.size(0))
57 | s_n = torch.pow(self.likelihood.sigma, 2.0)
58 |
59 | K = self.kernel.K(x, x)
60 | KI = K + torch.mul(s_n, identity)
61 | iKI, _ = torch.solve(torch.eye(KI.size(0)), KI)
62 |
63 | gp_m = Kx.t().mm(iKI).mm(y)
64 | gp_v = torch.diagonal(Kxx - Kx.t().mm(iKI).mm(Kx), 0)[:,None]
65 |
66 | return gp_m, gp_v
67 |
68 | def rmse(self, x, y, x_new, f_new):
69 | f_gp,_ = self.predictive(x, y, x_new)
70 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
71 | return rmse
72 |
73 | def mae(self, x, y, x_new, f_new):
74 | f_gp,_ = self.predictive(x, y, x_new)
75 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
76 | return mae
77 |
78 | def nlpd(self, x, y, x_new, y_new):
79 | f_gp, u_gp = self.predictive(x, y, x_new)
80 | #f_gp = torch.from_numpy(f_gp)
81 | #u_gp = torch.from_numpy(u_gp)
82 | v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0)
83 | nlpd = - torch.mean(self.likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
84 | return nlpd
85 |
--------------------------------------------------------------------------------
/baselines/dvigp.py:
--------------------------------------------------------------------------------
1 | # Implementation of the "Distributed Variational Inference in GPs"
2 | # by Y. Gal and M. van der Wilk
3 | #
4 | # Little adaptation without the LVM assumption
5 | # for testing and comparison. Simulates a distributed environment.
6 | # -----------------------------------------------------------------
7 | # This script belongs to the ModularGP repo
8 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
9 | # Copyright (c) 2021 Pablo Moreno-Munoz
10 | # -----------------------------------------------------------------
11 | #
12 | #
13 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
14 | # Section for Cognitive Systems
15 | # Technical University of Denmark (DTU)
16 | # October 2021
17 |
18 | import torch
19 | import numpy as np
20 | from torch.distributions import MultivariateNormal as Normal
21 | from torch.distributions import kl_divergence
22 |
23 | from GPy.inference.latent_function_inference.posterior import Posterior
24 |
25 | class DVIGP(torch.nn.Module):
26 | """
27 | -- Distributed Variational Inference in Gaussian Processes --
28 | --
29 | -- Adaptation to Pytorch + GP framework
30 | -- Y. Gal et al. "Distributed Variational Inference in Sparse Gaussian
31 | Process Regression and Latent Variable Models" NIPS 2014
32 | """
33 | def __init__(self, kernel, likelihood, M, nodes=1, input_dim=None):
34 | super(DVIGP, self).__init__()
35 |
36 | if input_dim is None:
37 | input_dim = 1
38 |
39 | # Nodes to distribute the computational load --
40 | self.nodes = int(nodes)
41 |
42 | # Dimensions --
43 | self.M = M #num. inducing
44 | self.input_dim = int(input_dim) #dimension of x
45 |
46 | # GP Elements --
47 | self.likelihood = likelihood #type of likelihood
48 | self.kernel = kernel #type of kernel
49 | self.z = torch.nn.Parameter(torch.linspace(-0.9, 0.9, self.M)[:,None], requires_grad=False)
50 |
51 | # Variational distribution --
52 | self.q_m = torch.nn.Parameter(torch.randn(M,1), requires_grad=True) # variational: mean parameter
53 | self.q_L = torch.nn.Parameter(torch.eye(M), requires_grad=True) # variational: covariance
54 |
55 | def forward(self, x, y):
56 | x_nodes, y_nodes = self.data_to_nodes(x,y)
57 |
58 | # Variational parameters --
59 | q_m = self.q_m
60 | q_L = torch.tril(self.q_L)
61 | q_S = torch.mm(q_L, q_L.t())
62 |
63 | # Prior parameters (uses kernel) --
64 | Kuu = self.kernel.K(self.z)
65 | iKuu, _ = torch.solve(torch.eye(self.M), Kuu) # is pseudo-inverse?
66 |
67 | # Distributions -- q(u), p(u)
68 | q_u = Normal(q_m.flatten(), q_S)
69 | p_u = Normal(torch.zeros(self.M), Kuu)
70 |
71 | global_params = {'q_m': q_m, 'q_L': q_L, 'q_S': q_S, 'Kuu': Kuu, 'iKuu': iKuu}
72 |
73 | # Distributed Expectations
74 | expectation = 0.0
75 | for k, y_k in enumerate(y_nodes):
76 | x_k = x_nodes[k]
77 | expectation_node = self.forward_node(x_k, y_k, global_params)
78 | expectation += expectation_node.sum()
79 |
80 | # KL divergence --
81 | kl = kl_divergence(q_u, p_u)
82 |
83 | # Lower bound (ELBO) --
84 | elbo = expectation - kl
85 |
86 | return -elbo
87 |
88 | def forward_node(self, x_node, y_node, global_params):
89 | q_m = global_params['q_m']
90 | q_L = global_params['q_m']
91 | q_S = global_params['q_S']
92 | Kuu = global_params['Kuu']
93 | iKuu = global_params['iKuu']
94 |
95 | Kff = self.kernel.K(x_node, x_node)
96 | Kfu = self.kernel.K(x_node, self.z)
97 | Kuf = torch.transpose(Kfu, 0, 1)
98 |
99 | A = Kfu.mm(iKuu)
100 | AT = iKuu.mm(Kuf)
101 |
102 | m_f = A.mm(q_m)
103 | v_f = torch.diag(Kff + A.mm(q_S - Kuu).mm(AT))
104 |
105 | # Expectation term of node --
106 | expectation = self.likelihood.variational_expectation(y_node, m_f, v_f)
107 |
108 | return expectation
109 |
110 | def data_to_nodes(self, x, y):
111 | x_nodes = []
112 | y_nodes = []
113 |
114 | N = y.size(0)
115 | size_node = np.int(np.floor(N/self.nodes))
116 | for k in range(self.nodes):
117 | if k < self.nodes - 1:
118 | x_nodes.append(x[(0+(k*size_node)):(0+((k+1)*size_node)), :])
119 | y_nodes.append(y[(0+(k*size_node)):(0+((k+1)*size_node)), :])
120 | else:
121 | x_nodes.append(x[(0+(k*size_node)):, :])
122 | y_nodes.append(y[(0+(k*size_node)):, :])
123 |
124 | return x_nodes, y_nodes
125 |
126 | def predictive(self, x_new):
127 | # Matrices
128 | q_m = self.q_m.detach().numpy()
129 | q_L = torch.tril(self.q_L)
130 | q_S = torch.mm(q_L, q_L.t()).detach().numpy()
131 | Kuu = self.kernel.K(self.z, self.z).detach().numpy()
132 |
133 | posterior = Posterior(mean=q_m, cov=q_S, K=Kuu, prior_mean=np.zeros(q_m.shape))
134 | Kx = self.kernel.K(self.z, x_new).detach().numpy()
135 | Kxx = self.kernel.K(x_new, x_new).detach().numpy()
136 |
137 | # GP Predictive Posterior - mean + variance
138 | gp_mu = np.dot(Kx.T, posterior.woodbury_vector)
139 | Kxx = np.diag(Kxx)
140 | gp_var = (Kxx - np.sum(np.dot(np.atleast_3d(posterior.woodbury_inv).T, Kx) * Kx[None, :, :], 1)).T
141 |
142 | gp = gp_mu
143 | gp_upper = gp_mu + 2*np.sqrt(gp_var) #+ 2 * self.likelihood.sigma.detach().numpy()
144 | gp_lower = gp_mu - 2*np.sqrt(gp_var) #- 2 * self.likelihood.sigma.detach().numpy()
145 |
146 | return gp, gp_upper, gp_lower
147 |
148 | def rmse(self, x_new, f_new):
149 | f_gp,_,_ = self.predictive(x_new)
150 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
151 | return rmse
152 |
153 | def mae(self, x_new, f_new):
154 | f_gp,_,_ = self.predictive(x_new)
155 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
156 | return mae
157 |
158 | def nlpd(self, x_new, y_new):
159 | f_gp, u_gp, _ = self.predictive(x_new)
160 | f_gp = torch.from_numpy(f_gp)
161 | u_gp = torch.from_numpy(u_gp)
162 | v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0)
163 | nlpd = - torch.mean(self.likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
164 | return nlpd
165 |
166 |
--------------------------------------------------------------------------------
/baselines/gpoegp.py:
--------------------------------------------------------------------------------
1 | # Implementation of the "Generalised Product of GP Experts"
2 | # by Cao & Fleet, 2014
3 | # -----------------------------------------------------------------
4 | # This script belongs to the ModularGP repo
5 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
6 | # Copyright (c) 2021 Pablo Moreno-Munoz
7 | # -----------------------------------------------------------------
8 | #
9 | #
10 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
11 | # Section for Cognitive Systems
12 | # Technical University of Denmark (DTU)
13 | # October 2021
14 |
15 | import torch
16 |
17 | class GenPoeGP(torch.nn.Module):
18 | """
19 | -- Generalised Product of GP Experts --
20 | --
21 | -- Adaptation to Pytorch + GP framework
22 | -- Y. Cao and D. J. Fleet, "Generalized Product of Experts (...)"
23 | -- Reference: https://arxiv.org/abs/1410.7827
24 | """
25 |
26 | def __init__(self, models, input_dim=1.0):
27 | super(GenPoeGP, self).__init__()
28 |
29 | self.input_dim = int(input_dim) # dimension of x
30 |
31 | # Adjacent Local GP Models
32 | self.models = models # is a list
33 |
34 | def forward(self):
35 | return 1.0
36 |
37 | def predictive(self, x, y, x_new):
38 | # x is a list of x_k (distributed)
39 | # y is a list of y_k (distributed)
40 |
41 | K = len(self.models)
42 | beta_k = 1/K
43 |
44 | gp_m = torch.zeros(x_new.size())
45 | gp_v = torch.zeros(x_new.size())
46 |
47 | for k, model_k in enumerate(self.models):
48 | m_k, v_k = model_k.predictive(x[k], y[k], x_new)
49 |
50 | gp_m += (beta_k*m_k)/v_k
51 | gp_v += beta_k/v_k
52 |
53 | gp_v = 1.0/gp_v
54 | gp_m = gp_v*gp_m
55 |
56 | return gp_m, gp_v
57 |
58 |
59 | def rmse(self, x, y, x_new, f_new):
60 | f_gp,_ = self.predictive(x, y, x_new)
61 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
62 | return rmse
63 |
64 | def mae(self, x, y, x_new, f_new):
65 | f_gp,_ = self.predictive(x, y, x_new)
66 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
67 | return mae
68 |
69 | def nlpd(self, x, y, x_new, y_new):
70 | f_gp, v_gp = self.predictive(x, y, x_new)
71 | nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
72 | return nlpd
73 |
74 | # FOR HIERARCHICAL SETTINGS
75 |
76 | def predictive_layer(self, gps_m, gps_v, x_new):
77 | # gps_m is a list of gp_m (distributed)
78 | # gps_v is a list of gp_v (distributed)
79 |
80 | K = len(gps_m)
81 | beta_k = 1/K
82 |
83 | gp_m = torch.zeros(x_new.size())
84 | gp_v = torch.zeros(x_new.size())
85 |
86 | for k, m_k in enumerate(gps_m):
87 | v_k = gps_v[k]
88 |
89 | gp_m += (beta_k*m_k)/v_k
90 | gp_v += beta_k/v_k
91 |
92 | gp_v = 1.0/gp_v
93 | gp_m = gp_v*gp_m
94 |
95 | return gp_m, gp_v
96 |
97 | def rmse_layer(self, gps_m, gps_v, x_new, f_new):
98 | f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new)
99 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
100 | return rmse
101 |
102 | def mae_layer(self, gps_m, gps_v, x_new, f_new):
103 | f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new)
104 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
105 | return mae
106 |
107 | def nlpd_layer(self, gps_m, gps_v, x_new, y_new):
108 | f_gp, v_gp = self.predictive_layer(gps_m, gps_v, x_new)
109 | nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
110 | return nlpd
111 |
--------------------------------------------------------------------------------
/baselines/poegp.py:
--------------------------------------------------------------------------------
1 | # Implementation of the "Product of GP Experts"
2 | # by Ng & Deisenroth, 2014
3 | # -----------------------------------------------------------------
4 | # This script belongs to the ModularGP repo
5 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
6 | # Copyright (c) 2021 Pablo Moreno-Munoz
7 | # -----------------------------------------------------------------
8 | #
9 | #
10 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
11 | # Section for Cognitive Systems
12 | # Technical University of Denmark (DTU)
13 | # October 2021
14 |
15 |
16 | import torch
17 |
18 | class PoeGP(torch.nn.Module):
19 | """
20 | -- Product of GP Experts --
21 | --
22 | -- Adaptation to Pytorch + GP framework
23 | -- J. W. Ng and M. P. Deisenroth, "Hierarchical Mixtureof-Experts Model for (...) 2014"
24 | -- Reference: http://arxiv.org/abs/1412.3078
25 | """
26 | def __init__(self, models, input_dim=1.0):
27 | super(PoeGP, self).__init__()
28 |
29 | self.input_dim = int(input_dim) # dimension of x
30 |
31 | # Adjacent Local GP Models
32 | self.models = models # is a list
33 |
34 | def forward(self):
35 | return 1.0
36 |
37 | def predictive(self, x, y, x_new):
38 | # x is a list of x_k (distributed)
39 | # y is a list of y_k (distributed)
40 |
41 | gp_m = torch.zeros(x_new.size())
42 | gp_v = torch.zeros(x_new.size())
43 |
44 | for k, model_k in enumerate(self.models):
45 | m_k, v_k = model_k.predictive(x[k], y[k], x_new)
46 |
47 | gp_m += m_k/v_k
48 | gp_v += 1.0/v_k
49 |
50 | gp_v = 1.0/gp_v
51 | gp_m = gp_v*gp_m
52 |
53 | return gp_m, gp_v
54 |
55 | def rmse(self, x, y, x_new, f_new):
56 | f_gp,_ = self.predictive(x, y, x_new)
57 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
58 | return rmse
59 |
60 | def mae(self, x, y, x_new, f_new):
61 | f_gp,_ = self.predictive(x, y, x_new)
62 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
63 | return mae
64 |
65 | def nlpd(self, x, y, x_new, y_new):
66 | f_gp, v_gp = self.predictive(x, y, x_new)
67 | nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
68 | return nlpd
69 |
70 | # FOR HIERARCHICAL SETTINGS
71 |
72 | def predictive_layer(self, gps_m, gps_v, x_new):
73 | # gps_m is a list of gp_m (distributed)
74 | # gps_v is a list of gp_v (distributed)
75 |
76 | gp_m = torch.zeros(x_new.size())
77 | gp_v = torch.zeros(x_new.size())
78 |
79 | for k, m_k in enumerate(gps_m):
80 | v_k = gps_v[k]
81 |
82 | gp_m += m_k / v_k
83 | gp_v += 1.0 / v_k
84 |
85 | gp_v = 1.0 / gp_v
86 | gp_m = gp_v * gp_m
87 |
88 | return gp_m, gp_v
89 |
90 | def rmse_layer(self, gps_m, gps_v, x_new, f_new):
91 | f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new)
92 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
93 | return rmse
94 |
95 | def mae_layer(self, gps_m, gps_v, x_new, f_new):
96 | f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new)
97 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
98 | return mae
99 |
100 | def nlpd_layer(self, gps_m, gps_v, x_new, y_new):
101 | f_gp, v_gp = self.predictive_layer(gps_m, gps_v, x_new)
102 | nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
103 | return nlpd
--------------------------------------------------------------------------------
/baselines/rbcm.py:
--------------------------------------------------------------------------------
1 | # Implementation of the "Robust Bayesian Committee Machine"
2 | # by Deisenroth & Ng, ICML 2015
3 | # -----------------------------------------------------------------
4 | # This script belongs to the ModularGP repo
5 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
6 | # Copyright (c) 2021 Pablo Moreno-Munoz
7 | # -----------------------------------------------------------------
8 | #
9 | #
10 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
11 | # Section for Cognitive Systems
12 | # Technical University of Denmark (DTU)
13 | # October 2021
14 |
15 | import torch
16 |
17 | class RobustBayesianCM(torch.nn.Module):
18 | """
19 | -- Robust Bayesian Committee Machine --
20 | --
21 | -- Adaptation to Pytorch + GP framework
22 | -- M. P. Deisenroth and J. W. Ng, "Distributed Gaussian Processes"
23 | -- Reference: http://proceedings.mlr.press/v37/deisenroth15.pdf
24 | """
25 | def __init__(self, models, input_dim=1.0):
26 | super(RobustBayesianCM, self).__init__()
27 |
28 | self.input_dim = int(input_dim) # dimension of x
29 |
30 | # Adjacent Local GP Models
31 | self.models = models # is a list
32 |
33 | def forward(self):
34 | return 1.0
35 |
36 | def predictive(self, x, y, x_new):
37 | # x is a list of x_k (distributed)
38 | # y is a list of y_k (distributed)
39 |
40 | K = len(self.models)
41 | prior_v = torch.diagonal(self.models[0].kernel.K(x_new, x_new), 0)[:, None]
42 | log_prior_v = torch.log(prior_v)
43 |
44 | gp_m = torch.zeros(x_new.size())
45 | gp_v = torch.zeros(x_new.size())
46 | correction = torch.zeros(x_new.size())
47 |
48 | for k, model_k in enumerate(self.models):
49 | m_k, v_k = model_k.predictive(x[k], y[k], x_new)
50 |
51 | beta_k = 0.5*(log_prior_v - torch.log(v_k))
52 | correction += beta_k
53 |
54 | gp_m += m_k/v_k
55 | gp_v += 1.0/v_k
56 |
57 | correction = (1-correction)/prior_v
58 | gp_v = 1.0/(gp_v + correction)
59 | gp_m = gp_v*gp_m
60 |
61 | return gp_m, gp_v
62 |
63 | def rmse(self, x, y, x_new, f_new):
64 | f_gp,_ = self.predictive(x, y, x_new)
65 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
66 | return rmse
67 |
68 | def mae(self, x, y, x_new, f_new):
69 | f_gp,_ = self.predictive(x, y, x_new)
70 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
71 | return mae
72 |
73 | def nlpd(self, x, y, x_new, y_new):
74 | f_gp, v_gp = self.predictive(x, y, x_new)
75 | nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
76 | return nlpd
77 |
78 | # FOR HIERARCHICAL SETTINGS
79 |
80 | def predictive_layer(self, gps_m, gps_v, x_new):
81 | # gps_m is a list of gp_m (distributed)
82 | # gps_v is a list of gp_v (distributed)
83 |
84 | K = len(gps_m)
85 | prior_v = torch.diagonal(self.models[0].kernel.K(x_new, x_new), 0)[:, None]
86 | log_prior_v = torch.log(prior_v)
87 | log_prior_v[torch.isnan(log_prior_v)] = 1e-3
88 |
89 | gp_m = torch.zeros(x_new.size())
90 | gp_v = torch.zeros(x_new.size())
91 | correction = torch.zeros(x_new.size())
92 |
93 | for k, m_k in enumerate(gps_m):
94 | v_k = gps_v[k]
95 |
96 | log_v_k = torch.log(v_k)
97 | log_v_k[torch.isnan(log_v_k)] = 1e-3
98 |
99 | beta_k = 0.5 * (log_prior_v - log_v_k)
100 | correction += beta_k
101 |
102 | gp_m += m_k / v_k
103 | gp_v += 1.0 / v_k
104 |
105 | correction = (1 - correction) / prior_v
106 | gp_v = 1.0 / (gp_v + correction)
107 | gp_m = gp_v * gp_m
108 |
109 | return gp_m, gp_v
110 |
111 | def rmse_layer(self, gps_m, gps_v, x_new, f_new):
112 | f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new)
113 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
114 | return rmse
115 |
116 | def mae_layer(self, gps_m, gps_v, x_new, f_new):
117 | f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new)
118 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
119 | return mae
120 |
121 | def nlpd_layer(self, gps_m, gps_v, x_new, y_new):
122 | f_gp, v_gp = self.predictive_layer(gps_m, gps_v, x_new)
123 | nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
124 | return nlpd
--------------------------------------------------------------------------------
/data/banana.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/data/banana.mat
--------------------------------------------------------------------------------
/data/nasa.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/data/nasa.mat
--------------------------------------------------------------------------------
/experiments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/experiments/__init__.py
--------------------------------------------------------------------------------
/experiments/banana.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 | # -----------------------------------------------------------------
14 | # Experiment -- Banana Classification
15 | # -----------------------------------------------------------------
16 |
17 | from kernels.rbf import RBF
18 | from likelihoods.gaussian import Gaussian
19 | from likelihoods.bernoulli import Bernoulli
20 | from models.svgp import SVGP
21 | from models.ensemblegp import EnsembleGP
22 | from optimization.algorithms import vem_algorithm, ensemble_vem, ensemble_vem_parallel
23 | from optimization.algorithms import AlgorithmVEM
24 | from sklearn.model_selection import train_test_split
25 |
26 | import torch
27 | import numpy as np
28 | import scipy.io as sio
29 | import matplotlib.pyplot as plt
30 | from matplotlib2tikz import save as tikz_save
31 |
32 | plt.rc('text', usetex=True)
33 | plt.rc('font', family='serif')
34 |
35 | # COOLORS.CO palettes
36 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e']
37 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031']
38 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56']
39 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e']
40 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559']
41 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2']
42 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54']
43 |
44 | color_palette = color_palette_6
45 | color_0 = color_palette_6[0]
46 | color_1 = color_palette_6[4]
47 |
48 | # Load Data --
49 | data = sio.loadmat('../data/banana.mat')
50 | y_banana = data['banana_Y']
51 | x_banana = data['banana_X']
52 |
53 | trials = 10
54 | nlpd_metrics = np.zeros((1,trials))
55 |
56 | plot_local = False
57 | plot_ensemble = False
58 | save = False
59 |
60 | for trial in range(trials):
61 | print('TRIAL = ' + str(trial) + '/' + str(trials))
62 | x, x_test, y, y_test = train_test_split(x_banana, y_banana, test_size=0.33, random_state=42)
63 |
64 | x_test = torch.from_numpy(x_test).float()
65 | y_test = torch.from_numpy(y_test).float()
66 |
67 | # Sorting wrt first input dimension
68 | y = y[x[:,0].argsort()]
69 | x = x[x[:,0].argsort()]
70 |
71 | # plot limits
72 | max_x = x[:,0].max()
73 | max_y = x[:,1].max()
74 | min_x = x[:,0].min()
75 | min_y = x[:,1].min()
76 |
77 | # Division into 4 regions
78 | x_1 = torch.from_numpy(x[(x[:,0]<0.0) & (x[:,1]<0.0),:]).float()
79 | y_1 = torch.from_numpy(y[(x[:,0]<0.0) & (x[:,1]<0.0),:]).float()
80 |
81 | x_2 = torch.from_numpy(x[(x[:,0]>0.0) & (x[:,1]<0.0),:]).float()
82 | y_2 = torch.from_numpy(y[(x[:,0]>0.0) & (x[:,1]<0.0),:]).float()
83 |
84 | x_3 = torch.from_numpy(x[(x[:,0]>0.0) & (x[:,1]>0.0),:]).float()
85 | y_3 = torch.from_numpy(y[(x[:,0]>0.0) & (x[:,1]>0.0),:]).float()
86 |
87 | x_4 = torch.from_numpy(x[(x[:,0]<0.0) & (x[:,1]>0.0),:]).float()
88 | y_4 = torch.from_numpy(y[(x[:,0]<0.0) & (x[:,1]>0.0),:]).float()
89 |
90 | # All tasks
91 | x_tasks = [x_1, x_2, x_3, x_4]
92 | y_tasks = [y_1, y_2, y_3, y_4]
93 |
94 | K = len(x_tasks)
95 | sigmoid = torch.nn.Sigmoid()
96 |
97 | M_k = 3 # inducing points per side
98 | N_test = 80 # test points per side
99 |
100 | ###########################
101 | # #
102 | # DISTRIBUTED TASKS #
103 | # #
104 | ###########################
105 |
106 | models = []
107 | for k, x_k in enumerate(x_tasks):
108 |
109 | print('- -')
110 | print('----- TASK k=' + str(k + 1) + ' ------')
111 | print('- -')
112 |
113 | y_k = y_tasks[k]
114 | kernel_k = RBF()
115 | likelihood_k = Bernoulli()
116 | model_k = SVGP(kernel_k, likelihood_k, M_k**2, input_dim=2)
117 |
118 | # initial grid of inducing-points
119 | mx = torch.mean(x_k[:, 0])
120 | my = torch.mean(x_k[:, 1])
121 | vx = torch.var(x_k[:, 0])
122 | vy = torch.var(x_k[:, 1])
123 |
124 | zy = np.linspace(my - 3*vy, my + 3*vy, M_k)
125 | zx = np.linspace(mx - 3*vx, mx + 3*vx, M_k)
126 | ZX, ZY = np.meshgrid(zx, zy)
127 | ZX = ZX.reshape(M_k ** 2, 1)
128 | ZY = ZY.reshape(M_k ** 2, 1)
129 | Z = np.hstack((ZX, ZY))
130 | z_k = torch.from_numpy(Z).float()
131 |
132 | model_k.z = torch.nn.Parameter(z_k, requires_grad=True)
133 | vem_algorithm = AlgorithmVEM(model_k, x_k, y_k, iters=7)
134 |
135 | vem_algorithm.ve_its = 20
136 | vem_algorithm.vm_its = 10
137 | vem_algorithm.lr_m = 1e-3
138 | vem_algorithm.lr_L = 1e-6
139 | vem_algorithm.lr_hyp = 1e-6
140 | vem_algorithm.lr_z = 1e-4
141 |
142 | vem_algorithm.fit()
143 | models.append(model_k)
144 |
145 | # NLPD -- Metrics
146 | nlpd = model_k.nlpd(x_test, y_test)
147 |
148 | print('Local Model ('+str(k+1)+')- NLPD: ', nlpd)
149 | print(' ')
150 |
151 | if plot_local:
152 |
153 | min_tx = x[:,0].min() - 0.15
154 | min_ty = x[:,1].min() - 0.15
155 | max_tx = x[:,0].max() + 0.15
156 | max_ty = x[:,1].max() + 0.15
157 |
158 | ty = np.linspace(min_ty, max_ty, N_test)
159 | tx = np.linspace(min_tx, max_tx, N_test)
160 | TX_grid, TY_grid = np.meshgrid(tx, ty)
161 | TX = TX_grid.reshape(N_test ** 2, 1)
162 | TY = TY_grid.reshape(N_test ** 2, 1)
163 | X_test = np.hstack((TX, TY))
164 | x_test = torch.from_numpy(X_test).float()
165 |
166 | gp, gp_upper, gp_lower = model_k.predictive(x_test)
167 | gp = sigmoid(torch.from_numpy(gp))
168 |
169 | # Plot
170 | plt.figure(figsize=(7, 6))
171 | ax = plt.axes()
172 | plt.plot(x_k[y_k[:, 0] == 0, 0], x_k[y_k[:, 0] == 0, 1], 'o', color=color_1, alpha=0.5)
173 | plt.plot(x_k[y_k[:, 0] == 1, 0], x_k[y_k[:, 0] == 1, 1], 'o', color=color_0, alpha=0.5)
174 | plt.plot(model_k.z[:,0].detach(), model_k.z[:,1].detach(), 'kx', ms=10.0, mew=2.0)
175 | cs = ax.contour(TX_grid, TY_grid, np.reshape(gp, (N_test, N_test)), linewidths=3, colors='k',
176 | levels=[0.25, 0.5, 0.75], zorder=10)
177 | ax.clabel(cs, inline=1, fontsize=14, fmt='%1.1f')
178 |
179 | plt.title(r'Banana Recyclable GP - '+ str(k + 1) )
180 | plt.xlabel(r'$x_1$ input')
181 | plt.ylabel(r'$x_2$ input')
182 | plt.xlim(-2.5, 2.5)
183 | plt.ylim(-2.5, 2.5)
184 |
185 | if save:
186 | plt.savefig(fname='./figs/banana/banana_task_' + str(k + 1) + '.pdf', format='pdf')
187 |
188 | #plt.show()
189 | plt.close()
190 |
191 | ###########################
192 | # #
193 | # ENSEMBLE INFERENCE #
194 | # #
195 | ###########################
196 |
197 | print('- -')
198 | print('----- ENSEMBLE ------')
199 | print('- -')
200 |
201 | M_e = 5
202 | kernel = RBF()
203 | likelihood = Bernoulli()
204 | model_e = EnsembleGP(kernel, likelihood, models, M_e**2, input_dim=2)
205 |
206 | # initial grid of inducing-points
207 | mx = np.mean(x[:, 0])
208 | my = np.mean(x[:, 1])
209 | vx = np.var(x[:, 0])
210 | vy = np.var(x[:, 1])
211 |
212 | zy = np.linspace(my - 1.5*vy, my + 1.5*vy, M_e)
213 | zx = np.linspace(mx - 1.5*vx, mx + 1.5*vx, M_e)
214 | ZX, ZY = np.meshgrid(zx, zy)
215 | ZX = ZX.reshape(M_e ** 2, 1)
216 | ZY = ZY.reshape(M_e ** 2, 1)
217 | Z = np.hstack((ZX, ZY))
218 | z_e = torch.from_numpy(Z).float()
219 |
220 | model_e.z = torch.nn.Parameter(z_e, requires_grad=True)
221 | vem_algorithm = AlgorithmVEM(model_e, config='ensemble', iters=20)
222 |
223 | vem_algorithm.ve_its = 20
224 | vem_algorithm.vm_its = 10
225 | vem_algorithm.lr_m = 1e-3
226 | vem_algorithm.lr_L = 1e-5
227 | vem_algorithm.lr_hyp = 1e-6
228 | vem_algorithm.lr_z = 1e-5
229 |
230 | vem_algorithm.fit()
231 |
232 | # NLPD -- Metrics
233 | nlpd = model_e.nlpd(x_test, y_test)
234 |
235 | nlpd_metrics[0, trial] = nlpd
236 |
237 | print('Banana Ensemble NLPD: ', nlpd)
238 | print(' ')
239 |
240 | if plot_ensemble:
241 |
242 | min_tx = x[:,0].min() - 0.15
243 | min_ty = x[:,1].min() - 0.15
244 | max_tx = x[:,0].max() + 0.15
245 | max_ty = x[:,1].max() + 0.15
246 |
247 | ty = np.linspace(min_ty, max_ty, N_test)
248 | tx = np.linspace(min_tx, max_tx, N_test)
249 | TX_grid, TY_grid = np.meshgrid(tx, ty)
250 | TX = TX_grid.reshape(N_test ** 2, 1)
251 | TY = TY_grid.reshape(N_test ** 2, 1)
252 | X_test = np.hstack((TX, TY))
253 | x_test = torch.from_numpy(X_test).float()
254 |
255 | gp, _, _ = model_e.predictive(x_test)
256 | gp = sigmoid(torch.from_numpy(gp))
257 |
258 | # Plot
259 | plt.figure(figsize=(7, 6))
260 | ax = plt.axes()
261 | plt.plot(x[y[:, 0] == 0, 0], x[y[:, 0] == 0, 1], 'o', color=color_1, alpha=0.5)
262 | plt.plot(x[y[:, 0] == 1, 0], x[y[:, 0] == 1, 1], 'o', color=color_0, alpha=0.5)
263 | plt.plot(model_e.z[:,0].detach(), model_e.z[:,1].detach(), 'kx', ms=10.0, mew=2.0)
264 | cs = ax.contour(TX_grid, TY_grid, np.reshape(gp, (N_test, N_test)), linewidths=3, colors='k',
265 | levels=[0.25, 0.5, 0.75], zorder=10)
266 | ax.clabel(cs, inline=1, fontsize=14, fmt='%1.1f')
267 |
268 | plt.title(r'Banana GP Ensemble')
269 | plt.xlabel(r'$x_1$ input')
270 | plt.ylabel(r'$x_2$ input')
271 | plt.xlim(-2.5, 2.5)
272 | plt.ylim(-2.5, 2.5)
273 |
274 | if save:
275 | plt.savefig(fname='./figs/banana/banana_task_ensemble.pdf', format='pdf')
276 |
277 | plt.show()
278 | #plt.close()
279 |
280 |
--------------------------------------------------------------------------------
/experiments/baseline.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 | #
13 | # -----------------------------------------------------------------
14 | # Experiment -- Baselines
15 | # -----------------------------------------------------------------
16 |
17 | import torch
18 | import numpy as np
19 | import matplotlib.pyplot as plt
20 |
21 | plt.rc('text', usetex=True)
22 | plt.rc('font', family='serif')
23 |
24 | # COOLORS.CO palettes
25 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e']
26 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031']
27 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56']
28 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e']
29 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559']
30 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2']
31 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54']
32 |
33 | color_palette = color_palette_2
34 |
35 | from kernels.rbf import RBF
36 | from likelihoods.gaussian import Gaussian
37 | from models.svgp import SVGP
38 | from models.ensemblegp import EnsembleGP
39 | from baselines.distgp import DistGP
40 | from baselines.poegp import PoeGP
41 | from baselines.gpoegp import GenPoeGP
42 | from baselines.bcm import BayesianCM
43 | from baselines.rbcm import RobustBayesianCM
44 | from baselines.dvigp import DVIGP
45 | from optimization.algorithms import AlgorithmVEM
46 | from optimization.algorithms import GPR_Optimizer
47 | from util import smooth_function
48 |
49 | #experiment = '10k'
50 | experiment = '100k'
51 | #experiment = '1m'
52 |
53 | if experiment == '10k':
54 | node_overlapping = 1
55 | N_k = 200
56 | trials = 10
57 | elif experiment == '100k':
58 | node_overlapping = 5
59 | N_k = 400
60 | trials = 10
61 | elif experiment == '1m':
62 | node_overlapping = 100
63 | N_k = 800
64 | trials = 10
65 | else:
66 | raise ValueError('Experiment indicator not valid! Must be {10k, 100k or 1m}')
67 |
68 | M_k = 3
69 | M_e = 35
70 | plot_local = True
71 | plot_ensemble = True
72 | save = True
73 |
74 | recy_metrics = np.zeros((3,trials))
75 | poe_metrics = np.zeros((3,trials))
76 | gpoe_metrics = np.zeros((3,trials))
77 | bcm_metrics = np.zeros((3,trials))
78 | rbcm_metrics = np.zeros((3,trials))
79 |
80 | for trial in range(trials):
81 |
82 | tasks = 50
83 | T = 50
84 |
85 | print('TRIAL = '+str(trial)+'/'+str(trials))
86 |
87 | ###########################
88 | # #
89 | # DISTRIBUTED TASKS #
90 | # #
91 | ###########################
92 |
93 | min_x = 0.0
94 | max_x = T * 0.1
95 | segment_x = (max_x - min_x) / tasks
96 | x_tasks = []
97 | y_tasks = []
98 | for n in range(node_overlapping):
99 | for k in range(T):
100 | x_k = ((min_x + (k * segment_x)) - (min_x + ((k + 1) * segment_x))) * torch.rand(N_k, 1) + (
101 | min_x + ((k + 1) * segment_x))
102 | x_k, _ = torch.sort(x_k, dim=0)
103 | y_k = smooth_function(x_k) + 2.0 * torch.randn(N_k, 1)
104 | x_tasks.append(x_k)
105 | y_tasks.append(y_k)
106 |
107 | tasks = T * node_overlapping
108 |
109 | print('# of tasks: ', tasks)
110 |
111 | ###########################
112 | # #
113 | # PARALLEL INFERENCE #
114 | # #
115 | ###########################
116 |
117 | N_k_test = 400
118 | x_test = torch.linspace(min_x-0.5, max_x+0.5, N_k_test)[:, None]
119 | models = [] # for recyclable GPs
120 | models_dist = [] # for distributed GPs
121 | x_all = [] # for distributed GPs
122 | y_all = [] # for distributed GPs
123 | for k, x_k in enumerate(x_tasks):
124 | print('- -')
125 | print('----- TASK k='+str(k+1)+' ------')
126 | print('- -')
127 | ######################################################
128 | # 1. RECYCLABLE GP
129 | ######################################################
130 | kernel_k = RBF()
131 | likelihood_k = Gaussian(fit_noise=False)
132 | model_k = SVGP(kernel_k, likelihood_k, M_k)
133 |
134 | z_k_min = min_x + ((k%T)*segment_x)
135 | z_k_max = min_x + (((k%T)+1)*segment_x)
136 | model_k.z = torch.nn.Parameter(torch.linspace(z_k_min, z_k_max, M_k)[:, None], requires_grad=True)
137 | vem_algorithm = AlgorithmVEM(model_k, x_k, y_tasks[k], iters=15)
138 |
139 | vem_algorithm.ve_its = 20
140 | vem_algorithm.vm_its = 10
141 | vem_algorithm.lr_m = 1e-6
142 | vem_algorithm.lr_L = 1e-10
143 | vem_algorithm.lr_hyp = 1e-10
144 | vem_algorithm.lr_z = 1e-10
145 |
146 | vem_algorithm.fit()
147 | models.append(model_k)
148 |
149 | ######################################################
150 | # 2. DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE)
151 | ######################################################
152 |
153 | kernel_j = RBF()
154 | likelihood_j = Gaussian(fit_noise=True)
155 | model_j = DistGP(kernel_j, likelihood_j)
156 | GPR_Optimizer(model_j, x_k, y_tasks[k])
157 |
158 | models_dist.append(model_j)
159 | x_all.append(x_k)
160 | y_all.append(y_tasks[k])
161 |
162 | if plot_local:
163 | gp, gp_upper, gp_lower = model_k.predictive(x_test)
164 | disgp_m, disgp_v = model_j.predictive(x_k, y_tasks[k], x_test)
165 |
166 | disgp = disgp_m.detach().numpy()
167 | disgp_upper = (disgp_m + 2 * torch.sqrt(disgp_v)).detach().numpy() + 2 * model_j.likelihood.sigma.detach().numpy()
168 | disgp_lower = (disgp_m - 2 * torch.sqrt(disgp_v)).detach().numpy() - 2 * model_j.likelihood.sigma.detach().numpy()
169 |
170 | plt.figure(figsize=(12, 4))
171 | plt.plot(x_k, y_tasks[k], ls='-', color=color_palette[k%len(color_palette)], markersize=2.5, markeredgewidth=0.75)
172 | plt.plot(models[k].z.detach(), -20.0*torch.ones(M_k, 1), color=color_palette[k%len(color_palette)], linestyle='', marker='.',markersize=5)
173 |
174 | plt.plot(x_test, gp, 'k-', linewidth=1.5)
175 | #plt.fill_between(x_test.flatten(), gp_lower.flatten(), gp_upper.flatten(), color='b', alpha=0.2,lw='0.5')
176 | plt.plot(x_test, gp_upper, 'k-', linewidth=2.5)
177 | plt.plot(x_test, gp_lower, 'k-', linewidth=2.5)
178 |
179 | plt.plot(x_test, disgp, 'b-', linewidth=1.5)
180 | #plt.fill_between(x_test.flatten(), gp_lower.flatten(), gp_upper.flatten(), color='b', alpha=0.2,lw='0.5')
181 | plt.plot(x_test, disgp_upper, 'b-', linewidth=2.5)
182 | plt.plot(x_test, disgp_lower, 'b-', linewidth=2.5)
183 |
184 | plt.title(r'Variational Sparse GP -- (task=' + str(k+1) + ')')
185 | plt.xlabel(r'Input, $x$')
186 | plt.ylabel(r'Output, $y$')
187 | plt.xlim(min_x - 0.5, max_x + 0.5)
188 | plt.ylim(-22.0, 22.0)
189 |
190 | if save:
191 | plt.savefig(fname='./figs/baseline/distributed_task_'+str(k+1)+'.pdf',format='pdf')
192 |
193 | plt.close()
194 | #plt.show()
195 |
196 | ###########################
197 | # #
198 | # ENSEMBLE INFERENCE #
199 | # #
200 | ###########################
201 | print('- -')
202 | print('----- ENSEMBLE ------')
203 | print('- -')
204 |
205 | # TEST DATA FOR EVALUATION
206 | N_e_test = 400
207 | x_test_ensemble = torch.linspace(min_x-0.5, max_x+0.5, N_e_test)[:, None]
208 | f_test_ensemble = smooth_function(x_test_ensemble)
209 | y_test_ensemble = f_test_ensemble + 2.0*torch.randn(N_e_test,1)
210 |
211 | ######################################################
212 | # 1. RECYCLABLE GP
213 | ######################################################
214 |
215 | kernel = RBF()
216 | likelihood = Gaussian(fit_noise=False)
217 | model_e = EnsembleGP(kernel, likelihood, models, M_e)
218 | model_e.z = torch.nn.Parameter(torch.linspace(min_x, max_x, M_e)[:,None], requires_grad=True)
219 | vem_algorithm = AlgorithmVEM(model_e, config='ensemble', iters=10)
220 |
221 | vem_algorithm.ve_its = 30
222 | vem_algorithm.vm_its = 10
223 | vem_algorithm.lr_m = 1e-3
224 | vem_algorithm.lr_L = 1e-6
225 | vem_algorithm.lr_hyp = 1e-8
226 | vem_algorithm.lr_z = 1e-8
227 |
228 | vem_algorithm.fit()
229 |
230 | nlpd = model_e.nlpd(x_test_ensemble, y_test_ensemble)
231 | rmse = model_e.rmse(x_test_ensemble, f_test_ensemble)
232 | mae = model_e.mae(x_test_ensemble, f_test_ensemble)
233 |
234 | recy_metrics[0, trial] = nlpd
235 | recy_metrics[1, trial] = rmse
236 | recy_metrics[2, trial] = mae
237 |
238 | print('Recyclable - NLPD: ', nlpd)
239 | print('Recyclable - RMSE: ', rmse)
240 | print('Recyclable - MAE: ', mae)
241 | print(' ')
242 |
243 | ######################################################
244 | # 2. DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE)
245 | ######################################################
246 |
247 | # A. POE _________//
248 |
249 | poe_model = PoeGP(models_dist)
250 |
251 | nlpd = poe_model.nlpd(x_all, y_all, x_test_ensemble, y_test_ensemble)
252 | rmse = poe_model.rmse(x_all, y_all, x_test_ensemble, f_test_ensemble)
253 | mae = poe_model.mae(x_all, y_all, x_test_ensemble, f_test_ensemble)
254 |
255 | poe_metrics[0, trial] = nlpd
256 | poe_metrics[1, trial] = rmse
257 | poe_metrics[2, trial] = mae
258 |
259 | print('POE-NLPD: ', nlpd)
260 | print('POE-RMSE: ', rmse)
261 | print('POE-MAE: ', mae)
262 | print(' ')
263 |
264 | # B. GPOE _________//
265 |
266 | gpoe_model = GenPoeGP(models_dist)
267 |
268 | nlpd = gpoe_model.nlpd(x_all, y_all, x_test_ensemble, y_test_ensemble)
269 | rmse = gpoe_model.rmse(x_all, y_all, x_test_ensemble, f_test_ensemble)
270 | mae = gpoe_model.mae(x_all, y_all, x_test_ensemble, f_test_ensemble)
271 |
272 | gpoe_metrics[0, trial] = nlpd
273 | gpoe_metrics[1, trial] = rmse
274 | gpoe_metrics[2, trial] = mae
275 |
276 | print('GenPOE-NLPD: ', nlpd)
277 | print('GenPOE-RMSE: ', rmse)
278 | print('GenPOE-MAE: ', mae)
279 | print(' ')
280 |
281 | # C. BCM _________//
282 |
283 | bcm_model = BayesianCM(models_dist)
284 |
285 | nlpd = bcm_model.nlpd(x_all, y_all, x_test_ensemble, y_test_ensemble)
286 | rmse = bcm_model.rmse(x_all, y_all, x_test_ensemble, f_test_ensemble)
287 | mae = bcm_model.mae(x_all, y_all, x_test_ensemble, f_test_ensemble)
288 |
289 | bcm_metrics[0, trial] = nlpd
290 | bcm_metrics[1, trial] = rmse
291 | bcm_metrics[2, trial] = mae
292 |
293 | print('BCM-NLPD: ', nlpd)
294 | print('BCM-RMSE: ', rmse)
295 | print('BCM-MAE: ', mae)
296 | print(' ')
297 |
298 | # D. RBCM _________//
299 |
300 | rbcm_model = RobustBayesianCM(models_dist)
301 |
302 | nlpd = rbcm_model.nlpd(x_all, y_all, x_test_ensemble, y_test_ensemble)
303 | rmse = rbcm_model.rmse(x_all, y_all, x_test_ensemble, f_test_ensemble)
304 | mae = rbcm_model.mae(x_all, y_all, x_test_ensemble, f_test_ensemble)
305 |
306 | rbcm_metrics[0, trial] = nlpd
307 | rbcm_metrics[1, trial] = rmse
308 | rbcm_metrics[2, trial] = mae
309 |
310 | print('RBCM-NLPD: ', nlpd)
311 | print('RBCM-RMSE: ', rmse)
312 | print('RBCM-MAE: ', mae)
313 | print(' ')
314 |
315 | # save to csv file
316 | np.savetxt('./metrics/recy_metrics_'+ experiment +'.csv', recy_metrics, delimiter=',')
317 | np.savetxt('./metrics/poe_metrics_' + experiment + '.csv', poe_metrics, delimiter=',')
318 | np.savetxt('./metrics/gpoe_metrics_' + experiment + '.csv', gpoe_metrics, delimiter=',')
319 | np.savetxt('./metrics/bcm_metrics_' + experiment + '.csv', bcm_metrics, delimiter=',')
320 | np.savetxt('./metrics/rbcm_metrics_' + experiment + '.csv', rbcm_metrics, delimiter=',')
321 |
322 | if plot_ensemble:
323 | gp, gp_upper, gp_lower = model_e.predictive(x_test_ensemble)
324 |
325 | poe_m, poe_v = poe_model.predictive(x_all, y_all, x_test_ensemble)
326 | gpoe_m, gpoe_v = gpoe_model.predictive(x_all, y_all, x_test_ensemble)
327 | bcm_m, bcm_v = gpoe_model.predictive(x_all, y_all, x_test_ensemble)
328 | rbcm_m, rbcm_v = rbcm_model.predictive(x_all, y_all, x_test_ensemble)
329 |
330 | # Plot Ensemble
331 | plt.figure(figsize=(12, 4))
332 | for k in range(50):
333 | #if k%10==0:
334 | plt.plot(x_tasks[k], y_tasks[k], ls='-', color=color_palette[k%len(color_palette)], markersize=2.5, markeredgewidth=0.75)
335 | plt.plot(models[k].z.detach(), -20.0*torch.ones(M_k,1), color=color_palette[k%len(color_palette)], linestyle='', marker='.', markersize=5)
336 |
337 | plt.plot(model_e.z.detach(), -20.0 * torch.ones(M_e, 1), color='r', linestyle='', marker='x', markersize=5, markeredgewidth=1.0)
338 | plt.plot(x_test_ensemble, gp, 'k-', linewidth=1.5)
339 | plt.plot(x_test_ensemble, gp_upper, 'k-', linewidth=2.5)
340 | plt.plot(x_test_ensemble, gp_lower, 'k-', linewidth=2.5)
341 |
342 | poe = poe_m.detach().numpy()
343 | poe_upper = (poe_m + 2 * torch.sqrt(poe_v)).detach().numpy() # + 2*model_2.likelihood.sigma.detach().numpy()
344 | poe_lower = (poe_m - 2 * torch.sqrt(poe_v)).detach().numpy() # - 2*model_2.likelihood.sigma.detach().numpy()
345 |
346 | plt.plot(x_test, poe, 'g-', linewidth=1.5)
347 | plt.plot(x_test, poe_upper, 'g-', linewidth=2.5)
348 | plt.plot(x_test, poe_lower, 'g-', linewidth=2.5)
349 |
350 | gpoe = gpoe_m.detach().numpy()
351 | gpoe_upper = (gpoe_m + 2 * torch.sqrt(gpoe_v)).detach().numpy() # + 2*model_2.likelihood.sigma.detach().numpy()
352 | gpoe_lower = (gpoe_m - 2 * torch.sqrt(gpoe_v)).detach().numpy() # - 2*model_2.likelihood.sigma.detach().numpy()
353 |
354 | plt.plot(x_test, gpoe, 'm-', linewidth=1.5)
355 | plt.plot(x_test, gpoe_upper, 'm-', linewidth=2.5)
356 | plt.plot(x_test, gpoe_lower, 'm-', linewidth=2.5)
357 |
358 | bcm = bcm_m.detach().numpy()
359 | bcm_upper = (bcm_m + 2 * torch.sqrt(bcm_v)).detach().numpy() # + 2*model_2.likelihood.sigma.detach().numpy()
360 | bcm_lower = (bcm_m - 2 * torch.sqrt(bcm_v)).detach().numpy() # - 2*model_2.likelihood.sigma.detach().numpy()
361 |
362 | plt.plot(x_test, bcm, 'r-', linewidth=1.5)
363 | plt.plot(x_test, bcm_upper, 'r-', linewidth=2.5)
364 | plt.plot(x_test, bcm_lower, 'r-', linewidth=2.5)
365 |
366 | rbcm = rbcm_m.detach().numpy()
367 | rbcm_upper = (rbcm_m + 2 * torch.sqrt(rbcm_v)).detach().numpy() # + 2*model_2.likelihood.sigma.detach().numpy()
368 | rbcm_lower = (rbcm_m - 2 * torch.sqrt(rbcm_v)).detach().numpy() # - 2*model_2.likelihood.sigma.detach().numpy()
369 |
370 | plt.plot(x_test, rbcm, 'b-', linewidth=1.5)
371 | plt.plot(x_test, rbcm_upper, 'b-', linewidth=2.5)
372 | plt.plot(x_test, rbcm_lower, 'b-', linewidth=2.5)
373 |
374 | plt.title(r'Ensemble GP Model -- (tasks='+str(tasks)+')')
375 | plt.xlabel(r'Input, $x$')
376 | plt.ylabel(r'Output, $y$')
377 | plt.xlim(min_x-0.5, max_x+0.5)
378 | plt.ylim(-22.0, 22.0)
379 |
380 | if save:
381 | plt.savefig(fname='./figs/baseline/distributed_ensemble.pdf',format='pdf')
382 |
383 | #plt.show()
384 | plt.close()
--------------------------------------------------------------------------------
/experiments/dvigp_nlpd.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 | # -----------------------------------------------------------------
14 | # Experiment -- Baselines / Y. Gal et al. (2014)
15 | # -----------------------------------------------------------------
16 |
17 | import torch
18 | import numpy as np
19 | import matplotlib.pyplot as plt
20 |
21 | plt.rc('text', usetex=True)
22 | plt.rc('font', family='serif')
23 |
24 | # COOLORS.CO palettes
25 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e']
26 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031']
27 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56']
28 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e']
29 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559']
30 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2']
31 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54']
32 |
33 | color_palette = color_palette_2
34 |
35 | from kernels.rbf import RBF
36 | from likelihoods.gaussian import Gaussian
37 | from baselines.distgp import DistGP
38 | from baselines.dvigp import DVIGP
39 | from optimization.algorithms import GPR_Optimizer
40 | #from models.svgp import predictive
41 | from optimization.algorithms import vem_algorithm
42 | from util import smooth_function
43 |
44 | experiment = '10k'
45 | #experiment = '100k'
46 | #experiment = '1m'
47 |
48 | if experiment == '10k':
49 | node_overlapping = 1
50 | N_k = 200
51 | trials = 10
52 | N = 10000
53 | elif experiment == '100k':
54 | node_overlapping = 5
55 | N_k = 400
56 | trials = 10
57 | N = 100000
58 | elif experiment == '1m':
59 | node_overlapping = 100
60 | N_k = 800
61 | trials = 10
62 | N = 1000000
63 | else:
64 | raise ValueError('Experiment indicator not valid! Must be {10k, 100k or 1m}')
65 |
66 | M = 35
67 | plot_local = False
68 | plot_ensemble = False
69 | save = False
70 |
71 | dvigp_metrics = np.zeros((3,trials))
72 |
73 | for trial in range(trials):
74 |
75 | tasks = 50
76 | T = 50
77 |
78 | print('TRIAL = '+str(trial)+'/'+str(trials))
79 |
80 | ###########################
81 | # #
82 | # DISTRIBUTED TASKS #
83 | # #
84 | ###########################
85 |
86 | min_x = 0.0
87 | max_x = T * 0.1
88 | x = (min_x - max_x)*torch.rand(N, 1) + max_x
89 | x, _ = torch.sort(x, dim=0)
90 | y = smooth_function(x) + 2.0*torch.randn(N, 1)
91 |
92 | tasks = T * node_overlapping
93 |
94 | print('Number # of tasks: ', tasks)
95 |
96 | ######################################################
97 | # 1. DISTRIBUTED VIGP (Gal 2014)
98 | ######################################################
99 |
100 | kernel_j = RBF()
101 | likelihood_j = Gaussian(fit_noise=True)
102 |
103 | model = DVIGP(kernel_j, likelihood_j, M, nodes=tasks)
104 | model.z = torch.nn.Parameter(torch.linspace(min_x, max_x, M)[:,None], requires_grad=True)
105 | vem_algorithm(model, x, y, em_iters=20, plot=False)
106 |
107 | # TEST DATA FOR EVALUATION
108 | N_e_test = 400
109 | x_test_ensemble = torch.linspace(min_x-0.5, max_x+0.5, N_e_test)[:, None]
110 | f_test_ensemble = smooth_function(x_test_ensemble)
111 | y_test_ensemble = f_test_ensemble + 2.0*torch.randn(N_e_test,1)
112 |
113 | nlpd = model.nlpd(x_test_ensemble, y_test_ensemble)
114 | rmse = model.rmse(x_test_ensemble, f_test_ensemble)
115 | mae = model.mae(x_test_ensemble, f_test_ensemble)
116 |
117 | dvigp_metrics[0, trial] = nlpd
118 | dvigp_metrics[1, trial] = rmse
119 | dvigp_metrics[2, trial] = mae
120 |
121 | print('Distributed VIGP - NLPD: ', nlpd)
122 | print('Distributed VIGP - RMSE: ', rmse)
123 | print('Distributed VIGP - MAE: ', mae)
124 | print(' ')
125 |
126 |
127 |
--------------------------------------------------------------------------------
/experiments/image.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 | # -----------------------------------------------------------------
14 | # Experiment -- MNIST
15 | # -----------------------------------------------------------------
16 |
17 |
18 | import torch
19 | import torchvision
20 | import numpy as np
21 | import matplotlib.pyplot as plt
22 |
23 | from kernels.rbf import RBF
24 | from likelihoods.gaussian import Gaussian
25 | from likelihoods.bernoulli import Bernoulli
26 | from models.svgp import SVGP
27 | from models.ensemblegp import EnsembleGP
28 | from optimization.algorithms import vem_algorithm, ensemble_vem, ensemble_vem_parallel
29 | from optimization.algorithms import AlgorithmVEM
30 |
31 | plt.rc('text', usetex=True)
32 | plt.rc('font', family='serif')
33 |
34 | # COOLORS.CO palettes
35 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e']
36 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031']
37 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56']
38 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e']
39 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559']
40 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2']
41 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54']
42 |
43 | # For 0-number
44 | #color_palette = color_palette_5
45 | #color_0 = color_palette[0]
46 | #color_1 = color_palette[4]
47 |
48 | # For 1-number
49 | color_palette = color_palette_3
50 | color_0 = color_palette[1]
51 | color_1 = color_palette[4]
52 |
53 | mnist = torchvision.datasets.MNIST('../data/', train=True, download=False, transform=torchvision.transforms.Compose([
54 | torchvision.transforms.ToTensor(), torchvision.transforms.Normalize((0.1307,), (0.3081,))]))
55 |
56 | data = enumerate(torch.utils.data.DataLoader(mnist, batch_size=10, shuffle=False))
57 | batch_id, (image, label) = next(data)
58 |
59 | number = 1
60 | if number > 0:
61 | i = 3
62 | else:
63 | i = 1
64 |
65 | y = image[i][0]
66 | y[y>0.0] = 1.0
67 | y[y<0.0] = 0.0
68 |
69 | pixel = y.size(0)
70 |
71 | y = y.view(1,pixel ** 2).t()
72 |
73 | x1 = np.linspace(-1.0, 1.0, pixel)
74 | x2 = np.linspace(-1.0, 1.0, pixel)
75 | X1, X2 = np.meshgrid(x1, x2)
76 | X1 = X1.reshape(pixel ** 2, 1)
77 | X2 = -X2.reshape(pixel ** 2, 1)
78 | X_np = np.hstack((X1, X2))
79 | x = torch.from_numpy(X_np).float()
80 |
81 | # plot limits
82 | max_x = x[:,0].max()
83 | max_y = x[:,1].max()
84 | min_x = x[:,0].min()
85 | min_y = x[:,1].min()
86 |
87 | x_tasks = []
88 | y_tasks = []
89 | if number == 0:
90 | # Division into 4 regions
91 | x_1 = x[(x[:,0]<0.0) & (x[:,1]<0.0),:]
92 | y_1 = y[(x[:,0]<0.0) & (x[:,1]<0.0),:]
93 |
94 | x_2 = x[(x[:,0]>0.0) & (x[:,1]<0.0),:]
95 | y_2 = y[(x[:,0]>0.0) & (x[:,1]<0.0),:]
96 |
97 | x_3 = x[(x[:,0]>0.0) & (x[:,1]>0.0),:]
98 | y_3 = y[(x[:,0]>0.0) & (x[:,1]>0.0),:]
99 |
100 | x_4 = x[(x[:,0]<0.0) & (x[:,1]>0.0),:]
101 | y_4 = y[(x[:,0]<0.0) & (x[:,1]>0.0),:]
102 |
103 | # All tasks
104 | x_tasks += [x_1, x_2, x_3, x_4]
105 | y_tasks += [y_1, y_2, y_3, y_4]
106 |
107 | elif number == 1:
108 | # Division into 2 regions
109 | x_1 = x[(x[:, 1] < 0.0), :]
110 | y_1 = y[(x[:, 1] < 0.0), :]
111 |
112 | x_2 = x[(x[:, 1] > 0.0), :]
113 | y_2 = y[(x[:, 1] > 0.0), :]
114 |
115 | # All tasks
116 | x_tasks += [x_1, x_2]
117 | y_tasks += [y_1, y_2]
118 |
119 |
120 | K = len(x_tasks)
121 | sigmoid = torch.nn.Sigmoid()
122 |
123 | M_k = 4 # inducing points per side
124 | N_test = 80 # test points per side
125 |
126 | plot_local = True
127 | plot_ensemble = True
128 | save = True
129 |
130 | ###########################
131 | # #
132 | # DISTRIBUTED TASKS #
133 | # #
134 | ###########################
135 |
136 | models = []
137 | for k, x_k in enumerate(x_tasks):
138 |
139 | print('- -')
140 | print('----- TASK k=' + str(k + 1) + ' ------')
141 | print('- -')
142 |
143 | y_k = y_tasks[k]
144 | kernel_k = RBF()
145 | likelihood_k = Bernoulli()
146 | model_k = SVGP(kernel_k, likelihood_k, M_k**2, input_dim=2)
147 |
148 | # initial grid of inducing-points
149 | mx = torch.mean(x_k[:, 0])
150 | my = torch.mean(x_k[:, 1])
151 | vx = torch.var(x_k[:, 0])
152 | vy = torch.var(x_k[:, 1])
153 |
154 | zy = np.linspace(my - 3*vy, my + 3*vy, M_k)
155 | zx = np.linspace(mx - 3*vx, mx + 3*vx, M_k)
156 | ZX, ZY = np.meshgrid(zx, zy)
157 | ZX = ZX.reshape(M_k ** 2, 1)
158 | ZY = ZY.reshape(M_k ** 2, 1)
159 | Z = np.hstack((ZX, ZY))
160 | z_k = torch.from_numpy(Z).float()
161 |
162 | model_k.z = torch.nn.Parameter(z_k, requires_grad=True)
163 | vem_algorithm = AlgorithmVEM(model_k, x_k, y_k, iters=7)
164 |
165 | vem_algorithm.ve_its = 20
166 | vem_algorithm.vm_its = 10
167 | vem_algorithm.lr_m = 1e-3
168 | vem_algorithm.lr_L = 1e-6
169 | vem_algorithm.lr_hyp = 1e-6
170 | vem_algorithm.lr_z = 1e-4
171 |
172 | vem_algorithm.fit()
173 | models.append(model_k)
174 |
175 | if plot_local:
176 |
177 | min_tx = x[:,0].min() - 0.15
178 | min_ty = x[:,1].min() - 0.15
179 | max_tx = x[:,0].max() + 0.15
180 | max_ty = x[:,1].max() + 0.15
181 |
182 | ty = np.linspace(min_ty, max_ty, N_test)
183 | tx = np.linspace(min_tx, max_tx, N_test)
184 | TX_grid, TY_grid = np.meshgrid(tx, ty)
185 | TX = TX_grid.reshape(N_test ** 2, 1)
186 | TY = TY_grid.reshape(N_test ** 2, 1)
187 | X_test = np.hstack((TX, TY))
188 | x_test = torch.from_numpy(X_test).float()
189 |
190 | gp, gp_upper, gp_lower = model_k.predictive(x_test)
191 | gp = sigmoid(torch.from_numpy(gp))
192 |
193 | # Plot
194 | plt.figure(figsize=(7, 7))
195 | ax = plt.axes()
196 | plt.plot(x_k[y_k[:, 0] == 0, 0], x_k[y_k[:, 0] == 0, 1], 'o', color=color_1, alpha=0.5, ms=12.0)
197 | plt.plot(x_k[y_k[:, 0] == 1, 0], x_k[y_k[:, 0] == 1, 1], 'o', color=color_0, alpha=0.5, ms=12.0)
198 | plt.plot(model_k.z[:,0].detach(), model_k.z[:,1].detach(), 'kx', ms=10.0, mew=2.0)
199 | cs = ax.contour(TX_grid, TY_grid, np.reshape(gp, (N_test, N_test)), linewidths=3, colors='k',
200 | levels=[0.25, 0.5, 0.75], zorder=10)
201 | ax.clabel(cs, inline=1, fontsize=14, fmt='%1.1f')
202 |
203 | plt.title(r'MNIST Recyclable GP - '+ str(k + 1) )
204 | plt.xlabel(r'$x_1$ input')
205 | plt.ylabel(r'$x_2$ input')
206 | plt.xlim(-1.2, 1.2)
207 | plt.ylim(-1.2, 1.2)
208 |
209 | if save:
210 | plt.savefig(fname='./figs/image/0_number_task_' + str(k + 1) + '.pdf', format='pdf')
211 |
212 | plt.show()
213 | #plt.close()
214 |
215 | ###########################
216 | # #
217 | # ENSEMBLE INFERENCE #
218 | # #
219 | ###########################
220 |
221 | print('- -')
222 | print('----- ENSEMBLE ------')
223 | print('- -')
224 |
225 | if number == 0:
226 | M_e = 5
227 | elif number == 1:
228 | M_e = 4
229 |
230 | kernel = RBF()
231 | likelihood = Bernoulli()
232 | model_e = EnsembleGP(kernel, likelihood, models, M_e**2, input_dim=2)
233 |
234 | # initial grid of inducing-points
235 | mx = torch.mean(x[:, 0])
236 | my = torch.mean(x[:, 1])
237 | vx = torch.var(x[:, 0])
238 | vy = torch.var(x[:, 1])
239 |
240 | zy = np.linspace(my - 1.5*vy, my + 1.5*vy, M_e)
241 | zx = np.linspace(mx - 1.5*vx, mx + 1.5*vx, M_e)
242 | ZX, ZY = np.meshgrid(zx, zy)
243 | ZX = ZX.reshape(M_e ** 2, 1)
244 | ZY = ZY.reshape(M_e ** 2, 1)
245 | Z = np.hstack((ZX, ZY))
246 | z_e = torch.from_numpy(Z).float()
247 |
248 | model_e.z = torch.nn.Parameter(z_e, requires_grad=True)
249 | vem_algorithm = AlgorithmVEM(model_e, config='ensemble', iters=20)
250 |
251 | vem_algorithm.ve_its = 20
252 | vem_algorithm.vm_its = 10
253 | vem_algorithm.lr_m = 1e-3
254 | vem_algorithm.lr_L = 1e-5
255 | vem_algorithm.lr_hyp = 1e-6
256 | vem_algorithm.lr_z = 1e-5
257 |
258 | vem_algorithm.fit()
259 |
260 | if plot_ensemble:
261 |
262 | min_tx = x[:,0].min() - 0.15
263 | min_ty = x[:,1].min() - 0.15
264 | max_tx = x[:,0].max() + 0.15
265 | max_ty = x[:,1].max() + 0.15
266 |
267 | ty = np.linspace(min_ty, max_ty, N_test)
268 | tx = np.linspace(min_tx, max_tx, N_test)
269 | TX_grid, TY_grid = np.meshgrid(tx, ty)
270 | TX = TX_grid.reshape(N_test ** 2, 1)
271 | TY = TY_grid.reshape(N_test ** 2, 1)
272 | X_test = np.hstack((TX, TY))
273 | x_test = torch.from_numpy(X_test).float()
274 |
275 | gp, _, _ = model_e.predictive(x_test)
276 | gp = sigmoid(torch.from_numpy(gp))
277 |
278 | # Plot
279 | plt.figure(figsize=(7, 7))
280 | ax = plt.axes()
281 | plt.plot(x[y[:, 0] == 0, 0], x[y[:, 0] == 0, 1], 'o', color=color_1, alpha=0.5, ms=12.0)
282 | plt.plot(x[y[:, 0] == 1, 0], x[y[:, 0] == 1, 1], 'o', color=color_0, alpha=0.5, ms=12.0)
283 | plt.plot(model_e.z[:,0].detach(), model_e.z[:,1].detach(), 'kx', ms=10.0, mew=2.0)
284 | cs = ax.contour(TX_grid, TY_grid, np.reshape(gp, (N_test, N_test)), linewidths=3, colors='k',
285 | levels=[0.25, 0.5, 0.75], zorder=10)
286 | ax.clabel(cs, inline=1, fontsize=14, fmt='%1.1f')
287 |
288 | plt.title(r'MNIST GP Ensemble')
289 | plt.xlabel(r'$x_1$ input')
290 | plt.ylabel(r'$x_2$ input')
291 | plt.xlim(-1.2, 1.2)
292 | plt.ylim(-1.2, 1.2)
293 |
294 | if save:
295 | plt.savefig(fname='./figs/image/0_number_ensemble.pdf', format='pdf')
296 |
297 | plt.show()
298 |
299 | # plt.figure(figsize=(6, 6))
300 | # plt.plot(x[y[:, 0] == 0, 0], x[y[:, 0] == 0, 1], 'o', color=color_1, alpha=0.5, ms=8.0)
301 | # plt.plot(x[y[:, 0] == 1, 0], x[y[:, 0] == 1, 1], 'o', color=color_0, alpha=0.5, ms=8.0)
302 | # plt.xlim(-1.5, 1.5)
303 | # plt.ylim(-1.5, 1.5)
304 | # plt.show()
305 |
306 | # fig = plt.figure()
307 | # for i in range(10):
308 | # plt.subplot(2,5,i+1)
309 | # #plt.tight_layout()
310 | # print(image[i][0])
311 | # plt.imshow(image[i][0], cmap='gray', interpolation='none')
312 | # plt.title("Ground Truth: {}".format(label[i]))
313 | #
314 | # plt.show()
--------------------------------------------------------------------------------
/experiments/million_rbcm.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 | # -----------------------------------------------------------------
14 | # Experiment II -- Baselines (Million)
15 | # -----------------------------------------------------------------
16 |
17 |
18 | import torch
19 | import numpy as np
20 | import matplotlib.pyplot as plt
21 |
22 | plt.rc('text', usetex=True)
23 | plt.rc('font', family='serif')
24 |
25 | # COOLORS.CO palettes
26 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e']
27 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031']
28 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56']
29 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e']
30 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559']
31 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2']
32 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54']
33 |
34 | color_palette = color_palette_2
35 |
36 | from kernels.rbf import RBF
37 | from likelihoods.gaussian import Gaussian
38 | from models.svgp import SVGP
39 | from models.ensemblegp import EnsembleGP
40 | from baselines.distgp import DistGP
41 | from baselines.poegp import PoeGP
42 | from baselines.gpoegp import GenPoeGP
43 | from baselines.bcm import BayesianCM
44 | from baselines.rbcm import RobustBayesianCM
45 | from baselines.dvigp import DVIGP
46 | from optimization.algorithms import AlgorithmVEM
47 | from optimization.algorithms import GPR_Optimizer
48 | from util import smooth_function
49 |
50 | experiment = '1m'
51 |
52 | my_path = './../../../../Dropbox/PhD/Works/RecyclableGP/'
53 |
54 | N_k = 400 # 200
55 | M_k = 3
56 | M_e = 35
57 |
58 | T = 50
59 | tasks = 50
60 | layer_1_merge = 10 # 10
61 | layer_2_merge = 5 # 10
62 | trials = 5
63 | node_overlapping = 1
64 |
65 | plot_layer_0 = False
66 | plot_layer_1 = True
67 | plot_layer_2 = True
68 | plot_ensemble = True
69 | save = True
70 |
71 | recy_metrics = np.zeros((3,trials))
72 | poe_metrics = np.zeros((3,trials))
73 | gpoe_metrics = np.zeros((3,trials))
74 | bcm_metrics = np.zeros((3,trials))
75 | rbcm_metrics = np.zeros((3,trials))
76 |
77 | N_test = 400
78 | min_x = 0.0
79 | max_x = T * 0.1
80 | segment_x = (max_x - min_x) / tasks
81 | x_test = torch.linspace(min_x - 0.5, max_x + 0.5, N_test)[:, None]
82 | f_test = smooth_function(x_test)
83 | y_test = f_test + 2.0 * torch.randn(N_test, 1)
84 |
85 | for trial in range(trials):
86 | print('TRIAL = '+str(trial+1)+'/'+str(trials))
87 | layer_2 = []
88 | layer_2_poe_gpm = [] # POE GPs (predictive)
89 | layer_2_poe_gpv = [] # POE GPs (predictive)
90 | layer_2_gpoe_gpm = [] # GPOE GPs (predictive)
91 | layer_2_gpoe_gpv = [] # GPOE GPs (predictive)
92 | layer_2_bcm_gpm = [] # BCM GPs (predictive)
93 | layer_2_bcm_gpv = [] # BCM GPs (predictive)
94 | layer_2_rbcm_gpm = [] # rBCM GPs (predictive)
95 | layer_2_rbcm_gpv = [] # rBCM GPs (predictive)
96 |
97 | for j in range(layer_2_merge):
98 | print('LAYER-2 = ' + str(j+1) + '/' + str(layer_2_merge))
99 | layer_1 = []
100 | layer_1_poe_gpm = [] # POE GPs (predictive)
101 | layer_1_poe_gpv = [] # POE GPs (predictive)
102 | layer_1_gpoe_gpm = [] # GPOE GPs (predictive)
103 | layer_1_gpoe_gpv = [] # GPOE GPs (predictive)
104 | layer_1_bcm_gpm = [] # BCM GPs (predictive)
105 | layer_1_bcm_gpv = [] # BCM GPs (predictive)
106 | layer_1_rbcm_gpm = [] # rBCM GPs (predictive)
107 | layer_1_rbcm_gpv = [] # rBCM GPs (predictive)
108 |
109 | for m in range(layer_1_merge):
110 | print('LAYER-1 = ' + str(m+1) + '/' + str(layer_1_merge))
111 |
112 | ###########################
113 | # LAYER 0 #
114 | # ___________ #
115 | # DISTRIBUTED #
116 | ###########################
117 |
118 | x_tasks = []
119 | y_tasks = []
120 |
121 | # SYNTHETIC DATA
122 | for n in range(node_overlapping):
123 | for k in range(T):
124 | x_k = ((min_x + (k * segment_x)) - (min_x + ((k + 1) * segment_x))) * torch.rand(N_k, 1) + (
125 | min_x + ((k + 1) * segment_x))
126 | x_k, _ = torch.sort(x_k, dim=0)
127 | y_k = smooth_function(x_k) + 2.0 * torch.randn(N_k, 1)
128 | x_tasks.append(x_k)
129 | y_tasks.append(y_k)
130 |
131 | tasks = T * node_overlapping
132 |
133 | layer_0 = [] # recyclable GPs
134 | layer_0_dist = [] # distributed GPs (models)
135 | layer_0_dist_gpm = [] # distributed GPs (predictive)
136 | layer_0_dist_gpv = [] # distributed GPs (predictive)
137 |
138 | for k, x_k in enumerate(x_tasks):
139 | print(' ')
140 | print('TRIAL = ' + str(trial + 1) + '/' + str(trials))
141 | print('LAYER-0 = ' + str(k+1) + '/' + str(T*node_overlapping))
142 | print('LAYER-1 = ' + str(m+1) + '/' + str(layer_1_merge))
143 | print('LAYER-2 = ' + str(j+1) + '/' + str(layer_2_merge))
144 | print('\ -')
145 | print(' ---- TASK k=' + str(k + 1) + ' ------')
146 | print('/ -')
147 | print(' ')
148 | ######################################################
149 | # 2. DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE)
150 | ######################################################
151 |
152 | kernel_j = RBF()
153 | likelihood_j = Gaussian(fit_noise=True)
154 | model_j = DistGP(kernel_j, likelihood_j)
155 | GPR_Optimizer(model_j, x_k, y_tasks[k])
156 |
157 | dis_gp_m, dis_gp_v = model_j.predictive(x_k, y_tasks[k], x_test)
158 | layer_0_dist.append(model_j)
159 | layer_0_dist_gpm.append(dis_gp_m)
160 | layer_0_dist_gpv.append(dis_gp_v)
161 |
162 | ###########################
163 | # LAYER 0 #
164 | # ________ #
165 | # ENSEMBLE #
166 | ###########################
167 |
168 | print(' ')
169 | print('TRIAL = ' + str(trial + 1) + '/' + str(trials))
170 | print('LAYER-0 = ' + str(k + 1) + '/' + str(T * node_overlapping))
171 | print('LAYER-1 = ' + str(m + 1) + '/' + str(layer_1_merge))
172 | print('LAYER-2 = ' + str(j + 1) + '/' + str(layer_2_merge))
173 | print('\ -')
174 | print(' ------ ENSEMBLE LAYER 0 ------')
175 | print('/ -')
176 | print(' ')
177 |
178 | #########################################################
179 | # 2. ENSEMBLE DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE)
180 | #########################################################
181 | # A. POE _________//
182 | # B. GPOE _________//
183 | # C. BCM _________//
184 | # D. RBCM _________//
185 |
186 | poe_model = PoeGP(models=layer_0_dist)
187 | gpoe_model = GenPoeGP(models=layer_0_dist)
188 | bcm_model = BayesianCM(models=layer_0_dist)
189 | rbcm_model = RobustBayesianCM(models=layer_0_dist)
190 |
191 | poe_m, poe_v = poe_model.predictive_layer(layer_0_dist_gpm, layer_0_dist_gpv, x_test)
192 | gpoe_m, gpoe_v = gpoe_model.predictive_layer(layer_0_dist_gpm, layer_0_dist_gpv, x_test)
193 | bcm_m, bcm_v = bcm_model.predictive_layer(layer_0_dist_gpm, layer_0_dist_gpv, x_test)
194 | rbcm_m, rbcm_v = rbcm_model.predictive_layer(layer_0_dist_gpm, layer_0_dist_gpv, x_test)
195 |
196 | layer_1_poe_gpm.append(poe_m)
197 | layer_1_poe_gpv.append(poe_v)
198 | layer_1_gpoe_gpm.append(gpoe_m)
199 | layer_1_gpoe_gpv.append(gpoe_v)
200 | layer_1_bcm_gpm.append(bcm_m)
201 | layer_1_bcm_gpv.append(bcm_v)
202 | layer_1_rbcm_gpm.append(rbcm_m)
203 | layer_1_rbcm_gpv.append(rbcm_v)
204 |
205 | ###########################
206 | # LAYER 1 #
207 | ###########################
208 |
209 | print(' ')
210 | print('TRIAL = ' + str(trial + 1) + '/' + str(trials))
211 | print('LAYER-0 = ' + str(k + 1) + '/' + str(T * node_overlapping))
212 | print('LAYER-1 = ' + str(m + 1) + '/' + str(layer_1_merge))
213 | print('LAYER-2 = ' + str(j + 1) + '/' + str(layer_2_merge))
214 | print('\ -')
215 | print(' ------ ENSEMBLE LAYER 1 ------')
216 | print('/ -')
217 | print(' ')
218 |
219 | #########################################################
220 | # 2. ENSEMBLE DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE)
221 | #########################################################
222 | # A. POE _________//
223 | # B. GPOE _________//
224 | # C. BCM _________//
225 | # D. RBCM _________//
226 |
227 | poe_model = PoeGP(models=layer_0_dist)
228 | gpoe_model = GenPoeGP(models=layer_0_dist)
229 | bcm_model = BayesianCM(models=layer_0_dist)
230 | rbcm_model = RobustBayesianCM(models=layer_0_dist)
231 |
232 | poe_m, poe_v = poe_model.predictive_layer(layer_1_poe_gpm, layer_1_poe_gpv, x_test)
233 | gpoe_m, gpoe_v = gpoe_model.predictive_layer(layer_1_gpoe_gpm, layer_1_gpoe_gpv, x_test)
234 | bcm_m, bcm_v = bcm_model.predictive_layer(layer_1_bcm_gpm, layer_1_bcm_gpv, x_test)
235 | rbcm_m, rbcm_v = rbcm_model.predictive_layer(layer_1_rbcm_gpm, layer_1_rbcm_gpv, x_test)
236 |
237 | layer_2_poe_gpm.append(poe_m)
238 | layer_2_poe_gpv.append(poe_v)
239 | layer_2_gpoe_gpm.append(gpoe_m)
240 | layer_2_gpoe_gpv.append(gpoe_v)
241 | layer_2_bcm_gpm.append(bcm_m)
242 | layer_2_bcm_gpv.append(bcm_v)
243 | layer_2_rbcm_gpm.append(rbcm_m)
244 | layer_2_rbcm_gpv.append(rbcm_v)
245 |
246 | ###########################
247 | # LAYER 2 #
248 | ###########################
249 |
250 | print(' ')
251 | print('TRIAL = ' + str(trial + 1) + '/' + str(trials))
252 | print('LAYER-0 = ' + str(k + 1) + '/' + str(T * node_overlapping))
253 | print('LAYER-1 = ' + str(m + 1) + '/' + str(layer_1_merge))
254 | print('LAYER-2 = ' + str(j + 1) + '/' + str(layer_2_merge))
255 | print('\ -')
256 | print(' ------ FINAL ENSEMBLE ------')
257 | print('/ -')
258 | print(' ')
259 |
260 | #########################################################
261 | # 2. ENSEMBLE DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE)
262 | #########################################################
263 | # A. POE _________//
264 | # B. GPOE _________//
265 | # C. BCM _________//
266 | # D. RBCM _________//
267 |
268 | poe_model = PoeGP(models=layer_0_dist)
269 | gpoe_model = GenPoeGP(models=layer_0_dist)
270 | bcm_model = BayesianCM(models=layer_0_dist)
271 | rbcm_model = RobustBayesianCM(models=layer_0_dist)
272 |
273 | #########################################################
274 | # -- METRICS --------------------------------------------
275 | #########################################################
276 |
277 | # A. POE _________//
278 |
279 | nlpd = poe_model.nlpd_layer(layer_2_poe_gpm, layer_2_poe_gpv, x_test, y_test)
280 | rmse = poe_model.rmse_layer(layer_2_poe_gpm, layer_2_poe_gpv, x_test, f_test)
281 | mae = poe_model.mae_layer(layer_2_poe_gpm, layer_2_poe_gpv, x_test, f_test)
282 |
283 | poe_metrics[0, trial] = nlpd
284 | poe_metrics[1, trial] = rmse
285 | poe_metrics[2, trial] = mae
286 |
287 | print('POE-NLPD: ', nlpd)
288 | print('POE-RMSE: ', rmse)
289 | print('POE-MAE: ', mae)
290 | print(' ')
291 |
292 | # B. GPOE _________//
293 |
294 | nlpd = gpoe_model.nlpd_layer(layer_2_gpoe_gpm, layer_2_gpoe_gpv, x_test, y_test)
295 | rmse = gpoe_model.rmse_layer(layer_2_gpoe_gpm, layer_2_gpoe_gpv, x_test, f_test)
296 | mae = gpoe_model.mae_layer(layer_2_gpoe_gpm, layer_2_gpoe_gpv, x_test, f_test)
297 |
298 | gpoe_metrics[0, trial] = nlpd
299 | gpoe_metrics[1, trial] = rmse
300 | gpoe_metrics[2, trial] = mae
301 |
302 | print('GenPOE-NLPD: ', nlpd)
303 | print('GenPOE-RMSE: ', rmse)
304 | print('GenPOE-MAE: ', mae)
305 | print(' ')
306 |
307 | # C. BCM _________//
308 |
309 | nlpd = bcm_model.nlpd_layer(layer_2_bcm_gpm, layer_2_bcm_gpv, x_test, y_test)
310 | rmse = bcm_model.rmse_layer(layer_2_bcm_gpm, layer_2_bcm_gpv, x_test, f_test)
311 | mae = bcm_model.mae_layer(layer_2_bcm_gpm, layer_2_bcm_gpv, x_test, f_test)
312 |
313 | bcm_metrics[0, trial] = nlpd
314 | bcm_metrics[1, trial] = rmse
315 | bcm_metrics[2, trial] = mae
316 |
317 | print('BCM-NLPD: ', nlpd)
318 | print('BCM-RMSE: ', rmse)
319 | print('BCM-MAE: ', mae)
320 | print(' ')
321 |
322 | # D. RBCM _________//
323 |
324 | nlpd = rbcm_model.nlpd_layer(layer_2_rbcm_gpm, layer_2_rbcm_gpv, x_test, y_test)
325 | rmse = rbcm_model.rmse_layer(layer_2_rbcm_gpm, layer_2_rbcm_gpv, x_test, f_test)
326 | mae = rbcm_model.mae_layer(layer_2_rbcm_gpm, layer_2_rbcm_gpv, x_test, f_test)
327 |
328 | rbcm_metrics[0, trial] = nlpd
329 | rbcm_metrics[1, trial] = rmse
330 | rbcm_metrics[2, trial] = mae
331 |
332 | print('RBCM-NLPD: ', nlpd)
333 | print('RBCM-RMSE: ', rmse)
334 | print('RBCM-MAE: ', mae)
335 | print(' ')
336 |
337 | # save to csv file
338 | #np.savetxt(my_path + 'metrics/recy_metrics_' + experiment + '.csv', recy_metrics, delimiter=',')
339 | np.savetxt(my_path + 'metrics/poe_metrics_' + experiment + '.csv', poe_metrics, delimiter=',')
340 | np.savetxt(my_path + 'metrics/gpoe_metrics_' + experiment + '.csv', gpoe_metrics, delimiter=',')
341 | np.savetxt(my_path + 'metrics/bcm_metrics_' + experiment + '.csv', bcm_metrics, delimiter=',')
342 | np.savetxt(my_path + 'metrics/rbcm_metrics_' + experiment + '.csv', rbcm_metrics, delimiter=',')
343 |
--------------------------------------------------------------------------------
/experiments/paralell.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 | # -----------------------------------------------------------------
14 | # Experiment -- Parallel Inference
15 | # -----------------------------------------------------------------
16 |
17 | import torch
18 | import numpy as np
19 | import matplotlib.pyplot as plt
20 | from tikzplotlib import save as tikz_save
21 |
22 | plt.rc('text', usetex=True)
23 | plt.rc('font', family='serif')
24 |
25 | # COOLORS.CO palettes
26 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e']
27 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031']
28 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56']
29 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e']
30 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559']
31 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2']
32 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54']
33 |
34 | color_palette = color_palette_2
35 |
36 | from kernels.rbf import RBF
37 | from likelihoods.gaussian import Gaussian
38 | from models.svgp import SVGP
39 | from models.ensemblegp import EnsembleGP
40 | from optimization.algorithms import vem_algorithm, ensemble_vem, ensemble_vem_parallel
41 | from optimization.algorithms import AlgorithmVEM
42 | from util import smooth_function, smooth_function_bias
43 |
44 | tasks = 5
45 | N_k = 500
46 | M_k = 15
47 | M_e = 35
48 | plot_local = True
49 | plot_ensemble = True
50 | save = True
51 |
52 | ###########################
53 | # #
54 | # DISTRIBUTED TASKS #
55 | # #
56 | ###########################
57 |
58 | min_x = 0.0
59 | max_x = 5.5
60 | segment_x = (max_x - min_x)/tasks
61 | x_tasks = []
62 | y_tasks = []
63 | for k in range(tasks):
64 | x_k = ((min_x+(k*segment_x))-(min_x+((k+1)*segment_x)))*torch.rand(N_k,1) + (min_x+((k+1)*segment_x))
65 | x_k, _ = torch.sort(x_k, dim=0)
66 | y_k = smooth_function_bias(x_k) + 2.0*torch.randn(N_k,1)
67 | x_tasks.append(x_k)
68 | y_tasks.append(y_k)
69 |
70 | ###########################
71 | # #
72 | # PARALLEL INFERENCE #
73 | # #
74 | ###########################
75 |
76 | N_k_test = 400
77 | x_test = torch.linspace(min_x-0.5, max_x+0.5, N_k_test)[:, None]
78 | models = []
79 | for k, x_k in enumerate(x_tasks):
80 | print('- -')
81 | print('----- TASK k='+str(k+1)+' ------')
82 | print('- -')
83 | kernel_k = RBF()
84 | likelihood_k = Gaussian(fit_noise=False)
85 | model_k = SVGP(kernel_k, likelihood_k, M_k)
86 |
87 | z_k_min = min_x+(k*segment_x)
88 | z_k_max = min_x+((k+1)*segment_x)
89 | #model_k.z = torch.nn.Parameter((z_k_max - z_k_min)*torch.rand(M_k, 1) + z_k_min, requires_grad=True)
90 | model_k.z = torch.nn.Parameter(torch.linspace(z_k_min, z_k_max, M_k)[:, None], requires_grad=True)
91 | vem_algorithm = AlgorithmVEM(model_k, x_k, y_tasks[k], iters=15)
92 |
93 | vem_algorithm.ve_its = 20
94 | vem_algorithm.vm_its = 10
95 | vem_algorithm.lr_m = 1e-6
96 | vem_algorithm.lr_L = 1e-10
97 | vem_algorithm.lr_hyp = 1e-10
98 | vem_algorithm.lr_z = 1e-10
99 |
100 | vem_algorithm.fit()
101 |
102 | models.append(model_k)
103 |
104 | if plot_local:
105 | gp, gp_upper, gp_lower = model_k.predictive(x_test)
106 |
107 | plt.figure(figsize=(12, 4))
108 | plt.plot(x_k, y_tasks[k], ls='-', color=color_palette[k], lw=1.5)
109 | plt.plot(models[k].z.detach(), -20.0*torch.ones(M_k, 1), color=color_palette[k], linestyle='', marker='.',markersize=5)
110 |
111 | plt.plot(x_test, gp, 'k-', linewidth=1.5)
112 | #plt.fill_between(x_test.flatten(), gp_lower.flatten(), gp_upper.flatten(), color='b', alpha=0.2,lw='0.5')
113 | plt.plot(x_test, gp_upper, 'k-', linewidth=3.0)
114 | plt.plot(x_test, gp_lower, 'k-', linewidth=3.0)
115 |
116 | plt.title(r'Variational Sparse GP -- (task=' + str(k+1) + ')')
117 | plt.xlabel(r'Input, $x$')
118 | plt.ylabel(r'Output, $y$')
119 | plt.xlim(min_x - 0.5, max_x + 0.5)
120 | plt.ylim(-22.0, 22.0)
121 |
122 | if save:
123 | plt.savefig(fname='./figs/ parallel_task_'+str(k+1)+'.pdf',format='pdf')
124 |
125 | plt.show()
126 |
127 | ###########################
128 | # #
129 | # ENSEMBLE INFERENCE #
130 | # #
131 | ###########################
132 | print('- -')
133 | print('----- ENSEMBLE ------')
134 | print('- -')
135 |
136 | kernel = RBF()
137 | likelihood = Gaussian(fit_noise=False)
138 | model_e = EnsembleGP(kernel, likelihood, models, M_e)
139 | model_e.z = torch.nn.Parameter(torch.linspace(min_x, max_x, M_e)[:,None], requires_grad=True)
140 | vem_algorithm = AlgorithmVEM(model_e, config='ensemble', iters=30)
141 |
142 | vem_algorithm.ve_its = 30
143 | vem_algorithm.vm_its = 10
144 | vem_algorithm.lr_m = 1e-3
145 | vem_algorithm.lr_L = 1e-6
146 | vem_algorithm.lr_hyp = 1e-8
147 | vem_algorithm.lr_z = 1e-8
148 |
149 | vem_algorithm.fit()
150 |
151 | N_e_test = 400
152 | x_test_ensemble = torch.linspace(min_x-0.5, max_x+0.5, N_e_test)[:, None]
153 |
154 | if plot_ensemble:
155 | gp, gp_upper, gp_lower = model_e.predictive(x_test_ensemble)
156 |
157 | # Plot Ensemble
158 | plt.figure(figsize=(12, 4))
159 | for k, x_k in enumerate(x_tasks):
160 | #if k%10==0:
161 | plt.plot(x_k, y_tasks[k], ls='-', color=color_palette[k], lw=1.5)
162 | plt.plot(models[k].z.detach(), -20.0*torch.ones(M_k,1), color=color_palette[k], linestyle='', marker='.', markersize=5)
163 |
164 | plt.plot(model_e.z.detach(), -20.0 * torch.ones(M_e, 1), color='k', linestyle='', marker='x', markersize=7, markeredgewidth=1.1)
165 | plt.plot(x_test_ensemble, gp, 'k-', linewidth=1.5)
166 | #plt.fill_between(x_test_ensemble.flatten(), gp_lower.flatten(), gp_upper.flatten(), color='b', alpha=0.2, lw='0.5')
167 | plt.plot(x_test_ensemble, gp_upper, 'k-', linewidth=3.0)
168 | plt.plot(x_test_ensemble, gp_lower, 'k-', linewidth=3.0)
169 |
170 | plt.title(r'Ensemble GP Model -- (tasks='+str(tasks)+')')
171 | plt.xlabel(r'Input, $x$')
172 | plt.ylabel(r'Output, $y$')
173 | plt.xlim(min_x-0.5, max_x+0.5)
174 | plt.ylim(-22.0, 22.0)
175 |
176 | if save:
177 | plt.savefig(fname='./figs/parallel_ensemble.pdf',format='pdf')
178 |
179 | plt.show()
180 |
181 | N_e_test = 400
182 | x_test_ensemble = torch.linspace(min_x-0.5, max_x+0.5, N_e_test)[:, None]
183 | f_test_ensemble = smooth_function(x_test_ensemble)
184 | y_test_ensemble = f_test_ensemble + 2.0*torch.randn(N_e_test,1)
185 |
186 | nlpd = model_e.nlpd(x_test_ensemble, y_test_ensemble)
187 | rmse = model_e.rmse(x_test_ensemble, f_test_ensemble)
188 | mae = model_e.mae(x_test_ensemble, f_test_ensemble)
189 |
190 | print("NLPD: ", nlpd)
191 | print("RMSE: ", rmse)
192 | print("MAE: ", mae)
--------------------------------------------------------------------------------
/experiments/solar.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 | # -----------------------------------------------------------------
14 | # Experiment -- Solar Dataset
15 | # -----------------------------------------------------------------
16 |
17 | from kernels.rbf import RBF
18 | from likelihoods.gaussian import Gaussian
19 | from models.svgp import SVGP
20 | from models.ensemblegp import EnsembleGP
21 | from baselines.distgp import DistGP
22 | from baselines.poegp import PoeGP
23 | from baselines.gpoegp import GenPoeGP
24 | from baselines.bcm import BayesianCM
25 | from baselines.rbcm import RobustBayesianCM
26 | from baselines.dvigp import DVIGP
27 | from optimization.algorithms import AlgorithmVEM
28 | from optimization.algorithms import GPR_Optimizer
29 | from optimization.algorithms import AlgorithmVEM
30 | from sklearn.model_selection import train_test_split
31 |
32 | import torch
33 | import numpy as np
34 | import scipy.io as sio
35 | import matplotlib.pyplot as plt
36 |
37 | plt.rc('text', usetex=True)
38 | plt.rc('font', family='serif')
39 |
40 | # COOLORS.CO palettes
41 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e']
42 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031']
43 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56']
44 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e']
45 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559']
46 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2']
47 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54']
48 |
49 | palette = color_palette_4
50 |
51 | trials = 10
52 | experiment = 'solar'
53 |
54 | recy_metrics = np.zeros((3,trials))
55 | poe_metrics = np.zeros((3,trials))
56 | gpoe_metrics = np.zeros((3,trials))
57 | bcm_metrics = np.zeros((3,trials))
58 | rbcm_metrics = np.zeros((3,trials))
59 |
60 | # Load Solar Data --
61 | data = sio.loadmat('../data/nasa.mat')
62 | y = data['nasa'][:,2]
63 | y = np.log(y + 1)
64 | y = y[:,np.newaxis]
65 | y = (y - np.mean(y)) # mean normalization
66 | x = np.linspace(0,100, y.shape[0])[:,np.newaxis]
67 |
68 | print(y.shape)
69 |
70 |
71 | for trial in range(trials):
72 |
73 | print('TRIAL = ' + str(trial) + '/' + str(trials))
74 |
75 | ###########################
76 | # #
77 | # DISTRIBUTED TASKS #
78 | # #
79 | ###########################
80 |
81 | tasks = 50
82 | min_x = 0.0
83 | max_x = 100.0
84 | segment_x = (max_x - min_x)/tasks
85 | x_tasks = [] # training x -- inputs
86 | y_tasks = [] # training y -- outputs
87 |
88 | x_test = torch.zeros(1,1) # test x -- inputs
89 | y_test = torch.zeros(1,1) # test y -- outputs
90 |
91 | n_training = 0
92 | n_test = 0
93 | for k in range(tasks):
94 | min_x_k = min_x + (k*segment_x)
95 | max_x_k = min_x + ((k+1)*segment_x)
96 | y_k = y[(x[:, 0] > min_x_k) & (x[:, 0] < max_x_k), :]
97 | x_k = x[(x[:, 0] > min_x_k) & (x[:, 0] < max_x_k), :]
98 |
99 | x_k_train, x_k_test, y_k_train, y_k_test = train_test_split(x_k, y_k, test_size = 0.2, random_state = 42)
100 |
101 | x_tasks.append(torch.from_numpy(x_k_train).float())
102 | y_tasks.append(torch.from_numpy(y_k_train).float())
103 |
104 | x_test = torch.cat((x_test, torch.from_numpy(x_k_test).float()), 0)
105 | y_test = torch.cat((y_test, torch.from_numpy(y_k_test).float()), 0)
106 |
107 | #x_k_test = x_k[::5, :]
108 | #y_k_test = y_k[::5, :]
109 |
110 | #x_tasks.append(torch.from_numpy(np.delete(x_k,np.s_[::5])[:,None]).float())
111 | #y_tasks.append(torch.from_numpy(np.delete(y_k,np.s_[::5])[:,None]).float())
112 |
113 | #x_test = torch.cat((x_test, torch.from_numpy(x_k_test).float()), 0)
114 | #y_test = torch.cat((y_test, torch.from_numpy(y_k_test).float()), 0)
115 |
116 | n_training += y_k_train.shape[0]
117 | n_test += y_k_test.shape[0]
118 |
119 |
120 | print('Total # of tasks: ', len(x_tasks))
121 | print('Number # of training samples: ', n_training)
122 | print('Number # of test samples: ', n_test)
123 |
124 | ###########################
125 | # #
126 | # PARALLEL INFERENCE #
127 | # #
128 | ###########################
129 |
130 | M_k = 6
131 | models = [] # for recyclable GPs
132 | models_dist = [] # for distributed GPs
133 | x_all = [] # for distributed GPs
134 | y_all = [] # for distributed GPs
135 | for k, x_k in enumerate(x_tasks):
136 | print('- -')
137 | print('----- TASK k=' + str(k + 1) + ' ------')
138 | print('- -')
139 | ######################################################
140 | # 1. RECYCLABLE GP
141 | ######################################################
142 | kernel_k = RBF(length_scale=0.2, variance=1.0)
143 | likelihood_k = Gaussian(sigma=0.1, fit_noise=True)
144 | model_k = SVGP(kernel_k, likelihood_k, M_k)
145 |
146 | z_k_min = min_x + (k*segment_x)
147 | z_k_max = min_x + ((k+1)*segment_x)
148 | model_k.z = torch.nn.Parameter(torch.linspace(z_k_min, z_k_max, M_k)[:, None], requires_grad=True)
149 |
150 | vem_algorithm = AlgorithmVEM(model_k, x_k, y_tasks[k], iters=20)
151 |
152 | vem_algorithm.ve_its = 20
153 | vem_algorithm.vm_its = 20
154 | vem_algorithm.lr_m = 1e-5
155 | vem_algorithm.lr_L = 1e-8
156 | vem_algorithm.lr_hyp = 1e-10
157 | vem_algorithm.lr_z = 1e-10
158 |
159 | vem_algorithm.fit()
160 |
161 | ######################################################
162 | # 2. DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE)
163 | ######################################################
164 |
165 | kernel_j = RBF()
166 | likelihood_j = Gaussian(fit_noise=False)
167 | model_j = DistGP(kernel_j, likelihood_j)
168 | GPR_Optimizer(model_j, x_k, y_tasks[k])
169 |
170 | models_dist.append(model_j)
171 | x_all.append(x_k)
172 | y_all.append(y_tasks[k])
173 |
174 | ###########################
175 | # #
176 | # ENSEMBLE INFERENCE #
177 | # #
178 | ###########################
179 | print('- -')
180 | print('----- ENSEMBLE ------')
181 | print('- -')
182 |
183 | ######################################################
184 | # 1. RECYCLABLE GP
185 | ######################################################
186 |
187 | M_e = 90
188 | kernel = RBF()
189 | likelihood = Gaussian(fit_noise=False)
190 | model_e = EnsembleGP(kernel, likelihood, models, M_e)
191 | model_e.z = torch.nn.Parameter(torch.linspace(min_x, max_x, M_e)[:, None], requires_grad=True)
192 | vem_algorithm = AlgorithmVEM(model_e, config='ensemble', iters=10)
193 |
194 | vem_algorithm.ve_its = 30
195 | vem_algorithm.vm_its = 10
196 | vem_algorithm.lr_m = 1e-3
197 | vem_algorithm.lr_L = 1e-6
198 | vem_algorithm.lr_hyp = 1e-8
199 | vem_algorithm.lr_z = 1e-8
200 |
201 | vem_algorithm.fit()
202 |
203 | nlpd = model_e.nlpd(x_test, y_test)
204 | rmse = model_e.rmse(x_test, y_test)
205 | mae = model_e.mae(x_test, y_test)
206 |
207 | recy_metrics[0, trial] = nlpd
208 | recy_metrics[1, trial] = rmse
209 | recy_metrics[2, trial] = mae
210 |
211 | print('Recyclable - NLPD: ', nlpd)
212 | print('Recyclable - RMSE: ', rmse)
213 | print('Recyclable - MAE: ', mae)
214 | print(' ')
215 |
216 | ######################################################
217 | # 2. DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE)
218 | ######################################################
219 |
220 | # A. POE _________//
221 |
222 | poe_model = PoeGP(models_dist)
223 |
224 | nlpd = poe_model.nlpd(x_all, y_all, x_test, y_test)
225 | rmse = poe_model.rmse(x_all, y_all, x_test, y_test)
226 | mae = poe_model.mae(x_all, y_all, x_test, y_test)
227 |
228 | poe_metrics[0, trial] = nlpd
229 | poe_metrics[1, trial] = rmse
230 | poe_metrics[2, trial] = mae
231 |
232 | print('POE-NLPD: ', nlpd)
233 | print('POE-RMSE: ', rmse)
234 | print('POE-MAE: ', mae)
235 | print(' ')
236 |
237 | # B. GPOE _________//
238 |
239 | gpoe_model = GenPoeGP(models_dist)
240 |
241 | nlpd = gpoe_model.nlpd(x_all, y_all, x_test, y_test)
242 | rmse = gpoe_model.rmse(x_all, y_all, x_test, y_test)
243 | mae = gpoe_model.mae(x_all, y_all, x_test, y_test)
244 |
245 | gpoe_metrics[0, trial] = nlpd
246 | gpoe_metrics[1, trial] = rmse
247 | gpoe_metrics[2, trial] = mae
248 |
249 | print('GenPOE-NLPD: ', nlpd)
250 | print('GenPOE-RMSE: ', rmse)
251 | print('GenPOE-MAE: ', mae)
252 | print(' ')
253 |
254 | # C. BCM _________//
255 |
256 | bcm_model = BayesianCM(models_dist)
257 |
258 | nlpd = bcm_model.nlpd(x_all, y_all, x_test, y_test)
259 | rmse = bcm_model.rmse(x_all, y_all, x_test, y_test)
260 | mae = bcm_model.mae(x_all, y_all, x_test, y_test)
261 |
262 | bcm_metrics[0, trial] = nlpd
263 | bcm_metrics[1, trial] = rmse
264 | bcm_metrics[2, trial] = mae
265 |
266 | print('BCM-NLPD: ', nlpd)
267 | print('BCM-RMSE: ', rmse)
268 | print('BCM-MAE: ', mae)
269 | print(' ')
270 |
271 | # D. RBCM _________//
272 |
273 | rbcm_model = RobustBayesianCM(models_dist)
274 |
275 | nlpd = rbcm_model.nlpd(x_all, y_all, x_test, y_test)
276 | rmse = rbcm_model.rmse(x_all, y_all, x_test, y_test)
277 | mae = rbcm_model.mae(x_all, y_all, x_test, y_test)
278 |
279 | rbcm_metrics[0, trial] = nlpd
280 | rbcm_metrics[1, trial] = rmse
281 | rbcm_metrics[2, trial] = mae
282 |
283 | print('RBCM-NLPD: ', nlpd)
284 | print('RBCM-RMSE: ', rmse)
285 | print('RBCM-MAE: ', mae)
286 | print(' ')
287 |
288 | # save to csv file
289 | np.savetxt('./metrics/recy_metrics_' + experiment + '.csv', recy_metrics, delimiter=',')
290 | np.savetxt('./metrics/poe_metrics_' + experiment + '.csv', poe_metrics, delimiter=',')
291 | np.savetxt('./metrics/gpoe_metrics_' + experiment + '.csv', gpoe_metrics, delimiter=',')
292 | np.savetxt('./metrics/bcm_metrics_' + experiment + '.csv', bcm_metrics, delimiter=',')
293 | np.savetxt('./metrics/rbcm_metrics_' + experiment + '.csv', rbcm_metrics, delimiter=',')
--------------------------------------------------------------------------------
/extra/modular_gp_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/extra/modular_gp_logo.png
--------------------------------------------------------------------------------
/kernels/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/kernels/__init__.py
--------------------------------------------------------------------------------
/kernels/coregionalization.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 | import torch
14 | from util import squared_distance
15 | from kernels.kernel import Kernel
16 | from kernels.rbf import RBF
17 |
18 | class LMC(Kernel):
19 | """
20 | Class for Linear Model of Coregionalization / Kernel
21 | """
22 |
23 | def __init__(self, kernels, output_dim, rank=1, W=None, kappa=None, variance=None, length_scale=None, input_dim=None):
24 | super().__init__(input_dim)
25 |
26 | # Dimensionality of coregionalization kernel
27 | self.Q = len(kernels)
28 | self.output_dim = output_dim
29 | self.rank = rank
30 | if self.rank > output_dim:
31 | print("Warning: Unusual choice of rank, rank should be less than output dim.")
32 |
33 | # Coregionalization kernel / mixing hyper-parameters
34 | if W is None:
35 | self.W = torch.nn.Parameter(torch.randn(self.output_dim, self.Q), requires_grad=True)
36 | else:
37 | assert W.shape == (self.output_dim, self.Q, self.rank)
38 |
39 | # Registration of coregionalization parameters
40 | self.register_parameter('coregionalization_W', self.W)
41 |
42 | # Independent kernels
43 | self.kernels = kernels
44 |
45 | def B_coefficients(self):
46 | B_coeff = []
47 | for q in range(self.Q):
48 | B_q = torch.mm(self.W[:,q:q+1], self.W[:,q:q+1].t())
49 | B_coeff.append(B_q)
50 | return B_coeff
51 |
52 | def Kff(self, X, k):
53 | """
54 | Builds the cross-covariance matrix Kfdfd = cov[f_d(x),f_d(x)] of a Multi-output GP
55 | :param X: Input data
56 | :param k: Output function
57 | """
58 | N,_ = X.shape
59 | Kff = torch.zeros(N,N)
60 | B = self.B_coefficients()
61 | for q, B_q in enumerate(B):
62 | Kff += B_q[k,k] * self.kernels[q].K(X, X)
63 |
64 | return Kff
65 |
66 | def Kfu(self, X, Z, k):
67 | """
68 | Builds the cross-covariance cov[f_d(x),u(z)] of a Multi-output GP
69 | :param X: Input data
70 | :param Z: Inducing points (M, D, Q)
71 | :param k: Output function
72 | """
73 | N, _ = X.shape
74 | M, Xdim, _ = Z.shape
75 |
76 | B = self.B_coefficients()
77 | Kfu = torch.empty(N, M, self.Q)
78 | for q, B_q in enumerate(B):
79 | Kfu[:,:,q] = self.W[k,q] * self.kernels[q].K(X, Z[:,:,q])
80 |
81 | return Kfu
82 |
83 |
--------------------------------------------------------------------------------
/kernels/kernel.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 |
14 |
15 | import torch
16 | import numpy as np
17 | from util import squared_distance
18 |
19 | class Kernel(torch.nn.Module):
20 | """
21 | Base class for kernels
22 | """
23 | def __init__(self, input_dim=None):
24 | super(Kernel, self).__init__()
25 |
26 | # Input dimension -- x
27 | if input_dim is None:
28 | input_dim = 1
29 | else:
30 | input_dim = int(input_dim)
31 |
32 | self.input_dim = input_dim
--------------------------------------------------------------------------------
/kernels/rbf.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 |
14 | import torch
15 | import numpy as np
16 | from kernels.stationary import Stationary
17 |
18 | class RBF(Stationary):
19 | """
20 | The Radial Basis Function (RBF) or Squared Exponential / Gaussian Kernel
21 | """
22 |
23 | def K(self, X, X2=None):
24 | variance = self.variance.abs().clamp(min=0.0, max=5.0)
25 | r2 = torch.clamp(self.squared_dist(X, X2),min=0.0, max=np.inf)
26 | K = variance*torch.exp(-r2 / 2.0)
27 |
28 | # Assure that is PSD
29 | if X2 is None:
30 | try:
31 | _ = torch.cholesky(K)
32 | except RuntimeError:
33 | print('Jitter added')
34 | jitter = 1e-5
35 | idx = torch.arange(K.shape[-1])
36 | Kprime = K.clone()
37 | Kprime[idx, idx] += jitter
38 | K = Kprime
39 |
40 | return K
--------------------------------------------------------------------------------
/kernels/stationary.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 |
14 | import torch
15 | from util import squared_distance
16 | from kernels.kernel import Kernel
17 |
18 | class Stationary(Kernel):
19 | """
20 | Class for Stationary Kernel
21 | """
22 |
23 | def __init__(self, variance=None, length_scale=None, input_dim=None, ARD=False):
24 | super().__init__(input_dim)
25 |
26 | if input_dim is None:
27 | self.input_dim = 1
28 | else:
29 | self.input_dim = input_dim
30 |
31 | self.ARD = ARD # Automatic relevance determination
32 | # Length-scale/smoothness of the kernel -- l
33 | if self.ARD:
34 | if length_scale is None:
35 | length_scale = 0.1 * torch.ones(self.input_dim)
36 | else:
37 | if length_scale is None:
38 | length_scale = 0.1
39 |
40 | # Variance/amplitude of the kernel - /sigma
41 | if variance is None:
42 | variance = 2.0
43 |
44 | self.length_scale = torch.nn.Parameter(length_scale*torch.ones(1), requires_grad=True)
45 | self.variance = torch.nn.Parameter(variance*torch.ones(1), requires_grad=True)
46 | self.register_parameter('length_scale', self.length_scale)
47 | self.register_parameter('variance', self.variance)
48 |
49 | def squared_dist(self, X, X2):
50 | """
51 | Returns the SCALED squared distance between X and X2.
52 | """
53 | length_scale = self.length_scale.abs().clamp(min=0.0, max=10.0)
54 |
55 | if not self.ARD:
56 | if X2 is None:
57 | dist = squared_distance(X/length_scale)
58 | else:
59 | dist = squared_distance(X/length_scale, X2/length_scale)
60 | else:
61 | if X2 is None:
62 | dist = squared_distance(X / length_scale)
63 | else:
64 | dist = squared_distance(X / length_scale, X2 / length_scale)
65 |
66 | return dist
67 |
68 | def Kdiag(self, X):
69 | variance = torch.abs(self.variance)
70 | return variance.expand(X.size(0))
--------------------------------------------------------------------------------
/likelihoods/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/likelihoods/__init__.py
--------------------------------------------------------------------------------
/likelihoods/bernoulli.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 | import torch
14 | import numpy as np
15 | from likelihoods.likelihood import Likelihood
16 | from torch.distributions.normal import Normal
17 | from torch.distributions.bernoulli import Bernoulli as Ber
18 |
19 | class Bernoulli(Likelihood):
20 | """
21 | Class for Gaussian Likelihood
22 | """
23 | def __init__(self):
24 | super(Bernoulli, self).__init__()
25 |
26 |
27 | def pdf(self, f, y):
28 |
29 | sigmoid = torch.nn.Sigmoid()
30 | p = sigmoid(f)#.flatten()
31 | bernoulli = Ber(probs=p)
32 | pdf = torch.exp(bernoulli.log_prob(y))
33 | return pdf
34 |
35 | def logpdf(self, f, y):
36 | sigmoid = torch.nn.Sigmoid()
37 | p = sigmoid(f).flatten()
38 | bernoulli = Ber(probs=p)
39 | logpdf = bernoulli.log_prob(y)
40 | return logpdf
41 |
42 | def variational_expectation(self, y, m, v):
43 | # Gauss-Hermite Quadrature
44 | gh_p, gh_w = self.gh_points()
45 | gh_w = torch.div(gh_w, np.sqrt(np.pi))
46 |
47 | m, v, y = m.flatten(), v.flatten(), y.flatten()
48 | f = gh_p[None, :] * torch.sqrt(2. * v[:, None]) + m[:, None]
49 | y = y[:,None].repeat(1,f.size(1))
50 |
51 | logp = self.logpdf(f.view(-1), y.view(-1))
52 | logp = logp.view(f.size()).double()
53 | gh_w = gh_w[:, None]
54 |
55 | var_exp = logp.mm(gh_w)
56 | return var_exp
57 |
58 | def log_predictive(self, y_test, mu_gp, v_gp, num_samples=1000):
59 | N_test = y_test.size(0)
60 | # function samples:
61 | normal = Normal(loc=mu_gp.flatten(), scale=torch.sqrt(v_gp).flatten())
62 | f_samples = torch.reshape(normal.sample(sample_shape=(1,num_samples))[0,:,:], (-1,))
63 |
64 | # monte-carlo:
65 | logpdf = self.logpdf(f_samples, y_test.repeat(num_samples,1).flatten())
66 | log_pred = -np.log(num_samples) + torch.logsumexp(logpdf, dim=0)
67 | return -log_pred
68 |
69 |
--------------------------------------------------------------------------------
/likelihoods/gaussian.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 | import torch
14 | import numpy as np
15 | from likelihoods.likelihood import Likelihood
16 | from torch.distributions.normal import Normal
17 |
18 | class Gaussian(Likelihood):
19 | """
20 | Class for Gaussian Likelihood
21 | """
22 | def __init__(self, sigma=None, fit_noise=False):
23 | super(Gaussian, self).__init__()
24 |
25 | if sigma is None:
26 | sigma=1.0
27 |
28 | self.sigma = torch.nn.Parameter(sigma*torch.ones(1), requires_grad=fit_noise)
29 |
30 |
31 | def pdf(self, f, y):
32 | normal = Normal(loc=f, scale=self.sigma)
33 | pdf = torch.exp(normal.log_prob(y))
34 | return pdf
35 |
36 | def logpdf(self, f, y):
37 | normal = Normal(loc=f, scale=self.sigma)
38 | logpdf = normal.log_prob(y)
39 | return logpdf
40 |
41 | def variational_expectation(self, y, m, v):
42 | # Variational Expectation of log-likelihood -- Analytical
43 | lik_variance = self.sigma.pow(2)
44 | expectation = - np.log(2*np.pi) - torch.log(lik_variance) \
45 | - (y.pow(2) + m.pow(2) + v - (2*m*y)).div(lik_variance)
46 |
47 | return 0.5*expectation
48 |
49 | def log_predictive(self, y_test, mu_gp, v_gp, num_samples=1000):
50 | # function samples:
51 | normal = Normal(loc=mu_gp.flatten(), scale=torch.sqrt(v_gp).flatten())
52 | f_samples = normal.sample(sample_shape=(1,num_samples))[0,:,:]
53 |
54 | # monte-carlo:
55 | logpdf = self.logpdf(f_samples, y_test.flatten())
56 | log_pred = -np.log(num_samples) + torch.logsumexp(logpdf, dim=0)
57 | return log_pred
--------------------------------------------------------------------------------
/likelihoods/hetgaussian.py:
--------------------------------------------------------------------------------
1 |
2 | # -----------------------------------------------------------------
3 | # This script belongs to the ModularGP repo
4 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
5 | # Copyright (c) 2021 Pablo Moreno-Munoz
6 | # -----------------------------------------------------------------
7 | #
8 | #
9 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
10 | # Section for Cognitive Systems
11 | # Technical University of Denmark (DTU)
12 | # October 2021
13 |
14 |
15 | import torch
16 | import numpy as np
17 | from likelihoods.likelihood import Likelihood
18 | from torch.distributions.normal import Normal
19 | from util import safe_exp, safe_square
20 |
21 | class HetGaussian(Likelihood):
22 | """
23 | Class for Heteroscedastic Gaussian Likelihood
24 | --
25 | -- Adaptation to Pytorch+GP framework
26 | -- Based on M. Lázaro-Gredilla et al. "Variational Heteroscedastic Gaussian Process Regression" @ ICML 2011
27 | -- Reference: https://icml.cc/Conferences/2011/papers/456_icmlpaper.pdf
28 | """
29 | def __init__(self):
30 | super(HetGaussian, self).__init__()
31 |
32 | def pdf(self, f, g, y):
33 | normal = Normal(loc=f, scale=safe_exp(g))
34 | pdf = safe_exp(normal.log_prob(y))
35 | return pdf
36 |
37 | def logpdf(self, f, g, y):
38 | normal = Normal(loc=f, scale=safe_exp(g))
39 | logpdf = normal.log_prob(y)
40 | return logpdf
41 |
42 | def variational_expectation(self, y, m_f, v_f, m_g, v_g):
43 | # Variational Expectation of log-likelihood -- Analytical
44 | precision = torch.clamp(safe_exp(-m_g + (0.5*v_g)), min=-1e9, max=1e9)
45 | #squares = torch.clamp(safe_square(y) + safe_square(m_f) + v_f - (2*m_f*y), min=-1e9, max=1e9)
46 | squares = torch.clamp(y**2 + m_f**2 + v_f - (2 * m_f * y), min=-1e9, max=1e9)
47 | expectation = -np.log(2*np.pi) - m_g - (precision*squares)
48 | return 0.5*expectation
49 |
50 | def log_predictive(self, y_test, mu_f_gp, v_f_gp, mu_g_gp, v_g_gp, num_samples=1000):
51 | # function samples f:
52 | normal = Normal(loc=mu_f_gp.flatten(), scale=torch.sqrt(v_f_gp).flatten())
53 | f_samples = normal.sample(sample_shape=(1,num_samples))[0,:,:]
54 |
55 | # function samples g:
56 | normal = Normal(loc=mu_g_gp.flatten(), scale=torch.sqrt(v_g_gp).flatten())
57 | g_samples = normal.sample(sample_shape=(1,num_samples))[0,:,:]
58 |
59 | # monte-carlo:
60 | logpdf = self.logpdf(f_samples, g_samples, y_test.flatten())
61 | log_pred = -np.log(num_samples) + torch.logsumexp(logpdf, dim=0)
62 | return log_pred
--------------------------------------------------------------------------------
/likelihoods/likelihood.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 | import torch
14 | import numpy as np
15 |
16 | class Likelihood(torch.nn.Module):
17 | """
18 | Base class for likelihoods
19 | """
20 | def __init__(self):
21 | super(Likelihood, self).__init__()
22 |
23 | def gh_points(self, T=20):
24 | # Gaussian-Hermite Quadrature points
25 | gh_p, gh_w = np.polynomial.hermite.hermgauss(T)
26 | gh_p, gh_w = torch.from_numpy(gh_p), torch.from_numpy(gh_w)
27 | return gh_p, gh_w
28 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/models/__init__.py
--------------------------------------------------------------------------------
/models/chainedgp.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 |
14 | import torch
15 | from torch.distributions import MultivariateNormal as Normal
16 | from likelihoods.hetgaussian import HetGaussian
17 | from torch.distributions import kl_divergence
18 |
19 | import numpy as np
20 | from GPy.inference.latent_function_inference import LatentFunctionInference
21 | from GPy.inference.latent_function_inference.posterior import Posterior
22 |
23 |
24 | class ChainedGP(torch.nn.Module):
25 | """
26 | -- Chained Gaussian Process with Heteroscedastic Gaussian Likelihood --
27 | --
28 | -- Adaptation to Pytorch+GP framework
29 | -- Based on A. Saul et al. "Chained Gaussian Processes" @ AISTATS 2016
30 | -- Reference: http://proceedings.mlr.press/v51/saul16.pdf
31 | """
32 | def __init__(self, kernel_f, kernel_g, M, input_dim=None, batch_rate=1.0):
33 | super(ChainedGP, self).__init__()
34 |
35 | if input_dim is None:
36 | input_dim = 1
37 |
38 | # Dimensions --
39 | self.M = M # num. inducing
40 | self.input_dim = int(input_dim) # dimension of x
41 | self.batch_rate = batch_rate # rate of mini-batch/dataset
42 |
43 | # GP Elements --
44 | self.likelihood = HetGaussian() # type of likelihood
45 | self.kernel_f = kernel_f # type of kernel for f
46 | self.kernel_g = kernel_g # type of kernel for g
47 |
48 | self.logZ = 0.0
49 |
50 | if self.input_dim > 1:
51 | self.z = torch.nn.Parameter(2*torch.rand(self.M, self.input_dim) - 1.0, requires_grad=False)
52 | else:
53 | self.z = torch.nn.Parameter(torch.linspace(-0.9, 0.9, self.M)[:,None], requires_grad=False)
54 |
55 | # Variational distribution f --
56 | self.q_m_f = torch.nn.Parameter(0.5*torch.randn(M,1), requires_grad=True) # variational: mean parameter
57 | self.q_L_f = torch.nn.Parameter(torch.eye(M), requires_grad=True) # variational: covariance
58 |
59 | # Variational distribution g --
60 | self.q_m_g = torch.nn.Parameter(0.5*torch.randn(M,1), requires_grad=True) # variational: mean parameter
61 | self.q_L_g = torch.nn.Parameter(torch.eye(M), requires_grad=True) # variational: covariance
62 |
63 | def forward(self, x, y):
64 |
65 | # Variational parameters f --
66 | q_m_f = self.q_m_f
67 | q_L_f = torch.tril(self.q_L_f)
68 | q_S_f = torch.mm(q_L_f, q_L_f.t())
69 |
70 | # Variational parameters g --
71 | q_m_g = self.q_m_g
72 | q_L_g = torch.tril(self.q_L_g)
73 | q_S_g = torch.mm(q_L_g, q_L_g.t())
74 |
75 | # Prior parameters (uses kernel) --
76 | Kuu_f = self.kernel_f.K(self.z)
77 | Kuu_g = self.kernel_g.K(self.z)
78 |
79 | # Distributions -- q(u), p(u)
80 | q_u_f = Normal(q_m_f.flatten(), q_S_f)
81 | p_u_f = Normal(torch.zeros(self.M), Kuu_f)
82 |
83 | q_u_g = Normal(q_m_g.flatten(), q_S_g)
84 | p_u_g = Normal(torch.zeros(self.M), Kuu_g)
85 |
86 | # Calculus of q(f) --
87 | Kff = self.kernel_f.K(x,x)
88 | Kfu = self.kernel_f.K(x, self.z)
89 | Kuf = torch.transpose(Kfu,0,1)
90 | iKuu,_ = torch.solve(torch.eye(self.M), Kuu_f) # is pseudo-inverse?
91 |
92 | A = Kfu.mm(iKuu)
93 | AT = iKuu.mm(Kuf)
94 |
95 | m_f = A.mm(q_m_f)
96 | v_f = torch.diag(Kff + A.mm(q_S_f - Kuu_f).mm(AT))
97 |
98 | # Calculus of q(g) --
99 | Kff = self.kernel_g.K(x,x)
100 | Kfu = self.kernel_g.K(x, self.z)
101 | Kuf = torch.transpose(Kfu,0,1)
102 | iKuu,_ = torch.solve(torch.eye(self.M), Kuu_g) # is pseudo-inverse?
103 |
104 | A = Kfu.mm(iKuu)
105 | AT = iKuu.mm(Kuf)
106 |
107 | m_g = A.mm(q_m_g)
108 | v_g = torch.diag(Kff + A.mm(q_S_g - Kuu_g).mm(AT))
109 |
110 | # Expectation term --
111 | expectation = self.likelihood.variational_expectation(y, m_f, v_f, m_g, v_g)
112 |
113 | # KL divergence --
114 | kl = kl_divergence(q_u_f, p_u_f) + kl_divergence(q_u_g, p_u_g)
115 |
116 | # Lower bound (ELBO) --
117 | elbo = self.batch_rate*expectation.sum() - kl
118 | return -elbo
119 |
120 | def predictive(self, x_new, lik_noise=False):
121 | # Matrices f
122 | q_m_f = self.q_m_f.detach().numpy()
123 | q_L_f = torch.tril(self.q_L_f)
124 | q_S_f = torch.mm(q_L_f, q_L_f.t()).detach().numpy()
125 | Kuu_f = self.kernel_f.K(self.z, self.z).detach().numpy()
126 |
127 | # Matrices g
128 | q_m_g = self.q_m_g.detach().numpy()
129 | q_L_g = torch.tril(self.q_L_g)
130 | q_S_g = torch.mm(q_L_g, q_L_g.t()).detach().numpy()
131 | Kuu_g = self.kernel_g.K(self.z, self.z).detach().numpy()
132 |
133 | # GP function f ------
134 | posterior = Posterior(mean=q_m_f, cov=q_S_f, K=Kuu_f, prior_mean=np.zeros(q_m_f.shape))
135 | Kx = self.kernel_f.K(self.z, x_new).detach().numpy()
136 | Kxx = self.kernel_f.K(x_new, x_new).detach().numpy()
137 |
138 | # GP Predictive Posterior - mean + variance
139 | gp_mu_f = np.dot(Kx.T, posterior.woodbury_vector)
140 | Kxx = np.diag(Kxx)
141 | gp_var_f = (Kxx - np.sum(np.dot(np.atleast_3d(posterior.woodbury_inv).T, Kx) * Kx[None, :, :], 1)).T
142 |
143 | gp_f = gp_mu_f
144 | gp_v_f = gp_var_f
145 |
146 | # GP function g ------
147 | posterior = Posterior(mean=q_m_g, cov=q_S_g, K=Kuu_g, prior_mean=np.zeros(q_m_g.shape))
148 | Kx = self.kernel_g.K(self.z, x_new).detach().numpy()
149 | Kxx = self.kernel_g.K(x_new, x_new).detach().numpy()
150 |
151 | # GP Predictive Posterior - mean + variance
152 | gp_mu_g = np.dot(Kx.T, posterior.woodbury_vector)
153 | Kxx = np.diag(Kxx)
154 | gp_var_g = (Kxx - np.sum(np.dot(np.atleast_3d(posterior.woodbury_inv).T, Kx) * Kx[None, :, :], 1)).T
155 |
156 | gp_g = gp_mu_g
157 | gp_v_g = gp_var_g
158 |
159 | return gp_f, gp_v_f, gp_g, gp_v_g
160 |
161 | def rmse(self, x_new, f_new):
162 | f_gp,_,_,_ = self.predictive(x_new)
163 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
164 | return rmse
165 |
166 | def mae(self, x_new, f_new):
167 | f_gp,_,_,_ = self.predictive(x_new)
168 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
169 | return mae
170 |
171 | def nlpd(self, x_new, y_new):
172 | f_gp, v_f_gp, g_gp, v_g_gp = self.predictive(x_new)
173 | f_gp = torch.from_numpy(f_gp)
174 | v_f_gp = torch.from_numpy(v_f_gp)
175 | g_gp = torch.from_numpy(g_gp)
176 | v_g_gp = torch.from_numpy(v_g_gp)
177 | nlpd = - torch.mean(self.likelihood.log_predictive(y_new, f_gp, v_f_gp, g_gp, v_g_gp)).detach().numpy()
178 | return nlpd
179 |
180 | def evidence(self, x, y, N_samples=None):
181 | # Approximation CI
182 | if N_samples is None:
183 | N_samples = 1000
184 |
185 | N,_ = x.shape
186 | v_f = torch.zeros(N)
187 | for i in range(N):
188 | v_f[i] = self.kernel.K(x[i:i+1,:],x[i:i+1,:])
189 |
190 | m_f = torch.zeros(v_f.shape)
191 | p_f = Normal(m_f, torch.diag(v_f))
192 | f_samples = p_f.sample([N_samples]).t() # N x N_samples
193 | mc_pdf = self.likelihood.pdf(f_samples, torch.tile(y, (1,N_samples)))
194 |
195 | mc_expectations = 1/N_samples * torch.sum(torch.clamp(mc_pdf, min=1e-100),1)
196 | print(mc_expectations)
197 | logZ = torch.sum(torch.log(mc_expectations))
198 |
199 | self.logZ = logZ
200 | return logZ
201 |
202 |
203 |
--------------------------------------------------------------------------------
/models/ensemblegp.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 |
14 | import torch
15 | from torch.distributions import MultivariateNormal as Normal
16 | from torch.distributions import kl_divergence
17 | from GPy.inference.latent_function_inference.posterior import Posterior
18 | import numpy as np
19 |
20 | class EnsembleGP(torch.nn.Module):
21 | """
22 | -- Ensemble Variational Inference for Gaussian Processes --
23 | """
24 | def __init__(self, kernel, likelihood, models, M, input_dim=None):
25 | super(EnsembleGP, self).__init__()
26 |
27 | if input_dim is None:
28 | input_dim = 1
29 |
30 | # Dimensions --
31 | self.M = M # num. inducing
32 | self.input_dim = int(input_dim) # dimension of x
33 |
34 | # Ensemble GP Elements --
35 | self.likelihood = likelihood
36 | self.kernel = kernel
37 |
38 | if self.input_dim > 1:
39 | self.z = torch.nn.Parameter(2*torch.rand(self.M, self.input_dim) - 1.0, requires_grad=False)
40 | else:
41 | self.z = torch.nn.Parameter(torch.linspace(-0.9, 0.9, self.M)[:,None], requires_grad=False)
42 |
43 | # Adjacent GP Models
44 | self.models = models # is a list
45 |
46 | # Ensemble Variational distribution --
47 | self.q_m = torch.nn.Parameter(torch.randn(M, 1), requires_grad=True) # variational: mean parameter
48 | self.q_L = torch.nn.Parameter(torch.eye(M), requires_grad=True) # variational: covariance
49 |
50 | def ensemble(self):
51 | # GP prior
52 | Kuu = self.kernel.K(self.z, self.z)
53 | iKuu, _ = torch.solve(torch.eye(self.M), Kuu) # is pseudo-inverse?
54 |
55 | q_m = self.q_m
56 | q_L = torch.tril(self.q_L)
57 | q_S = torch.mm(q_L, q_L.t())
58 |
59 | ensemble_m = []
60 | ensemble_S = []
61 |
62 | # Ensemble GP Distributions
63 | for model_k in self.models:
64 | Kkk = self.kernel.K(model_k.z, model_k.z)
65 | Kuk = self.kernel.K(self.z, model_k.z)
66 | Kku = torch.transpose(Kuk,0,1)
67 |
68 | A = Kku.mm(iKuu)
69 | AT = iKuu.mm(Kuk)
70 |
71 | m_k = Kku.mm(iKuu).mm(q_m)
72 | S_k = Kkk + A.mm(q_S - Kuu).mm(AT)
73 |
74 | ensemble_m.append(m_k)
75 | ensemble_S.append(S_k)
76 |
77 | return ensemble_m, ensemble_S
78 |
79 | def expectation(self):
80 | E = 0.0
81 | ensemble_m, ensemble_S = self.ensemble()
82 |
83 | # Expectation of k ensembles --
84 | for k,model_k in enumerate(self.models):
85 | # Ensemble GP -- q_e()
86 | m_e = ensemble_m[k]
87 | S_e = ensemble_S[k]
88 |
89 | # Past GP variational distribution -- q_k()
90 | m_k = model_k.q_m
91 | L_k = torch.tril(model_k.q_L)
92 | S_k = torch.mm(L_k, L_k.t())
93 | iS_k, _ = torch.solve(torch.eye(model_k.M), S_k) # is pseudo-inverse?
94 |
95 | # Past GP prior -- p_k()
96 | z_k = model_k.z
97 | Kkk = model_k.kernel.K(z_k, z_k)
98 | iKkk, _ = torch.solve(torch.eye(model_k.M), Kkk) # is pseudo-inverse?
99 |
100 | # Expectation on terms -- E[log_p()] and E[log_q()]
101 | E_log_q = -torch.trace(iS_k.mm(S_e)) - (m_e - m_k).t().mm(iS_k).mm(m_e - m_k) - torch.logdet(2*np.pi*S_k)
102 | E_log_p = -torch.trace(iKkk.mm(S_e)) - m_e.t().mm(iKkk).mm(m_e) - torch.logdet(2*np.pi*Kkk)
103 |
104 | # General Expectation -- E[sum_k E[log_q_k] - E[log_p_k]]
105 | E += 0.5*(E_log_q - E_log_p) + model_k.logZ
106 |
107 | return E
108 |
109 | def divergence(self, p, q):
110 | kl = kl_divergence(q,p)
111 | return kl
112 |
113 | def forward(self):
114 |
115 | # Variational parameters --
116 | q_m = self.q_m
117 | q_L = torch.tril(self.q_L)
118 | q_S = torch.mm(q_L, q_L.t())
119 |
120 | # Prior parameters (uses kernel) --
121 | Kuu = self.kernel.K(self.z, self.z)
122 |
123 | # Distributions -- q(u), p(u)
124 | q_u = Normal(q_m.flatten(), q_S)
125 | p_u = Normal(torch.zeros(self.M), Kuu)
126 |
127 | # Expectation --
128 | expectation = self.expectation()
129 |
130 | # KL divergence --
131 | kl = self.divergence(q_u, p_u)
132 |
133 | # Calls ELBO
134 | elbo = expectation - kl
135 | return -elbo
136 |
137 | def predictive(self, x_new):
138 | # Matrices
139 | q_m = self.q_m.detach().numpy()
140 | q_L = torch.tril(self.q_L)
141 | q_S = torch.mm(q_L, q_L.t()).detach().numpy()
142 | Kuu = self.kernel.K(self.z, self.z).detach().numpy()
143 |
144 | posterior = Posterior(mean=q_m, cov=q_S, K=Kuu, prior_mean=np.zeros(q_m.shape))
145 | Kx = self.kernel.K(self.z, x_new).detach().numpy()
146 | Kxx = self.kernel.K(x_new, x_new).detach().numpy()
147 |
148 | # GP Predictive Posterior - mean + variance
149 | gp_mu = np.dot(Kx.T, posterior.woodbury_vector)
150 | Kxx = np.diag(Kxx)
151 | gp_var = (Kxx - np.sum(np.dot(np.atleast_3d(posterior.woodbury_inv).T, Kx) * Kx[None, :, :], 1)).T
152 |
153 | gp = gp_mu
154 | gp_upper = gp_mu + 2*np.sqrt(gp_var) #+ 2*self.likelihood.sigma.detach().numpy()
155 | gp_lower = gp_mu - 2*np.sqrt(gp_var) #- 2*self.likelihood.sigma.detach().numpy()
156 |
157 | return gp, gp_upper, gp_lower
158 |
159 | def rmse(self, x_new, f_new):
160 | f_gp,_,_ = self.predictive(x_new)
161 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
162 | return rmse
163 |
164 | def mae(self, x_new, f_new):
165 | f_gp,_,_ = self.predictive(x_new)
166 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
167 | return mae
168 |
169 | def nlpd(self, x_new, y_new):
170 | f_gp, u_gp, _ = self.predictive(x_new)
171 | f_gp = torch.from_numpy(f_gp)
172 | u_gp = torch.from_numpy(u_gp)
173 | v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0)
174 | nlpd = - torch.mean(self.likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
175 | return nlpd
176 |
177 |
--------------------------------------------------------------------------------
/models/hetmoensemble.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 | import torch
14 | from torch.distributions import MultivariateNormal as Normal
15 | from torch.distributions import kl_divergence
16 | from kernels.coregionalization import LMC
17 | from GPy.inference.latent_function_inference.posterior import Posterior
18 | import numpy as np
19 |
20 | class HetMultiOutputEnsembleGP(torch.nn.Module):
21 | """
22 | -- Heterogeneous Multi Output Ensemble for Gaussian Processes --
23 | -- Accepts one channel x,y of data. --
24 | """
25 |
26 | def __init__(self, models, likelihood, kernels, Q, M, input_dim=None, batch_rate=1.0):
27 | super(HetMultiOutputEnsembleGP, self).__init__()
28 |
29 | if input_dim is None:
30 | input_dim = 1
31 | self.batch_rate = batch_rate # rate of mini-batch/dataset
32 |
33 | # Dimensions --
34 | self.M = M # num. inducing
35 | self.K = len(models) # num. models
36 | self.input_dim = int(input_dim) # dimension of x
37 |
38 | # Multi-output GP Ensemble Elements --
39 | self.Q = Q
40 | self.likelihood = likelihood
41 | self.D = self.K + 1 # the number of modules + data channel
42 |
43 | # Kernels --
44 | self.kernels = torch.nn.ModuleList()
45 | for q in range(self.Q):
46 | self.kernels.append(kernels[q])
47 | self.coregionalization = LMC(self.kernels, self.D) # is a list
48 |
49 | if self.input_dim > 1:
50 | self.z = torch.nn.Parameter(torch.rand(self.M, self.input_dim, self.Q), requires_grad=False)
51 | else:
52 | self.z = torch.nn.Parameter(torch.tile(torch.linspace(0.1, 0.9, self.M)[:,None, None], (1, 1, self.Q)), requires_grad=False)
53 |
54 | # Adjacent GP Models
55 | self.models = models # is a list
56 |
57 | # Ensemble Variational distribution --
58 | self.q_m = torch.nn.Parameter(torch.randn(M, Q), requires_grad=True) # variational: mean parameter
59 | self.q_L = torch.nn.Parameter(torch.tile(torch.eye(M)[:,:,None], (1, 1, self.Q)), requires_grad=True) # variational: covariance
60 |
61 |
62 | def ensemble(self):
63 | # MOGP prior + Variational parameters
64 | q_m = self.q_m
65 | q_S = torch.zeros(self.M, self.M, self.Q)
66 | Kvv = torch.zeros(self.M, self.M, self.Q)
67 | iKvv = torch.zeros(self.M, self.M, self.Q)
68 | for q in range(self.Q):
69 | Kvv_q = self.kernels[q].K(self.z[:,:,q], self.z[:,:,q])
70 | iKvv_q, _ = torch.solve(torch.eye(self.M), Kvv_q) # is pseudo-inverse?
71 | Kvv[:,:,q] = Kvv_q
72 | iKvv[:,:,q] = iKvv_q
73 |
74 | q_L = torch.tril(self.q_L[:,:,q])
75 | q_S[:,:,q] = torch.mm(q_L, q_L.t())
76 |
77 | ensemble_m = []
78 | ensemble_S = []
79 |
80 | # Ensemble MOGP Distributions
81 | for k, model_k in enumerate(self.models):
82 |
83 | Kuu = self.coregionalization.Kff(model_k.z, k)
84 | Kuv = self.coregionalization.Kfu(model_k.z, self.z, k)
85 |
86 | m_k = 0.0
87 | S_k = Kuu
88 |
89 | # TODO: Make the following faster
90 | for q in range(self.Q):
91 |
92 | A = Kuv[:,:,q].mm(iKvv[:,:,q])
93 | AT = iKvv[:,:,q].mm(Kuv[:,:,q].t())
94 |
95 | m_k += A.mm(q_m[:,q:q+1])
96 | S_k += A.mm(q_S[:,:,q]).mm(AT) - A.mm(Kuv[:,:,q].t())
97 |
98 | ensemble_m.append(m_k)
99 | ensemble_S.append(S_k)
100 |
101 | return ensemble_m, ensemble_S
102 |
103 |
104 | def expectation(self, x, y):
105 | E = 0.0
106 | ensemble_m, ensemble_S = self.ensemble()
107 |
108 | # Expectation of k ensembles --
109 | for k,model_k in enumerate(self.models):
110 | # Ensemble GP -- q_e()
111 | m_e = ensemble_m[k]
112 | S_e = ensemble_S[k]
113 |
114 | # Past GP variational distribution -- q_k()
115 | m_k = model_k.q_m
116 | L_k = torch.tril(model_k.q_L)
117 | S_k = torch.mm(L_k, L_k.t())
118 | iS_k, _ = torch.solve(torch.eye(model_k.M), S_k) # is pseudo-inverse?
119 |
120 | # Past GP prior -- p_k()
121 | z_k = model_k.z
122 | Kkk = model_k.kernel.K(z_k, z_k)
123 | iKkk, _ = torch.solve(torch.eye(model_k.M), Kkk) # is pseudo-inverse?
124 |
125 | # Expectation on terms -- E[log_p()] and E[log_q()]
126 | E_log_q = -torch.trace(iS_k.mm(S_e)) - (m_e - m_k).t().mm(iS_k).mm(m_e - m_k) - torch.logdet(2*np.pi*S_k)
127 | E_log_p = -torch.trace(iKkk.mm(S_e)) - m_e.t().mm(iKkk).mm(m_e) - torch.logdet(2*np.pi*Kkk)
128 |
129 | # General Expectation -- E[sum_k E[log_q_k] - E[log_p_k]]
130 | E += 0.5*(E_log_q - E_log_p) + model_k.logZ
131 |
132 | # Expectation of data channel --
133 | q_m = self.q_m
134 | q_S = torch.zeros(self.M, self.M, self.Q)
135 | Kuu = torch.zeros(self.M, self.M, self.Q)
136 | iKuu = torch.zeros(self.M, self.M, self.Q)
137 |
138 | for q in range(self.Q):
139 | # MOGP latent functions prior
140 | Kuu_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q])
141 | iKuu_q, _ = torch.solve(torch.eye(self.M), Kuu_q) # is pseudo-inverse?
142 | Kuu[:, :, q] = Kuu_q
143 | iKuu[:, :, q] = iKuu_q
144 |
145 | # Variational parameters + Gaussian integration
146 | q_L = torch.tril(self.q_L[:, :, q])
147 | q_S[:, :, q] = torch.mm(q_L, q_L.t())
148 | Kff = self.coregionalization.Kff(x, self.D-1)
149 | Kfu = self.coregionalization.Kfu(x, self.z, self.D-1)
150 |
151 | m_f = 0.0
152 | S_f = Kff
153 |
154 | for q in range(self.Q):
155 | A = Kfu[:, :, q].mm(iKuu[:, :, q])
156 | AT = iKuu[:, :, q].mm(Kfu[:, :, q].t())
157 |
158 | m_f += A.mm(q_m[:, q:q + 1])
159 | S_f += A.mm(q_S[:, :, q]).mm(AT) - A.mm(Kfu[:, :, q].t())
160 |
161 | v_f = torch.diag(S_f)
162 | expectation_y = self.likelihood.variational_expectation(y, m_f, v_f)
163 |
164 | return E, expectation_y
165 |
166 | def divergence(self, p_v, q_v):
167 | kl = 0.0
168 | for q in range(self.Q):
169 | kl += kl_divergence(q_v[q], p_v[q])
170 | return kl
171 |
172 | def forward(self, x, y):
173 |
174 | q_u = []
175 | p_u = []
176 | q_m = self.q_m
177 | q_S = torch.zeros(self.M, self.M, self.Q)
178 | Kuu = torch.zeros(self.M, self.M, self.Q)
179 | for q in range(self.Q):
180 |
181 | # Variational parameters --
182 | q_L = torch.tril(self.q_L[:,:,q])
183 | q_S[:,:,q] = torch.mm(q_L, q_L.t())
184 |
185 | # Prior parameters (uses kernel) --
186 | Kuu_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q])
187 | Kuu[:, :, q] = Kuu_q
188 |
189 | # Distributions -- q(u), p(u)
190 | q_u.append(Normal(q_m[:,q].flatten(), q_S[:,:,q]))
191 | p_u.append(Normal(torch.zeros(self.M), Kuu[:,:,q]))
192 |
193 | # Expectation --
194 | expectation, exp_y = self.expectation(x, y)
195 | expectation_y = self.batch_rate * exp_y.sum()
196 |
197 | # KL divergence --
198 | kl = self.divergence(q_u, p_u)
199 |
200 | # Calls ELBO
201 | elbo = expectation + expectation_y - kl
202 | return -elbo
203 |
204 | def predictive(self, xnew, k):
205 | # MOGP prior + Variational parameters
206 | q_m = self.q_m
207 | q_S = torch.zeros(self.M, self.M, self.Q)
208 | Kvv = torch.zeros(self.M, self.M, self.Q)
209 | iKvv = torch.zeros(self.M, self.M, self.Q)
210 |
211 | # Posterior distribution on new input data
212 | Kuu = self.coregionalization.Kff(xnew, k)
213 | Kuv = self.coregionalization.Kfu(xnew, self.z, k)
214 |
215 | m_k = 0.0
216 | S_k = Kuu
217 | for q in range(self.Q):
218 | # MOGP latent functions prior
219 | Kvv_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q])
220 | iKvv_q, _ = torch.solve(torch.eye(self.M), Kvv_q) # is pseudo-inverse?
221 | Kvv[:, :, q] = Kvv_q
222 | iKvv[:, :, q] = iKvv_q
223 |
224 | # Variational parameters + Gaussian integration
225 | q_L = torch.tril(self.q_L[:, :, q])
226 | q_S[:, :, q] = torch.mm(q_L, q_L.t())
227 |
228 | A = Kuv[:, :, q].mm(iKvv[:, :, q])
229 | AT = iKvv[:, :, q].mm(Kuv[:, :, q].t())
230 |
231 | m_k += A.mm(q_m[:, q:q + 1])
232 | S_k += A.mm(q_S[:, :, q]).mm(AT) - A.mm(Kuv[:, :, q].t())
233 |
234 | m_k = m_k.detach().numpy()
235 | S_k = S_k.detach().numpy()
236 |
237 | gp_mu = m_k.flatten()
238 | gp_var = np.diagonal(S_k)
239 |
240 | gp = gp_mu
241 | gp_upper = gp_mu + 2 * np.sqrt(gp_var) # + 2*self.likelihood.sigma.detach().numpy()
242 | gp_lower = gp_mu - 2 * np.sqrt(gp_var) # - 2*self.likelihood.sigma.detach().numpy()
243 |
244 | return gp, gp_upper, gp_lower
245 |
246 | def rmse(self, x_new, f_new, k):
247 | f_gp,_,_ = self.predictive(x_new, k)
248 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
249 | return rmse
250 |
251 | def mae(self, x_new, f_new, k):
252 | f_gp,_,_ = self.predictive(x_new, k)
253 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
254 | return mae
255 |
256 | def nlpd(self, x_new, y_new, k):
257 | f_gp, u_gp, _ = self.predictive(x_new, k)
258 | f_gp = torch.from_numpy(f_gp)
259 | u_gp = torch.from_numpy(u_gp)
260 | v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0)
261 | nlpd = - torch.mean(self.likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
262 | return nlpd
--------------------------------------------------------------------------------
/models/moensemble.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 |
14 | import torch
15 | from torch.distributions import MultivariateNormal as Normal
16 | from torch.distributions import kl_divergence
17 | from kernels.coregionalization import LMC
18 | from GPy.inference.latent_function_inference.posterior import Posterior
19 | import numpy as np
20 |
21 | class MultiOutputEnsembleGP(torch.nn.Module):
22 | """
23 | -- Multi Output Ensemble for Gaussian Processes --
24 | """
25 |
26 | def __init__(self, models, kernels, Q, M, input_dim=None):
27 | super(MultiOutputEnsembleGP, self).__init__()
28 |
29 | if input_dim is None:
30 | input_dim = 1
31 |
32 | # Dimensions --
33 | self.M = M # num. inducing
34 | self.K = len(models) # num. models
35 | self.input_dim = int(input_dim) # dimension of x
36 |
37 | # Multi-output GP Ensemble Elements --
38 | self.Q = Q
39 |
40 | # Kernels --
41 | self.kernels = torch.nn.ModuleList()
42 | for q in range(self.Q):
43 | self.kernels.append(kernels[q])
44 | self.coregionalization = LMC(self.kernels, self.K) # is a list
45 |
46 | if self.input_dim > 1:
47 | self.z = torch.nn.Parameter(torch.rand(self.M, self.input_dim, self.Q), requires_grad=False)
48 | else:
49 | self.z = torch.nn.Parameter(torch.tile(torch.linspace(0.1, 0.9, self.M)[:,None, None], (1, 1, self.Q)), requires_grad=False)
50 |
51 | # Adjacent GP Models
52 | self.models = models # is a list
53 |
54 | # Ensemble Variational distribution --
55 | self.q_m = torch.nn.Parameter(2*torch.randn(M, Q), requires_grad=True) # variational: mean parameter
56 | self.q_L = torch.nn.Parameter(0.5*torch.tile(torch.eye(M)[:,:,None], (1, 1, self.Q)), requires_grad=True) # variational: covariance
57 |
58 |
59 | def ensemble(self):
60 | # MOGP prior + Variational parameters
61 | q_m = self.q_m
62 | q_S = torch.zeros(self.M, self.M, self.Q)
63 | Kvv = torch.zeros(self.M, self.M, self.Q)
64 | iKvv = torch.zeros(self.M, self.M, self.Q)
65 | for q in range(self.Q):
66 | Kvv_q = self.kernels[q].K(self.z[:,:,q], self.z[:,:,q])
67 | iKvv_q, _ = torch.solve(torch.eye(self.M), Kvv_q) # is pseudo-inverse?
68 | Kvv[:,:,q] = Kvv_q
69 | iKvv[:,:,q] = iKvv_q
70 |
71 | q_L = torch.tril(self.q_L[:,:,q])
72 | q_S[:,:,q] = torch.mm(q_L, q_L.t())
73 |
74 | ensemble_m = []
75 | ensemble_S = []
76 |
77 | # Ensemble MOGP Distributions
78 | for k, model_k in enumerate(self.models):
79 |
80 | Kuu = self.coregionalization.Kff(model_k.z, k)
81 | Kuv = self.coregionalization.Kfu(model_k.z, self.z, k)
82 |
83 | m_k = 0.0
84 | S_k = Kuu
85 |
86 | for q in range(self.Q):
87 |
88 | A = Kuv[:,:,q].mm(iKvv[:,:,q])
89 | AT = iKvv[:,:,q].mm(Kuv[:,:,q].t())
90 |
91 | m_k += A.mm(q_m[:,q:q+1])
92 | S_k += A.mm(q_S[:,:,q]).mm(AT) - A.mm(Kuv[:,:,q].t())
93 |
94 | ensemble_m.append(m_k)
95 | ensemble_S.append(S_k)
96 |
97 | return ensemble_m, ensemble_S
98 |
99 |
100 | def expectation(self):
101 | E = 0.0
102 | ensemble_m, ensemble_S = self.ensemble()
103 |
104 | # Expectation of k ensembles --
105 | for k,model_k in enumerate(self.models):
106 | # Ensemble GP -- q_e()
107 | m_e = ensemble_m[k]
108 | S_e = ensemble_S[k]
109 |
110 | # Past GP variational distribution -- q_k()
111 | m_k = model_k.q_m
112 | L_k = torch.tril(model_k.q_L)
113 | S_k = torch.mm(L_k, L_k.t())
114 | iS_k, _ = torch.solve(torch.eye(model_k.M), S_k) # is pseudo-inverse?
115 |
116 | # Past GP prior -- p_k()
117 | z_k = model_k.z
118 | Kkk = model_k.kernel.K(z_k, z_k)
119 | iKkk, _ = torch.solve(torch.eye(model_k.M), Kkk) # is pseudo-inverse?
120 |
121 | # Expectation on terms -- E[log_p()] and E[log_q()]
122 | E_log_q = -torch.trace(iS_k.mm(S_e)) - (m_e - m_k).t().mm(iS_k).mm(m_e - m_k) - torch.logdet(2*np.pi*S_k)
123 | E_log_p = -torch.trace(iKkk.mm(S_e)) - m_e.t().mm(iKkk).mm(m_e) - torch.logdet(2*np.pi*Kkk)
124 |
125 | # General Expectation -- E[sum_k E[log_q_k] - E[log_p_k]]
126 | E += 0.5*(E_log_q - E_log_p) + model_k.logZ
127 |
128 | return E
129 |
130 | def divergence(self, p_v, q_v):
131 | kl = 0.0
132 | for q in range(self.Q):
133 | kl += kl_divergence(q_v[q], p_v[q])
134 | return kl
135 |
136 | def forward(self):
137 |
138 | q_u = []
139 | p_u = []
140 | q_m = self.q_m
141 | q_S = torch.zeros(self.M, self.M, self.Q)
142 | Kuu = torch.zeros(self.M, self.M, self.Q)
143 | for q in range(self.Q):
144 |
145 | # Variational parameters --
146 | q_L = torch.tril(self.q_L[:,:,q])
147 | q_S[:,:,q] = torch.mm(q_L, q_L.t())
148 |
149 | # Prior parameters (uses kernel) --
150 | Kuu_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q])
151 | Kuu[:, :, q] = Kuu_q
152 |
153 | # Distributions -- q(u), p(u)
154 | q_u.append(Normal(q_m[:,q].flatten(), q_S[:,:,q]))
155 | p_u.append(Normal(torch.zeros(self.M), Kuu[:,:,q]))
156 |
157 | # Expectation --
158 | expectation = self.expectation()
159 |
160 | # KL divergence --
161 | kl = self.divergence(q_u, p_u)
162 |
163 | # Calls ELBO
164 | elbo = expectation - kl
165 | return -elbo
166 |
167 | def predictive(self, xnew, k):
168 | # MOGP prior + Variational parameters
169 | q_m = self.q_m
170 | q_S = torch.zeros(self.M, self.M, self.Q)
171 | Kvv = torch.zeros(self.M, self.M, self.Q)
172 | iKvv = torch.zeros(self.M, self.M, self.Q)
173 |
174 | # Posterior distribution on new input data
175 | Kuu = self.coregionalization.Kff(xnew, k)
176 | Kuv = self.coregionalization.Kfu(xnew, self.z, k)
177 |
178 | m_k = 0.0
179 | S_k = Kuu
180 | for q in range(self.Q):
181 | # MOGP latent functions prior
182 | Kvv_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q])
183 | iKvv_q, _ = torch.solve(torch.eye(self.M), Kvv_q) # is pseudo-inverse?
184 | Kvv[:, :, q] = Kvv_q
185 | iKvv[:, :, q] = iKvv_q
186 |
187 | # Variational parameters + Gaussian integration
188 | q_L = torch.tril(self.q_L[:, :, q])
189 | q_S[:, :, q] = torch.mm(q_L, q_L.t())
190 |
191 | A = Kuv[:, :, q].mm(iKvv[:, :, q])
192 | AT = iKvv[:, :, q].mm(Kuv[:, :, q].t())
193 |
194 | m_k += A.mm(q_m[:, q:q + 1])
195 | S_k += A.mm(q_S[:, :, q]).mm(AT) - A.mm(Kuv[:, :, q].t())
196 |
197 | m_k = m_k.detach().numpy()
198 | S_k = S_k.detach().numpy()
199 |
200 | gp_mu = m_k.flatten()
201 | gp_var = np.diagonal(S_k)
202 |
203 | gp = gp_mu
204 | gp_upper = gp_mu + 2 * np.sqrt(gp_var) # + 2*self.likelihood.sigma.detach().numpy()
205 | gp_lower = gp_mu - 2 * np.sqrt(gp_var) # - 2*self.likelihood.sigma.detach().numpy()
206 |
207 | return gp, gp_upper, gp_lower
208 |
209 | def rmse(self, x_new, f_new, k):
210 | f_gp,_,_ = self.predictive(x_new, k)
211 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
212 | return rmse
213 |
214 | def mae(self, x_new, f_new, k):
215 | f_gp,_,_ = self.predictive(x_new, k)
216 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
217 | return mae
218 |
219 | def nlpd(self, likelihood, x_new, y_new, k):
220 | f_gp, u_gp, _ = self.predictive(x_new, k)
221 | f_gp = torch.from_numpy(f_gp)
222 | u_gp = torch.from_numpy(u_gp)
223 | v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0)
224 | nlpd = - torch.mean(likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
225 | return nlpd
--------------------------------------------------------------------------------
/models/svgp.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 | import torch
14 | from torch.distributions import MultivariateNormal as Normal
15 | from torch.distributions import kl_divergence
16 |
17 | import numpy as np
18 | from GPy.inference.latent_function_inference import LatentFunctionInference
19 | from GPy.inference.latent_function_inference.posterior import Posterior
20 |
21 |
22 | class SVGP(torch.nn.Module):
23 | """
24 | -- Sparse Variational Gaussian Process --
25 | --
26 | -- Adaptation to Pytorch + GP framework
27 | -- Based on Hensman et al. "Scalable Variational Gaussian Process Classification" AISTATS 2015
28 | -- Reference: http://proceedings.mlr.press/v38/hensman15.pdf
29 | """
30 | def __init__(self, kernel, likelihood, M, input_dim=None, batch_rate=1.0):
31 | super(SVGP, self).__init__()
32 |
33 | if input_dim is None:
34 | input_dim = 1
35 |
36 | # Dimensions --
37 | self.M = M #num. inducing
38 | self.input_dim = int(input_dim) #dimension of x
39 | self.batch_rate = batch_rate #rate of mini-batch/dataset
40 |
41 | # GP Elements --
42 | self.likelihood = likelihood #type of likelihood
43 | self.kernel = kernel #type of kernel
44 |
45 | self.logZ = 0.0
46 |
47 | if self.input_dim > 1:
48 | self.z = torch.nn.Parameter(2*torch.rand(self.M, self.input_dim) - 1.0, requires_grad=False)
49 | else:
50 | self.z = torch.nn.Parameter(torch.linspace(-0.9, 0.9, self.M)[:,None], requires_grad=False)
51 |
52 | # Variational distribution --
53 | self.q_m = torch.nn.Parameter(torch.randn(M,1), requires_grad=True) # variational: mean parameter
54 | self.q_L = torch.nn.Parameter(torch.eye(M), requires_grad=True) # variational: covariance
55 |
56 | def forward(self, x, y):
57 |
58 | # Variational parameters --
59 | q_m = self.q_m
60 | q_L = torch.tril(self.q_L)
61 | q_S = torch.mm(q_L, q_L.t())
62 |
63 | # Prior parameters (uses kernel) --
64 | Kuu = self.kernel.K(self.z)
65 |
66 | # Distributions -- q(u), p(u)
67 | q_u = Normal(q_m.flatten(), q_S)
68 | p_u = Normal(torch.zeros(self.M), Kuu)
69 |
70 | # Calculus of q(f) --
71 | Kff = self.kernel.K(x,x)
72 | Kfu = self.kernel.K(x, self.z)
73 | Kuf = torch.transpose(Kfu,0,1)
74 | iKuu,_ = torch.solve(torch.eye(self.M), Kuu) # is pseudo-inverse?
75 |
76 | A = Kfu.mm(iKuu)
77 | AT = iKuu.mm(Kuf)
78 |
79 | m_f = A.mm(q_m)
80 | v_f = torch.diag(Kff + A.mm(q_S - Kuu).mm(AT))
81 |
82 | # Expectation term --
83 | expectation = self.likelihood.variational_expectation(y, m_f, v_f)
84 |
85 | # KL divergence --
86 | kl = kl_divergence(q_u, p_u)
87 |
88 | # Lower bound (ELBO) --
89 | elbo = self.batch_rate*expectation.sum() - kl
90 | return -elbo
91 |
92 | def predictive(self, x_new, lik_noise=False):
93 | # Matrices
94 | q_m = self.q_m.detach().numpy()
95 | q_L = torch.tril(self.q_L)
96 | q_S = torch.mm(q_L, q_L.t()).detach().numpy()
97 | Kuu = self.kernel.K(self.z, self.z).detach().numpy()
98 |
99 | posterior = Posterior(mean=q_m, cov=q_S, K=Kuu, prior_mean=np.zeros(q_m.shape))
100 | Kx = self.kernel.K(self.z, x_new).detach().numpy()
101 | Kxx = self.kernel.K(x_new, x_new).detach().numpy()
102 |
103 | # GP Predictive Posterior - mean + variance
104 | gp_mu = np.dot(Kx.T, posterior.woodbury_vector)
105 | Kxx = np.diag(Kxx)
106 | gp_var = (Kxx - np.sum(np.dot(np.atleast_3d(posterior.woodbury_inv).T, Kx) * Kx[None, :, :], 1)).T
107 |
108 | gp = gp_mu
109 | if lik_noise:
110 | gp_upper = gp_mu + 2 * np.sqrt(gp_var) + 2 * self.likelihood.sigma.detach().numpy()
111 | gp_lower = gp_mu - 2 * np.sqrt(gp_var) - 2 * self.likelihood.sigma.detach().numpy()
112 | else:
113 | gp_upper = gp_mu + 2*np.sqrt(gp_var)
114 | gp_lower = gp_mu - 2*np.sqrt(gp_var)
115 |
116 | return gp, gp_upper, gp_lower
117 |
118 | def rmse(self, x_new, f_new):
119 | f_gp,_,_ = self.predictive(x_new)
120 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
121 | return rmse
122 |
123 | def mae(self, x_new, f_new):
124 | f_gp,_,_ = self.predictive(x_new)
125 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
126 | return mae
127 |
128 | def nlpd(self, x_new, y_new):
129 | f_gp, u_gp, _ = self.predictive(x_new)
130 | f_gp = torch.from_numpy(f_gp)
131 | u_gp = torch.from_numpy(u_gp)
132 | v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0)
133 | nlpd = - torch.mean(self.likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy()
134 | return nlpd
135 |
136 | def evidence(self, x, y, N_samples=None):
137 | # Approximation CI
138 | if N_samples is None:
139 | N_samples = 1000
140 |
141 | N,_ = x.shape
142 | v_f = torch.zeros(N)
143 | for i in range(N):
144 | v_f[i] = self.kernel.K(x[i:i+1,:],x[i:i+1,:])
145 | #v_f = torch.diag(self.kernel.K(x,x), 0)
146 | m_f = torch.zeros(v_f.shape)
147 | p_f = Normal(m_f, torch.diag(v_f))
148 | f_samples = p_f.sample([N_samples]).t() # N x N_samples
149 | mc_pdf = self.likelihood.pdf(f_samples, torch.tile(y, (1,N_samples)))
150 |
151 | mc_expectations = 1/N_samples * torch.sum(torch.clamp(mc_pdf, min=1e-100),1)
152 | print(mc_expectations)
153 | logZ = torch.sum(torch.log(mc_expectations))
154 |
155 | self.logZ = logZ
156 | return logZ
157 |
158 |
159 |
--------------------------------------------------------------------------------
/models/svmogp.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 | import torch
14 | from torch.distributions import MultivariateNormal as Normal
15 | from torch.distributions import kl_divergence
16 | from kernels.coregionalization import LMC
17 |
18 | import numpy as np
19 | from GPy.inference.latent_function_inference import LatentFunctionInference
20 | from GPy.inference.latent_function_inference.posterior import Posterior
21 |
22 |
23 | class SVMOGP(torch.nn.Module):
24 | """
25 | -- Sparse Variational Multi-output Gaussian Process --
26 | --
27 | -- Adaptation to Pytorch + GP framework --
28 | -- Based on M. A. Álvarez and N. Lawrence, "Sparse convolved Gaussian processes for multi-output regression" NIPS'08
29 | -- Reference: http://papers.neurips.cc/paper/3553-sparse-convolved-gaussian-processes-for-multi-output-regression.pdf
30 | """
31 | def __init__(self, kernels, likelihoods, Q, M, input_dim=None, batch_rates=None):
32 | super(SVMOGP, self).__init__()
33 |
34 | if input_dim is None:
35 | input_dim = 1
36 |
37 |
38 | # Dimensions --
39 | self.M = M # num. inducing
40 | self.Q = Q # num. latent functions
41 | self.input_dim = int(input_dim) # dimension of x
42 |
43 | # Likelihoods --
44 | self.likelihoods = likelihoods # list of likelihoods
45 | self.D = len(self.likelihoods) # num. output channels
46 |
47 | if batch_rates is None:
48 | self.batch_rates = self.D*[1.0]
49 | else:
50 | self.batch_rates = batch_rates
51 |
52 | # Kernels --
53 | self.kernels = torch.nn.ModuleList()
54 | for q in range(self.Q):
55 | self.kernels.append(kernels[q])
56 | self.coregionalization = LMC(self.kernels, self.D) # is a list
57 |
58 | # Inducing points --
59 | if self.input_dim > 1:
60 | self.z = torch.nn.Parameter(torch.rand(self.M, self.input_dim, self.Q), requires_grad=False)
61 | else:
62 | self.z = torch.nn.Parameter(torch.tile(torch.linspace(0.1, 0.9, self.M)[:,None, None], (1, 1, self.Q)), requires_grad=False)
63 |
64 |
65 | # Variational distributions --
66 | self.q_m = torch.nn.Parameter(2*torch.randn(M, Q), requires_grad=True) # variational: mean parameter
67 | self.q_L = torch.nn.Parameter(torch.tile(torch.eye(M)[:, :, None], (1, 1, self.Q)), requires_grad=True) # variational: covariance
68 |
69 | def expectation(self, x, y):
70 | # Check length of input+output lists
71 | assert len(x) == self.D
72 | assert len(y) == self.D
73 |
74 | # MOGP prior + Variational parameters
75 | q_m = self.q_m
76 | q_S = torch.zeros(self.M, self.M, self.Q)
77 | Kuu = torch.zeros(self.M, self.M, self.Q)
78 | iKuu = torch.zeros(self.M, self.M, self.Q)
79 |
80 | for q in range(self.Q):
81 | # MOGP latent functions prior
82 | Kuu_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q])
83 | iKuu_q, _ = torch.solve(torch.eye(self.M), Kuu_q) # is pseudo-inverse?
84 | Kuu[:, :, q] = Kuu_q
85 | iKuu[:, :, q] = iKuu_q
86 |
87 | # Variational parameters + Gaussian integration
88 | q_L = torch.tril(self.q_L[:, :, q])
89 | q_S[:, :, q] = torch.mm(q_L, q_L.t())
90 |
91 | # Expectation values (NxD)
92 | expectation = []
93 | for d in range(self.D):
94 | Kff = self.coregionalization.Kff(x[d], d)
95 | Kfu = self.coregionalization.Kfu(x[d], self.z, d)
96 |
97 | m_f = 0.0
98 | S_f = Kff
99 |
100 | for q in range(self.Q):
101 | A = Kfu[:, :, q].mm(iKuu[:, :, q])
102 | AT = iKuu[:, :, q].mm(Kfu[:, :, q].t())
103 |
104 | m_f += A.mm(q_m[:, q:q + 1])
105 | S_f += A.mm(q_S[:, :, q]).mm(AT) - A.mm(Kfu[:, :, q].t())
106 |
107 | v_f = torch.diag(S_f)
108 | expectation.append(self.likelihoods[d].variational_expectation(y[d], m_f, v_f))
109 |
110 | return expectation
111 |
112 | def divergence(self, p_u, q_u):
113 | kl = 0.0
114 | for q in range(self.Q):
115 | kl += kl_divergence(q_u[q], p_u[q])
116 | return kl
117 |
118 | def forward(self, x, y):
119 |
120 | # Empty variables for filling in 1:Q
121 | q_u = []
122 | p_u = []
123 | q_m = self.q_m
124 | q_S = torch.zeros(self.M, self.M, self.Q)
125 | Kuu = torch.zeros(self.M, self.M, self.Q)
126 | for q in range(self.Q):
127 |
128 | # Variational parameters --
129 | q_L = torch.tril(self.q_L[:,:,q])
130 | q_S[:,:,q] = torch.mm(q_L, q_L.t())
131 |
132 | # Prior parameters (uses kernel) --
133 | Kuu_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q])
134 | Kuu[:, :, q] = Kuu_q
135 |
136 | # Distributions -- q(u), p(u)
137 | q_u.append(Normal(q_m[:,q].flatten(), q_S[:,:,q]))
138 | p_u.append(Normal(torch.zeros(self.M), Kuu[:,:,q]))
139 |
140 | # Expectation term --
141 | expectation = 0.0
142 | expectation_mo = self.expectation(x, y)
143 | for d, exp in enumerate(expectation_mo):
144 | expectation += self.batch_rates[d] * exp.sum()
145 |
146 | # KL divergence --
147 | kl = self.divergence(q_u, p_u)
148 |
149 | # Lower bound (ELBO) --
150 | elbo = expectation - kl
151 | return -elbo
152 |
153 | def predictive(self, xnew, d):
154 | # MOGP prior + Variational parameters
155 | q_m = self.q_m
156 | q_S = torch.zeros(self.M, self.M, self.Q)
157 | Kuu = torch.zeros(self.M, self.M, self.Q)
158 | iKuu = torch.zeros(self.M, self.M, self.Q)
159 |
160 | # Posterior distribution on new input data
161 | Kff = self.coregionalization.Kff(xnew, d)
162 | Kfu = self.coregionalization.Kfu(xnew, self.z, d)
163 |
164 | m_pred = 0.0
165 | S_pred = Kff
166 | for q in range(self.Q):
167 | # MOGP latent functions prior
168 | Kuu_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q])
169 | iKuu_q, _ = torch.solve(torch.eye(self.M), Kuu_q) # is pseudo-inverse?
170 | Kuu[:, :, q] = Kuu_q
171 | iKuu[:, :, q] = iKuu_q
172 |
173 | # Variational parameters + Gaussian integration
174 | q_L = torch.tril(self.q_L[:, :, q])
175 | q_S[:, :, q] = torch.mm(q_L, q_L.t())
176 |
177 | A = Kfu[:, :, q].mm(iKuu[:, :, q])
178 | AT = iKuu[:, :, q].mm(Kfu[:, :, q].t())
179 |
180 | m_pred += A.mm(q_m[:, q:q + 1])
181 | S_pred += A.mm(q_S[:, :, q]).mm(AT) - A.mm(Kfu[:, :, q].t())
182 |
183 | # Detach and numpy easier for plotting.
184 | m_pred = m_pred.detach().numpy()
185 | S_pred = S_pred.detach().numpy()
186 |
187 | gp_mu = m_pred.flatten()
188 | gp_var = np.diagonal(S_pred)
189 |
190 | gp = gp_mu
191 | gp_upper = gp_mu + 2 * np.sqrt(gp_var) # + 2*self.likelihood.sigma.detach().numpy()
192 | gp_lower = gp_mu - 2 * np.sqrt(gp_var) # - 2*self.likelihood.sigma.detach().numpy()
193 |
194 | return gp, gp_upper, gp_lower
195 |
196 | def rmse(self, x_new, f_new, d):
197 | f_gp,_,_ = self.predictive(x_new, d)
198 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy()
199 | return rmse
200 |
201 | def mae(self, x_new, f_new, d):
202 | f_gp,_,_ = self.predictive(x_new, d)
203 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy()
204 | return mae
205 |
206 | def nlpd(self, x_new, y_new, d):
207 | f_gp, u_gp, _ = self.predictive(x_new, d)
208 | f_gp = torch.from_numpy(f_gp)
209 | u_gp = torch.from_numpy(u_gp)
210 | v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0)
211 | nlpd = - torch.mean(self.likelihoods[d].log_predictive(y_new, f_gp, v_gp)).detach().numpy()
212 | return nlpd
213 |
--------------------------------------------------------------------------------
/optimization/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/optimization/__init__.py
--------------------------------------------------------------------------------
/optimization/algorithms.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------
2 | # This script belongs to the ModularGP repo
3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
4 | # Copyright (c) 2021 Pablo Moreno-Munoz
5 | # -----------------------------------------------------------------
6 | #
7 | #
8 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
9 | # Section for Cognitive Systems
10 | # Technical University of Denmark (DTU)
11 | # October 2021
12 |
13 | import torch
14 | import numpy as np
15 | import matplotlib.pyplot as plt
16 |
17 | from likelihoods.gaussian import Gaussian
18 | from likelihoods.bernoulli import Bernoulli
19 |
20 | class AlgorithmVEM():
21 | def __init__(self, model, x=None, y=None, config='svgp', iters=20):
22 | super(AlgorithmVEM, self).__init__()
23 |
24 | self.model = model
25 | if x is not None:
26 | self.x = x
27 | if y is not None:
28 | self.y = y
29 | self.iters = iters
30 |
31 | if config == 'svgp' or config == 'ensemble':
32 | self.config = config
33 | else:
34 | raise ValueError('Not valid model type for Algorithm VEM, choose \'svgp\' or \'ensemble\'')
35 |
36 | if self.config == 'svgp':
37 | # Learning rates per param.
38 | self.lr_m = 1e-6
39 | self.lr_L = 1e-12
40 | self.lr_hyp = 1e-10
41 | self.lr_z = 1e-10
42 |
43 | # VE + VM iterations.
44 | self.ve_its = 20
45 | self.vm_its = 10
46 | self.z_its = 10
47 |
48 | elif self.config == 'ensemble':
49 | # Learning rates per param.
50 | self.lr_m = 1e-3
51 | self.lr_L = 1e-6
52 | self.lr_hyp = 1e-8
53 | self.lr_z = 1e-6
54 |
55 | # VE + VM iterations.
56 | self.ve_its = 30
57 | self.vm_its = 10
58 | self.z_its = 10
59 |
60 | def fit(self, opt='sgd', plot=False):
61 | if opt == 'sgd':
62 | ve_optimizer = torch.optim.SGD([{'params':self.model.q_m, 'lr':self.lr_m},{'params':self.model.q_L,'lr':self.lr_L}], lr=1e-12, momentum=0.9)
63 |
64 | if isinstance(self.model, Gaussian):
65 | vm_optimizer = torch.optim.SGD([{'params':self.model.kernel.parameters(), 'lr':self.lr_hyp},{'params':self.model.likelihood.sigma,'lr':self.lr_hyp}], lr=1e-12, momentum=0.9)
66 | else:
67 | vm_optimizer = torch.optim.SGD([{'params': self.model.kernel.parameters(), 'lr': self.lr_hyp}], lr=1e-12, momentum=0.9)
68 |
69 | z_optimizer = torch.optim.SGD([{'params':self.model.z, 'lr':self.lr_z}], lr=1e-10, momentum=0.9)
70 |
71 | elbo_its = np.empty((self.iters, 1))
72 | for em_it in range(self.iters):
73 |
74 | # VE STEP
75 | for it in range(self.ve_its):
76 | if self.config == 'svgp':
77 | elbo_it = self.model(self.x,self.y) # Forward pass -> computes ELBO
78 | elif self.config == 'ensemble':
79 | elbo_it = self.model() # Forward pass -> computes ELBO
80 |
81 | ve_optimizer.zero_grad()
82 | elbo_it.backward() # Backward pass <- computes gradients
83 | ve_optimizer.step()
84 |
85 | # Overfitting avoidance
86 | if self.config == 'ensemble':
87 | if self.model().item() < 10.0:
88 | break
89 |
90 | # VM STEP
91 | # 1. hyper-parameters
92 | for it in range(self.vm_its):
93 | if self.config == 'svgp':
94 | elbo_it = self.model(self.x,self.y) # Forward pass -> computes ELBO
95 | elif self.config == 'ensemble':
96 | elbo_it = self.model() # Forward pass -> computes ELBO
97 |
98 | vm_optimizer.zero_grad()
99 | elbo_it.backward() # Backward pass <- computes gradients
100 | vm_optimizer.step()
101 |
102 | # Overfitting avoidance
103 | if self.config == 'ensemble':
104 | if self.model().item() < 10.0:
105 | break
106 |
107 | # 2. inducing-points
108 | for it in range(self.z_its):
109 | if self.config == 'svgp':
110 | elbo_it = self.model(self.x,self.y) # Forward pass -> computes ELBO
111 | elif self.config == 'ensemble':
112 | elbo_it = self.model() # Forward pass -> computes ELBO
113 |
114 | z_optimizer.zero_grad()
115 | elbo_it.backward() # Backward pass <- computes gradients
116 | z_optimizer.step()
117 |
118 | # Overfitting avoidance
119 | if self.config == 'ensemble':
120 | if self.model().item() < 10.0:
121 | break
122 |
123 | print('Variational EM step (it=' + str(em_it) + ')')
124 | if self.config == 'svgp':
125 | print(' \__ elbo =', self.model(self.x, self.y).item())
126 | elbo_its[em_it] = - self.model(self.x, self.y).item()
127 | elif self.config == 'ensemble':
128 | print(' \__ elbo =', self.model().item())
129 | elbo_its[em_it] = - self.model().item()
130 |
131 | # Overfitting avoidance
132 | if self.model().item() < 10.0:
133 | break
134 |
135 | elif opt == 'lbfgs':
136 | optim_param= torch.optim.LBFGS([self.model.q_m, self.model.q_L], lr=self.lr_m, max_iter=self.ve_its)
137 | optim_hyper = torch.optim.LBFGS(list(self.model.kernel.parameters()) + [self.model.likelihood.sigma], lr=self.lr_hyp, max_iter=self.vm_its)
138 | optim_z = torch.optim.LBFGS([self.model.z], lr=self.lr_z, max_iter=self.vm_its)
139 |
140 | elbo_its = np.empty((self.iters, 1))
141 | for em_it in range(self.iters):
142 |
143 | # VE STEP
144 | def closure():
145 | optim_param.zero_grad()
146 | if self.config == 'svgp':
147 | elbo_it = self.model(self.x, self.y) # Forward pass -> computes ELBO
148 | elif self.config == 'ensemble':
149 | elbo_it = self.model() # Forward pass -> computes ELBO
150 |
151 | elbo_it.backward()
152 | return elbo_it
153 |
154 | optim_param.step(closure)
155 | if self.config == 'svgp':
156 | print(' param >>> elbo =', self.model(self.x, self.y).item())
157 | elif self.config == 'ensemble':
158 | print(' param >>> elbo =', self.model().item())
159 |
160 | # VM STEP
161 | # 1. hyper-parameters
162 | def closure():
163 | optim_hyper.zero_grad()
164 | if self.config == 'svgp':
165 | elbo_it = self.model(self.x, self.y) # Forward pass -> computes ELBO
166 | elif self.config == 'ensemble':
167 | elbo_it = self.model() # Forward pass -> computes ELBO
168 |
169 | elbo_it.backward()
170 | return elbo_it
171 |
172 | optim_hyper.step(closure)
173 | if self.config == 'svgp':
174 | print(' hyper >>> elbo =', self.model(self.x, self.y).item())
175 | elif self.config == 'ensemble':
176 | print(' hyper >>> elbo =', self.model().item())
177 |
178 | # 2. inducing-points
179 | def closure():
180 | optim_z.zero_grad()
181 | if self.config == 'svgp':
182 | elbo_it = self.model(self.x, self.y) # Forward pass -> computes ELBO
183 | elif self.config == 'ensemble':
184 | elbo_it = self.model() # Forward pass -> computes ELBO
185 |
186 | elbo_it.backward()
187 | return elbo_it
188 |
189 | optim_z.step(closure)
190 | if self.config == 'svgp':
191 | print(' z pts >>> elbo =', self.model(self.x, self.y).item())
192 | elif self.config == 'ensemble':
193 | print(' z pts >>> elbo =', self.model().item())
194 |
195 |
196 | print('Variational EM step (it=' + str(em_it) + ')')
197 | if self.config == 'svgp':
198 | print(' \__ elbo =', self.model(self.x, self.y).item())
199 | elbo_its[em_it] = - self.model(self.x, self.y).item()
200 | elif self.config == 'ensemble':
201 | print(' \__ elbo =', self.model().item())
202 | elbo_its[em_it] = - self.model().item()
203 |
204 | else:
205 | print('Not valid optimizer')
206 |
207 | if plot:
208 | plt.figure()
209 | plt.plot(elbo_its, 'k-')
210 | plt.title('Ensemble GP Inference (ELBO)')
211 | plt.xlabel('Iterations')
212 | plt.show()
213 |
214 | def GPR_Optimizer(model, x, y, its=50, lr=1e-2):
215 | optimizer = torch.optim.LBFGS(model.parameters(), lr=lr, max_iter=10)
216 | elbo_its = np.empty((its, 1))
217 | for it in range(its):
218 | def closure():
219 | optimizer.zero_grad()
220 | elbo_opt = model(x, y)
221 | elbo_opt.backward()
222 | return elbo_opt
223 |
224 | optimizer.step(closure)
225 |
226 | print('Optimization step (it=' + str(it) + ')')
227 | print(' \__ log_marginal =', model(x, y).item())
228 | elbo_its[it] = -model(x, y).item()
229 |
230 |
231 | def vem_algorithm(model, x, y, em_iters=10, optimizer='sgd',plot=False):
232 | if optimizer=='sgd':
233 | ve_optimizer = torch.optim.SGD([{'params':model.q_m, 'lr':1e-6},{'params':model.q_L,'lr':1e-12}], lr=1e-12, momentum=0.9)
234 | vm_optimizer = torch.optim.SGD(model.kernel.parameters(), lr=1e-10, momentum=0.9)
235 | z_optimizer = torch.optim.SGD([{'params':model.z, 'lr':1e-10}], lr=1e-10, momentum=0.9)
236 |
237 | VE_iters = 20
238 | VM_iters = 10
239 | Z_iters = 10
240 |
241 | elbo_its = np.empty((em_iters, 1))
242 | for em_it in range(em_iters):
243 |
244 | # VE STEP
245 | for it in range(VE_iters):
246 | elbo_it = model(x,y) # Forward pass -> computes ELBO
247 | ve_optimizer.zero_grad()
248 | elbo_it.backward() # Backward pass <- computes gradients
249 | ve_optimizer.step()
250 |
251 | # VM STEP
252 | # 1. hyper-parameters
253 | for it in range(VM_iters):
254 | elbo_it = model(x,y) # Forward pass -> computes ELBO
255 | vm_optimizer.zero_grad()
256 | elbo_it.backward() # Backward pass <- computes gradients
257 | vm_optimizer.step()
258 |
259 | # 2. inducing-points
260 | for it in range(Z_iters):
261 | elbo_it = model(x,y) # Forward pass -> computes ELBO
262 | z_optimizer.zero_grad()
263 | elbo_it.backward() # Backward pass <- computes gradients
264 | z_optimizer.step()
265 |
266 | print('Variational EM step (it=' + str(em_it) + ')')
267 | print(' \__ elbo =', model(x, y).item())
268 | elbo_its[em_it] = -model(x, y).item()
269 |
270 |
271 | elif optimizer=='lbfgs':
272 | ve_optimizer = torch.optim.LBFGS([{model.q_m, model.q_L}], max_iter=50)
273 | vm_optimizer = torch.optim.LBFGS(model.kernel.parameters(), lr=1e-3, max_iter=10)
274 |
275 |
276 | elbo_its = np.empty((em_iters,1))
277 | for em_it in range(em_iters):
278 | # VE STEP
279 | for name, param in model.kernel.named_parameters():
280 | param.requires_grad = False
281 |
282 | def closure():
283 | ve_optimizer.zero_grad()
284 | elbo_opt = model(x, y)
285 | #print('ELBO:', elbo_opt.item())
286 | elbo_opt.backward()
287 | return elbo_opt
288 |
289 | ve_optimizer.step(closure)
290 |
291 | # VM STEP
292 | for name, param in model.kernel.named_parameters():
293 | param.requires_grad = True
294 |
295 | def closure():
296 | vm_optimizer.zero_grad()
297 | elbo_opt = model(x, y)
298 | #print('ELBO:', elbo_opt.item())
299 | elbo_opt.backward()
300 | return elbo_opt
301 |
302 | vm_optimizer.step(closure)
303 |
304 | print('Variational EM step (it=' + str(em_it) + ')')
305 | print(' \__ elbo =', model(x, y).item())
306 | elbo_its[em_it] = -model(x, y).item()
307 |
308 | if plot:
309 | plt.figure()
310 | plt.plot(elbo_its, 'k-')
311 | plt.title('Sparse GP Regression (ELBO)')
312 | plt.xlabel('Iterations')
313 | plt.show()
314 |
315 | def ensemble_vem(model, em_iters=20, optimizer='sgd',plot=False):
316 | if optimizer=='sgd':
317 | ve_optimizer = torch.optim.SGD([{'params':model.q_m, 'lr':1e-3},{'params':model.q_L,'lr':1e-6}], lr=1e-6, momentum=0.9)
318 | vm_optimizer = torch.optim.SGD(model.kernel.parameters(), lr=1e-8, momentum=0.9)
319 | z_optimizer = torch.optim.SGD([{'params':model.z, 'lr':1e-6}], lr=1e-8, momentum=0.9)
320 |
321 | VE_iters = 30
322 | VM_iters = 10
323 | Z_iters = 10
324 |
325 | elbo_its = np.empty((em_iters, 1))
326 | for em_it in range(em_iters):
327 | # VE STEP
328 | # 1. Variational parameters
329 | for it in range(VE_iters):
330 | elbo_it = model() # Forward pass -> computes ELBO
331 | ve_optimizer.zero_grad()
332 | elbo_it.backward() # Backward pass <- computes gradients
333 | ve_optimizer.step()
334 |
335 | # VM STEP
336 | # 1. hyper-parameters
337 | for it in range(VM_iters):
338 | elbo_it = model() # Forward pass -> computes ELBO
339 | vm_optimizer.zero_grad()
340 | elbo_it.backward() # Backward pass <- computes gradients
341 | vm_optimizer.step()
342 |
343 | # 2. inducing-points
344 | for it in range(Z_iters):
345 | elbo_it = model() # Forward pass -> computes ELBO
346 | z_optimizer.zero_grad()
347 | elbo_it.backward() # Backward pass <- computes gradients
348 | z_optimizer.step()
349 |
350 | print('Variational EM step (it=' + str(em_it) + ')')
351 | print(' \__ elbo =', model().item())
352 | elbo_its[em_it] = -model().item()
353 |
354 | if -model().item() > 0.0:
355 | break
356 |
357 | if plot:
358 | plt.figure()
359 | plt.plot(elbo_its, 'k-')
360 | plt.title('Ensemble GP Inference (ELBO)')
361 | plt.xlabel('Iterations')
362 | plt.show()
363 |
364 |
365 | def ensemble_vem_parallel(model, em_iters=30, optimizer='sgd',plot=False):
366 | if optimizer=='sgd':
367 | ve_optimizer = torch.optim.SGD([{'params':model.q_m, 'lr':1e-3},{'params':model.q_L,'lr':1e-6}], lr=1e-6, momentum=0.9)
368 | vm_optimizer = torch.optim.SGD(model.kernel.parameters(), lr=1e-8, momentum=0.9)
369 | z_optimizer = torch.optim.SGD([{'params':model.z, 'lr':1e-8}], lr=1e-8, momentum=0.9)
370 |
371 | VE_iters = 30
372 | VM_iters = 10
373 | Z_iters = 10
374 |
375 | elbo_its = np.zeros((em_iters, 1))
376 | for em_it in range(em_iters):
377 | # VE STEP
378 | # 1. Variational parameters
379 | for it in range(VE_iters):
380 | elbo_it = model() # Forward pass -> computes ELBO
381 | ve_optimizer.zero_grad()
382 | elbo_it.backward() # Backward pass <- computes gradients
383 | ve_optimizer.step()
384 |
385 | # VM STEP
386 | # 1. hyper-parameters
387 | for it in range(VM_iters):
388 | elbo_it = model() # Forward pass -> computes ELBO
389 | vm_optimizer.zero_grad()
390 | elbo_it.backward() # Backward pass <- computes gradients
391 | vm_optimizer.step()
392 |
393 | # 2. inducing-points
394 | for it in range(Z_iters):
395 | elbo_it = model() # Forward pass -> computes ELBO
396 | z_optimizer.zero_grad()
397 | elbo_it.backward() # Backward pass <- computes gradients
398 | z_optimizer.step()
399 |
400 | print('Variational EM step (it=' + str(em_it) + ')')
401 | print(' \__ elbo =', model().item())
402 | elbo_its[em_it] = -model().item()
403 |
404 | if -model().item() > 0.0:
405 | break
406 |
407 | if plot:
408 | plt.figure()
409 | plt.plot(elbo_its, 'k-')
410 | plt.title('Ensemble GP Inference (ELBO)')
411 | plt.xlabel('Iterations')
412 | plt.show()
413 |
414 | def vem_algorithm_infographic(model, x, y, em_iters=10, plot=False):
415 | ve_optimizer = torch.optim.SGD([{'params':model.q_m, 'lr':1e-5},{'params':model.q_L,'lr':1e-8}], lr=1e-12, momentum=0.9)
416 | vm_optimizer = torch.optim.SGD(model.kernel.parameters(), lr=1e-10, momentum=0.9)
417 | z_optimizer = torch.optim.SGD([{'params':model.z, 'lr':1e-10}], lr=1e-10, momentum=0.9)
418 |
419 | VE_iters = 20
420 | VM_iters = 20
421 | Z_iters = 10
422 |
423 | elbo_its = np.empty((em_iters, 1))
424 | for em_it in range(em_iters):
425 |
426 | # VE STEP
427 | for it in range(VE_iters):
428 | elbo_it = model(x,y) # Forward pass -> computes ELBO
429 | ve_optimizer.zero_grad()
430 | elbo_it.backward() # Backward pass <- computes gradients
431 | ve_optimizer.step()
432 |
433 | # VM STEP
434 | # 1. hyper-parameters
435 | for it in range(VM_iters):
436 | elbo_it = model(x,y) # Forward pass -> computes ELBO
437 | vm_optimizer.zero_grad()
438 | elbo_it.backward() # Backward pass <- computes gradients
439 | vm_optimizer.step()
440 |
441 | # 2. inducing-points
442 | for it in range(Z_iters):
443 | elbo_it = model(x,y) # Forward pass -> computes ELBO
444 | z_optimizer.zero_grad()
445 | elbo_it.backward() # Backward pass <- computes gradients
446 | z_optimizer.step()
447 |
448 | print('Variational EM step (it=' + str(em_it) + ')')
449 | print(' \__ elbo =', model(x, y).item())
450 | elbo_its[em_it] = -model(x, y).item()
451 |
452 |
453 | def ensemble_vem_infographic(model, em_iters=30, optimizer='sgd',plot=False):
454 | if optimizer=='sgd':
455 | ve_optimizer = torch.optim.SGD([{'params':model.q_m, 'lr':1e-3},{'params':model.q_L,'lr':1e-6}], lr=1e-6, momentum=0.9)
456 | vm_optimizer = torch.optim.SGD(model.kernel.parameters(), lr=1e-8, momentum=0.9)
457 | z_optimizer = torch.optim.SGD([{'params':model.z, 'lr':1e-8}], lr=1e-8, momentum=0.9)
458 |
459 | VE_iters = 30
460 | VM_iters = 20
461 | Z_iters = 10
462 |
463 | elbo_its = np.zeros((em_iters, 1))
464 | for em_it in range(em_iters):
465 | # VE STEP
466 | # 1. Variational parameters
467 | for it in range(VE_iters):
468 | elbo_it = model() # Forward pass -> computes ELBO
469 | ve_optimizer.zero_grad()
470 | elbo_it.backward() # Backward pass <- computes gradients
471 | ve_optimizer.step()
472 |
473 | # VM STEP
474 | # 1. hyper-parameters
475 | for it in range(VM_iters):
476 | elbo_it = model() # Forward pass -> computes ELBO
477 | vm_optimizer.zero_grad()
478 | elbo_it.backward() # Backward pass <- computes gradients
479 | vm_optimizer.step()
480 |
481 | # 2. inducing-points
482 | for it in range(Z_iters):
483 | elbo_it = model() # Forward pass -> computes ELBO
484 | z_optimizer.zero_grad()
485 | elbo_it.backward() # Backward pass <- computes gradients
486 | z_optimizer.step()
487 |
488 | print('Variational EM step (it=' + str(em_it) + ')')
489 | print(' \__ elbo =', model().item())
490 | elbo_its[em_it] = -model().item()
491 |
492 | if -model().item() > 0.0:
493 | break
494 |
495 | def moensemble_vem(model, em_iters=20, optimizer='sgd',plot=False):
496 | if optimizer=='sgd':
497 | ve_optimizer = torch.optim.SGD([{'params': model.q_m, 'lr': 1e-3},
498 | {'params': model.q_L,'lr': 1e-6}], lr=1e-6, momentum=0.9)
499 | vm_optimizer = torch.optim.SGD([{'params': model.kernels.parameters(), 'lr': 1e-8},
500 | {'params': model.coregionalization.W, 'lr': 1e-6}], lr=1e-8, momentum=0.9)
501 | z_optimizer = torch.optim.SGD([{'params': model.z, 'lr':1e-7}], lr=1e-8, momentum=0.9)
502 |
503 | VE_iters = 30
504 | VM_iters = 20
505 | Z_iters = 5
506 |
507 | elbo_its = np.empty((em_iters, 1))
508 | for em_it in range(em_iters):
509 | # VE STEP
510 | # 1. Variational parameters
511 | for it in range(VE_iters):
512 | elbo_it = model() # Forward pass -> computes ELBO
513 | ve_optimizer.zero_grad()
514 | elbo_it.backward() # Backward pass <- computes gradients
515 | ve_optimizer.step()
516 |
517 | # VM STEP
518 | # 1. hyper-parameters
519 | for it in range(VM_iters):
520 | elbo_it = model() # Forward pass -> computes ELBO
521 | vm_optimizer.zero_grad()
522 | elbo_it.backward() # Backward pass <- computes gradients
523 | vm_optimizer.step()
524 |
525 | # 2. inducing-points
526 | for it in range(Z_iters):
527 | elbo_it = model() # Forward pass -> computes ELBO
528 | z_optimizer.zero_grad()
529 | elbo_it.backward() # Backward pass <- computes gradients
530 | z_optimizer.step()
531 |
532 | print('Variational EM step (it=' + str(em_it) + ')')
533 | print(' \__ elbo =', model().item())
534 | elbo_its[em_it] = -model().item()
535 |
536 | if -model().item() > 0.0:
537 | break
538 |
539 | if plot:
540 | plt.figure()
541 | plt.plot(elbo_its, 'k-')
542 | plt.title('Ensemble GP Inference (ELBO)')
543 | plt.xlabel('Iterations')
544 | plt.show()
545 |
546 | class AlgorithmMOVEM():
547 | def __init__(self, model, iters=20, plot=False):
548 | super(AlgorithmMOVEM, self).__init__()
549 |
550 | self.model = model
551 | self.iters = iters
552 |
553 | # Learning rates per param.
554 | self.lr_m = 1e-3
555 | self.lr_L = 1e-6
556 | self.lr_B = 1e-6
557 | self.lr_hyp = 1e-8
558 | self.lr_z = 1e-7
559 |
560 | # VE + VM iterations.
561 | self.ve_iters = 30
562 | self.vm_iters = 20
563 | self.z_iters = 10
564 |
565 | def fit(self, plot=False):
566 |
567 | ve_optimizer = torch.optim.SGD([{'params': self.model.q_m, 'lr': self.lr_m},
568 | {'params': self.model.q_L,'lr': self.lr_L}], lr=1e-6, momentum=0.9)
569 | vm_optimizer = torch.optim.SGD([{'params': self.model.kernels.parameters(), 'lr': self.lr_hyp},
570 | {'params': self.model.coregionalization.W, 'lr': self.lr_B}], lr=1e-8, momentum=0.9)
571 | z_optimizer = torch.optim.SGD([{'params': self.model.z, 'lr': self.lr_z}], lr=1e-8, momentum=0.9)
572 |
573 | elbo_its = np.empty((self.iters, 1))
574 | for em_it in range(self.iters):
575 | # VE STEP
576 | # 1. Variational parameters
577 | for it in range(self.ve_iters):
578 | elbo_it = self.model() # Forward pass -> computes ELBO
579 | ve_optimizer.zero_grad()
580 | elbo_it.backward() # Backward pass <- computes gradients
581 | ve_optimizer.step()
582 |
583 | # VM STEP
584 | # 1. hyper-parameters
585 | for it in range(self.vm_iters):
586 | elbo_it = self.model() # Forward pass -> computes ELBO
587 | vm_optimizer.zero_grad()
588 | elbo_it.backward() # Backward pass <- computes gradients
589 | vm_optimizer.step()
590 |
591 | # 2. inducing-points
592 | for it in range(self.z_iters):
593 | elbo_it = self.model() # Forward pass -> computes ELBO
594 | z_optimizer.zero_grad()
595 | elbo_it.backward() # Backward pass <- computes gradients
596 | z_optimizer.step()
597 |
598 | print('Variational EM step (it=' + str(em_it) + ')')
599 | print(' \__ elbo =', self.model().item())
600 | elbo_its[em_it] = -self.model().item()
601 |
602 | if -self.model().item() > 0.0:
603 | break
604 |
605 | if plot:
606 | plt.figure()
607 | plt.plot(elbo_its, 'k-')
608 | plt.title('Ensemble GP Inference (ELBO)')
609 | plt.xlabel('Iterations')
610 | plt.show()
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | alembic==1.4.1
2 | anyio==3.2.1
3 | appnope==0.1.2
4 | argon2-cffi==20.1.0
5 | async-generator==1.10
6 | attrs==21.2.0
7 | Babel==2.9.1
8 | backcall==0.2.0
9 | bleach==3.3.0
10 | certifi==2020.6.20
11 | cffi==1.14.5
12 | chardet==4.0.0
13 | click==8.0.1
14 | cloudpickle==2.0.0
15 | cycler==0.10.0
16 | Cython==0.29.23
17 | databricks-cli==0.15.0
18 | decorator==5.0.7
19 | defusedxml==0.7.1
20 | docker==5.0.2
21 | entrypoints==0.3
22 | Flask==2.0.1
23 | gitdb==4.0.7
24 | GitPython==3.1.23
25 | GPy==1.10.0
26 | gpytorch==1.5.1
27 | greenlet==1.1.1
28 | gunicorn==20.1.0
29 | idna==2.10
30 | importlib-metadata==4.0.1
31 | ipykernel==5.5.5
32 | ipython==7.24.1
33 | ipython-genutils==0.2.0
34 | itsdangerous==2.0.1
35 | jedi==0.18.0
36 | Jinja2==3.0.1
37 | joblib==1.0.1
38 | json5==0.9.6
39 | jsonschema==3.2.0
40 | jupyter-client==6.1.12
41 | jupyter-core==4.7.1
42 | jupyter-server==1.9.0
43 | jupyterlab==3.0.16
44 | jupyterlab-pygments==0.1.2
45 | jupyterlab-server==2.6.0
46 | kiwisolver==1.3.1
47 | Mako==1.1.5
48 | MarkupSafe==2.0.1
49 | matplotlib==3.4.2
50 | matplotlib-inline==0.1.2
51 | matplotlib2tikz==0.7.6
52 | mistune==0.8.4
53 | mlflow==1.20.2
54 | nbclassic==0.3.1
55 | nbclient==0.5.3
56 | nbconvert==6.1.0
57 | nbformat==5.1.3
58 | nest-asyncio==1.5.1
59 | networkx @ file:///tmp/build/80754af9/networkx_1627459939258/work
60 | notebook==6.4.0
61 | numpy==1.20.3
62 | opt-einsum==3.3.0
63 | packaging==20.9
64 | pandas==1.2.4
65 | pandocfilters==1.4.3
66 | paramz==0.9.5
67 | parso==0.8.2
68 | pexpect==4.8.0
69 | pickleshare==0.7.5
70 | Pillow==8.2.0
71 | prometheus-client==0.11.0
72 | prometheus-flask-exporter==0.18.2
73 | prompt-toolkit==3.0.19
74 | protobuf==3.17.3
75 | ptyprocess==0.7.0
76 | pycparser==2.20
77 | Pygments==2.9.0
78 | pyparsing==2.4.7
79 | pyreadstat==1.1.2
80 | pyro-api==0.1.2
81 | pyro-ppl==1.7.0
82 | pyrsistent==0.17.3
83 | python-dateutil==2.8.1
84 | python-editor==1.0.4
85 | pytz==2021.1
86 | PyYAML==5.4.1
87 | pyzmq==22.1.0
88 | querystring-parser==1.2.4
89 | requests==2.25.1
90 | requests-unixsocket==0.2.0
91 | scikit-learn==0.24.2
92 | scipy==1.6.3
93 | Send2Trash==1.7.1
94 | six==1.16.0
95 | sklearn==0.0
96 | smmap==4.0.0
97 | sniffio==1.2.0
98 | SQLAlchemy==1.4.23
99 | sqlparse==0.4.2
100 | tabulate==0.8.9
101 | terminado==0.10.1
102 | testpath==0.5.0
103 | threadpoolctl==2.1.0
104 | tikzplotlib==0.9.8
105 | torch==1.9.0
106 | torchplot==0.2.0
107 | torchvision==0.9.1
108 | tornado==6.1
109 | tqdm==4.62.2
110 | traitlets==5.0.5
111 | typing-extensions==3.10.0.0
112 | urllib3==1.26.5
113 | wcwidth==0.2.5
114 | webencodings==0.5.1
115 | websocket-client==1.1.0
116 | Werkzeug==2.0.1
117 | zipp==3.4.1
118 |
--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
1 | # squared distance is based on the gptorch code
2 | # by Steven Atkinson (steven@atkinson.mn)
3 | # -----------------------------------------------------------------
4 | # This script belongs to the ModularGP repo
5 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021
6 | # Copyright (c) 2021 Pablo Moreno-Munoz
7 | # -----------------------------------------------------------------
8 | #
9 | #
10 | # Pablo Moreno-Munoz (pabmo@dtu.dk)
11 | # Section for Cognitive Systems
12 | # Technical University of Denmark (DTU)
13 | # October 2021
14 |
15 | import os
16 | import torch
17 | import numpy as np
18 | import pandas as pd
19 | from torch.utils.data import Dataset, DataLoader
20 |
21 | _lim_val = np.finfo(np.float64).max
22 | _lim_val_exp = np.log(_lim_val)
23 | _lim_val_square = np.sqrt(_lim_val)
24 | #_lim_val_cube = cbrt(_lim_val)
25 | _lim_val_cube = np.nextafter(_lim_val**(1/3.0), -np.inf)
26 | _lim_val_quad = np.nextafter(_lim_val**(1/4.0), -np.inf)
27 | _lim_val_three_times = np.nextafter(_lim_val/3.0, -np.inf)
28 |
29 |
30 | def safe_exp(f):
31 | clamp_f = torch.clamp(f, min=-np.inf, max=_lim_val_exp)
32 | return torch.exp(clamp_f)
33 |
34 | def safe_square(f):
35 | f = torch.clamp(f, min=-np.inf, max=_lim_val_square)
36 | return f**2
37 |
38 | def safe_cube(f):
39 | f = torch.clamp(f, min=-np.inf, max=_lim_val_cube)
40 | return f**3
41 |
42 | def safe_quad(f):
43 | f = torch.clamp(f, min=-np.inf, max=_lim_val_quad)
44 | return f**4
45 |
46 | def true_function(x):
47 | y = 4.5*torch.cos(2*np.pi*x + 1.5*np.pi) - \
48 | 3*torch.sin(4.3*np.pi*x + 0.3*np.pi) + \
49 | 5*torch.cos(7*np.pi*x + 2.4*np.pi)
50 | return y
51 |
52 | def smooth_function(x):
53 | y = 4.5*torch.cos(2*np.pi*x + 1.5*np.pi) - \
54 | 3*torch.sin(4.3*np.pi*x + 0.3*np.pi)
55 | return y
56 |
57 | def smooth_function_bias(x):
58 | y = 4.5*torch.cos(2*np.pi*x + 1.5*np.pi) - \
59 | 3*torch.sin(4.3*np.pi*x + 0.3*np.pi) + \
60 | 3.0*x - 7.5
61 | return y
62 |
63 |
64 | def true_u_functions(x_list, Q):
65 | u_functions = []
66 | amplitude = (1.5 - 0.5) * torch.rand(Q, 3) + 0.5
67 | freq = (3 - 1) * torch.rand(Q, 3) + 1
68 | shift = 2 * torch.rand(Q, 3)
69 | for x in x_list:
70 | u_function = torch.empty(x.shape[0], Q)
71 | for q in range(Q):
72 | u_function[:,q,None] = 3.0 * amplitude[q, 0] * np.cos(freq[q, 0] * np.pi * x + shift[q, 0] * np.pi) - \
73 | 2.0 * amplitude[q, 1] * np.sin(2 * freq[q, 1] * np.pi * x + shift[q, 1] * np.pi) + \
74 | amplitude[q, 2] * np.cos(4 * freq[q, 2] * np.pi * x + shift[q, 2] * np.pi)
75 | u_functions.append(u_function)
76 | return u_functions
77 |
78 |
79 | def true_f_functions(x_list, Q):
80 | K = len(x_list)
81 | W = 0.5 * torch.randn(K, Q)
82 | f_functions = []
83 | u_functions = true_u_functions(x_list, Q)
84 | for k, u_function in enumerate(u_functions):
85 | Nk = u_function.shape[0]
86 | f_function = torch.zeros(Nk, 1)
87 | for q in range(Q):
88 | f_function += torch.tile(W[k:k+1, q:q+1], (Nk, 1)) * u_function[:, q:q+1]
89 |
90 | f_functions.append(f_function)
91 |
92 | return f_functions
93 |
94 |
95 | def squared_distance(x1, x2=None):
96 | """
97 | Given points x1 [n1 x d1] and x2 [n2 x d2], return a [n1 x n2] matrix with
98 | the pairwise squared distances between the points.
99 | Entry (i, j) is sum_{j=1}^d (x_1[i, j] - x_2[i, j]) ^ 2
100 | """
101 | if x2 is None:
102 | return squared_distance(x1, x1)
103 |
104 | x1s = x1.pow(2).sum(1, keepdim=True)
105 | x2s = x2.pow(2).sum(1, keepdim=True)
106 |
107 | r2 = x1s + x2s.t() -2.0 * x1 @ x2.t()
108 |
109 | # Prevent negative squared distances using torch.clamp
110 | # NOTE: Clamping is for numerics.
111 | # This use of .detach() is to avoid breaking the gradient flow.
112 | return r2 - (torch.clamp(r2, max=0.0)).detach()
113 |
114 |
115 | class DataGP(Dataset):
116 | def __init__(self, x, y):
117 | if not torch.is_tensor(x):
118 | self.x = torch.from_numpy(x)
119 | if not torch.is_tensor(y):
120 | self.y = torch.from_numpy(y)
121 |
122 | def __len__(self):
123 | return len(self.x)
124 |
125 | def __getitem__(self, item):
126 | return self.x[item], self.y[item]
127 |
128 |
129 | class DataMOGP(Dataset):
130 | def __init__(self, x, y):
131 | self.x = x # x is a list
132 | self.y = y # y is a list
133 |
134 | def __len__(self):
135 | return min(len(x_d) for x_d in self.x)
136 |
137 | def __getitem__(self, item):
138 | x_tuple = tuple(x_d[item] for x_d in self.x)
139 | y_tuple = tuple(y_d[item] for y_d in self.y)
140 | return x_tuple, y_tuple
141 |
142 |
143 |
144 |
--------------------------------------------------------------------------------