├── LICENSE ├── README.md ├── __init__.py ├── baselines ├── __init__.py ├── bcm.py ├── distgp.py ├── dvigp.py ├── gpoegp.py ├── poegp.py └── rbcm.py ├── data ├── banana.mat └── nasa.mat ├── experiments ├── __init__.py ├── banana.py ├── baseline.py ├── dvigp_nlpd.py ├── image.py ├── million.py ├── million_rbcm.py ├── paralell.py └── solar.py ├── extra └── modular_gp_logo.png ├── kernels ├── __init__.py ├── coregionalization.py ├── kernel.py ├── rbf.py └── stationary.py ├── likelihoods ├── __init__.py ├── bernoulli.py ├── gaussian.py ├── hetgaussian.py └── likelihood.py ├── models ├── __init__.py ├── chainedgp.py ├── ensemblegp.py ├── hetmoensemble.py ├── moensemble.py ├── svgp.py └── svmogp.py ├── optimization ├── __init__.py └── algorithms.py ├── requirements.txt └── util.py /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Modular Gaussian Processes
for Transfer Learning 2 | 3 | 4 | 5 | ## 🧩 Introduction 6 | 7 | This repository contains the implementation of our paper [Modular Gaussian Processes for Transfer Learning](https://arxiv.org/abs/2110.13515) accepted in the 35th Conference on Neural Information Processing Systems (NeurIPS) 2021. The entire code is written in Python and is based on the [Pytorch](https://pytorch.org/) framework. 8 | 9 | ### 🧩 Idea 10 | 11 | Here, you may find a new framework for transfer learning based on *modular Gaussian processes* (GP). The underlying idea is to avoid the revisiting of samples once a model is trained and well-fitted, so the model can be repurposed in combination with other or new data. We build *dictionaries* of modules (models), where each one contains only parameters and hyperparameters, but not observations. Finally, we are able to build *meta-models* (GP models) from different combinations of modules without reusing the old data. 12 | 13 | ## 🧩 Citation 14 | 15 | Please, if you use this code, include the following citation: 16 | ``` 17 | @inproceedings{MorenoArtesAlvarez21, 18 | title = {Modular {G}aussian Processes for Transfer Learning}, 19 | author = {Moreno-Mu\~noz, Pablo and Art\'es-Rodr\'iguez, Antonio and \'Alvarez, Mauricio A}, 20 | booktitle = {Advances in Neural Information Processing Systems (NeurIPS)}, 21 | year = {2021} 22 | } 23 | ``` 24 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/__init__.py -------------------------------------------------------------------------------- /baselines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/baselines/__init__.py -------------------------------------------------------------------------------- /baselines/bcm.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | import torch 14 | 15 | class BayesianCM(torch.nn.Module): 16 | """ 17 | -- Bayesian Committee Machine -- 18 | -- 19 | -- Adaptation to Pytorch + GP framework 20 | -- V. Tresp "A Bayesian Committee Machine" 21 | -- Reference: https://www.dbs.ifi.lmu.de/~tresp/papers/bcm6.pdf 22 | """ 23 | 24 | def __init__(self, models, input_dim=1.0): 25 | super(BayesianCM, self).__init__() 26 | 27 | self.input_dim = int(input_dim) # dimension of x 28 | 29 | # Adjacent Local GP Models 30 | self.models = models # is a list 31 | 32 | def forward(self): 33 | return 1.0 34 | 35 | def predictive(self, x, y, x_new): 36 | # x is a list of x_k (distributed) 37 | # y is a list of y_k (distributed) 38 | 39 | K = len(self.models) 40 | prior_v = torch.diagonal(self.models[0].kernel.K(x_new, x_new), 0)[:, None] 41 | correction = (1-K)/prior_v 42 | 43 | gp_m = torch.zeros(x_new.size()) 44 | gp_v = torch.zeros(x_new.size()) 45 | 46 | for k, model_k in enumerate(self.models): 47 | m_k, v_k = model_k.predictive(x[k], y[k], x_new) 48 | 49 | gp_m += m_k/v_k 50 | gp_v += 1.0/v_k 51 | 52 | gp_v = 1.0/(gp_v + correction) 53 | gp_m = gp_v*gp_m 54 | 55 | return gp_m, gp_v 56 | 57 | def rmse(self, x, y, x_new, f_new): 58 | f_gp,_ = self.predictive(x, y, x_new) 59 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy() 60 | return rmse 61 | 62 | def mae(self, x, y, x_new, f_new): 63 | f_gp,_ = self.predictive(x, y, x_new) 64 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy() 65 | return mae 66 | 67 | def nlpd(self, x, y, x_new, y_new): 68 | f_gp, v_gp = self.predictive(x, y, x_new) 69 | nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy() 70 | return nlpd 71 | 72 | # FOR HIERARCHICAL SETTINGS 73 | 74 | def predictive_layer(self, gps_m, gps_v, x_new): 75 | # gps_m is a list of gp_m (distributed) 76 | # gps_v is a list of gp_v (distributed) 77 | 78 | K = len(gps_m) 79 | prior_v = torch.diagonal(self.models[0].kernel.K(x_new, x_new), 0)[:, None] 80 | prior_v += 1e-1 81 | correction = (1 - K)/prior_v 82 | 83 | gp_m = torch.zeros(x_new.size()) 84 | gp_v = torch.zeros(x_new.size()) 85 | 86 | for k, m_k in enumerate(gps_m): 87 | v_k = gps_v[k] 88 | 89 | gp_m += m_k / v_k 90 | gp_v += 1.0 / v_k 91 | 92 | gp_v = 1.0 / (gp_v + correction) 93 | gp_m = gp_v * gp_m 94 | 95 | return gp_m, gp_v 96 | 97 | def rmse_layer(self, gps_m, gps_v, x_new, f_new): 98 | f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new) 99 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy() 100 | return rmse 101 | 102 | def mae_layer(self, gps_m, gps_v, x_new, f_new): 103 | f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new) 104 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy() 105 | return mae 106 | 107 | def nlpd_layer(self, gps_m, gps_v, x_new, y_new): 108 | f_gp, v_gp = self.predictive_layer(gps_m, gps_v, x_new) 109 | nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy() 110 | return nlpd -------------------------------------------------------------------------------- /baselines/distgp.py: -------------------------------------------------------------------------------- 1 | # Implementation of the "Distributed GP" 2 | # by Deisenroth & Ng, ICML 2015 3 | # ----------------------------------------------------------------- 4 | # This script belongs to the ModularGP repo 5 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 6 | # Copyright (c) 2021 Pablo Moreno-Munoz 7 | # ----------------------------------------------------------------- 8 | # 9 | # 10 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 11 | # Section for Cognitive Systems 12 | # Technical University of Denmark (DTU) 13 | # October 2021 14 | 15 | import torch 16 | 17 | class DistGP(torch.nn.Module): 18 | """ 19 | -- Distributed Gaussian Process Regression-- 20 | -- 21 | -- Adaptation to Pytorch + GP framework 22 | -- M. P. Deisenroth and J. W. Ng, "Distributed Gaussian Processes" 23 | -- Reference: http://proceedings.mlr.press/v37/deisenroth15.pdf 24 | """ 25 | 26 | def __init__(self, kernel, likelihood, input_dim=None): 27 | super(DistGP, self).__init__() 28 | 29 | if input_dim is None: 30 | input_dim = 1 31 | 32 | self.input_dim = int(input_dim) #dimension of x 33 | 34 | # GP Elements -- 35 | self.likelihood = likelihood #type of likelihood 36 | self.kernel = kernel #type of kernel 37 | 38 | 39 | def forward(self, x, y): 40 | identity = torch.eye(y.size(0)) 41 | s_n = torch.pow(self.likelihood.sigma, 2.0) 42 | 43 | K = self.kernel.K(x,x) 44 | KI = K + torch.mul(s_n,identity) 45 | iKI, _ = torch.solve(torch.eye(KI.size(0)), KI) 46 | yiKIy = y.t().mm(iKI).mm(y) 47 | 48 | log_marginal = -0.5*yiKIy - 0.5*torch.logdet(KI) 49 | return -log_marginal 50 | 51 | def predictive(self, x, y, x_new): 52 | 53 | Kx = self.kernel.K(x, x_new) 54 | Kxx = self.kernel.K(x_new, x_new) 55 | 56 | identity = torch.eye(y.size(0)) 57 | s_n = torch.pow(self.likelihood.sigma, 2.0) 58 | 59 | K = self.kernel.K(x, x) 60 | KI = K + torch.mul(s_n, identity) 61 | iKI, _ = torch.solve(torch.eye(KI.size(0)), KI) 62 | 63 | gp_m = Kx.t().mm(iKI).mm(y) 64 | gp_v = torch.diagonal(Kxx - Kx.t().mm(iKI).mm(Kx), 0)[:,None] 65 | 66 | return gp_m, gp_v 67 | 68 | def rmse(self, x, y, x_new, f_new): 69 | f_gp,_ = self.predictive(x, y, x_new) 70 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy() 71 | return rmse 72 | 73 | def mae(self, x, y, x_new, f_new): 74 | f_gp,_ = self.predictive(x, y, x_new) 75 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy() 76 | return mae 77 | 78 | def nlpd(self, x, y, x_new, y_new): 79 | f_gp, u_gp = self.predictive(x, y, x_new) 80 | #f_gp = torch.from_numpy(f_gp) 81 | #u_gp = torch.from_numpy(u_gp) 82 | v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0) 83 | nlpd = - torch.mean(self.likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy() 84 | return nlpd 85 | -------------------------------------------------------------------------------- /baselines/dvigp.py: -------------------------------------------------------------------------------- 1 | # Implementation of the "Distributed Variational Inference in GPs" 2 | # by Y. Gal and M. van der Wilk 3 | # 4 | # Little adaptation without the LVM assumption 5 | # for testing and comparison. Simulates a distributed environment. 6 | # ----------------------------------------------------------------- 7 | # This script belongs to the ModularGP repo 8 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 9 | # Copyright (c) 2021 Pablo Moreno-Munoz 10 | # ----------------------------------------------------------------- 11 | # 12 | # 13 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 14 | # Section for Cognitive Systems 15 | # Technical University of Denmark (DTU) 16 | # October 2021 17 | 18 | import torch 19 | import numpy as np 20 | from torch.distributions import MultivariateNormal as Normal 21 | from torch.distributions import kl_divergence 22 | 23 | from GPy.inference.latent_function_inference.posterior import Posterior 24 | 25 | class DVIGP(torch.nn.Module): 26 | """ 27 | -- Distributed Variational Inference in Gaussian Processes -- 28 | -- 29 | -- Adaptation to Pytorch + GP framework 30 | -- Y. Gal et al. "Distributed Variational Inference in Sparse Gaussian 31 | Process Regression and Latent Variable Models" NIPS 2014 32 | """ 33 | def __init__(self, kernel, likelihood, M, nodes=1, input_dim=None): 34 | super(DVIGP, self).__init__() 35 | 36 | if input_dim is None: 37 | input_dim = 1 38 | 39 | # Nodes to distribute the computational load -- 40 | self.nodes = int(nodes) 41 | 42 | # Dimensions -- 43 | self.M = M #num. inducing 44 | self.input_dim = int(input_dim) #dimension of x 45 | 46 | # GP Elements -- 47 | self.likelihood = likelihood #type of likelihood 48 | self.kernel = kernel #type of kernel 49 | self.z = torch.nn.Parameter(torch.linspace(-0.9, 0.9, self.M)[:,None], requires_grad=False) 50 | 51 | # Variational distribution -- 52 | self.q_m = torch.nn.Parameter(torch.randn(M,1), requires_grad=True) # variational: mean parameter 53 | self.q_L = torch.nn.Parameter(torch.eye(M), requires_grad=True) # variational: covariance 54 | 55 | def forward(self, x, y): 56 | x_nodes, y_nodes = self.data_to_nodes(x,y) 57 | 58 | # Variational parameters -- 59 | q_m = self.q_m 60 | q_L = torch.tril(self.q_L) 61 | q_S = torch.mm(q_L, q_L.t()) 62 | 63 | # Prior parameters (uses kernel) -- 64 | Kuu = self.kernel.K(self.z) 65 | iKuu, _ = torch.solve(torch.eye(self.M), Kuu) # is pseudo-inverse? 66 | 67 | # Distributions -- q(u), p(u) 68 | q_u = Normal(q_m.flatten(), q_S) 69 | p_u = Normal(torch.zeros(self.M), Kuu) 70 | 71 | global_params = {'q_m': q_m, 'q_L': q_L, 'q_S': q_S, 'Kuu': Kuu, 'iKuu': iKuu} 72 | 73 | # Distributed Expectations 74 | expectation = 0.0 75 | for k, y_k in enumerate(y_nodes): 76 | x_k = x_nodes[k] 77 | expectation_node = self.forward_node(x_k, y_k, global_params) 78 | expectation += expectation_node.sum() 79 | 80 | # KL divergence -- 81 | kl = kl_divergence(q_u, p_u) 82 | 83 | # Lower bound (ELBO) -- 84 | elbo = expectation - kl 85 | 86 | return -elbo 87 | 88 | def forward_node(self, x_node, y_node, global_params): 89 | q_m = global_params['q_m'] 90 | q_L = global_params['q_m'] 91 | q_S = global_params['q_S'] 92 | Kuu = global_params['Kuu'] 93 | iKuu = global_params['iKuu'] 94 | 95 | Kff = self.kernel.K(x_node, x_node) 96 | Kfu = self.kernel.K(x_node, self.z) 97 | Kuf = torch.transpose(Kfu, 0, 1) 98 | 99 | A = Kfu.mm(iKuu) 100 | AT = iKuu.mm(Kuf) 101 | 102 | m_f = A.mm(q_m) 103 | v_f = torch.diag(Kff + A.mm(q_S - Kuu).mm(AT)) 104 | 105 | # Expectation term of node -- 106 | expectation = self.likelihood.variational_expectation(y_node, m_f, v_f) 107 | 108 | return expectation 109 | 110 | def data_to_nodes(self, x, y): 111 | x_nodes = [] 112 | y_nodes = [] 113 | 114 | N = y.size(0) 115 | size_node = np.int(np.floor(N/self.nodes)) 116 | for k in range(self.nodes): 117 | if k < self.nodes - 1: 118 | x_nodes.append(x[(0+(k*size_node)):(0+((k+1)*size_node)), :]) 119 | y_nodes.append(y[(0+(k*size_node)):(0+((k+1)*size_node)), :]) 120 | else: 121 | x_nodes.append(x[(0+(k*size_node)):, :]) 122 | y_nodes.append(y[(0+(k*size_node)):, :]) 123 | 124 | return x_nodes, y_nodes 125 | 126 | def predictive(self, x_new): 127 | # Matrices 128 | q_m = self.q_m.detach().numpy() 129 | q_L = torch.tril(self.q_L) 130 | q_S = torch.mm(q_L, q_L.t()).detach().numpy() 131 | Kuu = self.kernel.K(self.z, self.z).detach().numpy() 132 | 133 | posterior = Posterior(mean=q_m, cov=q_S, K=Kuu, prior_mean=np.zeros(q_m.shape)) 134 | Kx = self.kernel.K(self.z, x_new).detach().numpy() 135 | Kxx = self.kernel.K(x_new, x_new).detach().numpy() 136 | 137 | # GP Predictive Posterior - mean + variance 138 | gp_mu = np.dot(Kx.T, posterior.woodbury_vector) 139 | Kxx = np.diag(Kxx) 140 | gp_var = (Kxx - np.sum(np.dot(np.atleast_3d(posterior.woodbury_inv).T, Kx) * Kx[None, :, :], 1)).T 141 | 142 | gp = gp_mu 143 | gp_upper = gp_mu + 2*np.sqrt(gp_var) #+ 2 * self.likelihood.sigma.detach().numpy() 144 | gp_lower = gp_mu - 2*np.sqrt(gp_var) #- 2 * self.likelihood.sigma.detach().numpy() 145 | 146 | return gp, gp_upper, gp_lower 147 | 148 | def rmse(self, x_new, f_new): 149 | f_gp,_,_ = self.predictive(x_new) 150 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy() 151 | return rmse 152 | 153 | def mae(self, x_new, f_new): 154 | f_gp,_,_ = self.predictive(x_new) 155 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy() 156 | return mae 157 | 158 | def nlpd(self, x_new, y_new): 159 | f_gp, u_gp, _ = self.predictive(x_new) 160 | f_gp = torch.from_numpy(f_gp) 161 | u_gp = torch.from_numpy(u_gp) 162 | v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0) 163 | nlpd = - torch.mean(self.likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy() 164 | return nlpd 165 | 166 | -------------------------------------------------------------------------------- /baselines/gpoegp.py: -------------------------------------------------------------------------------- 1 | # Implementation of the "Generalised Product of GP Experts" 2 | # by Cao & Fleet, 2014 3 | # ----------------------------------------------------------------- 4 | # This script belongs to the ModularGP repo 5 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 6 | # Copyright (c) 2021 Pablo Moreno-Munoz 7 | # ----------------------------------------------------------------- 8 | # 9 | # 10 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 11 | # Section for Cognitive Systems 12 | # Technical University of Denmark (DTU) 13 | # October 2021 14 | 15 | import torch 16 | 17 | class GenPoeGP(torch.nn.Module): 18 | """ 19 | -- Generalised Product of GP Experts -- 20 | -- 21 | -- Adaptation to Pytorch + GP framework 22 | -- Y. Cao and D. J. Fleet, "Generalized Product of Experts (...)" 23 | -- Reference: https://arxiv.org/abs/1410.7827 24 | """ 25 | 26 | def __init__(self, models, input_dim=1.0): 27 | super(GenPoeGP, self).__init__() 28 | 29 | self.input_dim = int(input_dim) # dimension of x 30 | 31 | # Adjacent Local GP Models 32 | self.models = models # is a list 33 | 34 | def forward(self): 35 | return 1.0 36 | 37 | def predictive(self, x, y, x_new): 38 | # x is a list of x_k (distributed) 39 | # y is a list of y_k (distributed) 40 | 41 | K = len(self.models) 42 | beta_k = 1/K 43 | 44 | gp_m = torch.zeros(x_new.size()) 45 | gp_v = torch.zeros(x_new.size()) 46 | 47 | for k, model_k in enumerate(self.models): 48 | m_k, v_k = model_k.predictive(x[k], y[k], x_new) 49 | 50 | gp_m += (beta_k*m_k)/v_k 51 | gp_v += beta_k/v_k 52 | 53 | gp_v = 1.0/gp_v 54 | gp_m = gp_v*gp_m 55 | 56 | return gp_m, gp_v 57 | 58 | 59 | def rmse(self, x, y, x_new, f_new): 60 | f_gp,_ = self.predictive(x, y, x_new) 61 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy() 62 | return rmse 63 | 64 | def mae(self, x, y, x_new, f_new): 65 | f_gp,_ = self.predictive(x, y, x_new) 66 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy() 67 | return mae 68 | 69 | def nlpd(self, x, y, x_new, y_new): 70 | f_gp, v_gp = self.predictive(x, y, x_new) 71 | nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy() 72 | return nlpd 73 | 74 | # FOR HIERARCHICAL SETTINGS 75 | 76 | def predictive_layer(self, gps_m, gps_v, x_new): 77 | # gps_m is a list of gp_m (distributed) 78 | # gps_v is a list of gp_v (distributed) 79 | 80 | K = len(gps_m) 81 | beta_k = 1/K 82 | 83 | gp_m = torch.zeros(x_new.size()) 84 | gp_v = torch.zeros(x_new.size()) 85 | 86 | for k, m_k in enumerate(gps_m): 87 | v_k = gps_v[k] 88 | 89 | gp_m += (beta_k*m_k)/v_k 90 | gp_v += beta_k/v_k 91 | 92 | gp_v = 1.0/gp_v 93 | gp_m = gp_v*gp_m 94 | 95 | return gp_m, gp_v 96 | 97 | def rmse_layer(self, gps_m, gps_v, x_new, f_new): 98 | f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new) 99 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy() 100 | return rmse 101 | 102 | def mae_layer(self, gps_m, gps_v, x_new, f_new): 103 | f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new) 104 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy() 105 | return mae 106 | 107 | def nlpd_layer(self, gps_m, gps_v, x_new, y_new): 108 | f_gp, v_gp = self.predictive_layer(gps_m, gps_v, x_new) 109 | nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy() 110 | return nlpd 111 | -------------------------------------------------------------------------------- /baselines/poegp.py: -------------------------------------------------------------------------------- 1 | # Implementation of the "Product of GP Experts" 2 | # by Ng & Deisenroth, 2014 3 | # ----------------------------------------------------------------- 4 | # This script belongs to the ModularGP repo 5 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 6 | # Copyright (c) 2021 Pablo Moreno-Munoz 7 | # ----------------------------------------------------------------- 8 | # 9 | # 10 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 11 | # Section for Cognitive Systems 12 | # Technical University of Denmark (DTU) 13 | # October 2021 14 | 15 | 16 | import torch 17 | 18 | class PoeGP(torch.nn.Module): 19 | """ 20 | -- Product of GP Experts -- 21 | -- 22 | -- Adaptation to Pytorch + GP framework 23 | -- J. W. Ng and M. P. Deisenroth, "Hierarchical Mixtureof-Experts Model for (...) 2014" 24 | -- Reference: http://arxiv.org/abs/1412.3078 25 | """ 26 | def __init__(self, models, input_dim=1.0): 27 | super(PoeGP, self).__init__() 28 | 29 | self.input_dim = int(input_dim) # dimension of x 30 | 31 | # Adjacent Local GP Models 32 | self.models = models # is a list 33 | 34 | def forward(self): 35 | return 1.0 36 | 37 | def predictive(self, x, y, x_new): 38 | # x is a list of x_k (distributed) 39 | # y is a list of y_k (distributed) 40 | 41 | gp_m = torch.zeros(x_new.size()) 42 | gp_v = torch.zeros(x_new.size()) 43 | 44 | for k, model_k in enumerate(self.models): 45 | m_k, v_k = model_k.predictive(x[k], y[k], x_new) 46 | 47 | gp_m += m_k/v_k 48 | gp_v += 1.0/v_k 49 | 50 | gp_v = 1.0/gp_v 51 | gp_m = gp_v*gp_m 52 | 53 | return gp_m, gp_v 54 | 55 | def rmse(self, x, y, x_new, f_new): 56 | f_gp,_ = self.predictive(x, y, x_new) 57 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy() 58 | return rmse 59 | 60 | def mae(self, x, y, x_new, f_new): 61 | f_gp,_ = self.predictive(x, y, x_new) 62 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy() 63 | return mae 64 | 65 | def nlpd(self, x, y, x_new, y_new): 66 | f_gp, v_gp = self.predictive(x, y, x_new) 67 | nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy() 68 | return nlpd 69 | 70 | # FOR HIERARCHICAL SETTINGS 71 | 72 | def predictive_layer(self, gps_m, gps_v, x_new): 73 | # gps_m is a list of gp_m (distributed) 74 | # gps_v is a list of gp_v (distributed) 75 | 76 | gp_m = torch.zeros(x_new.size()) 77 | gp_v = torch.zeros(x_new.size()) 78 | 79 | for k, m_k in enumerate(gps_m): 80 | v_k = gps_v[k] 81 | 82 | gp_m += m_k / v_k 83 | gp_v += 1.0 / v_k 84 | 85 | gp_v = 1.0 / gp_v 86 | gp_m = gp_v * gp_m 87 | 88 | return gp_m, gp_v 89 | 90 | def rmse_layer(self, gps_m, gps_v, x_new, f_new): 91 | f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new) 92 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy() 93 | return rmse 94 | 95 | def mae_layer(self, gps_m, gps_v, x_new, f_new): 96 | f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new) 97 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy() 98 | return mae 99 | 100 | def nlpd_layer(self, gps_m, gps_v, x_new, y_new): 101 | f_gp, v_gp = self.predictive_layer(gps_m, gps_v, x_new) 102 | nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy() 103 | return nlpd -------------------------------------------------------------------------------- /baselines/rbcm.py: -------------------------------------------------------------------------------- 1 | # Implementation of the "Robust Bayesian Committee Machine" 2 | # by Deisenroth & Ng, ICML 2015 3 | # ----------------------------------------------------------------- 4 | # This script belongs to the ModularGP repo 5 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 6 | # Copyright (c) 2021 Pablo Moreno-Munoz 7 | # ----------------------------------------------------------------- 8 | # 9 | # 10 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 11 | # Section for Cognitive Systems 12 | # Technical University of Denmark (DTU) 13 | # October 2021 14 | 15 | import torch 16 | 17 | class RobustBayesianCM(torch.nn.Module): 18 | """ 19 | -- Robust Bayesian Committee Machine -- 20 | -- 21 | -- Adaptation to Pytorch + GP framework 22 | -- M. P. Deisenroth and J. W. Ng, "Distributed Gaussian Processes" 23 | -- Reference: http://proceedings.mlr.press/v37/deisenroth15.pdf 24 | """ 25 | def __init__(self, models, input_dim=1.0): 26 | super(RobustBayesianCM, self).__init__() 27 | 28 | self.input_dim = int(input_dim) # dimension of x 29 | 30 | # Adjacent Local GP Models 31 | self.models = models # is a list 32 | 33 | def forward(self): 34 | return 1.0 35 | 36 | def predictive(self, x, y, x_new): 37 | # x is a list of x_k (distributed) 38 | # y is a list of y_k (distributed) 39 | 40 | K = len(self.models) 41 | prior_v = torch.diagonal(self.models[0].kernel.K(x_new, x_new), 0)[:, None] 42 | log_prior_v = torch.log(prior_v) 43 | 44 | gp_m = torch.zeros(x_new.size()) 45 | gp_v = torch.zeros(x_new.size()) 46 | correction = torch.zeros(x_new.size()) 47 | 48 | for k, model_k in enumerate(self.models): 49 | m_k, v_k = model_k.predictive(x[k], y[k], x_new) 50 | 51 | beta_k = 0.5*(log_prior_v - torch.log(v_k)) 52 | correction += beta_k 53 | 54 | gp_m += m_k/v_k 55 | gp_v += 1.0/v_k 56 | 57 | correction = (1-correction)/prior_v 58 | gp_v = 1.0/(gp_v + correction) 59 | gp_m = gp_v*gp_m 60 | 61 | return gp_m, gp_v 62 | 63 | def rmse(self, x, y, x_new, f_new): 64 | f_gp,_ = self.predictive(x, y, x_new) 65 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy() 66 | return rmse 67 | 68 | def mae(self, x, y, x_new, f_new): 69 | f_gp,_ = self.predictive(x, y, x_new) 70 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy() 71 | return mae 72 | 73 | def nlpd(self, x, y, x_new, y_new): 74 | f_gp, v_gp = self.predictive(x, y, x_new) 75 | nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy() 76 | return nlpd 77 | 78 | # FOR HIERARCHICAL SETTINGS 79 | 80 | def predictive_layer(self, gps_m, gps_v, x_new): 81 | # gps_m is a list of gp_m (distributed) 82 | # gps_v is a list of gp_v (distributed) 83 | 84 | K = len(gps_m) 85 | prior_v = torch.diagonal(self.models[0].kernel.K(x_new, x_new), 0)[:, None] 86 | log_prior_v = torch.log(prior_v) 87 | log_prior_v[torch.isnan(log_prior_v)] = 1e-3 88 | 89 | gp_m = torch.zeros(x_new.size()) 90 | gp_v = torch.zeros(x_new.size()) 91 | correction = torch.zeros(x_new.size()) 92 | 93 | for k, m_k in enumerate(gps_m): 94 | v_k = gps_v[k] 95 | 96 | log_v_k = torch.log(v_k) 97 | log_v_k[torch.isnan(log_v_k)] = 1e-3 98 | 99 | beta_k = 0.5 * (log_prior_v - log_v_k) 100 | correction += beta_k 101 | 102 | gp_m += m_k / v_k 103 | gp_v += 1.0 / v_k 104 | 105 | correction = (1 - correction) / prior_v 106 | gp_v = 1.0 / (gp_v + correction) 107 | gp_m = gp_v * gp_m 108 | 109 | return gp_m, gp_v 110 | 111 | def rmse_layer(self, gps_m, gps_v, x_new, f_new): 112 | f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new) 113 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy() 114 | return rmse 115 | 116 | def mae_layer(self, gps_m, gps_v, x_new, f_new): 117 | f_gp,_ = self.predictive_layer(gps_m, gps_v, x_new) 118 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy() 119 | return mae 120 | 121 | def nlpd_layer(self, gps_m, gps_v, x_new, y_new): 122 | f_gp, v_gp = self.predictive_layer(gps_m, gps_v, x_new) 123 | nlpd = - torch.mean(self.models[0].likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy() 124 | return nlpd -------------------------------------------------------------------------------- /data/banana.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/data/banana.mat -------------------------------------------------------------------------------- /data/nasa.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/data/nasa.mat -------------------------------------------------------------------------------- /experiments/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/experiments/__init__.py -------------------------------------------------------------------------------- /experiments/banana.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | # ----------------------------------------------------------------- 14 | # Experiment -- Banana Classification 15 | # ----------------------------------------------------------------- 16 | 17 | from kernels.rbf import RBF 18 | from likelihoods.gaussian import Gaussian 19 | from likelihoods.bernoulli import Bernoulli 20 | from models.svgp import SVGP 21 | from models.ensemblegp import EnsembleGP 22 | from optimization.algorithms import vem_algorithm, ensemble_vem, ensemble_vem_parallel 23 | from optimization.algorithms import AlgorithmVEM 24 | from sklearn.model_selection import train_test_split 25 | 26 | import torch 27 | import numpy as np 28 | import scipy.io as sio 29 | import matplotlib.pyplot as plt 30 | from matplotlib2tikz import save as tikz_save 31 | 32 | plt.rc('text', usetex=True) 33 | plt.rc('font', family='serif') 34 | 35 | # COOLORS.CO palettes 36 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e'] 37 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031'] 38 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56'] 39 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e'] 40 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559'] 41 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2'] 42 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54'] 43 | 44 | color_palette = color_palette_6 45 | color_0 = color_palette_6[0] 46 | color_1 = color_palette_6[4] 47 | 48 | # Load Data -- 49 | data = sio.loadmat('../data/banana.mat') 50 | y_banana = data['banana_Y'] 51 | x_banana = data['banana_X'] 52 | 53 | trials = 10 54 | nlpd_metrics = np.zeros((1,trials)) 55 | 56 | plot_local = False 57 | plot_ensemble = False 58 | save = False 59 | 60 | for trial in range(trials): 61 | print('TRIAL = ' + str(trial) + '/' + str(trials)) 62 | x, x_test, y, y_test = train_test_split(x_banana, y_banana, test_size=0.33, random_state=42) 63 | 64 | x_test = torch.from_numpy(x_test).float() 65 | y_test = torch.from_numpy(y_test).float() 66 | 67 | # Sorting wrt first input dimension 68 | y = y[x[:,0].argsort()] 69 | x = x[x[:,0].argsort()] 70 | 71 | # plot limits 72 | max_x = x[:,0].max() 73 | max_y = x[:,1].max() 74 | min_x = x[:,0].min() 75 | min_y = x[:,1].min() 76 | 77 | # Division into 4 regions 78 | x_1 = torch.from_numpy(x[(x[:,0]<0.0) & (x[:,1]<0.0),:]).float() 79 | y_1 = torch.from_numpy(y[(x[:,0]<0.0) & (x[:,1]<0.0),:]).float() 80 | 81 | x_2 = torch.from_numpy(x[(x[:,0]>0.0) & (x[:,1]<0.0),:]).float() 82 | y_2 = torch.from_numpy(y[(x[:,0]>0.0) & (x[:,1]<0.0),:]).float() 83 | 84 | x_3 = torch.from_numpy(x[(x[:,0]>0.0) & (x[:,1]>0.0),:]).float() 85 | y_3 = torch.from_numpy(y[(x[:,0]>0.0) & (x[:,1]>0.0),:]).float() 86 | 87 | x_4 = torch.from_numpy(x[(x[:,0]<0.0) & (x[:,1]>0.0),:]).float() 88 | y_4 = torch.from_numpy(y[(x[:,0]<0.0) & (x[:,1]>0.0),:]).float() 89 | 90 | # All tasks 91 | x_tasks = [x_1, x_2, x_3, x_4] 92 | y_tasks = [y_1, y_2, y_3, y_4] 93 | 94 | K = len(x_tasks) 95 | sigmoid = torch.nn.Sigmoid() 96 | 97 | M_k = 3 # inducing points per side 98 | N_test = 80 # test points per side 99 | 100 | ########################### 101 | # # 102 | # DISTRIBUTED TASKS # 103 | # # 104 | ########################### 105 | 106 | models = [] 107 | for k, x_k in enumerate(x_tasks): 108 | 109 | print('- -') 110 | print('----- TASK k=' + str(k + 1) + ' ------') 111 | print('- -') 112 | 113 | y_k = y_tasks[k] 114 | kernel_k = RBF() 115 | likelihood_k = Bernoulli() 116 | model_k = SVGP(kernel_k, likelihood_k, M_k**2, input_dim=2) 117 | 118 | # initial grid of inducing-points 119 | mx = torch.mean(x_k[:, 0]) 120 | my = torch.mean(x_k[:, 1]) 121 | vx = torch.var(x_k[:, 0]) 122 | vy = torch.var(x_k[:, 1]) 123 | 124 | zy = np.linspace(my - 3*vy, my + 3*vy, M_k) 125 | zx = np.linspace(mx - 3*vx, mx + 3*vx, M_k) 126 | ZX, ZY = np.meshgrid(zx, zy) 127 | ZX = ZX.reshape(M_k ** 2, 1) 128 | ZY = ZY.reshape(M_k ** 2, 1) 129 | Z = np.hstack((ZX, ZY)) 130 | z_k = torch.from_numpy(Z).float() 131 | 132 | model_k.z = torch.nn.Parameter(z_k, requires_grad=True) 133 | vem_algorithm = AlgorithmVEM(model_k, x_k, y_k, iters=7) 134 | 135 | vem_algorithm.ve_its = 20 136 | vem_algorithm.vm_its = 10 137 | vem_algorithm.lr_m = 1e-3 138 | vem_algorithm.lr_L = 1e-6 139 | vem_algorithm.lr_hyp = 1e-6 140 | vem_algorithm.lr_z = 1e-4 141 | 142 | vem_algorithm.fit() 143 | models.append(model_k) 144 | 145 | # NLPD -- Metrics 146 | nlpd = model_k.nlpd(x_test, y_test) 147 | 148 | print('Local Model ('+str(k+1)+')- NLPD: ', nlpd) 149 | print(' ') 150 | 151 | if plot_local: 152 | 153 | min_tx = x[:,0].min() - 0.15 154 | min_ty = x[:,1].min() - 0.15 155 | max_tx = x[:,0].max() + 0.15 156 | max_ty = x[:,1].max() + 0.15 157 | 158 | ty = np.linspace(min_ty, max_ty, N_test) 159 | tx = np.linspace(min_tx, max_tx, N_test) 160 | TX_grid, TY_grid = np.meshgrid(tx, ty) 161 | TX = TX_grid.reshape(N_test ** 2, 1) 162 | TY = TY_grid.reshape(N_test ** 2, 1) 163 | X_test = np.hstack((TX, TY)) 164 | x_test = torch.from_numpy(X_test).float() 165 | 166 | gp, gp_upper, gp_lower = model_k.predictive(x_test) 167 | gp = sigmoid(torch.from_numpy(gp)) 168 | 169 | # Plot 170 | plt.figure(figsize=(7, 6)) 171 | ax = plt.axes() 172 | plt.plot(x_k[y_k[:, 0] == 0, 0], x_k[y_k[:, 0] == 0, 1], 'o', color=color_1, alpha=0.5) 173 | plt.plot(x_k[y_k[:, 0] == 1, 0], x_k[y_k[:, 0] == 1, 1], 'o', color=color_0, alpha=0.5) 174 | plt.plot(model_k.z[:,0].detach(), model_k.z[:,1].detach(), 'kx', ms=10.0, mew=2.0) 175 | cs = ax.contour(TX_grid, TY_grid, np.reshape(gp, (N_test, N_test)), linewidths=3, colors='k', 176 | levels=[0.25, 0.5, 0.75], zorder=10) 177 | ax.clabel(cs, inline=1, fontsize=14, fmt='%1.1f') 178 | 179 | plt.title(r'Banana Recyclable GP - '+ str(k + 1) ) 180 | plt.xlabel(r'$x_1$ input') 181 | plt.ylabel(r'$x_2$ input') 182 | plt.xlim(-2.5, 2.5) 183 | plt.ylim(-2.5, 2.5) 184 | 185 | if save: 186 | plt.savefig(fname='./figs/banana/banana_task_' + str(k + 1) + '.pdf', format='pdf') 187 | 188 | #plt.show() 189 | plt.close() 190 | 191 | ########################### 192 | # # 193 | # ENSEMBLE INFERENCE # 194 | # # 195 | ########################### 196 | 197 | print('- -') 198 | print('----- ENSEMBLE ------') 199 | print('- -') 200 | 201 | M_e = 5 202 | kernel = RBF() 203 | likelihood = Bernoulli() 204 | model_e = EnsembleGP(kernel, likelihood, models, M_e**2, input_dim=2) 205 | 206 | # initial grid of inducing-points 207 | mx = np.mean(x[:, 0]) 208 | my = np.mean(x[:, 1]) 209 | vx = np.var(x[:, 0]) 210 | vy = np.var(x[:, 1]) 211 | 212 | zy = np.linspace(my - 1.5*vy, my + 1.5*vy, M_e) 213 | zx = np.linspace(mx - 1.5*vx, mx + 1.5*vx, M_e) 214 | ZX, ZY = np.meshgrid(zx, zy) 215 | ZX = ZX.reshape(M_e ** 2, 1) 216 | ZY = ZY.reshape(M_e ** 2, 1) 217 | Z = np.hstack((ZX, ZY)) 218 | z_e = torch.from_numpy(Z).float() 219 | 220 | model_e.z = torch.nn.Parameter(z_e, requires_grad=True) 221 | vem_algorithm = AlgorithmVEM(model_e, config='ensemble', iters=20) 222 | 223 | vem_algorithm.ve_its = 20 224 | vem_algorithm.vm_its = 10 225 | vem_algorithm.lr_m = 1e-3 226 | vem_algorithm.lr_L = 1e-5 227 | vem_algorithm.lr_hyp = 1e-6 228 | vem_algorithm.lr_z = 1e-5 229 | 230 | vem_algorithm.fit() 231 | 232 | # NLPD -- Metrics 233 | nlpd = model_e.nlpd(x_test, y_test) 234 | 235 | nlpd_metrics[0, trial] = nlpd 236 | 237 | print('Banana Ensemble NLPD: ', nlpd) 238 | print(' ') 239 | 240 | if plot_ensemble: 241 | 242 | min_tx = x[:,0].min() - 0.15 243 | min_ty = x[:,1].min() - 0.15 244 | max_tx = x[:,0].max() + 0.15 245 | max_ty = x[:,1].max() + 0.15 246 | 247 | ty = np.linspace(min_ty, max_ty, N_test) 248 | tx = np.linspace(min_tx, max_tx, N_test) 249 | TX_grid, TY_grid = np.meshgrid(tx, ty) 250 | TX = TX_grid.reshape(N_test ** 2, 1) 251 | TY = TY_grid.reshape(N_test ** 2, 1) 252 | X_test = np.hstack((TX, TY)) 253 | x_test = torch.from_numpy(X_test).float() 254 | 255 | gp, _, _ = model_e.predictive(x_test) 256 | gp = sigmoid(torch.from_numpy(gp)) 257 | 258 | # Plot 259 | plt.figure(figsize=(7, 6)) 260 | ax = plt.axes() 261 | plt.plot(x[y[:, 0] == 0, 0], x[y[:, 0] == 0, 1], 'o', color=color_1, alpha=0.5) 262 | plt.plot(x[y[:, 0] == 1, 0], x[y[:, 0] == 1, 1], 'o', color=color_0, alpha=0.5) 263 | plt.plot(model_e.z[:,0].detach(), model_e.z[:,1].detach(), 'kx', ms=10.0, mew=2.0) 264 | cs = ax.contour(TX_grid, TY_grid, np.reshape(gp, (N_test, N_test)), linewidths=3, colors='k', 265 | levels=[0.25, 0.5, 0.75], zorder=10) 266 | ax.clabel(cs, inline=1, fontsize=14, fmt='%1.1f') 267 | 268 | plt.title(r'Banana GP Ensemble') 269 | plt.xlabel(r'$x_1$ input') 270 | plt.ylabel(r'$x_2$ input') 271 | plt.xlim(-2.5, 2.5) 272 | plt.ylim(-2.5, 2.5) 273 | 274 | if save: 275 | plt.savefig(fname='./figs/banana/banana_task_ensemble.pdf', format='pdf') 276 | 277 | plt.show() 278 | #plt.close() 279 | 280 | -------------------------------------------------------------------------------- /experiments/baseline.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | # 13 | # ----------------------------------------------------------------- 14 | # Experiment -- Baselines 15 | # ----------------------------------------------------------------- 16 | 17 | import torch 18 | import numpy as np 19 | import matplotlib.pyplot as plt 20 | 21 | plt.rc('text', usetex=True) 22 | plt.rc('font', family='serif') 23 | 24 | # COOLORS.CO palettes 25 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e'] 26 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031'] 27 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56'] 28 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e'] 29 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559'] 30 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2'] 31 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54'] 32 | 33 | color_palette = color_palette_2 34 | 35 | from kernels.rbf import RBF 36 | from likelihoods.gaussian import Gaussian 37 | from models.svgp import SVGP 38 | from models.ensemblegp import EnsembleGP 39 | from baselines.distgp import DistGP 40 | from baselines.poegp import PoeGP 41 | from baselines.gpoegp import GenPoeGP 42 | from baselines.bcm import BayesianCM 43 | from baselines.rbcm import RobustBayesianCM 44 | from baselines.dvigp import DVIGP 45 | from optimization.algorithms import AlgorithmVEM 46 | from optimization.algorithms import GPR_Optimizer 47 | from util import smooth_function 48 | 49 | #experiment = '10k' 50 | experiment = '100k' 51 | #experiment = '1m' 52 | 53 | if experiment == '10k': 54 | node_overlapping = 1 55 | N_k = 200 56 | trials = 10 57 | elif experiment == '100k': 58 | node_overlapping = 5 59 | N_k = 400 60 | trials = 10 61 | elif experiment == '1m': 62 | node_overlapping = 100 63 | N_k = 800 64 | trials = 10 65 | else: 66 | raise ValueError('Experiment indicator not valid! Must be {10k, 100k or 1m}') 67 | 68 | M_k = 3 69 | M_e = 35 70 | plot_local = True 71 | plot_ensemble = True 72 | save = True 73 | 74 | recy_metrics = np.zeros((3,trials)) 75 | poe_metrics = np.zeros((3,trials)) 76 | gpoe_metrics = np.zeros((3,trials)) 77 | bcm_metrics = np.zeros((3,trials)) 78 | rbcm_metrics = np.zeros((3,trials)) 79 | 80 | for trial in range(trials): 81 | 82 | tasks = 50 83 | T = 50 84 | 85 | print('TRIAL = '+str(trial)+'/'+str(trials)) 86 | 87 | ########################### 88 | # # 89 | # DISTRIBUTED TASKS # 90 | # # 91 | ########################### 92 | 93 | min_x = 0.0 94 | max_x = T * 0.1 95 | segment_x = (max_x - min_x) / tasks 96 | x_tasks = [] 97 | y_tasks = [] 98 | for n in range(node_overlapping): 99 | for k in range(T): 100 | x_k = ((min_x + (k * segment_x)) - (min_x + ((k + 1) * segment_x))) * torch.rand(N_k, 1) + ( 101 | min_x + ((k + 1) * segment_x)) 102 | x_k, _ = torch.sort(x_k, dim=0) 103 | y_k = smooth_function(x_k) + 2.0 * torch.randn(N_k, 1) 104 | x_tasks.append(x_k) 105 | y_tasks.append(y_k) 106 | 107 | tasks = T * node_overlapping 108 | 109 | print('# of tasks: ', tasks) 110 | 111 | ########################### 112 | # # 113 | # PARALLEL INFERENCE # 114 | # # 115 | ########################### 116 | 117 | N_k_test = 400 118 | x_test = torch.linspace(min_x-0.5, max_x+0.5, N_k_test)[:, None] 119 | models = [] # for recyclable GPs 120 | models_dist = [] # for distributed GPs 121 | x_all = [] # for distributed GPs 122 | y_all = [] # for distributed GPs 123 | for k, x_k in enumerate(x_tasks): 124 | print('- -') 125 | print('----- TASK k='+str(k+1)+' ------') 126 | print('- -') 127 | ###################################################### 128 | # 1. RECYCLABLE GP 129 | ###################################################### 130 | kernel_k = RBF() 131 | likelihood_k = Gaussian(fit_noise=False) 132 | model_k = SVGP(kernel_k, likelihood_k, M_k) 133 | 134 | z_k_min = min_x + ((k%T)*segment_x) 135 | z_k_max = min_x + (((k%T)+1)*segment_x) 136 | model_k.z = torch.nn.Parameter(torch.linspace(z_k_min, z_k_max, M_k)[:, None], requires_grad=True) 137 | vem_algorithm = AlgorithmVEM(model_k, x_k, y_tasks[k], iters=15) 138 | 139 | vem_algorithm.ve_its = 20 140 | vem_algorithm.vm_its = 10 141 | vem_algorithm.lr_m = 1e-6 142 | vem_algorithm.lr_L = 1e-10 143 | vem_algorithm.lr_hyp = 1e-10 144 | vem_algorithm.lr_z = 1e-10 145 | 146 | vem_algorithm.fit() 147 | models.append(model_k) 148 | 149 | ###################################################### 150 | # 2. DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE) 151 | ###################################################### 152 | 153 | kernel_j = RBF() 154 | likelihood_j = Gaussian(fit_noise=True) 155 | model_j = DistGP(kernel_j, likelihood_j) 156 | GPR_Optimizer(model_j, x_k, y_tasks[k]) 157 | 158 | models_dist.append(model_j) 159 | x_all.append(x_k) 160 | y_all.append(y_tasks[k]) 161 | 162 | if plot_local: 163 | gp, gp_upper, gp_lower = model_k.predictive(x_test) 164 | disgp_m, disgp_v = model_j.predictive(x_k, y_tasks[k], x_test) 165 | 166 | disgp = disgp_m.detach().numpy() 167 | disgp_upper = (disgp_m + 2 * torch.sqrt(disgp_v)).detach().numpy() + 2 * model_j.likelihood.sigma.detach().numpy() 168 | disgp_lower = (disgp_m - 2 * torch.sqrt(disgp_v)).detach().numpy() - 2 * model_j.likelihood.sigma.detach().numpy() 169 | 170 | plt.figure(figsize=(12, 4)) 171 | plt.plot(x_k, y_tasks[k], ls='-', color=color_palette[k%len(color_palette)], markersize=2.5, markeredgewidth=0.75) 172 | plt.plot(models[k].z.detach(), -20.0*torch.ones(M_k, 1), color=color_palette[k%len(color_palette)], linestyle='', marker='.',markersize=5) 173 | 174 | plt.plot(x_test, gp, 'k-', linewidth=1.5) 175 | #plt.fill_between(x_test.flatten(), gp_lower.flatten(), gp_upper.flatten(), color='b', alpha=0.2,lw='0.5') 176 | plt.plot(x_test, gp_upper, 'k-', linewidth=2.5) 177 | plt.plot(x_test, gp_lower, 'k-', linewidth=2.5) 178 | 179 | plt.plot(x_test, disgp, 'b-', linewidth=1.5) 180 | #plt.fill_between(x_test.flatten(), gp_lower.flatten(), gp_upper.flatten(), color='b', alpha=0.2,lw='0.5') 181 | plt.plot(x_test, disgp_upper, 'b-', linewidth=2.5) 182 | plt.plot(x_test, disgp_lower, 'b-', linewidth=2.5) 183 | 184 | plt.title(r'Variational Sparse GP -- (task=' + str(k+1) + ')') 185 | plt.xlabel(r'Input, $x$') 186 | plt.ylabel(r'Output, $y$') 187 | plt.xlim(min_x - 0.5, max_x + 0.5) 188 | plt.ylim(-22.0, 22.0) 189 | 190 | if save: 191 | plt.savefig(fname='./figs/baseline/distributed_task_'+str(k+1)+'.pdf',format='pdf') 192 | 193 | plt.close() 194 | #plt.show() 195 | 196 | ########################### 197 | # # 198 | # ENSEMBLE INFERENCE # 199 | # # 200 | ########################### 201 | print('- -') 202 | print('----- ENSEMBLE ------') 203 | print('- -') 204 | 205 | # TEST DATA FOR EVALUATION 206 | N_e_test = 400 207 | x_test_ensemble = torch.linspace(min_x-0.5, max_x+0.5, N_e_test)[:, None] 208 | f_test_ensemble = smooth_function(x_test_ensemble) 209 | y_test_ensemble = f_test_ensemble + 2.0*torch.randn(N_e_test,1) 210 | 211 | ###################################################### 212 | # 1. RECYCLABLE GP 213 | ###################################################### 214 | 215 | kernel = RBF() 216 | likelihood = Gaussian(fit_noise=False) 217 | model_e = EnsembleGP(kernel, likelihood, models, M_e) 218 | model_e.z = torch.nn.Parameter(torch.linspace(min_x, max_x, M_e)[:,None], requires_grad=True) 219 | vem_algorithm = AlgorithmVEM(model_e, config='ensemble', iters=10) 220 | 221 | vem_algorithm.ve_its = 30 222 | vem_algorithm.vm_its = 10 223 | vem_algorithm.lr_m = 1e-3 224 | vem_algorithm.lr_L = 1e-6 225 | vem_algorithm.lr_hyp = 1e-8 226 | vem_algorithm.lr_z = 1e-8 227 | 228 | vem_algorithm.fit() 229 | 230 | nlpd = model_e.nlpd(x_test_ensemble, y_test_ensemble) 231 | rmse = model_e.rmse(x_test_ensemble, f_test_ensemble) 232 | mae = model_e.mae(x_test_ensemble, f_test_ensemble) 233 | 234 | recy_metrics[0, trial] = nlpd 235 | recy_metrics[1, trial] = rmse 236 | recy_metrics[2, trial] = mae 237 | 238 | print('Recyclable - NLPD: ', nlpd) 239 | print('Recyclable - RMSE: ', rmse) 240 | print('Recyclable - MAE: ', mae) 241 | print(' ') 242 | 243 | ###################################################### 244 | # 2. DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE) 245 | ###################################################### 246 | 247 | # A. POE _________// 248 | 249 | poe_model = PoeGP(models_dist) 250 | 251 | nlpd = poe_model.nlpd(x_all, y_all, x_test_ensemble, y_test_ensemble) 252 | rmse = poe_model.rmse(x_all, y_all, x_test_ensemble, f_test_ensemble) 253 | mae = poe_model.mae(x_all, y_all, x_test_ensemble, f_test_ensemble) 254 | 255 | poe_metrics[0, trial] = nlpd 256 | poe_metrics[1, trial] = rmse 257 | poe_metrics[2, trial] = mae 258 | 259 | print('POE-NLPD: ', nlpd) 260 | print('POE-RMSE: ', rmse) 261 | print('POE-MAE: ', mae) 262 | print(' ') 263 | 264 | # B. GPOE _________// 265 | 266 | gpoe_model = GenPoeGP(models_dist) 267 | 268 | nlpd = gpoe_model.nlpd(x_all, y_all, x_test_ensemble, y_test_ensemble) 269 | rmse = gpoe_model.rmse(x_all, y_all, x_test_ensemble, f_test_ensemble) 270 | mae = gpoe_model.mae(x_all, y_all, x_test_ensemble, f_test_ensemble) 271 | 272 | gpoe_metrics[0, trial] = nlpd 273 | gpoe_metrics[1, trial] = rmse 274 | gpoe_metrics[2, trial] = mae 275 | 276 | print('GenPOE-NLPD: ', nlpd) 277 | print('GenPOE-RMSE: ', rmse) 278 | print('GenPOE-MAE: ', mae) 279 | print(' ') 280 | 281 | # C. BCM _________// 282 | 283 | bcm_model = BayesianCM(models_dist) 284 | 285 | nlpd = bcm_model.nlpd(x_all, y_all, x_test_ensemble, y_test_ensemble) 286 | rmse = bcm_model.rmse(x_all, y_all, x_test_ensemble, f_test_ensemble) 287 | mae = bcm_model.mae(x_all, y_all, x_test_ensemble, f_test_ensemble) 288 | 289 | bcm_metrics[0, trial] = nlpd 290 | bcm_metrics[1, trial] = rmse 291 | bcm_metrics[2, trial] = mae 292 | 293 | print('BCM-NLPD: ', nlpd) 294 | print('BCM-RMSE: ', rmse) 295 | print('BCM-MAE: ', mae) 296 | print(' ') 297 | 298 | # D. RBCM _________// 299 | 300 | rbcm_model = RobustBayesianCM(models_dist) 301 | 302 | nlpd = rbcm_model.nlpd(x_all, y_all, x_test_ensemble, y_test_ensemble) 303 | rmse = rbcm_model.rmse(x_all, y_all, x_test_ensemble, f_test_ensemble) 304 | mae = rbcm_model.mae(x_all, y_all, x_test_ensemble, f_test_ensemble) 305 | 306 | rbcm_metrics[0, trial] = nlpd 307 | rbcm_metrics[1, trial] = rmse 308 | rbcm_metrics[2, trial] = mae 309 | 310 | print('RBCM-NLPD: ', nlpd) 311 | print('RBCM-RMSE: ', rmse) 312 | print('RBCM-MAE: ', mae) 313 | print(' ') 314 | 315 | # save to csv file 316 | np.savetxt('./metrics/recy_metrics_'+ experiment +'.csv', recy_metrics, delimiter=',') 317 | np.savetxt('./metrics/poe_metrics_' + experiment + '.csv', poe_metrics, delimiter=',') 318 | np.savetxt('./metrics/gpoe_metrics_' + experiment + '.csv', gpoe_metrics, delimiter=',') 319 | np.savetxt('./metrics/bcm_metrics_' + experiment + '.csv', bcm_metrics, delimiter=',') 320 | np.savetxt('./metrics/rbcm_metrics_' + experiment + '.csv', rbcm_metrics, delimiter=',') 321 | 322 | if plot_ensemble: 323 | gp, gp_upper, gp_lower = model_e.predictive(x_test_ensemble) 324 | 325 | poe_m, poe_v = poe_model.predictive(x_all, y_all, x_test_ensemble) 326 | gpoe_m, gpoe_v = gpoe_model.predictive(x_all, y_all, x_test_ensemble) 327 | bcm_m, bcm_v = gpoe_model.predictive(x_all, y_all, x_test_ensemble) 328 | rbcm_m, rbcm_v = rbcm_model.predictive(x_all, y_all, x_test_ensemble) 329 | 330 | # Plot Ensemble 331 | plt.figure(figsize=(12, 4)) 332 | for k in range(50): 333 | #if k%10==0: 334 | plt.plot(x_tasks[k], y_tasks[k], ls='-', color=color_palette[k%len(color_palette)], markersize=2.5, markeredgewidth=0.75) 335 | plt.plot(models[k].z.detach(), -20.0*torch.ones(M_k,1), color=color_palette[k%len(color_palette)], linestyle='', marker='.', markersize=5) 336 | 337 | plt.plot(model_e.z.detach(), -20.0 * torch.ones(M_e, 1), color='r', linestyle='', marker='x', markersize=5, markeredgewidth=1.0) 338 | plt.plot(x_test_ensemble, gp, 'k-', linewidth=1.5) 339 | plt.plot(x_test_ensemble, gp_upper, 'k-', linewidth=2.5) 340 | plt.plot(x_test_ensemble, gp_lower, 'k-', linewidth=2.5) 341 | 342 | poe = poe_m.detach().numpy() 343 | poe_upper = (poe_m + 2 * torch.sqrt(poe_v)).detach().numpy() # + 2*model_2.likelihood.sigma.detach().numpy() 344 | poe_lower = (poe_m - 2 * torch.sqrt(poe_v)).detach().numpy() # - 2*model_2.likelihood.sigma.detach().numpy() 345 | 346 | plt.plot(x_test, poe, 'g-', linewidth=1.5) 347 | plt.plot(x_test, poe_upper, 'g-', linewidth=2.5) 348 | plt.plot(x_test, poe_lower, 'g-', linewidth=2.5) 349 | 350 | gpoe = gpoe_m.detach().numpy() 351 | gpoe_upper = (gpoe_m + 2 * torch.sqrt(gpoe_v)).detach().numpy() # + 2*model_2.likelihood.sigma.detach().numpy() 352 | gpoe_lower = (gpoe_m - 2 * torch.sqrt(gpoe_v)).detach().numpy() # - 2*model_2.likelihood.sigma.detach().numpy() 353 | 354 | plt.plot(x_test, gpoe, 'm-', linewidth=1.5) 355 | plt.plot(x_test, gpoe_upper, 'm-', linewidth=2.5) 356 | plt.plot(x_test, gpoe_lower, 'm-', linewidth=2.5) 357 | 358 | bcm = bcm_m.detach().numpy() 359 | bcm_upper = (bcm_m + 2 * torch.sqrt(bcm_v)).detach().numpy() # + 2*model_2.likelihood.sigma.detach().numpy() 360 | bcm_lower = (bcm_m - 2 * torch.sqrt(bcm_v)).detach().numpy() # - 2*model_2.likelihood.sigma.detach().numpy() 361 | 362 | plt.plot(x_test, bcm, 'r-', linewidth=1.5) 363 | plt.plot(x_test, bcm_upper, 'r-', linewidth=2.5) 364 | plt.plot(x_test, bcm_lower, 'r-', linewidth=2.5) 365 | 366 | rbcm = rbcm_m.detach().numpy() 367 | rbcm_upper = (rbcm_m + 2 * torch.sqrt(rbcm_v)).detach().numpy() # + 2*model_2.likelihood.sigma.detach().numpy() 368 | rbcm_lower = (rbcm_m - 2 * torch.sqrt(rbcm_v)).detach().numpy() # - 2*model_2.likelihood.sigma.detach().numpy() 369 | 370 | plt.plot(x_test, rbcm, 'b-', linewidth=1.5) 371 | plt.plot(x_test, rbcm_upper, 'b-', linewidth=2.5) 372 | plt.plot(x_test, rbcm_lower, 'b-', linewidth=2.5) 373 | 374 | plt.title(r'Ensemble GP Model -- (tasks='+str(tasks)+')') 375 | plt.xlabel(r'Input, $x$') 376 | plt.ylabel(r'Output, $y$') 377 | plt.xlim(min_x-0.5, max_x+0.5) 378 | plt.ylim(-22.0, 22.0) 379 | 380 | if save: 381 | plt.savefig(fname='./figs/baseline/distributed_ensemble.pdf',format='pdf') 382 | 383 | #plt.show() 384 | plt.close() -------------------------------------------------------------------------------- /experiments/dvigp_nlpd.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | # ----------------------------------------------------------------- 14 | # Experiment -- Baselines / Y. Gal et al. (2014) 15 | # ----------------------------------------------------------------- 16 | 17 | import torch 18 | import numpy as np 19 | import matplotlib.pyplot as plt 20 | 21 | plt.rc('text', usetex=True) 22 | plt.rc('font', family='serif') 23 | 24 | # COOLORS.CO palettes 25 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e'] 26 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031'] 27 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56'] 28 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e'] 29 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559'] 30 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2'] 31 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54'] 32 | 33 | color_palette = color_palette_2 34 | 35 | from kernels.rbf import RBF 36 | from likelihoods.gaussian import Gaussian 37 | from baselines.distgp import DistGP 38 | from baselines.dvigp import DVIGP 39 | from optimization.algorithms import GPR_Optimizer 40 | #from models.svgp import predictive 41 | from optimization.algorithms import vem_algorithm 42 | from util import smooth_function 43 | 44 | experiment = '10k' 45 | #experiment = '100k' 46 | #experiment = '1m' 47 | 48 | if experiment == '10k': 49 | node_overlapping = 1 50 | N_k = 200 51 | trials = 10 52 | N = 10000 53 | elif experiment == '100k': 54 | node_overlapping = 5 55 | N_k = 400 56 | trials = 10 57 | N = 100000 58 | elif experiment == '1m': 59 | node_overlapping = 100 60 | N_k = 800 61 | trials = 10 62 | N = 1000000 63 | else: 64 | raise ValueError('Experiment indicator not valid! Must be {10k, 100k or 1m}') 65 | 66 | M = 35 67 | plot_local = False 68 | plot_ensemble = False 69 | save = False 70 | 71 | dvigp_metrics = np.zeros((3,trials)) 72 | 73 | for trial in range(trials): 74 | 75 | tasks = 50 76 | T = 50 77 | 78 | print('TRIAL = '+str(trial)+'/'+str(trials)) 79 | 80 | ########################### 81 | # # 82 | # DISTRIBUTED TASKS # 83 | # # 84 | ########################### 85 | 86 | min_x = 0.0 87 | max_x = T * 0.1 88 | x = (min_x - max_x)*torch.rand(N, 1) + max_x 89 | x, _ = torch.sort(x, dim=0) 90 | y = smooth_function(x) + 2.0*torch.randn(N, 1) 91 | 92 | tasks = T * node_overlapping 93 | 94 | print('Number # of tasks: ', tasks) 95 | 96 | ###################################################### 97 | # 1. DISTRIBUTED VIGP (Gal 2014) 98 | ###################################################### 99 | 100 | kernel_j = RBF() 101 | likelihood_j = Gaussian(fit_noise=True) 102 | 103 | model = DVIGP(kernel_j, likelihood_j, M, nodes=tasks) 104 | model.z = torch.nn.Parameter(torch.linspace(min_x, max_x, M)[:,None], requires_grad=True) 105 | vem_algorithm(model, x, y, em_iters=20, plot=False) 106 | 107 | # TEST DATA FOR EVALUATION 108 | N_e_test = 400 109 | x_test_ensemble = torch.linspace(min_x-0.5, max_x+0.5, N_e_test)[:, None] 110 | f_test_ensemble = smooth_function(x_test_ensemble) 111 | y_test_ensemble = f_test_ensemble + 2.0*torch.randn(N_e_test,1) 112 | 113 | nlpd = model.nlpd(x_test_ensemble, y_test_ensemble) 114 | rmse = model.rmse(x_test_ensemble, f_test_ensemble) 115 | mae = model.mae(x_test_ensemble, f_test_ensemble) 116 | 117 | dvigp_metrics[0, trial] = nlpd 118 | dvigp_metrics[1, trial] = rmse 119 | dvigp_metrics[2, trial] = mae 120 | 121 | print('Distributed VIGP - NLPD: ', nlpd) 122 | print('Distributed VIGP - RMSE: ', rmse) 123 | print('Distributed VIGP - MAE: ', mae) 124 | print(' ') 125 | 126 | 127 | -------------------------------------------------------------------------------- /experiments/image.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | # ----------------------------------------------------------------- 14 | # Experiment -- MNIST 15 | # ----------------------------------------------------------------- 16 | 17 | 18 | import torch 19 | import torchvision 20 | import numpy as np 21 | import matplotlib.pyplot as plt 22 | 23 | from kernels.rbf import RBF 24 | from likelihoods.gaussian import Gaussian 25 | from likelihoods.bernoulli import Bernoulli 26 | from models.svgp import SVGP 27 | from models.ensemblegp import EnsembleGP 28 | from optimization.algorithms import vem_algorithm, ensemble_vem, ensemble_vem_parallel 29 | from optimization.algorithms import AlgorithmVEM 30 | 31 | plt.rc('text', usetex=True) 32 | plt.rc('font', family='serif') 33 | 34 | # COOLORS.CO palettes 35 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e'] 36 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031'] 37 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56'] 38 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e'] 39 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559'] 40 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2'] 41 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54'] 42 | 43 | # For 0-number 44 | #color_palette = color_palette_5 45 | #color_0 = color_palette[0] 46 | #color_1 = color_palette[4] 47 | 48 | # For 1-number 49 | color_palette = color_palette_3 50 | color_0 = color_palette[1] 51 | color_1 = color_palette[4] 52 | 53 | mnist = torchvision.datasets.MNIST('../data/', train=True, download=False, transform=torchvision.transforms.Compose([ 54 | torchvision.transforms.ToTensor(), torchvision.transforms.Normalize((0.1307,), (0.3081,))])) 55 | 56 | data = enumerate(torch.utils.data.DataLoader(mnist, batch_size=10, shuffle=False)) 57 | batch_id, (image, label) = next(data) 58 | 59 | number = 1 60 | if number > 0: 61 | i = 3 62 | else: 63 | i = 1 64 | 65 | y = image[i][0] 66 | y[y>0.0] = 1.0 67 | y[y<0.0] = 0.0 68 | 69 | pixel = y.size(0) 70 | 71 | y = y.view(1,pixel ** 2).t() 72 | 73 | x1 = np.linspace(-1.0, 1.0, pixel) 74 | x2 = np.linspace(-1.0, 1.0, pixel) 75 | X1, X2 = np.meshgrid(x1, x2) 76 | X1 = X1.reshape(pixel ** 2, 1) 77 | X2 = -X2.reshape(pixel ** 2, 1) 78 | X_np = np.hstack((X1, X2)) 79 | x = torch.from_numpy(X_np).float() 80 | 81 | # plot limits 82 | max_x = x[:,0].max() 83 | max_y = x[:,1].max() 84 | min_x = x[:,0].min() 85 | min_y = x[:,1].min() 86 | 87 | x_tasks = [] 88 | y_tasks = [] 89 | if number == 0: 90 | # Division into 4 regions 91 | x_1 = x[(x[:,0]<0.0) & (x[:,1]<0.0),:] 92 | y_1 = y[(x[:,0]<0.0) & (x[:,1]<0.0),:] 93 | 94 | x_2 = x[(x[:,0]>0.0) & (x[:,1]<0.0),:] 95 | y_2 = y[(x[:,0]>0.0) & (x[:,1]<0.0),:] 96 | 97 | x_3 = x[(x[:,0]>0.0) & (x[:,1]>0.0),:] 98 | y_3 = y[(x[:,0]>0.0) & (x[:,1]>0.0),:] 99 | 100 | x_4 = x[(x[:,0]<0.0) & (x[:,1]>0.0),:] 101 | y_4 = y[(x[:,0]<0.0) & (x[:,1]>0.0),:] 102 | 103 | # All tasks 104 | x_tasks += [x_1, x_2, x_3, x_4] 105 | y_tasks += [y_1, y_2, y_3, y_4] 106 | 107 | elif number == 1: 108 | # Division into 2 regions 109 | x_1 = x[(x[:, 1] < 0.0), :] 110 | y_1 = y[(x[:, 1] < 0.0), :] 111 | 112 | x_2 = x[(x[:, 1] > 0.0), :] 113 | y_2 = y[(x[:, 1] > 0.0), :] 114 | 115 | # All tasks 116 | x_tasks += [x_1, x_2] 117 | y_tasks += [y_1, y_2] 118 | 119 | 120 | K = len(x_tasks) 121 | sigmoid = torch.nn.Sigmoid() 122 | 123 | M_k = 4 # inducing points per side 124 | N_test = 80 # test points per side 125 | 126 | plot_local = True 127 | plot_ensemble = True 128 | save = True 129 | 130 | ########################### 131 | # # 132 | # DISTRIBUTED TASKS # 133 | # # 134 | ########################### 135 | 136 | models = [] 137 | for k, x_k in enumerate(x_tasks): 138 | 139 | print('- -') 140 | print('----- TASK k=' + str(k + 1) + ' ------') 141 | print('- -') 142 | 143 | y_k = y_tasks[k] 144 | kernel_k = RBF() 145 | likelihood_k = Bernoulli() 146 | model_k = SVGP(kernel_k, likelihood_k, M_k**2, input_dim=2) 147 | 148 | # initial grid of inducing-points 149 | mx = torch.mean(x_k[:, 0]) 150 | my = torch.mean(x_k[:, 1]) 151 | vx = torch.var(x_k[:, 0]) 152 | vy = torch.var(x_k[:, 1]) 153 | 154 | zy = np.linspace(my - 3*vy, my + 3*vy, M_k) 155 | zx = np.linspace(mx - 3*vx, mx + 3*vx, M_k) 156 | ZX, ZY = np.meshgrid(zx, zy) 157 | ZX = ZX.reshape(M_k ** 2, 1) 158 | ZY = ZY.reshape(M_k ** 2, 1) 159 | Z = np.hstack((ZX, ZY)) 160 | z_k = torch.from_numpy(Z).float() 161 | 162 | model_k.z = torch.nn.Parameter(z_k, requires_grad=True) 163 | vem_algorithm = AlgorithmVEM(model_k, x_k, y_k, iters=7) 164 | 165 | vem_algorithm.ve_its = 20 166 | vem_algorithm.vm_its = 10 167 | vem_algorithm.lr_m = 1e-3 168 | vem_algorithm.lr_L = 1e-6 169 | vem_algorithm.lr_hyp = 1e-6 170 | vem_algorithm.lr_z = 1e-4 171 | 172 | vem_algorithm.fit() 173 | models.append(model_k) 174 | 175 | if plot_local: 176 | 177 | min_tx = x[:,0].min() - 0.15 178 | min_ty = x[:,1].min() - 0.15 179 | max_tx = x[:,0].max() + 0.15 180 | max_ty = x[:,1].max() + 0.15 181 | 182 | ty = np.linspace(min_ty, max_ty, N_test) 183 | tx = np.linspace(min_tx, max_tx, N_test) 184 | TX_grid, TY_grid = np.meshgrid(tx, ty) 185 | TX = TX_grid.reshape(N_test ** 2, 1) 186 | TY = TY_grid.reshape(N_test ** 2, 1) 187 | X_test = np.hstack((TX, TY)) 188 | x_test = torch.from_numpy(X_test).float() 189 | 190 | gp, gp_upper, gp_lower = model_k.predictive(x_test) 191 | gp = sigmoid(torch.from_numpy(gp)) 192 | 193 | # Plot 194 | plt.figure(figsize=(7, 7)) 195 | ax = plt.axes() 196 | plt.plot(x_k[y_k[:, 0] == 0, 0], x_k[y_k[:, 0] == 0, 1], 'o', color=color_1, alpha=0.5, ms=12.0) 197 | plt.plot(x_k[y_k[:, 0] == 1, 0], x_k[y_k[:, 0] == 1, 1], 'o', color=color_0, alpha=0.5, ms=12.0) 198 | plt.plot(model_k.z[:,0].detach(), model_k.z[:,1].detach(), 'kx', ms=10.0, mew=2.0) 199 | cs = ax.contour(TX_grid, TY_grid, np.reshape(gp, (N_test, N_test)), linewidths=3, colors='k', 200 | levels=[0.25, 0.5, 0.75], zorder=10) 201 | ax.clabel(cs, inline=1, fontsize=14, fmt='%1.1f') 202 | 203 | plt.title(r'MNIST Recyclable GP - '+ str(k + 1) ) 204 | plt.xlabel(r'$x_1$ input') 205 | plt.ylabel(r'$x_2$ input') 206 | plt.xlim(-1.2, 1.2) 207 | plt.ylim(-1.2, 1.2) 208 | 209 | if save: 210 | plt.savefig(fname='./figs/image/0_number_task_' + str(k + 1) + '.pdf', format='pdf') 211 | 212 | plt.show() 213 | #plt.close() 214 | 215 | ########################### 216 | # # 217 | # ENSEMBLE INFERENCE # 218 | # # 219 | ########################### 220 | 221 | print('- -') 222 | print('----- ENSEMBLE ------') 223 | print('- -') 224 | 225 | if number == 0: 226 | M_e = 5 227 | elif number == 1: 228 | M_e = 4 229 | 230 | kernel = RBF() 231 | likelihood = Bernoulli() 232 | model_e = EnsembleGP(kernel, likelihood, models, M_e**2, input_dim=2) 233 | 234 | # initial grid of inducing-points 235 | mx = torch.mean(x[:, 0]) 236 | my = torch.mean(x[:, 1]) 237 | vx = torch.var(x[:, 0]) 238 | vy = torch.var(x[:, 1]) 239 | 240 | zy = np.linspace(my - 1.5*vy, my + 1.5*vy, M_e) 241 | zx = np.linspace(mx - 1.5*vx, mx + 1.5*vx, M_e) 242 | ZX, ZY = np.meshgrid(zx, zy) 243 | ZX = ZX.reshape(M_e ** 2, 1) 244 | ZY = ZY.reshape(M_e ** 2, 1) 245 | Z = np.hstack((ZX, ZY)) 246 | z_e = torch.from_numpy(Z).float() 247 | 248 | model_e.z = torch.nn.Parameter(z_e, requires_grad=True) 249 | vem_algorithm = AlgorithmVEM(model_e, config='ensemble', iters=20) 250 | 251 | vem_algorithm.ve_its = 20 252 | vem_algorithm.vm_its = 10 253 | vem_algorithm.lr_m = 1e-3 254 | vem_algorithm.lr_L = 1e-5 255 | vem_algorithm.lr_hyp = 1e-6 256 | vem_algorithm.lr_z = 1e-5 257 | 258 | vem_algorithm.fit() 259 | 260 | if plot_ensemble: 261 | 262 | min_tx = x[:,0].min() - 0.15 263 | min_ty = x[:,1].min() - 0.15 264 | max_tx = x[:,0].max() + 0.15 265 | max_ty = x[:,1].max() + 0.15 266 | 267 | ty = np.linspace(min_ty, max_ty, N_test) 268 | tx = np.linspace(min_tx, max_tx, N_test) 269 | TX_grid, TY_grid = np.meshgrid(tx, ty) 270 | TX = TX_grid.reshape(N_test ** 2, 1) 271 | TY = TY_grid.reshape(N_test ** 2, 1) 272 | X_test = np.hstack((TX, TY)) 273 | x_test = torch.from_numpy(X_test).float() 274 | 275 | gp, _, _ = model_e.predictive(x_test) 276 | gp = sigmoid(torch.from_numpy(gp)) 277 | 278 | # Plot 279 | plt.figure(figsize=(7, 7)) 280 | ax = plt.axes() 281 | plt.plot(x[y[:, 0] == 0, 0], x[y[:, 0] == 0, 1], 'o', color=color_1, alpha=0.5, ms=12.0) 282 | plt.plot(x[y[:, 0] == 1, 0], x[y[:, 0] == 1, 1], 'o', color=color_0, alpha=0.5, ms=12.0) 283 | plt.plot(model_e.z[:,0].detach(), model_e.z[:,1].detach(), 'kx', ms=10.0, mew=2.0) 284 | cs = ax.contour(TX_grid, TY_grid, np.reshape(gp, (N_test, N_test)), linewidths=3, colors='k', 285 | levels=[0.25, 0.5, 0.75], zorder=10) 286 | ax.clabel(cs, inline=1, fontsize=14, fmt='%1.1f') 287 | 288 | plt.title(r'MNIST GP Ensemble') 289 | plt.xlabel(r'$x_1$ input') 290 | plt.ylabel(r'$x_2$ input') 291 | plt.xlim(-1.2, 1.2) 292 | plt.ylim(-1.2, 1.2) 293 | 294 | if save: 295 | plt.savefig(fname='./figs/image/0_number_ensemble.pdf', format='pdf') 296 | 297 | plt.show() 298 | 299 | # plt.figure(figsize=(6, 6)) 300 | # plt.plot(x[y[:, 0] == 0, 0], x[y[:, 0] == 0, 1], 'o', color=color_1, alpha=0.5, ms=8.0) 301 | # plt.plot(x[y[:, 0] == 1, 0], x[y[:, 0] == 1, 1], 'o', color=color_0, alpha=0.5, ms=8.0) 302 | # plt.xlim(-1.5, 1.5) 303 | # plt.ylim(-1.5, 1.5) 304 | # plt.show() 305 | 306 | # fig = plt.figure() 307 | # for i in range(10): 308 | # plt.subplot(2,5,i+1) 309 | # #plt.tight_layout() 310 | # print(image[i][0]) 311 | # plt.imshow(image[i][0], cmap='gray', interpolation='none') 312 | # plt.title("Ground Truth: {}".format(label[i])) 313 | # 314 | # plt.show() -------------------------------------------------------------------------------- /experiments/million_rbcm.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | # ----------------------------------------------------------------- 14 | # Experiment II -- Baselines (Million) 15 | # ----------------------------------------------------------------- 16 | 17 | 18 | import torch 19 | import numpy as np 20 | import matplotlib.pyplot as plt 21 | 22 | plt.rc('text', usetex=True) 23 | plt.rc('font', family='serif') 24 | 25 | # COOLORS.CO palettes 26 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e'] 27 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031'] 28 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56'] 29 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e'] 30 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559'] 31 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2'] 32 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54'] 33 | 34 | color_palette = color_palette_2 35 | 36 | from kernels.rbf import RBF 37 | from likelihoods.gaussian import Gaussian 38 | from models.svgp import SVGP 39 | from models.ensemblegp import EnsembleGP 40 | from baselines.distgp import DistGP 41 | from baselines.poegp import PoeGP 42 | from baselines.gpoegp import GenPoeGP 43 | from baselines.bcm import BayesianCM 44 | from baselines.rbcm import RobustBayesianCM 45 | from baselines.dvigp import DVIGP 46 | from optimization.algorithms import AlgorithmVEM 47 | from optimization.algorithms import GPR_Optimizer 48 | from util import smooth_function 49 | 50 | experiment = '1m' 51 | 52 | my_path = './../../../../Dropbox/PhD/Works/RecyclableGP/' 53 | 54 | N_k = 400 # 200 55 | M_k = 3 56 | M_e = 35 57 | 58 | T = 50 59 | tasks = 50 60 | layer_1_merge = 10 # 10 61 | layer_2_merge = 5 # 10 62 | trials = 5 63 | node_overlapping = 1 64 | 65 | plot_layer_0 = False 66 | plot_layer_1 = True 67 | plot_layer_2 = True 68 | plot_ensemble = True 69 | save = True 70 | 71 | recy_metrics = np.zeros((3,trials)) 72 | poe_metrics = np.zeros((3,trials)) 73 | gpoe_metrics = np.zeros((3,trials)) 74 | bcm_metrics = np.zeros((3,trials)) 75 | rbcm_metrics = np.zeros((3,trials)) 76 | 77 | N_test = 400 78 | min_x = 0.0 79 | max_x = T * 0.1 80 | segment_x = (max_x - min_x) / tasks 81 | x_test = torch.linspace(min_x - 0.5, max_x + 0.5, N_test)[:, None] 82 | f_test = smooth_function(x_test) 83 | y_test = f_test + 2.0 * torch.randn(N_test, 1) 84 | 85 | for trial in range(trials): 86 | print('TRIAL = '+str(trial+1)+'/'+str(trials)) 87 | layer_2 = [] 88 | layer_2_poe_gpm = [] # POE GPs (predictive) 89 | layer_2_poe_gpv = [] # POE GPs (predictive) 90 | layer_2_gpoe_gpm = [] # GPOE GPs (predictive) 91 | layer_2_gpoe_gpv = [] # GPOE GPs (predictive) 92 | layer_2_bcm_gpm = [] # BCM GPs (predictive) 93 | layer_2_bcm_gpv = [] # BCM GPs (predictive) 94 | layer_2_rbcm_gpm = [] # rBCM GPs (predictive) 95 | layer_2_rbcm_gpv = [] # rBCM GPs (predictive) 96 | 97 | for j in range(layer_2_merge): 98 | print('LAYER-2 = ' + str(j+1) + '/' + str(layer_2_merge)) 99 | layer_1 = [] 100 | layer_1_poe_gpm = [] # POE GPs (predictive) 101 | layer_1_poe_gpv = [] # POE GPs (predictive) 102 | layer_1_gpoe_gpm = [] # GPOE GPs (predictive) 103 | layer_1_gpoe_gpv = [] # GPOE GPs (predictive) 104 | layer_1_bcm_gpm = [] # BCM GPs (predictive) 105 | layer_1_bcm_gpv = [] # BCM GPs (predictive) 106 | layer_1_rbcm_gpm = [] # rBCM GPs (predictive) 107 | layer_1_rbcm_gpv = [] # rBCM GPs (predictive) 108 | 109 | for m in range(layer_1_merge): 110 | print('LAYER-1 = ' + str(m+1) + '/' + str(layer_1_merge)) 111 | 112 | ########################### 113 | # LAYER 0 # 114 | # ___________ # 115 | # DISTRIBUTED # 116 | ########################### 117 | 118 | x_tasks = [] 119 | y_tasks = [] 120 | 121 | # SYNTHETIC DATA 122 | for n in range(node_overlapping): 123 | for k in range(T): 124 | x_k = ((min_x + (k * segment_x)) - (min_x + ((k + 1) * segment_x))) * torch.rand(N_k, 1) + ( 125 | min_x + ((k + 1) * segment_x)) 126 | x_k, _ = torch.sort(x_k, dim=0) 127 | y_k = smooth_function(x_k) + 2.0 * torch.randn(N_k, 1) 128 | x_tasks.append(x_k) 129 | y_tasks.append(y_k) 130 | 131 | tasks = T * node_overlapping 132 | 133 | layer_0 = [] # recyclable GPs 134 | layer_0_dist = [] # distributed GPs (models) 135 | layer_0_dist_gpm = [] # distributed GPs (predictive) 136 | layer_0_dist_gpv = [] # distributed GPs (predictive) 137 | 138 | for k, x_k in enumerate(x_tasks): 139 | print(' ') 140 | print('TRIAL = ' + str(trial + 1) + '/' + str(trials)) 141 | print('LAYER-0 = ' + str(k+1) + '/' + str(T*node_overlapping)) 142 | print('LAYER-1 = ' + str(m+1) + '/' + str(layer_1_merge)) 143 | print('LAYER-2 = ' + str(j+1) + '/' + str(layer_2_merge)) 144 | print('\ -') 145 | print(' ---- TASK k=' + str(k + 1) + ' ------') 146 | print('/ -') 147 | print(' ') 148 | ###################################################### 149 | # 2. DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE) 150 | ###################################################### 151 | 152 | kernel_j = RBF() 153 | likelihood_j = Gaussian(fit_noise=True) 154 | model_j = DistGP(kernel_j, likelihood_j) 155 | GPR_Optimizer(model_j, x_k, y_tasks[k]) 156 | 157 | dis_gp_m, dis_gp_v = model_j.predictive(x_k, y_tasks[k], x_test) 158 | layer_0_dist.append(model_j) 159 | layer_0_dist_gpm.append(dis_gp_m) 160 | layer_0_dist_gpv.append(dis_gp_v) 161 | 162 | ########################### 163 | # LAYER 0 # 164 | # ________ # 165 | # ENSEMBLE # 166 | ########################### 167 | 168 | print(' ') 169 | print('TRIAL = ' + str(trial + 1) + '/' + str(trials)) 170 | print('LAYER-0 = ' + str(k + 1) + '/' + str(T * node_overlapping)) 171 | print('LAYER-1 = ' + str(m + 1) + '/' + str(layer_1_merge)) 172 | print('LAYER-2 = ' + str(j + 1) + '/' + str(layer_2_merge)) 173 | print('\ -') 174 | print(' ------ ENSEMBLE LAYER 0 ------') 175 | print('/ -') 176 | print(' ') 177 | 178 | ######################################################### 179 | # 2. ENSEMBLE DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE) 180 | ######################################################### 181 | # A. POE _________// 182 | # B. GPOE _________// 183 | # C. BCM _________// 184 | # D. RBCM _________// 185 | 186 | poe_model = PoeGP(models=layer_0_dist) 187 | gpoe_model = GenPoeGP(models=layer_0_dist) 188 | bcm_model = BayesianCM(models=layer_0_dist) 189 | rbcm_model = RobustBayesianCM(models=layer_0_dist) 190 | 191 | poe_m, poe_v = poe_model.predictive_layer(layer_0_dist_gpm, layer_0_dist_gpv, x_test) 192 | gpoe_m, gpoe_v = gpoe_model.predictive_layer(layer_0_dist_gpm, layer_0_dist_gpv, x_test) 193 | bcm_m, bcm_v = bcm_model.predictive_layer(layer_0_dist_gpm, layer_0_dist_gpv, x_test) 194 | rbcm_m, rbcm_v = rbcm_model.predictive_layer(layer_0_dist_gpm, layer_0_dist_gpv, x_test) 195 | 196 | layer_1_poe_gpm.append(poe_m) 197 | layer_1_poe_gpv.append(poe_v) 198 | layer_1_gpoe_gpm.append(gpoe_m) 199 | layer_1_gpoe_gpv.append(gpoe_v) 200 | layer_1_bcm_gpm.append(bcm_m) 201 | layer_1_bcm_gpv.append(bcm_v) 202 | layer_1_rbcm_gpm.append(rbcm_m) 203 | layer_1_rbcm_gpv.append(rbcm_v) 204 | 205 | ########################### 206 | # LAYER 1 # 207 | ########################### 208 | 209 | print(' ') 210 | print('TRIAL = ' + str(trial + 1) + '/' + str(trials)) 211 | print('LAYER-0 = ' + str(k + 1) + '/' + str(T * node_overlapping)) 212 | print('LAYER-1 = ' + str(m + 1) + '/' + str(layer_1_merge)) 213 | print('LAYER-2 = ' + str(j + 1) + '/' + str(layer_2_merge)) 214 | print('\ -') 215 | print(' ------ ENSEMBLE LAYER 1 ------') 216 | print('/ -') 217 | print(' ') 218 | 219 | ######################################################### 220 | # 2. ENSEMBLE DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE) 221 | ######################################################### 222 | # A. POE _________// 223 | # B. GPOE _________// 224 | # C. BCM _________// 225 | # D. RBCM _________// 226 | 227 | poe_model = PoeGP(models=layer_0_dist) 228 | gpoe_model = GenPoeGP(models=layer_0_dist) 229 | bcm_model = BayesianCM(models=layer_0_dist) 230 | rbcm_model = RobustBayesianCM(models=layer_0_dist) 231 | 232 | poe_m, poe_v = poe_model.predictive_layer(layer_1_poe_gpm, layer_1_poe_gpv, x_test) 233 | gpoe_m, gpoe_v = gpoe_model.predictive_layer(layer_1_gpoe_gpm, layer_1_gpoe_gpv, x_test) 234 | bcm_m, bcm_v = bcm_model.predictive_layer(layer_1_bcm_gpm, layer_1_bcm_gpv, x_test) 235 | rbcm_m, rbcm_v = rbcm_model.predictive_layer(layer_1_rbcm_gpm, layer_1_rbcm_gpv, x_test) 236 | 237 | layer_2_poe_gpm.append(poe_m) 238 | layer_2_poe_gpv.append(poe_v) 239 | layer_2_gpoe_gpm.append(gpoe_m) 240 | layer_2_gpoe_gpv.append(gpoe_v) 241 | layer_2_bcm_gpm.append(bcm_m) 242 | layer_2_bcm_gpv.append(bcm_v) 243 | layer_2_rbcm_gpm.append(rbcm_m) 244 | layer_2_rbcm_gpv.append(rbcm_v) 245 | 246 | ########################### 247 | # LAYER 2 # 248 | ########################### 249 | 250 | print(' ') 251 | print('TRIAL = ' + str(trial + 1) + '/' + str(trials)) 252 | print('LAYER-0 = ' + str(k + 1) + '/' + str(T * node_overlapping)) 253 | print('LAYER-1 = ' + str(m + 1) + '/' + str(layer_1_merge)) 254 | print('LAYER-2 = ' + str(j + 1) + '/' + str(layer_2_merge)) 255 | print('\ -') 256 | print(' ------ FINAL ENSEMBLE ------') 257 | print('/ -') 258 | print(' ') 259 | 260 | ######################################################### 261 | # 2. ENSEMBLE DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE) 262 | ######################################################### 263 | # A. POE _________// 264 | # B. GPOE _________// 265 | # C. BCM _________// 266 | # D. RBCM _________// 267 | 268 | poe_model = PoeGP(models=layer_0_dist) 269 | gpoe_model = GenPoeGP(models=layer_0_dist) 270 | bcm_model = BayesianCM(models=layer_0_dist) 271 | rbcm_model = RobustBayesianCM(models=layer_0_dist) 272 | 273 | ######################################################### 274 | # -- METRICS -------------------------------------------- 275 | ######################################################### 276 | 277 | # A. POE _________// 278 | 279 | nlpd = poe_model.nlpd_layer(layer_2_poe_gpm, layer_2_poe_gpv, x_test, y_test) 280 | rmse = poe_model.rmse_layer(layer_2_poe_gpm, layer_2_poe_gpv, x_test, f_test) 281 | mae = poe_model.mae_layer(layer_2_poe_gpm, layer_2_poe_gpv, x_test, f_test) 282 | 283 | poe_metrics[0, trial] = nlpd 284 | poe_metrics[1, trial] = rmse 285 | poe_metrics[2, trial] = mae 286 | 287 | print('POE-NLPD: ', nlpd) 288 | print('POE-RMSE: ', rmse) 289 | print('POE-MAE: ', mae) 290 | print(' ') 291 | 292 | # B. GPOE _________// 293 | 294 | nlpd = gpoe_model.nlpd_layer(layer_2_gpoe_gpm, layer_2_gpoe_gpv, x_test, y_test) 295 | rmse = gpoe_model.rmse_layer(layer_2_gpoe_gpm, layer_2_gpoe_gpv, x_test, f_test) 296 | mae = gpoe_model.mae_layer(layer_2_gpoe_gpm, layer_2_gpoe_gpv, x_test, f_test) 297 | 298 | gpoe_metrics[0, trial] = nlpd 299 | gpoe_metrics[1, trial] = rmse 300 | gpoe_metrics[2, trial] = mae 301 | 302 | print('GenPOE-NLPD: ', nlpd) 303 | print('GenPOE-RMSE: ', rmse) 304 | print('GenPOE-MAE: ', mae) 305 | print(' ') 306 | 307 | # C. BCM _________// 308 | 309 | nlpd = bcm_model.nlpd_layer(layer_2_bcm_gpm, layer_2_bcm_gpv, x_test, y_test) 310 | rmse = bcm_model.rmse_layer(layer_2_bcm_gpm, layer_2_bcm_gpv, x_test, f_test) 311 | mae = bcm_model.mae_layer(layer_2_bcm_gpm, layer_2_bcm_gpv, x_test, f_test) 312 | 313 | bcm_metrics[0, trial] = nlpd 314 | bcm_metrics[1, trial] = rmse 315 | bcm_metrics[2, trial] = mae 316 | 317 | print('BCM-NLPD: ', nlpd) 318 | print('BCM-RMSE: ', rmse) 319 | print('BCM-MAE: ', mae) 320 | print(' ') 321 | 322 | # D. RBCM _________// 323 | 324 | nlpd = rbcm_model.nlpd_layer(layer_2_rbcm_gpm, layer_2_rbcm_gpv, x_test, y_test) 325 | rmse = rbcm_model.rmse_layer(layer_2_rbcm_gpm, layer_2_rbcm_gpv, x_test, f_test) 326 | mae = rbcm_model.mae_layer(layer_2_rbcm_gpm, layer_2_rbcm_gpv, x_test, f_test) 327 | 328 | rbcm_metrics[0, trial] = nlpd 329 | rbcm_metrics[1, trial] = rmse 330 | rbcm_metrics[2, trial] = mae 331 | 332 | print('RBCM-NLPD: ', nlpd) 333 | print('RBCM-RMSE: ', rmse) 334 | print('RBCM-MAE: ', mae) 335 | print(' ') 336 | 337 | # save to csv file 338 | #np.savetxt(my_path + 'metrics/recy_metrics_' + experiment + '.csv', recy_metrics, delimiter=',') 339 | np.savetxt(my_path + 'metrics/poe_metrics_' + experiment + '.csv', poe_metrics, delimiter=',') 340 | np.savetxt(my_path + 'metrics/gpoe_metrics_' + experiment + '.csv', gpoe_metrics, delimiter=',') 341 | np.savetxt(my_path + 'metrics/bcm_metrics_' + experiment + '.csv', bcm_metrics, delimiter=',') 342 | np.savetxt(my_path + 'metrics/rbcm_metrics_' + experiment + '.csv', rbcm_metrics, delimiter=',') 343 | -------------------------------------------------------------------------------- /experiments/paralell.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | # ----------------------------------------------------------------- 14 | # Experiment -- Parallel Inference 15 | # ----------------------------------------------------------------- 16 | 17 | import torch 18 | import numpy as np 19 | import matplotlib.pyplot as plt 20 | from tikzplotlib import save as tikz_save 21 | 22 | plt.rc('text', usetex=True) 23 | plt.rc('font', family='serif') 24 | 25 | # COOLORS.CO palettes 26 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e'] 27 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031'] 28 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56'] 29 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e'] 30 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559'] 31 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2'] 32 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54'] 33 | 34 | color_palette = color_palette_2 35 | 36 | from kernels.rbf import RBF 37 | from likelihoods.gaussian import Gaussian 38 | from models.svgp import SVGP 39 | from models.ensemblegp import EnsembleGP 40 | from optimization.algorithms import vem_algorithm, ensemble_vem, ensemble_vem_parallel 41 | from optimization.algorithms import AlgorithmVEM 42 | from util import smooth_function, smooth_function_bias 43 | 44 | tasks = 5 45 | N_k = 500 46 | M_k = 15 47 | M_e = 35 48 | plot_local = True 49 | plot_ensemble = True 50 | save = True 51 | 52 | ########################### 53 | # # 54 | # DISTRIBUTED TASKS # 55 | # # 56 | ########################### 57 | 58 | min_x = 0.0 59 | max_x = 5.5 60 | segment_x = (max_x - min_x)/tasks 61 | x_tasks = [] 62 | y_tasks = [] 63 | for k in range(tasks): 64 | x_k = ((min_x+(k*segment_x))-(min_x+((k+1)*segment_x)))*torch.rand(N_k,1) + (min_x+((k+1)*segment_x)) 65 | x_k, _ = torch.sort(x_k, dim=0) 66 | y_k = smooth_function_bias(x_k) + 2.0*torch.randn(N_k,1) 67 | x_tasks.append(x_k) 68 | y_tasks.append(y_k) 69 | 70 | ########################### 71 | # # 72 | # PARALLEL INFERENCE # 73 | # # 74 | ########################### 75 | 76 | N_k_test = 400 77 | x_test = torch.linspace(min_x-0.5, max_x+0.5, N_k_test)[:, None] 78 | models = [] 79 | for k, x_k in enumerate(x_tasks): 80 | print('- -') 81 | print('----- TASK k='+str(k+1)+' ------') 82 | print('- -') 83 | kernel_k = RBF() 84 | likelihood_k = Gaussian(fit_noise=False) 85 | model_k = SVGP(kernel_k, likelihood_k, M_k) 86 | 87 | z_k_min = min_x+(k*segment_x) 88 | z_k_max = min_x+((k+1)*segment_x) 89 | #model_k.z = torch.nn.Parameter((z_k_max - z_k_min)*torch.rand(M_k, 1) + z_k_min, requires_grad=True) 90 | model_k.z = torch.nn.Parameter(torch.linspace(z_k_min, z_k_max, M_k)[:, None], requires_grad=True) 91 | vem_algorithm = AlgorithmVEM(model_k, x_k, y_tasks[k], iters=15) 92 | 93 | vem_algorithm.ve_its = 20 94 | vem_algorithm.vm_its = 10 95 | vem_algorithm.lr_m = 1e-6 96 | vem_algorithm.lr_L = 1e-10 97 | vem_algorithm.lr_hyp = 1e-10 98 | vem_algorithm.lr_z = 1e-10 99 | 100 | vem_algorithm.fit() 101 | 102 | models.append(model_k) 103 | 104 | if plot_local: 105 | gp, gp_upper, gp_lower = model_k.predictive(x_test) 106 | 107 | plt.figure(figsize=(12, 4)) 108 | plt.plot(x_k, y_tasks[k], ls='-', color=color_palette[k], lw=1.5) 109 | plt.plot(models[k].z.detach(), -20.0*torch.ones(M_k, 1), color=color_palette[k], linestyle='', marker='.',markersize=5) 110 | 111 | plt.plot(x_test, gp, 'k-', linewidth=1.5) 112 | #plt.fill_between(x_test.flatten(), gp_lower.flatten(), gp_upper.flatten(), color='b', alpha=0.2,lw='0.5') 113 | plt.plot(x_test, gp_upper, 'k-', linewidth=3.0) 114 | plt.plot(x_test, gp_lower, 'k-', linewidth=3.0) 115 | 116 | plt.title(r'Variational Sparse GP -- (task=' + str(k+1) + ')') 117 | plt.xlabel(r'Input, $x$') 118 | plt.ylabel(r'Output, $y$') 119 | plt.xlim(min_x - 0.5, max_x + 0.5) 120 | plt.ylim(-22.0, 22.0) 121 | 122 | if save: 123 | plt.savefig(fname='./figs/ parallel_task_'+str(k+1)+'.pdf',format='pdf') 124 | 125 | plt.show() 126 | 127 | ########################### 128 | # # 129 | # ENSEMBLE INFERENCE # 130 | # # 131 | ########################### 132 | print('- -') 133 | print('----- ENSEMBLE ------') 134 | print('- -') 135 | 136 | kernel = RBF() 137 | likelihood = Gaussian(fit_noise=False) 138 | model_e = EnsembleGP(kernel, likelihood, models, M_e) 139 | model_e.z = torch.nn.Parameter(torch.linspace(min_x, max_x, M_e)[:,None], requires_grad=True) 140 | vem_algorithm = AlgorithmVEM(model_e, config='ensemble', iters=30) 141 | 142 | vem_algorithm.ve_its = 30 143 | vem_algorithm.vm_its = 10 144 | vem_algorithm.lr_m = 1e-3 145 | vem_algorithm.lr_L = 1e-6 146 | vem_algorithm.lr_hyp = 1e-8 147 | vem_algorithm.lr_z = 1e-8 148 | 149 | vem_algorithm.fit() 150 | 151 | N_e_test = 400 152 | x_test_ensemble = torch.linspace(min_x-0.5, max_x+0.5, N_e_test)[:, None] 153 | 154 | if plot_ensemble: 155 | gp, gp_upper, gp_lower = model_e.predictive(x_test_ensemble) 156 | 157 | # Plot Ensemble 158 | plt.figure(figsize=(12, 4)) 159 | for k, x_k in enumerate(x_tasks): 160 | #if k%10==0: 161 | plt.plot(x_k, y_tasks[k], ls='-', color=color_palette[k], lw=1.5) 162 | plt.plot(models[k].z.detach(), -20.0*torch.ones(M_k,1), color=color_palette[k], linestyle='', marker='.', markersize=5) 163 | 164 | plt.plot(model_e.z.detach(), -20.0 * torch.ones(M_e, 1), color='k', linestyle='', marker='x', markersize=7, markeredgewidth=1.1) 165 | plt.plot(x_test_ensemble, gp, 'k-', linewidth=1.5) 166 | #plt.fill_between(x_test_ensemble.flatten(), gp_lower.flatten(), gp_upper.flatten(), color='b', alpha=0.2, lw='0.5') 167 | plt.plot(x_test_ensemble, gp_upper, 'k-', linewidth=3.0) 168 | plt.plot(x_test_ensemble, gp_lower, 'k-', linewidth=3.0) 169 | 170 | plt.title(r'Ensemble GP Model -- (tasks='+str(tasks)+')') 171 | plt.xlabel(r'Input, $x$') 172 | plt.ylabel(r'Output, $y$') 173 | plt.xlim(min_x-0.5, max_x+0.5) 174 | plt.ylim(-22.0, 22.0) 175 | 176 | if save: 177 | plt.savefig(fname='./figs/parallel_ensemble.pdf',format='pdf') 178 | 179 | plt.show() 180 | 181 | N_e_test = 400 182 | x_test_ensemble = torch.linspace(min_x-0.5, max_x+0.5, N_e_test)[:, None] 183 | f_test_ensemble = smooth_function(x_test_ensemble) 184 | y_test_ensemble = f_test_ensemble + 2.0*torch.randn(N_e_test,1) 185 | 186 | nlpd = model_e.nlpd(x_test_ensemble, y_test_ensemble) 187 | rmse = model_e.rmse(x_test_ensemble, f_test_ensemble) 188 | mae = model_e.mae(x_test_ensemble, f_test_ensemble) 189 | 190 | print("NLPD: ", nlpd) 191 | print("RMSE: ", rmse) 192 | print("MAE: ", mae) -------------------------------------------------------------------------------- /experiments/solar.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | # ----------------------------------------------------------------- 14 | # Experiment -- Solar Dataset 15 | # ----------------------------------------------------------------- 16 | 17 | from kernels.rbf import RBF 18 | from likelihoods.gaussian import Gaussian 19 | from models.svgp import SVGP 20 | from models.ensemblegp import EnsembleGP 21 | from baselines.distgp import DistGP 22 | from baselines.poegp import PoeGP 23 | from baselines.gpoegp import GenPoeGP 24 | from baselines.bcm import BayesianCM 25 | from baselines.rbcm import RobustBayesianCM 26 | from baselines.dvigp import DVIGP 27 | from optimization.algorithms import AlgorithmVEM 28 | from optimization.algorithms import GPR_Optimizer 29 | from optimization.algorithms import AlgorithmVEM 30 | from sklearn.model_selection import train_test_split 31 | 32 | import torch 33 | import numpy as np 34 | import scipy.io as sio 35 | import matplotlib.pyplot as plt 36 | 37 | plt.rc('text', usetex=True) 38 | plt.rc('font', family='serif') 39 | 40 | # COOLORS.CO palettes 41 | color_palette_1 = ['#335c67','#fff3b0','#e09f3e','#9e2a2b','#540b0e'] 42 | color_palette_2 = ['#177e89','#084c61','#db3a34','#ef8354','#323031'] 43 | color_palette_3 = ['#bce784','#5dd39e','#348aa7','#525274','#513b56'] 44 | color_palette_4 = ['#002642','#840032','#e59500','#e5dada','#02040e'] 45 | color_palette_5 = ['#202c39','#283845','#b8b08d','#f2d449','#f29559'] 46 | color_palette_6 = ['#21295c','#1b3b6f','#065a82','#1c7293','#9eb3c2'] 47 | color_palette_7 = ['#f7b267','#f79d65','#f4845f','#f27059','#f25c54'] 48 | 49 | palette = color_palette_4 50 | 51 | trials = 10 52 | experiment = 'solar' 53 | 54 | recy_metrics = np.zeros((3,trials)) 55 | poe_metrics = np.zeros((3,trials)) 56 | gpoe_metrics = np.zeros((3,trials)) 57 | bcm_metrics = np.zeros((3,trials)) 58 | rbcm_metrics = np.zeros((3,trials)) 59 | 60 | # Load Solar Data -- 61 | data = sio.loadmat('../data/nasa.mat') 62 | y = data['nasa'][:,2] 63 | y = np.log(y + 1) 64 | y = y[:,np.newaxis] 65 | y = (y - np.mean(y)) # mean normalization 66 | x = np.linspace(0,100, y.shape[0])[:,np.newaxis] 67 | 68 | print(y.shape) 69 | 70 | 71 | for trial in range(trials): 72 | 73 | print('TRIAL = ' + str(trial) + '/' + str(trials)) 74 | 75 | ########################### 76 | # # 77 | # DISTRIBUTED TASKS # 78 | # # 79 | ########################### 80 | 81 | tasks = 50 82 | min_x = 0.0 83 | max_x = 100.0 84 | segment_x = (max_x - min_x)/tasks 85 | x_tasks = [] # training x -- inputs 86 | y_tasks = [] # training y -- outputs 87 | 88 | x_test = torch.zeros(1,1) # test x -- inputs 89 | y_test = torch.zeros(1,1) # test y -- outputs 90 | 91 | n_training = 0 92 | n_test = 0 93 | for k in range(tasks): 94 | min_x_k = min_x + (k*segment_x) 95 | max_x_k = min_x + ((k+1)*segment_x) 96 | y_k = y[(x[:, 0] > min_x_k) & (x[:, 0] < max_x_k), :] 97 | x_k = x[(x[:, 0] > min_x_k) & (x[:, 0] < max_x_k), :] 98 | 99 | x_k_train, x_k_test, y_k_train, y_k_test = train_test_split(x_k, y_k, test_size = 0.2, random_state = 42) 100 | 101 | x_tasks.append(torch.from_numpy(x_k_train).float()) 102 | y_tasks.append(torch.from_numpy(y_k_train).float()) 103 | 104 | x_test = torch.cat((x_test, torch.from_numpy(x_k_test).float()), 0) 105 | y_test = torch.cat((y_test, torch.from_numpy(y_k_test).float()), 0) 106 | 107 | #x_k_test = x_k[::5, :] 108 | #y_k_test = y_k[::5, :] 109 | 110 | #x_tasks.append(torch.from_numpy(np.delete(x_k,np.s_[::5])[:,None]).float()) 111 | #y_tasks.append(torch.from_numpy(np.delete(y_k,np.s_[::5])[:,None]).float()) 112 | 113 | #x_test = torch.cat((x_test, torch.from_numpy(x_k_test).float()), 0) 114 | #y_test = torch.cat((y_test, torch.from_numpy(y_k_test).float()), 0) 115 | 116 | n_training += y_k_train.shape[0] 117 | n_test += y_k_test.shape[0] 118 | 119 | 120 | print('Total # of tasks: ', len(x_tasks)) 121 | print('Number # of training samples: ', n_training) 122 | print('Number # of test samples: ', n_test) 123 | 124 | ########################### 125 | # # 126 | # PARALLEL INFERENCE # 127 | # # 128 | ########################### 129 | 130 | M_k = 6 131 | models = [] # for recyclable GPs 132 | models_dist = [] # for distributed GPs 133 | x_all = [] # for distributed GPs 134 | y_all = [] # for distributed GPs 135 | for k, x_k in enumerate(x_tasks): 136 | print('- -') 137 | print('----- TASK k=' + str(k + 1) + ' ------') 138 | print('- -') 139 | ###################################################### 140 | # 1. RECYCLABLE GP 141 | ###################################################### 142 | kernel_k = RBF(length_scale=0.2, variance=1.0) 143 | likelihood_k = Gaussian(sigma=0.1, fit_noise=True) 144 | model_k = SVGP(kernel_k, likelihood_k, M_k) 145 | 146 | z_k_min = min_x + (k*segment_x) 147 | z_k_max = min_x + ((k+1)*segment_x) 148 | model_k.z = torch.nn.Parameter(torch.linspace(z_k_min, z_k_max, M_k)[:, None], requires_grad=True) 149 | 150 | vem_algorithm = AlgorithmVEM(model_k, x_k, y_tasks[k], iters=20) 151 | 152 | vem_algorithm.ve_its = 20 153 | vem_algorithm.vm_its = 20 154 | vem_algorithm.lr_m = 1e-5 155 | vem_algorithm.lr_L = 1e-8 156 | vem_algorithm.lr_hyp = 1e-10 157 | vem_algorithm.lr_z = 1e-10 158 | 159 | vem_algorithm.fit() 160 | 161 | ###################################################### 162 | # 2. DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE) 163 | ###################################################### 164 | 165 | kernel_j = RBF() 166 | likelihood_j = Gaussian(fit_noise=False) 167 | model_j = DistGP(kernel_j, likelihood_j) 168 | GPR_Optimizer(model_j, x_k, y_tasks[k]) 169 | 170 | models_dist.append(model_j) 171 | x_all.append(x_k) 172 | y_all.append(y_tasks[k]) 173 | 174 | ########################### 175 | # # 176 | # ENSEMBLE INFERENCE # 177 | # # 178 | ########################### 179 | print('- -') 180 | print('----- ENSEMBLE ------') 181 | print('- -') 182 | 183 | ###################################################### 184 | # 1. RECYCLABLE GP 185 | ###################################################### 186 | 187 | M_e = 90 188 | kernel = RBF() 189 | likelihood = Gaussian(fit_noise=False) 190 | model_e = EnsembleGP(kernel, likelihood, models, M_e) 191 | model_e.z = torch.nn.Parameter(torch.linspace(min_x, max_x, M_e)[:, None], requires_grad=True) 192 | vem_algorithm = AlgorithmVEM(model_e, config='ensemble', iters=10) 193 | 194 | vem_algorithm.ve_its = 30 195 | vem_algorithm.vm_its = 10 196 | vem_algorithm.lr_m = 1e-3 197 | vem_algorithm.lr_L = 1e-6 198 | vem_algorithm.lr_hyp = 1e-8 199 | vem_algorithm.lr_z = 1e-8 200 | 201 | vem_algorithm.fit() 202 | 203 | nlpd = model_e.nlpd(x_test, y_test) 204 | rmse = model_e.rmse(x_test, y_test) 205 | mae = model_e.mae(x_test, y_test) 206 | 207 | recy_metrics[0, trial] = nlpd 208 | recy_metrics[1, trial] = rmse 209 | recy_metrics[2, trial] = mae 210 | 211 | print('Recyclable - NLPD: ', nlpd) 212 | print('Recyclable - RMSE: ', rmse) 213 | print('Recyclable - MAE: ', mae) 214 | print(' ') 215 | 216 | ###################################################### 217 | # 2. DISTRIBUTED GP (FOR BCM, RBCM, POE & GPOE) 218 | ###################################################### 219 | 220 | # A. POE _________// 221 | 222 | poe_model = PoeGP(models_dist) 223 | 224 | nlpd = poe_model.nlpd(x_all, y_all, x_test, y_test) 225 | rmse = poe_model.rmse(x_all, y_all, x_test, y_test) 226 | mae = poe_model.mae(x_all, y_all, x_test, y_test) 227 | 228 | poe_metrics[0, trial] = nlpd 229 | poe_metrics[1, trial] = rmse 230 | poe_metrics[2, trial] = mae 231 | 232 | print('POE-NLPD: ', nlpd) 233 | print('POE-RMSE: ', rmse) 234 | print('POE-MAE: ', mae) 235 | print(' ') 236 | 237 | # B. GPOE _________// 238 | 239 | gpoe_model = GenPoeGP(models_dist) 240 | 241 | nlpd = gpoe_model.nlpd(x_all, y_all, x_test, y_test) 242 | rmse = gpoe_model.rmse(x_all, y_all, x_test, y_test) 243 | mae = gpoe_model.mae(x_all, y_all, x_test, y_test) 244 | 245 | gpoe_metrics[0, trial] = nlpd 246 | gpoe_metrics[1, trial] = rmse 247 | gpoe_metrics[2, trial] = mae 248 | 249 | print('GenPOE-NLPD: ', nlpd) 250 | print('GenPOE-RMSE: ', rmse) 251 | print('GenPOE-MAE: ', mae) 252 | print(' ') 253 | 254 | # C. BCM _________// 255 | 256 | bcm_model = BayesianCM(models_dist) 257 | 258 | nlpd = bcm_model.nlpd(x_all, y_all, x_test, y_test) 259 | rmse = bcm_model.rmse(x_all, y_all, x_test, y_test) 260 | mae = bcm_model.mae(x_all, y_all, x_test, y_test) 261 | 262 | bcm_metrics[0, trial] = nlpd 263 | bcm_metrics[1, trial] = rmse 264 | bcm_metrics[2, trial] = mae 265 | 266 | print('BCM-NLPD: ', nlpd) 267 | print('BCM-RMSE: ', rmse) 268 | print('BCM-MAE: ', mae) 269 | print(' ') 270 | 271 | # D. RBCM _________// 272 | 273 | rbcm_model = RobustBayesianCM(models_dist) 274 | 275 | nlpd = rbcm_model.nlpd(x_all, y_all, x_test, y_test) 276 | rmse = rbcm_model.rmse(x_all, y_all, x_test, y_test) 277 | mae = rbcm_model.mae(x_all, y_all, x_test, y_test) 278 | 279 | rbcm_metrics[0, trial] = nlpd 280 | rbcm_metrics[1, trial] = rmse 281 | rbcm_metrics[2, trial] = mae 282 | 283 | print('RBCM-NLPD: ', nlpd) 284 | print('RBCM-RMSE: ', rmse) 285 | print('RBCM-MAE: ', mae) 286 | print(' ') 287 | 288 | # save to csv file 289 | np.savetxt('./metrics/recy_metrics_' + experiment + '.csv', recy_metrics, delimiter=',') 290 | np.savetxt('./metrics/poe_metrics_' + experiment + '.csv', poe_metrics, delimiter=',') 291 | np.savetxt('./metrics/gpoe_metrics_' + experiment + '.csv', gpoe_metrics, delimiter=',') 292 | np.savetxt('./metrics/bcm_metrics_' + experiment + '.csv', bcm_metrics, delimiter=',') 293 | np.savetxt('./metrics/rbcm_metrics_' + experiment + '.csv', rbcm_metrics, delimiter=',') -------------------------------------------------------------------------------- /extra/modular_gp_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/extra/modular_gp_logo.png -------------------------------------------------------------------------------- /kernels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/kernels/__init__.py -------------------------------------------------------------------------------- /kernels/coregionalization.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | import torch 14 | from util import squared_distance 15 | from kernels.kernel import Kernel 16 | from kernels.rbf import RBF 17 | 18 | class LMC(Kernel): 19 | """ 20 | Class for Linear Model of Coregionalization / Kernel 21 | """ 22 | 23 | def __init__(self, kernels, output_dim, rank=1, W=None, kappa=None, variance=None, length_scale=None, input_dim=None): 24 | super().__init__(input_dim) 25 | 26 | # Dimensionality of coregionalization kernel 27 | self.Q = len(kernels) 28 | self.output_dim = output_dim 29 | self.rank = rank 30 | if self.rank > output_dim: 31 | print("Warning: Unusual choice of rank, rank should be less than output dim.") 32 | 33 | # Coregionalization kernel / mixing hyper-parameters 34 | if W is None: 35 | self.W = torch.nn.Parameter(torch.randn(self.output_dim, self.Q), requires_grad=True) 36 | else: 37 | assert W.shape == (self.output_dim, self.Q, self.rank) 38 | 39 | # Registration of coregionalization parameters 40 | self.register_parameter('coregionalization_W', self.W) 41 | 42 | # Independent kernels 43 | self.kernels = kernels 44 | 45 | def B_coefficients(self): 46 | B_coeff = [] 47 | for q in range(self.Q): 48 | B_q = torch.mm(self.W[:,q:q+1], self.W[:,q:q+1].t()) 49 | B_coeff.append(B_q) 50 | return B_coeff 51 | 52 | def Kff(self, X, k): 53 | """ 54 | Builds the cross-covariance matrix Kfdfd = cov[f_d(x),f_d(x)] of a Multi-output GP 55 | :param X: Input data 56 | :param k: Output function 57 | """ 58 | N,_ = X.shape 59 | Kff = torch.zeros(N,N) 60 | B = self.B_coefficients() 61 | for q, B_q in enumerate(B): 62 | Kff += B_q[k,k] * self.kernels[q].K(X, X) 63 | 64 | return Kff 65 | 66 | def Kfu(self, X, Z, k): 67 | """ 68 | Builds the cross-covariance cov[f_d(x),u(z)] of a Multi-output GP 69 | :param X: Input data 70 | :param Z: Inducing points (M, D, Q) 71 | :param k: Output function 72 | """ 73 | N, _ = X.shape 74 | M, Xdim, _ = Z.shape 75 | 76 | B = self.B_coefficients() 77 | Kfu = torch.empty(N, M, self.Q) 78 | for q, B_q in enumerate(B): 79 | Kfu[:,:,q] = self.W[k,q] * self.kernels[q].K(X, Z[:,:,q]) 80 | 81 | return Kfu 82 | 83 | -------------------------------------------------------------------------------- /kernels/kernel.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | 14 | 15 | import torch 16 | import numpy as np 17 | from util import squared_distance 18 | 19 | class Kernel(torch.nn.Module): 20 | """ 21 | Base class for kernels 22 | """ 23 | def __init__(self, input_dim=None): 24 | super(Kernel, self).__init__() 25 | 26 | # Input dimension -- x 27 | if input_dim is None: 28 | input_dim = 1 29 | else: 30 | input_dim = int(input_dim) 31 | 32 | self.input_dim = input_dim -------------------------------------------------------------------------------- /kernels/rbf.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | 14 | import torch 15 | import numpy as np 16 | from kernels.stationary import Stationary 17 | 18 | class RBF(Stationary): 19 | """ 20 | The Radial Basis Function (RBF) or Squared Exponential / Gaussian Kernel 21 | """ 22 | 23 | def K(self, X, X2=None): 24 | variance = self.variance.abs().clamp(min=0.0, max=5.0) 25 | r2 = torch.clamp(self.squared_dist(X, X2),min=0.0, max=np.inf) 26 | K = variance*torch.exp(-r2 / 2.0) 27 | 28 | # Assure that is PSD 29 | if X2 is None: 30 | try: 31 | _ = torch.cholesky(K) 32 | except RuntimeError: 33 | print('Jitter added') 34 | jitter = 1e-5 35 | idx = torch.arange(K.shape[-1]) 36 | Kprime = K.clone() 37 | Kprime[idx, idx] += jitter 38 | K = Kprime 39 | 40 | return K -------------------------------------------------------------------------------- /kernels/stationary.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | 14 | import torch 15 | from util import squared_distance 16 | from kernels.kernel import Kernel 17 | 18 | class Stationary(Kernel): 19 | """ 20 | Class for Stationary Kernel 21 | """ 22 | 23 | def __init__(self, variance=None, length_scale=None, input_dim=None, ARD=False): 24 | super().__init__(input_dim) 25 | 26 | if input_dim is None: 27 | self.input_dim = 1 28 | else: 29 | self.input_dim = input_dim 30 | 31 | self.ARD = ARD # Automatic relevance determination 32 | # Length-scale/smoothness of the kernel -- l 33 | if self.ARD: 34 | if length_scale is None: 35 | length_scale = 0.1 * torch.ones(self.input_dim) 36 | else: 37 | if length_scale is None: 38 | length_scale = 0.1 39 | 40 | # Variance/amplitude of the kernel - /sigma 41 | if variance is None: 42 | variance = 2.0 43 | 44 | self.length_scale = torch.nn.Parameter(length_scale*torch.ones(1), requires_grad=True) 45 | self.variance = torch.nn.Parameter(variance*torch.ones(1), requires_grad=True) 46 | self.register_parameter('length_scale', self.length_scale) 47 | self.register_parameter('variance', self.variance) 48 | 49 | def squared_dist(self, X, X2): 50 | """ 51 | Returns the SCALED squared distance between X and X2. 52 | """ 53 | length_scale = self.length_scale.abs().clamp(min=0.0, max=10.0) 54 | 55 | if not self.ARD: 56 | if X2 is None: 57 | dist = squared_distance(X/length_scale) 58 | else: 59 | dist = squared_distance(X/length_scale, X2/length_scale) 60 | else: 61 | if X2 is None: 62 | dist = squared_distance(X / length_scale) 63 | else: 64 | dist = squared_distance(X / length_scale, X2 / length_scale) 65 | 66 | return dist 67 | 68 | def Kdiag(self, X): 69 | variance = torch.abs(self.variance) 70 | return variance.expand(X.size(0)) -------------------------------------------------------------------------------- /likelihoods/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/likelihoods/__init__.py -------------------------------------------------------------------------------- /likelihoods/bernoulli.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | import torch 14 | import numpy as np 15 | from likelihoods.likelihood import Likelihood 16 | from torch.distributions.normal import Normal 17 | from torch.distributions.bernoulli import Bernoulli as Ber 18 | 19 | class Bernoulli(Likelihood): 20 | """ 21 | Class for Gaussian Likelihood 22 | """ 23 | def __init__(self): 24 | super(Bernoulli, self).__init__() 25 | 26 | 27 | def pdf(self, f, y): 28 | 29 | sigmoid = torch.nn.Sigmoid() 30 | p = sigmoid(f)#.flatten() 31 | bernoulli = Ber(probs=p) 32 | pdf = torch.exp(bernoulli.log_prob(y)) 33 | return pdf 34 | 35 | def logpdf(self, f, y): 36 | sigmoid = torch.nn.Sigmoid() 37 | p = sigmoid(f).flatten() 38 | bernoulli = Ber(probs=p) 39 | logpdf = bernoulli.log_prob(y) 40 | return logpdf 41 | 42 | def variational_expectation(self, y, m, v): 43 | # Gauss-Hermite Quadrature 44 | gh_p, gh_w = self.gh_points() 45 | gh_w = torch.div(gh_w, np.sqrt(np.pi)) 46 | 47 | m, v, y = m.flatten(), v.flatten(), y.flatten() 48 | f = gh_p[None, :] * torch.sqrt(2. * v[:, None]) + m[:, None] 49 | y = y[:,None].repeat(1,f.size(1)) 50 | 51 | logp = self.logpdf(f.view(-1), y.view(-1)) 52 | logp = logp.view(f.size()).double() 53 | gh_w = gh_w[:, None] 54 | 55 | var_exp = logp.mm(gh_w) 56 | return var_exp 57 | 58 | def log_predictive(self, y_test, mu_gp, v_gp, num_samples=1000): 59 | N_test = y_test.size(0) 60 | # function samples: 61 | normal = Normal(loc=mu_gp.flatten(), scale=torch.sqrt(v_gp).flatten()) 62 | f_samples = torch.reshape(normal.sample(sample_shape=(1,num_samples))[0,:,:], (-1,)) 63 | 64 | # monte-carlo: 65 | logpdf = self.logpdf(f_samples, y_test.repeat(num_samples,1).flatten()) 66 | log_pred = -np.log(num_samples) + torch.logsumexp(logpdf, dim=0) 67 | return -log_pred 68 | 69 | -------------------------------------------------------------------------------- /likelihoods/gaussian.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | import torch 14 | import numpy as np 15 | from likelihoods.likelihood import Likelihood 16 | from torch.distributions.normal import Normal 17 | 18 | class Gaussian(Likelihood): 19 | """ 20 | Class for Gaussian Likelihood 21 | """ 22 | def __init__(self, sigma=None, fit_noise=False): 23 | super(Gaussian, self).__init__() 24 | 25 | if sigma is None: 26 | sigma=1.0 27 | 28 | self.sigma = torch.nn.Parameter(sigma*torch.ones(1), requires_grad=fit_noise) 29 | 30 | 31 | def pdf(self, f, y): 32 | normal = Normal(loc=f, scale=self.sigma) 33 | pdf = torch.exp(normal.log_prob(y)) 34 | return pdf 35 | 36 | def logpdf(self, f, y): 37 | normal = Normal(loc=f, scale=self.sigma) 38 | logpdf = normal.log_prob(y) 39 | return logpdf 40 | 41 | def variational_expectation(self, y, m, v): 42 | # Variational Expectation of log-likelihood -- Analytical 43 | lik_variance = self.sigma.pow(2) 44 | expectation = - np.log(2*np.pi) - torch.log(lik_variance) \ 45 | - (y.pow(2) + m.pow(2) + v - (2*m*y)).div(lik_variance) 46 | 47 | return 0.5*expectation 48 | 49 | def log_predictive(self, y_test, mu_gp, v_gp, num_samples=1000): 50 | # function samples: 51 | normal = Normal(loc=mu_gp.flatten(), scale=torch.sqrt(v_gp).flatten()) 52 | f_samples = normal.sample(sample_shape=(1,num_samples))[0,:,:] 53 | 54 | # monte-carlo: 55 | logpdf = self.logpdf(f_samples, y_test.flatten()) 56 | log_pred = -np.log(num_samples) + torch.logsumexp(logpdf, dim=0) 57 | return log_pred -------------------------------------------------------------------------------- /likelihoods/hetgaussian.py: -------------------------------------------------------------------------------- 1 | 2 | # ----------------------------------------------------------------- 3 | # This script belongs to the ModularGP repo 4 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 5 | # Copyright (c) 2021 Pablo Moreno-Munoz 6 | # ----------------------------------------------------------------- 7 | # 8 | # 9 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 10 | # Section for Cognitive Systems 11 | # Technical University of Denmark (DTU) 12 | # October 2021 13 | 14 | 15 | import torch 16 | import numpy as np 17 | from likelihoods.likelihood import Likelihood 18 | from torch.distributions.normal import Normal 19 | from util import safe_exp, safe_square 20 | 21 | class HetGaussian(Likelihood): 22 | """ 23 | Class for Heteroscedastic Gaussian Likelihood 24 | -- 25 | -- Adaptation to Pytorch+GP framework 26 | -- Based on M. Lázaro-Gredilla et al. "Variational Heteroscedastic Gaussian Process Regression" @ ICML 2011 27 | -- Reference: https://icml.cc/Conferences/2011/papers/456_icmlpaper.pdf 28 | """ 29 | def __init__(self): 30 | super(HetGaussian, self).__init__() 31 | 32 | def pdf(self, f, g, y): 33 | normal = Normal(loc=f, scale=safe_exp(g)) 34 | pdf = safe_exp(normal.log_prob(y)) 35 | return pdf 36 | 37 | def logpdf(self, f, g, y): 38 | normal = Normal(loc=f, scale=safe_exp(g)) 39 | logpdf = normal.log_prob(y) 40 | return logpdf 41 | 42 | def variational_expectation(self, y, m_f, v_f, m_g, v_g): 43 | # Variational Expectation of log-likelihood -- Analytical 44 | precision = torch.clamp(safe_exp(-m_g + (0.5*v_g)), min=-1e9, max=1e9) 45 | #squares = torch.clamp(safe_square(y) + safe_square(m_f) + v_f - (2*m_f*y), min=-1e9, max=1e9) 46 | squares = torch.clamp(y**2 + m_f**2 + v_f - (2 * m_f * y), min=-1e9, max=1e9) 47 | expectation = -np.log(2*np.pi) - m_g - (precision*squares) 48 | return 0.5*expectation 49 | 50 | def log_predictive(self, y_test, mu_f_gp, v_f_gp, mu_g_gp, v_g_gp, num_samples=1000): 51 | # function samples f: 52 | normal = Normal(loc=mu_f_gp.flatten(), scale=torch.sqrt(v_f_gp).flatten()) 53 | f_samples = normal.sample(sample_shape=(1,num_samples))[0,:,:] 54 | 55 | # function samples g: 56 | normal = Normal(loc=mu_g_gp.flatten(), scale=torch.sqrt(v_g_gp).flatten()) 57 | g_samples = normal.sample(sample_shape=(1,num_samples))[0,:,:] 58 | 59 | # monte-carlo: 60 | logpdf = self.logpdf(f_samples, g_samples, y_test.flatten()) 61 | log_pred = -np.log(num_samples) + torch.logsumexp(logpdf, dim=0) 62 | return log_pred -------------------------------------------------------------------------------- /likelihoods/likelihood.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | import torch 14 | import numpy as np 15 | 16 | class Likelihood(torch.nn.Module): 17 | """ 18 | Base class for likelihoods 19 | """ 20 | def __init__(self): 21 | super(Likelihood, self).__init__() 22 | 23 | def gh_points(self, T=20): 24 | # Gaussian-Hermite Quadrature points 25 | gh_p, gh_w = np.polynomial.hermite.hermgauss(T) 26 | gh_p, gh_w = torch.from_numpy(gh_p), torch.from_numpy(gh_w) 27 | return gh_p, gh_w 28 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/models/__init__.py -------------------------------------------------------------------------------- /models/chainedgp.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | 14 | import torch 15 | from torch.distributions import MultivariateNormal as Normal 16 | from likelihoods.hetgaussian import HetGaussian 17 | from torch.distributions import kl_divergence 18 | 19 | import numpy as np 20 | from GPy.inference.latent_function_inference import LatentFunctionInference 21 | from GPy.inference.latent_function_inference.posterior import Posterior 22 | 23 | 24 | class ChainedGP(torch.nn.Module): 25 | """ 26 | -- Chained Gaussian Process with Heteroscedastic Gaussian Likelihood -- 27 | -- 28 | -- Adaptation to Pytorch+GP framework 29 | -- Based on A. Saul et al. "Chained Gaussian Processes" @ AISTATS 2016 30 | -- Reference: http://proceedings.mlr.press/v51/saul16.pdf 31 | """ 32 | def __init__(self, kernel_f, kernel_g, M, input_dim=None, batch_rate=1.0): 33 | super(ChainedGP, self).__init__() 34 | 35 | if input_dim is None: 36 | input_dim = 1 37 | 38 | # Dimensions -- 39 | self.M = M # num. inducing 40 | self.input_dim = int(input_dim) # dimension of x 41 | self.batch_rate = batch_rate # rate of mini-batch/dataset 42 | 43 | # GP Elements -- 44 | self.likelihood = HetGaussian() # type of likelihood 45 | self.kernel_f = kernel_f # type of kernel for f 46 | self.kernel_g = kernel_g # type of kernel for g 47 | 48 | self.logZ = 0.0 49 | 50 | if self.input_dim > 1: 51 | self.z = torch.nn.Parameter(2*torch.rand(self.M, self.input_dim) - 1.0, requires_grad=False) 52 | else: 53 | self.z = torch.nn.Parameter(torch.linspace(-0.9, 0.9, self.M)[:,None], requires_grad=False) 54 | 55 | # Variational distribution f -- 56 | self.q_m_f = torch.nn.Parameter(0.5*torch.randn(M,1), requires_grad=True) # variational: mean parameter 57 | self.q_L_f = torch.nn.Parameter(torch.eye(M), requires_grad=True) # variational: covariance 58 | 59 | # Variational distribution g -- 60 | self.q_m_g = torch.nn.Parameter(0.5*torch.randn(M,1), requires_grad=True) # variational: mean parameter 61 | self.q_L_g = torch.nn.Parameter(torch.eye(M), requires_grad=True) # variational: covariance 62 | 63 | def forward(self, x, y): 64 | 65 | # Variational parameters f -- 66 | q_m_f = self.q_m_f 67 | q_L_f = torch.tril(self.q_L_f) 68 | q_S_f = torch.mm(q_L_f, q_L_f.t()) 69 | 70 | # Variational parameters g -- 71 | q_m_g = self.q_m_g 72 | q_L_g = torch.tril(self.q_L_g) 73 | q_S_g = torch.mm(q_L_g, q_L_g.t()) 74 | 75 | # Prior parameters (uses kernel) -- 76 | Kuu_f = self.kernel_f.K(self.z) 77 | Kuu_g = self.kernel_g.K(self.z) 78 | 79 | # Distributions -- q(u), p(u) 80 | q_u_f = Normal(q_m_f.flatten(), q_S_f) 81 | p_u_f = Normal(torch.zeros(self.M), Kuu_f) 82 | 83 | q_u_g = Normal(q_m_g.flatten(), q_S_g) 84 | p_u_g = Normal(torch.zeros(self.M), Kuu_g) 85 | 86 | # Calculus of q(f) -- 87 | Kff = self.kernel_f.K(x,x) 88 | Kfu = self.kernel_f.K(x, self.z) 89 | Kuf = torch.transpose(Kfu,0,1) 90 | iKuu,_ = torch.solve(torch.eye(self.M), Kuu_f) # is pseudo-inverse? 91 | 92 | A = Kfu.mm(iKuu) 93 | AT = iKuu.mm(Kuf) 94 | 95 | m_f = A.mm(q_m_f) 96 | v_f = torch.diag(Kff + A.mm(q_S_f - Kuu_f).mm(AT)) 97 | 98 | # Calculus of q(g) -- 99 | Kff = self.kernel_g.K(x,x) 100 | Kfu = self.kernel_g.K(x, self.z) 101 | Kuf = torch.transpose(Kfu,0,1) 102 | iKuu,_ = torch.solve(torch.eye(self.M), Kuu_g) # is pseudo-inverse? 103 | 104 | A = Kfu.mm(iKuu) 105 | AT = iKuu.mm(Kuf) 106 | 107 | m_g = A.mm(q_m_g) 108 | v_g = torch.diag(Kff + A.mm(q_S_g - Kuu_g).mm(AT)) 109 | 110 | # Expectation term -- 111 | expectation = self.likelihood.variational_expectation(y, m_f, v_f, m_g, v_g) 112 | 113 | # KL divergence -- 114 | kl = kl_divergence(q_u_f, p_u_f) + kl_divergence(q_u_g, p_u_g) 115 | 116 | # Lower bound (ELBO) -- 117 | elbo = self.batch_rate*expectation.sum() - kl 118 | return -elbo 119 | 120 | def predictive(self, x_new, lik_noise=False): 121 | # Matrices f 122 | q_m_f = self.q_m_f.detach().numpy() 123 | q_L_f = torch.tril(self.q_L_f) 124 | q_S_f = torch.mm(q_L_f, q_L_f.t()).detach().numpy() 125 | Kuu_f = self.kernel_f.K(self.z, self.z).detach().numpy() 126 | 127 | # Matrices g 128 | q_m_g = self.q_m_g.detach().numpy() 129 | q_L_g = torch.tril(self.q_L_g) 130 | q_S_g = torch.mm(q_L_g, q_L_g.t()).detach().numpy() 131 | Kuu_g = self.kernel_g.K(self.z, self.z).detach().numpy() 132 | 133 | # GP function f ------ 134 | posterior = Posterior(mean=q_m_f, cov=q_S_f, K=Kuu_f, prior_mean=np.zeros(q_m_f.shape)) 135 | Kx = self.kernel_f.K(self.z, x_new).detach().numpy() 136 | Kxx = self.kernel_f.K(x_new, x_new).detach().numpy() 137 | 138 | # GP Predictive Posterior - mean + variance 139 | gp_mu_f = np.dot(Kx.T, posterior.woodbury_vector) 140 | Kxx = np.diag(Kxx) 141 | gp_var_f = (Kxx - np.sum(np.dot(np.atleast_3d(posterior.woodbury_inv).T, Kx) * Kx[None, :, :], 1)).T 142 | 143 | gp_f = gp_mu_f 144 | gp_v_f = gp_var_f 145 | 146 | # GP function g ------ 147 | posterior = Posterior(mean=q_m_g, cov=q_S_g, K=Kuu_g, prior_mean=np.zeros(q_m_g.shape)) 148 | Kx = self.kernel_g.K(self.z, x_new).detach().numpy() 149 | Kxx = self.kernel_g.K(x_new, x_new).detach().numpy() 150 | 151 | # GP Predictive Posterior - mean + variance 152 | gp_mu_g = np.dot(Kx.T, posterior.woodbury_vector) 153 | Kxx = np.diag(Kxx) 154 | gp_var_g = (Kxx - np.sum(np.dot(np.atleast_3d(posterior.woodbury_inv).T, Kx) * Kx[None, :, :], 1)).T 155 | 156 | gp_g = gp_mu_g 157 | gp_v_g = gp_var_g 158 | 159 | return gp_f, gp_v_f, gp_g, gp_v_g 160 | 161 | def rmse(self, x_new, f_new): 162 | f_gp,_,_,_ = self.predictive(x_new) 163 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy() 164 | return rmse 165 | 166 | def mae(self, x_new, f_new): 167 | f_gp,_,_,_ = self.predictive(x_new) 168 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy() 169 | return mae 170 | 171 | def nlpd(self, x_new, y_new): 172 | f_gp, v_f_gp, g_gp, v_g_gp = self.predictive(x_new) 173 | f_gp = torch.from_numpy(f_gp) 174 | v_f_gp = torch.from_numpy(v_f_gp) 175 | g_gp = torch.from_numpy(g_gp) 176 | v_g_gp = torch.from_numpy(v_g_gp) 177 | nlpd = - torch.mean(self.likelihood.log_predictive(y_new, f_gp, v_f_gp, g_gp, v_g_gp)).detach().numpy() 178 | return nlpd 179 | 180 | def evidence(self, x, y, N_samples=None): 181 | # Approximation CI 182 | if N_samples is None: 183 | N_samples = 1000 184 | 185 | N,_ = x.shape 186 | v_f = torch.zeros(N) 187 | for i in range(N): 188 | v_f[i] = self.kernel.K(x[i:i+1,:],x[i:i+1,:]) 189 | 190 | m_f = torch.zeros(v_f.shape) 191 | p_f = Normal(m_f, torch.diag(v_f)) 192 | f_samples = p_f.sample([N_samples]).t() # N x N_samples 193 | mc_pdf = self.likelihood.pdf(f_samples, torch.tile(y, (1,N_samples))) 194 | 195 | mc_expectations = 1/N_samples * torch.sum(torch.clamp(mc_pdf, min=1e-100),1) 196 | print(mc_expectations) 197 | logZ = torch.sum(torch.log(mc_expectations)) 198 | 199 | self.logZ = logZ 200 | return logZ 201 | 202 | 203 | -------------------------------------------------------------------------------- /models/ensemblegp.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | 14 | import torch 15 | from torch.distributions import MultivariateNormal as Normal 16 | from torch.distributions import kl_divergence 17 | from GPy.inference.latent_function_inference.posterior import Posterior 18 | import numpy as np 19 | 20 | class EnsembleGP(torch.nn.Module): 21 | """ 22 | -- Ensemble Variational Inference for Gaussian Processes -- 23 | """ 24 | def __init__(self, kernel, likelihood, models, M, input_dim=None): 25 | super(EnsembleGP, self).__init__() 26 | 27 | if input_dim is None: 28 | input_dim = 1 29 | 30 | # Dimensions -- 31 | self.M = M # num. inducing 32 | self.input_dim = int(input_dim) # dimension of x 33 | 34 | # Ensemble GP Elements -- 35 | self.likelihood = likelihood 36 | self.kernel = kernel 37 | 38 | if self.input_dim > 1: 39 | self.z = torch.nn.Parameter(2*torch.rand(self.M, self.input_dim) - 1.0, requires_grad=False) 40 | else: 41 | self.z = torch.nn.Parameter(torch.linspace(-0.9, 0.9, self.M)[:,None], requires_grad=False) 42 | 43 | # Adjacent GP Models 44 | self.models = models # is a list 45 | 46 | # Ensemble Variational distribution -- 47 | self.q_m = torch.nn.Parameter(torch.randn(M, 1), requires_grad=True) # variational: mean parameter 48 | self.q_L = torch.nn.Parameter(torch.eye(M), requires_grad=True) # variational: covariance 49 | 50 | def ensemble(self): 51 | # GP prior 52 | Kuu = self.kernel.K(self.z, self.z) 53 | iKuu, _ = torch.solve(torch.eye(self.M), Kuu) # is pseudo-inverse? 54 | 55 | q_m = self.q_m 56 | q_L = torch.tril(self.q_L) 57 | q_S = torch.mm(q_L, q_L.t()) 58 | 59 | ensemble_m = [] 60 | ensemble_S = [] 61 | 62 | # Ensemble GP Distributions 63 | for model_k in self.models: 64 | Kkk = self.kernel.K(model_k.z, model_k.z) 65 | Kuk = self.kernel.K(self.z, model_k.z) 66 | Kku = torch.transpose(Kuk,0,1) 67 | 68 | A = Kku.mm(iKuu) 69 | AT = iKuu.mm(Kuk) 70 | 71 | m_k = Kku.mm(iKuu).mm(q_m) 72 | S_k = Kkk + A.mm(q_S - Kuu).mm(AT) 73 | 74 | ensemble_m.append(m_k) 75 | ensemble_S.append(S_k) 76 | 77 | return ensemble_m, ensemble_S 78 | 79 | def expectation(self): 80 | E = 0.0 81 | ensemble_m, ensemble_S = self.ensemble() 82 | 83 | # Expectation of k ensembles -- 84 | for k,model_k in enumerate(self.models): 85 | # Ensemble GP -- q_e() 86 | m_e = ensemble_m[k] 87 | S_e = ensemble_S[k] 88 | 89 | # Past GP variational distribution -- q_k() 90 | m_k = model_k.q_m 91 | L_k = torch.tril(model_k.q_L) 92 | S_k = torch.mm(L_k, L_k.t()) 93 | iS_k, _ = torch.solve(torch.eye(model_k.M), S_k) # is pseudo-inverse? 94 | 95 | # Past GP prior -- p_k() 96 | z_k = model_k.z 97 | Kkk = model_k.kernel.K(z_k, z_k) 98 | iKkk, _ = torch.solve(torch.eye(model_k.M), Kkk) # is pseudo-inverse? 99 | 100 | # Expectation on terms -- E[log_p()] and E[log_q()] 101 | E_log_q = -torch.trace(iS_k.mm(S_e)) - (m_e - m_k).t().mm(iS_k).mm(m_e - m_k) - torch.logdet(2*np.pi*S_k) 102 | E_log_p = -torch.trace(iKkk.mm(S_e)) - m_e.t().mm(iKkk).mm(m_e) - torch.logdet(2*np.pi*Kkk) 103 | 104 | # General Expectation -- E[sum_k E[log_q_k] - E[log_p_k]] 105 | E += 0.5*(E_log_q - E_log_p) + model_k.logZ 106 | 107 | return E 108 | 109 | def divergence(self, p, q): 110 | kl = kl_divergence(q,p) 111 | return kl 112 | 113 | def forward(self): 114 | 115 | # Variational parameters -- 116 | q_m = self.q_m 117 | q_L = torch.tril(self.q_L) 118 | q_S = torch.mm(q_L, q_L.t()) 119 | 120 | # Prior parameters (uses kernel) -- 121 | Kuu = self.kernel.K(self.z, self.z) 122 | 123 | # Distributions -- q(u), p(u) 124 | q_u = Normal(q_m.flatten(), q_S) 125 | p_u = Normal(torch.zeros(self.M), Kuu) 126 | 127 | # Expectation -- 128 | expectation = self.expectation() 129 | 130 | # KL divergence -- 131 | kl = self.divergence(q_u, p_u) 132 | 133 | # Calls ELBO 134 | elbo = expectation - kl 135 | return -elbo 136 | 137 | def predictive(self, x_new): 138 | # Matrices 139 | q_m = self.q_m.detach().numpy() 140 | q_L = torch.tril(self.q_L) 141 | q_S = torch.mm(q_L, q_L.t()).detach().numpy() 142 | Kuu = self.kernel.K(self.z, self.z).detach().numpy() 143 | 144 | posterior = Posterior(mean=q_m, cov=q_S, K=Kuu, prior_mean=np.zeros(q_m.shape)) 145 | Kx = self.kernel.K(self.z, x_new).detach().numpy() 146 | Kxx = self.kernel.K(x_new, x_new).detach().numpy() 147 | 148 | # GP Predictive Posterior - mean + variance 149 | gp_mu = np.dot(Kx.T, posterior.woodbury_vector) 150 | Kxx = np.diag(Kxx) 151 | gp_var = (Kxx - np.sum(np.dot(np.atleast_3d(posterior.woodbury_inv).T, Kx) * Kx[None, :, :], 1)).T 152 | 153 | gp = gp_mu 154 | gp_upper = gp_mu + 2*np.sqrt(gp_var) #+ 2*self.likelihood.sigma.detach().numpy() 155 | gp_lower = gp_mu - 2*np.sqrt(gp_var) #- 2*self.likelihood.sigma.detach().numpy() 156 | 157 | return gp, gp_upper, gp_lower 158 | 159 | def rmse(self, x_new, f_new): 160 | f_gp,_,_ = self.predictive(x_new) 161 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy() 162 | return rmse 163 | 164 | def mae(self, x_new, f_new): 165 | f_gp,_,_ = self.predictive(x_new) 166 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy() 167 | return mae 168 | 169 | def nlpd(self, x_new, y_new): 170 | f_gp, u_gp, _ = self.predictive(x_new) 171 | f_gp = torch.from_numpy(f_gp) 172 | u_gp = torch.from_numpy(u_gp) 173 | v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0) 174 | nlpd = - torch.mean(self.likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy() 175 | return nlpd 176 | 177 | -------------------------------------------------------------------------------- /models/hetmoensemble.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | import torch 14 | from torch.distributions import MultivariateNormal as Normal 15 | from torch.distributions import kl_divergence 16 | from kernels.coregionalization import LMC 17 | from GPy.inference.latent_function_inference.posterior import Posterior 18 | import numpy as np 19 | 20 | class HetMultiOutputEnsembleGP(torch.nn.Module): 21 | """ 22 | -- Heterogeneous Multi Output Ensemble for Gaussian Processes -- 23 | -- Accepts one channel x,y of data. -- 24 | """ 25 | 26 | def __init__(self, models, likelihood, kernels, Q, M, input_dim=None, batch_rate=1.0): 27 | super(HetMultiOutputEnsembleGP, self).__init__() 28 | 29 | if input_dim is None: 30 | input_dim = 1 31 | self.batch_rate = batch_rate # rate of mini-batch/dataset 32 | 33 | # Dimensions -- 34 | self.M = M # num. inducing 35 | self.K = len(models) # num. models 36 | self.input_dim = int(input_dim) # dimension of x 37 | 38 | # Multi-output GP Ensemble Elements -- 39 | self.Q = Q 40 | self.likelihood = likelihood 41 | self.D = self.K + 1 # the number of modules + data channel 42 | 43 | # Kernels -- 44 | self.kernels = torch.nn.ModuleList() 45 | for q in range(self.Q): 46 | self.kernels.append(kernels[q]) 47 | self.coregionalization = LMC(self.kernels, self.D) # is a list 48 | 49 | if self.input_dim > 1: 50 | self.z = torch.nn.Parameter(torch.rand(self.M, self.input_dim, self.Q), requires_grad=False) 51 | else: 52 | self.z = torch.nn.Parameter(torch.tile(torch.linspace(0.1, 0.9, self.M)[:,None, None], (1, 1, self.Q)), requires_grad=False) 53 | 54 | # Adjacent GP Models 55 | self.models = models # is a list 56 | 57 | # Ensemble Variational distribution -- 58 | self.q_m = torch.nn.Parameter(torch.randn(M, Q), requires_grad=True) # variational: mean parameter 59 | self.q_L = torch.nn.Parameter(torch.tile(torch.eye(M)[:,:,None], (1, 1, self.Q)), requires_grad=True) # variational: covariance 60 | 61 | 62 | def ensemble(self): 63 | # MOGP prior + Variational parameters 64 | q_m = self.q_m 65 | q_S = torch.zeros(self.M, self.M, self.Q) 66 | Kvv = torch.zeros(self.M, self.M, self.Q) 67 | iKvv = torch.zeros(self.M, self.M, self.Q) 68 | for q in range(self.Q): 69 | Kvv_q = self.kernels[q].K(self.z[:,:,q], self.z[:,:,q]) 70 | iKvv_q, _ = torch.solve(torch.eye(self.M), Kvv_q) # is pseudo-inverse? 71 | Kvv[:,:,q] = Kvv_q 72 | iKvv[:,:,q] = iKvv_q 73 | 74 | q_L = torch.tril(self.q_L[:,:,q]) 75 | q_S[:,:,q] = torch.mm(q_L, q_L.t()) 76 | 77 | ensemble_m = [] 78 | ensemble_S = [] 79 | 80 | # Ensemble MOGP Distributions 81 | for k, model_k in enumerate(self.models): 82 | 83 | Kuu = self.coregionalization.Kff(model_k.z, k) 84 | Kuv = self.coregionalization.Kfu(model_k.z, self.z, k) 85 | 86 | m_k = 0.0 87 | S_k = Kuu 88 | 89 | # TODO: Make the following faster 90 | for q in range(self.Q): 91 | 92 | A = Kuv[:,:,q].mm(iKvv[:,:,q]) 93 | AT = iKvv[:,:,q].mm(Kuv[:,:,q].t()) 94 | 95 | m_k += A.mm(q_m[:,q:q+1]) 96 | S_k += A.mm(q_S[:,:,q]).mm(AT) - A.mm(Kuv[:,:,q].t()) 97 | 98 | ensemble_m.append(m_k) 99 | ensemble_S.append(S_k) 100 | 101 | return ensemble_m, ensemble_S 102 | 103 | 104 | def expectation(self, x, y): 105 | E = 0.0 106 | ensemble_m, ensemble_S = self.ensemble() 107 | 108 | # Expectation of k ensembles -- 109 | for k,model_k in enumerate(self.models): 110 | # Ensemble GP -- q_e() 111 | m_e = ensemble_m[k] 112 | S_e = ensemble_S[k] 113 | 114 | # Past GP variational distribution -- q_k() 115 | m_k = model_k.q_m 116 | L_k = torch.tril(model_k.q_L) 117 | S_k = torch.mm(L_k, L_k.t()) 118 | iS_k, _ = torch.solve(torch.eye(model_k.M), S_k) # is pseudo-inverse? 119 | 120 | # Past GP prior -- p_k() 121 | z_k = model_k.z 122 | Kkk = model_k.kernel.K(z_k, z_k) 123 | iKkk, _ = torch.solve(torch.eye(model_k.M), Kkk) # is pseudo-inverse? 124 | 125 | # Expectation on terms -- E[log_p()] and E[log_q()] 126 | E_log_q = -torch.trace(iS_k.mm(S_e)) - (m_e - m_k).t().mm(iS_k).mm(m_e - m_k) - torch.logdet(2*np.pi*S_k) 127 | E_log_p = -torch.trace(iKkk.mm(S_e)) - m_e.t().mm(iKkk).mm(m_e) - torch.logdet(2*np.pi*Kkk) 128 | 129 | # General Expectation -- E[sum_k E[log_q_k] - E[log_p_k]] 130 | E += 0.5*(E_log_q - E_log_p) + model_k.logZ 131 | 132 | # Expectation of data channel -- 133 | q_m = self.q_m 134 | q_S = torch.zeros(self.M, self.M, self.Q) 135 | Kuu = torch.zeros(self.M, self.M, self.Q) 136 | iKuu = torch.zeros(self.M, self.M, self.Q) 137 | 138 | for q in range(self.Q): 139 | # MOGP latent functions prior 140 | Kuu_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q]) 141 | iKuu_q, _ = torch.solve(torch.eye(self.M), Kuu_q) # is pseudo-inverse? 142 | Kuu[:, :, q] = Kuu_q 143 | iKuu[:, :, q] = iKuu_q 144 | 145 | # Variational parameters + Gaussian integration 146 | q_L = torch.tril(self.q_L[:, :, q]) 147 | q_S[:, :, q] = torch.mm(q_L, q_L.t()) 148 | Kff = self.coregionalization.Kff(x, self.D-1) 149 | Kfu = self.coregionalization.Kfu(x, self.z, self.D-1) 150 | 151 | m_f = 0.0 152 | S_f = Kff 153 | 154 | for q in range(self.Q): 155 | A = Kfu[:, :, q].mm(iKuu[:, :, q]) 156 | AT = iKuu[:, :, q].mm(Kfu[:, :, q].t()) 157 | 158 | m_f += A.mm(q_m[:, q:q + 1]) 159 | S_f += A.mm(q_S[:, :, q]).mm(AT) - A.mm(Kfu[:, :, q].t()) 160 | 161 | v_f = torch.diag(S_f) 162 | expectation_y = self.likelihood.variational_expectation(y, m_f, v_f) 163 | 164 | return E, expectation_y 165 | 166 | def divergence(self, p_v, q_v): 167 | kl = 0.0 168 | for q in range(self.Q): 169 | kl += kl_divergence(q_v[q], p_v[q]) 170 | return kl 171 | 172 | def forward(self, x, y): 173 | 174 | q_u = [] 175 | p_u = [] 176 | q_m = self.q_m 177 | q_S = torch.zeros(self.M, self.M, self.Q) 178 | Kuu = torch.zeros(self.M, self.M, self.Q) 179 | for q in range(self.Q): 180 | 181 | # Variational parameters -- 182 | q_L = torch.tril(self.q_L[:,:,q]) 183 | q_S[:,:,q] = torch.mm(q_L, q_L.t()) 184 | 185 | # Prior parameters (uses kernel) -- 186 | Kuu_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q]) 187 | Kuu[:, :, q] = Kuu_q 188 | 189 | # Distributions -- q(u), p(u) 190 | q_u.append(Normal(q_m[:,q].flatten(), q_S[:,:,q])) 191 | p_u.append(Normal(torch.zeros(self.M), Kuu[:,:,q])) 192 | 193 | # Expectation -- 194 | expectation, exp_y = self.expectation(x, y) 195 | expectation_y = self.batch_rate * exp_y.sum() 196 | 197 | # KL divergence -- 198 | kl = self.divergence(q_u, p_u) 199 | 200 | # Calls ELBO 201 | elbo = expectation + expectation_y - kl 202 | return -elbo 203 | 204 | def predictive(self, xnew, k): 205 | # MOGP prior + Variational parameters 206 | q_m = self.q_m 207 | q_S = torch.zeros(self.M, self.M, self.Q) 208 | Kvv = torch.zeros(self.M, self.M, self.Q) 209 | iKvv = torch.zeros(self.M, self.M, self.Q) 210 | 211 | # Posterior distribution on new input data 212 | Kuu = self.coregionalization.Kff(xnew, k) 213 | Kuv = self.coregionalization.Kfu(xnew, self.z, k) 214 | 215 | m_k = 0.0 216 | S_k = Kuu 217 | for q in range(self.Q): 218 | # MOGP latent functions prior 219 | Kvv_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q]) 220 | iKvv_q, _ = torch.solve(torch.eye(self.M), Kvv_q) # is pseudo-inverse? 221 | Kvv[:, :, q] = Kvv_q 222 | iKvv[:, :, q] = iKvv_q 223 | 224 | # Variational parameters + Gaussian integration 225 | q_L = torch.tril(self.q_L[:, :, q]) 226 | q_S[:, :, q] = torch.mm(q_L, q_L.t()) 227 | 228 | A = Kuv[:, :, q].mm(iKvv[:, :, q]) 229 | AT = iKvv[:, :, q].mm(Kuv[:, :, q].t()) 230 | 231 | m_k += A.mm(q_m[:, q:q + 1]) 232 | S_k += A.mm(q_S[:, :, q]).mm(AT) - A.mm(Kuv[:, :, q].t()) 233 | 234 | m_k = m_k.detach().numpy() 235 | S_k = S_k.detach().numpy() 236 | 237 | gp_mu = m_k.flatten() 238 | gp_var = np.diagonal(S_k) 239 | 240 | gp = gp_mu 241 | gp_upper = gp_mu + 2 * np.sqrt(gp_var) # + 2*self.likelihood.sigma.detach().numpy() 242 | gp_lower = gp_mu - 2 * np.sqrt(gp_var) # - 2*self.likelihood.sigma.detach().numpy() 243 | 244 | return gp, gp_upper, gp_lower 245 | 246 | def rmse(self, x_new, f_new, k): 247 | f_gp,_,_ = self.predictive(x_new, k) 248 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy() 249 | return rmse 250 | 251 | def mae(self, x_new, f_new, k): 252 | f_gp,_,_ = self.predictive(x_new, k) 253 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy() 254 | return mae 255 | 256 | def nlpd(self, x_new, y_new, k): 257 | f_gp, u_gp, _ = self.predictive(x_new, k) 258 | f_gp = torch.from_numpy(f_gp) 259 | u_gp = torch.from_numpy(u_gp) 260 | v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0) 261 | nlpd = - torch.mean(self.likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy() 262 | return nlpd -------------------------------------------------------------------------------- /models/moensemble.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | 14 | import torch 15 | from torch.distributions import MultivariateNormal as Normal 16 | from torch.distributions import kl_divergence 17 | from kernels.coregionalization import LMC 18 | from GPy.inference.latent_function_inference.posterior import Posterior 19 | import numpy as np 20 | 21 | class MultiOutputEnsembleGP(torch.nn.Module): 22 | """ 23 | -- Multi Output Ensemble for Gaussian Processes -- 24 | """ 25 | 26 | def __init__(self, models, kernels, Q, M, input_dim=None): 27 | super(MultiOutputEnsembleGP, self).__init__() 28 | 29 | if input_dim is None: 30 | input_dim = 1 31 | 32 | # Dimensions -- 33 | self.M = M # num. inducing 34 | self.K = len(models) # num. models 35 | self.input_dim = int(input_dim) # dimension of x 36 | 37 | # Multi-output GP Ensemble Elements -- 38 | self.Q = Q 39 | 40 | # Kernels -- 41 | self.kernels = torch.nn.ModuleList() 42 | for q in range(self.Q): 43 | self.kernels.append(kernels[q]) 44 | self.coregionalization = LMC(self.kernels, self.K) # is a list 45 | 46 | if self.input_dim > 1: 47 | self.z = torch.nn.Parameter(torch.rand(self.M, self.input_dim, self.Q), requires_grad=False) 48 | else: 49 | self.z = torch.nn.Parameter(torch.tile(torch.linspace(0.1, 0.9, self.M)[:,None, None], (1, 1, self.Q)), requires_grad=False) 50 | 51 | # Adjacent GP Models 52 | self.models = models # is a list 53 | 54 | # Ensemble Variational distribution -- 55 | self.q_m = torch.nn.Parameter(2*torch.randn(M, Q), requires_grad=True) # variational: mean parameter 56 | self.q_L = torch.nn.Parameter(0.5*torch.tile(torch.eye(M)[:,:,None], (1, 1, self.Q)), requires_grad=True) # variational: covariance 57 | 58 | 59 | def ensemble(self): 60 | # MOGP prior + Variational parameters 61 | q_m = self.q_m 62 | q_S = torch.zeros(self.M, self.M, self.Q) 63 | Kvv = torch.zeros(self.M, self.M, self.Q) 64 | iKvv = torch.zeros(self.M, self.M, self.Q) 65 | for q in range(self.Q): 66 | Kvv_q = self.kernels[q].K(self.z[:,:,q], self.z[:,:,q]) 67 | iKvv_q, _ = torch.solve(torch.eye(self.M), Kvv_q) # is pseudo-inverse? 68 | Kvv[:,:,q] = Kvv_q 69 | iKvv[:,:,q] = iKvv_q 70 | 71 | q_L = torch.tril(self.q_L[:,:,q]) 72 | q_S[:,:,q] = torch.mm(q_L, q_L.t()) 73 | 74 | ensemble_m = [] 75 | ensemble_S = [] 76 | 77 | # Ensemble MOGP Distributions 78 | for k, model_k in enumerate(self.models): 79 | 80 | Kuu = self.coregionalization.Kff(model_k.z, k) 81 | Kuv = self.coregionalization.Kfu(model_k.z, self.z, k) 82 | 83 | m_k = 0.0 84 | S_k = Kuu 85 | 86 | for q in range(self.Q): 87 | 88 | A = Kuv[:,:,q].mm(iKvv[:,:,q]) 89 | AT = iKvv[:,:,q].mm(Kuv[:,:,q].t()) 90 | 91 | m_k += A.mm(q_m[:,q:q+1]) 92 | S_k += A.mm(q_S[:,:,q]).mm(AT) - A.mm(Kuv[:,:,q].t()) 93 | 94 | ensemble_m.append(m_k) 95 | ensemble_S.append(S_k) 96 | 97 | return ensemble_m, ensemble_S 98 | 99 | 100 | def expectation(self): 101 | E = 0.0 102 | ensemble_m, ensemble_S = self.ensemble() 103 | 104 | # Expectation of k ensembles -- 105 | for k,model_k in enumerate(self.models): 106 | # Ensemble GP -- q_e() 107 | m_e = ensemble_m[k] 108 | S_e = ensemble_S[k] 109 | 110 | # Past GP variational distribution -- q_k() 111 | m_k = model_k.q_m 112 | L_k = torch.tril(model_k.q_L) 113 | S_k = torch.mm(L_k, L_k.t()) 114 | iS_k, _ = torch.solve(torch.eye(model_k.M), S_k) # is pseudo-inverse? 115 | 116 | # Past GP prior -- p_k() 117 | z_k = model_k.z 118 | Kkk = model_k.kernel.K(z_k, z_k) 119 | iKkk, _ = torch.solve(torch.eye(model_k.M), Kkk) # is pseudo-inverse? 120 | 121 | # Expectation on terms -- E[log_p()] and E[log_q()] 122 | E_log_q = -torch.trace(iS_k.mm(S_e)) - (m_e - m_k).t().mm(iS_k).mm(m_e - m_k) - torch.logdet(2*np.pi*S_k) 123 | E_log_p = -torch.trace(iKkk.mm(S_e)) - m_e.t().mm(iKkk).mm(m_e) - torch.logdet(2*np.pi*Kkk) 124 | 125 | # General Expectation -- E[sum_k E[log_q_k] - E[log_p_k]] 126 | E += 0.5*(E_log_q - E_log_p) + model_k.logZ 127 | 128 | return E 129 | 130 | def divergence(self, p_v, q_v): 131 | kl = 0.0 132 | for q in range(self.Q): 133 | kl += kl_divergence(q_v[q], p_v[q]) 134 | return kl 135 | 136 | def forward(self): 137 | 138 | q_u = [] 139 | p_u = [] 140 | q_m = self.q_m 141 | q_S = torch.zeros(self.M, self.M, self.Q) 142 | Kuu = torch.zeros(self.M, self.M, self.Q) 143 | for q in range(self.Q): 144 | 145 | # Variational parameters -- 146 | q_L = torch.tril(self.q_L[:,:,q]) 147 | q_S[:,:,q] = torch.mm(q_L, q_L.t()) 148 | 149 | # Prior parameters (uses kernel) -- 150 | Kuu_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q]) 151 | Kuu[:, :, q] = Kuu_q 152 | 153 | # Distributions -- q(u), p(u) 154 | q_u.append(Normal(q_m[:,q].flatten(), q_S[:,:,q])) 155 | p_u.append(Normal(torch.zeros(self.M), Kuu[:,:,q])) 156 | 157 | # Expectation -- 158 | expectation = self.expectation() 159 | 160 | # KL divergence -- 161 | kl = self.divergence(q_u, p_u) 162 | 163 | # Calls ELBO 164 | elbo = expectation - kl 165 | return -elbo 166 | 167 | def predictive(self, xnew, k): 168 | # MOGP prior + Variational parameters 169 | q_m = self.q_m 170 | q_S = torch.zeros(self.M, self.M, self.Q) 171 | Kvv = torch.zeros(self.M, self.M, self.Q) 172 | iKvv = torch.zeros(self.M, self.M, self.Q) 173 | 174 | # Posterior distribution on new input data 175 | Kuu = self.coregionalization.Kff(xnew, k) 176 | Kuv = self.coregionalization.Kfu(xnew, self.z, k) 177 | 178 | m_k = 0.0 179 | S_k = Kuu 180 | for q in range(self.Q): 181 | # MOGP latent functions prior 182 | Kvv_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q]) 183 | iKvv_q, _ = torch.solve(torch.eye(self.M), Kvv_q) # is pseudo-inverse? 184 | Kvv[:, :, q] = Kvv_q 185 | iKvv[:, :, q] = iKvv_q 186 | 187 | # Variational parameters + Gaussian integration 188 | q_L = torch.tril(self.q_L[:, :, q]) 189 | q_S[:, :, q] = torch.mm(q_L, q_L.t()) 190 | 191 | A = Kuv[:, :, q].mm(iKvv[:, :, q]) 192 | AT = iKvv[:, :, q].mm(Kuv[:, :, q].t()) 193 | 194 | m_k += A.mm(q_m[:, q:q + 1]) 195 | S_k += A.mm(q_S[:, :, q]).mm(AT) - A.mm(Kuv[:, :, q].t()) 196 | 197 | m_k = m_k.detach().numpy() 198 | S_k = S_k.detach().numpy() 199 | 200 | gp_mu = m_k.flatten() 201 | gp_var = np.diagonal(S_k) 202 | 203 | gp = gp_mu 204 | gp_upper = gp_mu + 2 * np.sqrt(gp_var) # + 2*self.likelihood.sigma.detach().numpy() 205 | gp_lower = gp_mu - 2 * np.sqrt(gp_var) # - 2*self.likelihood.sigma.detach().numpy() 206 | 207 | return gp, gp_upper, gp_lower 208 | 209 | def rmse(self, x_new, f_new, k): 210 | f_gp,_,_ = self.predictive(x_new, k) 211 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy() 212 | return rmse 213 | 214 | def mae(self, x_new, f_new, k): 215 | f_gp,_,_ = self.predictive(x_new, k) 216 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy() 217 | return mae 218 | 219 | def nlpd(self, likelihood, x_new, y_new, k): 220 | f_gp, u_gp, _ = self.predictive(x_new, k) 221 | f_gp = torch.from_numpy(f_gp) 222 | u_gp = torch.from_numpy(u_gp) 223 | v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0) 224 | nlpd = - torch.mean(likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy() 225 | return nlpd -------------------------------------------------------------------------------- /models/svgp.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | import torch 14 | from torch.distributions import MultivariateNormal as Normal 15 | from torch.distributions import kl_divergence 16 | 17 | import numpy as np 18 | from GPy.inference.latent_function_inference import LatentFunctionInference 19 | from GPy.inference.latent_function_inference.posterior import Posterior 20 | 21 | 22 | class SVGP(torch.nn.Module): 23 | """ 24 | -- Sparse Variational Gaussian Process -- 25 | -- 26 | -- Adaptation to Pytorch + GP framework 27 | -- Based on Hensman et al. "Scalable Variational Gaussian Process Classification" AISTATS 2015 28 | -- Reference: http://proceedings.mlr.press/v38/hensman15.pdf 29 | """ 30 | def __init__(self, kernel, likelihood, M, input_dim=None, batch_rate=1.0): 31 | super(SVGP, self).__init__() 32 | 33 | if input_dim is None: 34 | input_dim = 1 35 | 36 | # Dimensions -- 37 | self.M = M #num. inducing 38 | self.input_dim = int(input_dim) #dimension of x 39 | self.batch_rate = batch_rate #rate of mini-batch/dataset 40 | 41 | # GP Elements -- 42 | self.likelihood = likelihood #type of likelihood 43 | self.kernel = kernel #type of kernel 44 | 45 | self.logZ = 0.0 46 | 47 | if self.input_dim > 1: 48 | self.z = torch.nn.Parameter(2*torch.rand(self.M, self.input_dim) - 1.0, requires_grad=False) 49 | else: 50 | self.z = torch.nn.Parameter(torch.linspace(-0.9, 0.9, self.M)[:,None], requires_grad=False) 51 | 52 | # Variational distribution -- 53 | self.q_m = torch.nn.Parameter(torch.randn(M,1), requires_grad=True) # variational: mean parameter 54 | self.q_L = torch.nn.Parameter(torch.eye(M), requires_grad=True) # variational: covariance 55 | 56 | def forward(self, x, y): 57 | 58 | # Variational parameters -- 59 | q_m = self.q_m 60 | q_L = torch.tril(self.q_L) 61 | q_S = torch.mm(q_L, q_L.t()) 62 | 63 | # Prior parameters (uses kernel) -- 64 | Kuu = self.kernel.K(self.z) 65 | 66 | # Distributions -- q(u), p(u) 67 | q_u = Normal(q_m.flatten(), q_S) 68 | p_u = Normal(torch.zeros(self.M), Kuu) 69 | 70 | # Calculus of q(f) -- 71 | Kff = self.kernel.K(x,x) 72 | Kfu = self.kernel.K(x, self.z) 73 | Kuf = torch.transpose(Kfu,0,1) 74 | iKuu,_ = torch.solve(torch.eye(self.M), Kuu) # is pseudo-inverse? 75 | 76 | A = Kfu.mm(iKuu) 77 | AT = iKuu.mm(Kuf) 78 | 79 | m_f = A.mm(q_m) 80 | v_f = torch.diag(Kff + A.mm(q_S - Kuu).mm(AT)) 81 | 82 | # Expectation term -- 83 | expectation = self.likelihood.variational_expectation(y, m_f, v_f) 84 | 85 | # KL divergence -- 86 | kl = kl_divergence(q_u, p_u) 87 | 88 | # Lower bound (ELBO) -- 89 | elbo = self.batch_rate*expectation.sum() - kl 90 | return -elbo 91 | 92 | def predictive(self, x_new, lik_noise=False): 93 | # Matrices 94 | q_m = self.q_m.detach().numpy() 95 | q_L = torch.tril(self.q_L) 96 | q_S = torch.mm(q_L, q_L.t()).detach().numpy() 97 | Kuu = self.kernel.K(self.z, self.z).detach().numpy() 98 | 99 | posterior = Posterior(mean=q_m, cov=q_S, K=Kuu, prior_mean=np.zeros(q_m.shape)) 100 | Kx = self.kernel.K(self.z, x_new).detach().numpy() 101 | Kxx = self.kernel.K(x_new, x_new).detach().numpy() 102 | 103 | # GP Predictive Posterior - mean + variance 104 | gp_mu = np.dot(Kx.T, posterior.woodbury_vector) 105 | Kxx = np.diag(Kxx) 106 | gp_var = (Kxx - np.sum(np.dot(np.atleast_3d(posterior.woodbury_inv).T, Kx) * Kx[None, :, :], 1)).T 107 | 108 | gp = gp_mu 109 | if lik_noise: 110 | gp_upper = gp_mu + 2 * np.sqrt(gp_var) + 2 * self.likelihood.sigma.detach().numpy() 111 | gp_lower = gp_mu - 2 * np.sqrt(gp_var) - 2 * self.likelihood.sigma.detach().numpy() 112 | else: 113 | gp_upper = gp_mu + 2*np.sqrt(gp_var) 114 | gp_lower = gp_mu - 2*np.sqrt(gp_var) 115 | 116 | return gp, gp_upper, gp_lower 117 | 118 | def rmse(self, x_new, f_new): 119 | f_gp,_,_ = self.predictive(x_new) 120 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy() 121 | return rmse 122 | 123 | def mae(self, x_new, f_new): 124 | f_gp,_,_ = self.predictive(x_new) 125 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy() 126 | return mae 127 | 128 | def nlpd(self, x_new, y_new): 129 | f_gp, u_gp, _ = self.predictive(x_new) 130 | f_gp = torch.from_numpy(f_gp) 131 | u_gp = torch.from_numpy(u_gp) 132 | v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0) 133 | nlpd = - torch.mean(self.likelihood.log_predictive(y_new, f_gp, v_gp)).detach().numpy() 134 | return nlpd 135 | 136 | def evidence(self, x, y, N_samples=None): 137 | # Approximation CI 138 | if N_samples is None: 139 | N_samples = 1000 140 | 141 | N,_ = x.shape 142 | v_f = torch.zeros(N) 143 | for i in range(N): 144 | v_f[i] = self.kernel.K(x[i:i+1,:],x[i:i+1,:]) 145 | #v_f = torch.diag(self.kernel.K(x,x), 0) 146 | m_f = torch.zeros(v_f.shape) 147 | p_f = Normal(m_f, torch.diag(v_f)) 148 | f_samples = p_f.sample([N_samples]).t() # N x N_samples 149 | mc_pdf = self.likelihood.pdf(f_samples, torch.tile(y, (1,N_samples))) 150 | 151 | mc_expectations = 1/N_samples * torch.sum(torch.clamp(mc_pdf, min=1e-100),1) 152 | print(mc_expectations) 153 | logZ = torch.sum(torch.log(mc_expectations)) 154 | 155 | self.logZ = logZ 156 | return logZ 157 | 158 | 159 | -------------------------------------------------------------------------------- /models/svmogp.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | import torch 14 | from torch.distributions import MultivariateNormal as Normal 15 | from torch.distributions import kl_divergence 16 | from kernels.coregionalization import LMC 17 | 18 | import numpy as np 19 | from GPy.inference.latent_function_inference import LatentFunctionInference 20 | from GPy.inference.latent_function_inference.posterior import Posterior 21 | 22 | 23 | class SVMOGP(torch.nn.Module): 24 | """ 25 | -- Sparse Variational Multi-output Gaussian Process -- 26 | -- 27 | -- Adaptation to Pytorch + GP framework -- 28 | -- Based on M. A. Álvarez and N. Lawrence, "Sparse convolved Gaussian processes for multi-output regression" NIPS'08 29 | -- Reference: http://papers.neurips.cc/paper/3553-sparse-convolved-gaussian-processes-for-multi-output-regression.pdf 30 | """ 31 | def __init__(self, kernels, likelihoods, Q, M, input_dim=None, batch_rates=None): 32 | super(SVMOGP, self).__init__() 33 | 34 | if input_dim is None: 35 | input_dim = 1 36 | 37 | 38 | # Dimensions -- 39 | self.M = M # num. inducing 40 | self.Q = Q # num. latent functions 41 | self.input_dim = int(input_dim) # dimension of x 42 | 43 | # Likelihoods -- 44 | self.likelihoods = likelihoods # list of likelihoods 45 | self.D = len(self.likelihoods) # num. output channels 46 | 47 | if batch_rates is None: 48 | self.batch_rates = self.D*[1.0] 49 | else: 50 | self.batch_rates = batch_rates 51 | 52 | # Kernels -- 53 | self.kernels = torch.nn.ModuleList() 54 | for q in range(self.Q): 55 | self.kernels.append(kernels[q]) 56 | self.coregionalization = LMC(self.kernels, self.D) # is a list 57 | 58 | # Inducing points -- 59 | if self.input_dim > 1: 60 | self.z = torch.nn.Parameter(torch.rand(self.M, self.input_dim, self.Q), requires_grad=False) 61 | else: 62 | self.z = torch.nn.Parameter(torch.tile(torch.linspace(0.1, 0.9, self.M)[:,None, None], (1, 1, self.Q)), requires_grad=False) 63 | 64 | 65 | # Variational distributions -- 66 | self.q_m = torch.nn.Parameter(2*torch.randn(M, Q), requires_grad=True) # variational: mean parameter 67 | self.q_L = torch.nn.Parameter(torch.tile(torch.eye(M)[:, :, None], (1, 1, self.Q)), requires_grad=True) # variational: covariance 68 | 69 | def expectation(self, x, y): 70 | # Check length of input+output lists 71 | assert len(x) == self.D 72 | assert len(y) == self.D 73 | 74 | # MOGP prior + Variational parameters 75 | q_m = self.q_m 76 | q_S = torch.zeros(self.M, self.M, self.Q) 77 | Kuu = torch.zeros(self.M, self.M, self.Q) 78 | iKuu = torch.zeros(self.M, self.M, self.Q) 79 | 80 | for q in range(self.Q): 81 | # MOGP latent functions prior 82 | Kuu_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q]) 83 | iKuu_q, _ = torch.solve(torch.eye(self.M), Kuu_q) # is pseudo-inverse? 84 | Kuu[:, :, q] = Kuu_q 85 | iKuu[:, :, q] = iKuu_q 86 | 87 | # Variational parameters + Gaussian integration 88 | q_L = torch.tril(self.q_L[:, :, q]) 89 | q_S[:, :, q] = torch.mm(q_L, q_L.t()) 90 | 91 | # Expectation values (NxD) 92 | expectation = [] 93 | for d in range(self.D): 94 | Kff = self.coregionalization.Kff(x[d], d) 95 | Kfu = self.coregionalization.Kfu(x[d], self.z, d) 96 | 97 | m_f = 0.0 98 | S_f = Kff 99 | 100 | for q in range(self.Q): 101 | A = Kfu[:, :, q].mm(iKuu[:, :, q]) 102 | AT = iKuu[:, :, q].mm(Kfu[:, :, q].t()) 103 | 104 | m_f += A.mm(q_m[:, q:q + 1]) 105 | S_f += A.mm(q_S[:, :, q]).mm(AT) - A.mm(Kfu[:, :, q].t()) 106 | 107 | v_f = torch.diag(S_f) 108 | expectation.append(self.likelihoods[d].variational_expectation(y[d], m_f, v_f)) 109 | 110 | return expectation 111 | 112 | def divergence(self, p_u, q_u): 113 | kl = 0.0 114 | for q in range(self.Q): 115 | kl += kl_divergence(q_u[q], p_u[q]) 116 | return kl 117 | 118 | def forward(self, x, y): 119 | 120 | # Empty variables for filling in 1:Q 121 | q_u = [] 122 | p_u = [] 123 | q_m = self.q_m 124 | q_S = torch.zeros(self.M, self.M, self.Q) 125 | Kuu = torch.zeros(self.M, self.M, self.Q) 126 | for q in range(self.Q): 127 | 128 | # Variational parameters -- 129 | q_L = torch.tril(self.q_L[:,:,q]) 130 | q_S[:,:,q] = torch.mm(q_L, q_L.t()) 131 | 132 | # Prior parameters (uses kernel) -- 133 | Kuu_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q]) 134 | Kuu[:, :, q] = Kuu_q 135 | 136 | # Distributions -- q(u), p(u) 137 | q_u.append(Normal(q_m[:,q].flatten(), q_S[:,:,q])) 138 | p_u.append(Normal(torch.zeros(self.M), Kuu[:,:,q])) 139 | 140 | # Expectation term -- 141 | expectation = 0.0 142 | expectation_mo = self.expectation(x, y) 143 | for d, exp in enumerate(expectation_mo): 144 | expectation += self.batch_rates[d] * exp.sum() 145 | 146 | # KL divergence -- 147 | kl = self.divergence(q_u, p_u) 148 | 149 | # Lower bound (ELBO) -- 150 | elbo = expectation - kl 151 | return -elbo 152 | 153 | def predictive(self, xnew, d): 154 | # MOGP prior + Variational parameters 155 | q_m = self.q_m 156 | q_S = torch.zeros(self.M, self.M, self.Q) 157 | Kuu = torch.zeros(self.M, self.M, self.Q) 158 | iKuu = torch.zeros(self.M, self.M, self.Q) 159 | 160 | # Posterior distribution on new input data 161 | Kff = self.coregionalization.Kff(xnew, d) 162 | Kfu = self.coregionalization.Kfu(xnew, self.z, d) 163 | 164 | m_pred = 0.0 165 | S_pred = Kff 166 | for q in range(self.Q): 167 | # MOGP latent functions prior 168 | Kuu_q = self.kernels[q].K(self.z[:, :, q], self.z[:, :, q]) 169 | iKuu_q, _ = torch.solve(torch.eye(self.M), Kuu_q) # is pseudo-inverse? 170 | Kuu[:, :, q] = Kuu_q 171 | iKuu[:, :, q] = iKuu_q 172 | 173 | # Variational parameters + Gaussian integration 174 | q_L = torch.tril(self.q_L[:, :, q]) 175 | q_S[:, :, q] = torch.mm(q_L, q_L.t()) 176 | 177 | A = Kfu[:, :, q].mm(iKuu[:, :, q]) 178 | AT = iKuu[:, :, q].mm(Kfu[:, :, q].t()) 179 | 180 | m_pred += A.mm(q_m[:, q:q + 1]) 181 | S_pred += A.mm(q_S[:, :, q]).mm(AT) - A.mm(Kfu[:, :, q].t()) 182 | 183 | # Detach and numpy easier for plotting. 184 | m_pred = m_pred.detach().numpy() 185 | S_pred = S_pred.detach().numpy() 186 | 187 | gp_mu = m_pred.flatten() 188 | gp_var = np.diagonal(S_pred) 189 | 190 | gp = gp_mu 191 | gp_upper = gp_mu + 2 * np.sqrt(gp_var) # + 2*self.likelihood.sigma.detach().numpy() 192 | gp_lower = gp_mu - 2 * np.sqrt(gp_var) # - 2*self.likelihood.sigma.detach().numpy() 193 | 194 | return gp, gp_upper, gp_lower 195 | 196 | def rmse(self, x_new, f_new, d): 197 | f_gp,_,_ = self.predictive(x_new, d) 198 | rmse = torch.sqrt(torch.mean((f_new - f_gp)**2.0)).detach().numpy() 199 | return rmse 200 | 201 | def mae(self, x_new, f_new, d): 202 | f_gp,_,_ = self.predictive(x_new, d) 203 | mae = torch.mean(torch.abs(f_new - f_gp)).detach().numpy() 204 | return mae 205 | 206 | def nlpd(self, x_new, y_new, d): 207 | f_gp, u_gp, _ = self.predictive(x_new, d) 208 | f_gp = torch.from_numpy(f_gp) 209 | u_gp = torch.from_numpy(u_gp) 210 | v_gp = torch.pow(0.5*(u_gp - f_gp), 2.0) 211 | nlpd = - torch.mean(self.likelihoods[d].log_predictive(y_new, f_gp, v_gp)).detach().numpy() 212 | return nlpd 213 | -------------------------------------------------------------------------------- /optimization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pmorenoz/ModularGP/b0c7a4116ef6b42376c1ba75834b45926f5c2767/optimization/__init__.py -------------------------------------------------------------------------------- /optimization/algorithms.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------- 2 | # This script belongs to the ModularGP repo 3 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 4 | # Copyright (c) 2021 Pablo Moreno-Munoz 5 | # ----------------------------------------------------------------- 6 | # 7 | # 8 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 9 | # Section for Cognitive Systems 10 | # Technical University of Denmark (DTU) 11 | # October 2021 12 | 13 | import torch 14 | import numpy as np 15 | import matplotlib.pyplot as plt 16 | 17 | from likelihoods.gaussian import Gaussian 18 | from likelihoods.bernoulli import Bernoulli 19 | 20 | class AlgorithmVEM(): 21 | def __init__(self, model, x=None, y=None, config='svgp', iters=20): 22 | super(AlgorithmVEM, self).__init__() 23 | 24 | self.model = model 25 | if x is not None: 26 | self.x = x 27 | if y is not None: 28 | self.y = y 29 | self.iters = iters 30 | 31 | if config == 'svgp' or config == 'ensemble': 32 | self.config = config 33 | else: 34 | raise ValueError('Not valid model type for Algorithm VEM, choose \'svgp\' or \'ensemble\'') 35 | 36 | if self.config == 'svgp': 37 | # Learning rates per param. 38 | self.lr_m = 1e-6 39 | self.lr_L = 1e-12 40 | self.lr_hyp = 1e-10 41 | self.lr_z = 1e-10 42 | 43 | # VE + VM iterations. 44 | self.ve_its = 20 45 | self.vm_its = 10 46 | self.z_its = 10 47 | 48 | elif self.config == 'ensemble': 49 | # Learning rates per param. 50 | self.lr_m = 1e-3 51 | self.lr_L = 1e-6 52 | self.lr_hyp = 1e-8 53 | self.lr_z = 1e-6 54 | 55 | # VE + VM iterations. 56 | self.ve_its = 30 57 | self.vm_its = 10 58 | self.z_its = 10 59 | 60 | def fit(self, opt='sgd', plot=False): 61 | if opt == 'sgd': 62 | ve_optimizer = torch.optim.SGD([{'params':self.model.q_m, 'lr':self.lr_m},{'params':self.model.q_L,'lr':self.lr_L}], lr=1e-12, momentum=0.9) 63 | 64 | if isinstance(self.model, Gaussian): 65 | vm_optimizer = torch.optim.SGD([{'params':self.model.kernel.parameters(), 'lr':self.lr_hyp},{'params':self.model.likelihood.sigma,'lr':self.lr_hyp}], lr=1e-12, momentum=0.9) 66 | else: 67 | vm_optimizer = torch.optim.SGD([{'params': self.model.kernel.parameters(), 'lr': self.lr_hyp}], lr=1e-12, momentum=0.9) 68 | 69 | z_optimizer = torch.optim.SGD([{'params':self.model.z, 'lr':self.lr_z}], lr=1e-10, momentum=0.9) 70 | 71 | elbo_its = np.empty((self.iters, 1)) 72 | for em_it in range(self.iters): 73 | 74 | # VE STEP 75 | for it in range(self.ve_its): 76 | if self.config == 'svgp': 77 | elbo_it = self.model(self.x,self.y) # Forward pass -> computes ELBO 78 | elif self.config == 'ensemble': 79 | elbo_it = self.model() # Forward pass -> computes ELBO 80 | 81 | ve_optimizer.zero_grad() 82 | elbo_it.backward() # Backward pass <- computes gradients 83 | ve_optimizer.step() 84 | 85 | # Overfitting avoidance 86 | if self.config == 'ensemble': 87 | if self.model().item() < 10.0: 88 | break 89 | 90 | # VM STEP 91 | # 1. hyper-parameters 92 | for it in range(self.vm_its): 93 | if self.config == 'svgp': 94 | elbo_it = self.model(self.x,self.y) # Forward pass -> computes ELBO 95 | elif self.config == 'ensemble': 96 | elbo_it = self.model() # Forward pass -> computes ELBO 97 | 98 | vm_optimizer.zero_grad() 99 | elbo_it.backward() # Backward pass <- computes gradients 100 | vm_optimizer.step() 101 | 102 | # Overfitting avoidance 103 | if self.config == 'ensemble': 104 | if self.model().item() < 10.0: 105 | break 106 | 107 | # 2. inducing-points 108 | for it in range(self.z_its): 109 | if self.config == 'svgp': 110 | elbo_it = self.model(self.x,self.y) # Forward pass -> computes ELBO 111 | elif self.config == 'ensemble': 112 | elbo_it = self.model() # Forward pass -> computes ELBO 113 | 114 | z_optimizer.zero_grad() 115 | elbo_it.backward() # Backward pass <- computes gradients 116 | z_optimizer.step() 117 | 118 | # Overfitting avoidance 119 | if self.config == 'ensemble': 120 | if self.model().item() < 10.0: 121 | break 122 | 123 | print('Variational EM step (it=' + str(em_it) + ')') 124 | if self.config == 'svgp': 125 | print(' \__ elbo =', self.model(self.x, self.y).item()) 126 | elbo_its[em_it] = - self.model(self.x, self.y).item() 127 | elif self.config == 'ensemble': 128 | print(' \__ elbo =', self.model().item()) 129 | elbo_its[em_it] = - self.model().item() 130 | 131 | # Overfitting avoidance 132 | if self.model().item() < 10.0: 133 | break 134 | 135 | elif opt == 'lbfgs': 136 | optim_param= torch.optim.LBFGS([self.model.q_m, self.model.q_L], lr=self.lr_m, max_iter=self.ve_its) 137 | optim_hyper = torch.optim.LBFGS(list(self.model.kernel.parameters()) + [self.model.likelihood.sigma], lr=self.lr_hyp, max_iter=self.vm_its) 138 | optim_z = torch.optim.LBFGS([self.model.z], lr=self.lr_z, max_iter=self.vm_its) 139 | 140 | elbo_its = np.empty((self.iters, 1)) 141 | for em_it in range(self.iters): 142 | 143 | # VE STEP 144 | def closure(): 145 | optim_param.zero_grad() 146 | if self.config == 'svgp': 147 | elbo_it = self.model(self.x, self.y) # Forward pass -> computes ELBO 148 | elif self.config == 'ensemble': 149 | elbo_it = self.model() # Forward pass -> computes ELBO 150 | 151 | elbo_it.backward() 152 | return elbo_it 153 | 154 | optim_param.step(closure) 155 | if self.config == 'svgp': 156 | print(' param >>> elbo =', self.model(self.x, self.y).item()) 157 | elif self.config == 'ensemble': 158 | print(' param >>> elbo =', self.model().item()) 159 | 160 | # VM STEP 161 | # 1. hyper-parameters 162 | def closure(): 163 | optim_hyper.zero_grad() 164 | if self.config == 'svgp': 165 | elbo_it = self.model(self.x, self.y) # Forward pass -> computes ELBO 166 | elif self.config == 'ensemble': 167 | elbo_it = self.model() # Forward pass -> computes ELBO 168 | 169 | elbo_it.backward() 170 | return elbo_it 171 | 172 | optim_hyper.step(closure) 173 | if self.config == 'svgp': 174 | print(' hyper >>> elbo =', self.model(self.x, self.y).item()) 175 | elif self.config == 'ensemble': 176 | print(' hyper >>> elbo =', self.model().item()) 177 | 178 | # 2. inducing-points 179 | def closure(): 180 | optim_z.zero_grad() 181 | if self.config == 'svgp': 182 | elbo_it = self.model(self.x, self.y) # Forward pass -> computes ELBO 183 | elif self.config == 'ensemble': 184 | elbo_it = self.model() # Forward pass -> computes ELBO 185 | 186 | elbo_it.backward() 187 | return elbo_it 188 | 189 | optim_z.step(closure) 190 | if self.config == 'svgp': 191 | print(' z pts >>> elbo =', self.model(self.x, self.y).item()) 192 | elif self.config == 'ensemble': 193 | print(' z pts >>> elbo =', self.model().item()) 194 | 195 | 196 | print('Variational EM step (it=' + str(em_it) + ')') 197 | if self.config == 'svgp': 198 | print(' \__ elbo =', self.model(self.x, self.y).item()) 199 | elbo_its[em_it] = - self.model(self.x, self.y).item() 200 | elif self.config == 'ensemble': 201 | print(' \__ elbo =', self.model().item()) 202 | elbo_its[em_it] = - self.model().item() 203 | 204 | else: 205 | print('Not valid optimizer') 206 | 207 | if plot: 208 | plt.figure() 209 | plt.plot(elbo_its, 'k-') 210 | plt.title('Ensemble GP Inference (ELBO)') 211 | plt.xlabel('Iterations') 212 | plt.show() 213 | 214 | def GPR_Optimizer(model, x, y, its=50, lr=1e-2): 215 | optimizer = torch.optim.LBFGS(model.parameters(), lr=lr, max_iter=10) 216 | elbo_its = np.empty((its, 1)) 217 | for it in range(its): 218 | def closure(): 219 | optimizer.zero_grad() 220 | elbo_opt = model(x, y) 221 | elbo_opt.backward() 222 | return elbo_opt 223 | 224 | optimizer.step(closure) 225 | 226 | print('Optimization step (it=' + str(it) + ')') 227 | print(' \__ log_marginal =', model(x, y).item()) 228 | elbo_its[it] = -model(x, y).item() 229 | 230 | 231 | def vem_algorithm(model, x, y, em_iters=10, optimizer='sgd',plot=False): 232 | if optimizer=='sgd': 233 | ve_optimizer = torch.optim.SGD([{'params':model.q_m, 'lr':1e-6},{'params':model.q_L,'lr':1e-12}], lr=1e-12, momentum=0.9) 234 | vm_optimizer = torch.optim.SGD(model.kernel.parameters(), lr=1e-10, momentum=0.9) 235 | z_optimizer = torch.optim.SGD([{'params':model.z, 'lr':1e-10}], lr=1e-10, momentum=0.9) 236 | 237 | VE_iters = 20 238 | VM_iters = 10 239 | Z_iters = 10 240 | 241 | elbo_its = np.empty((em_iters, 1)) 242 | for em_it in range(em_iters): 243 | 244 | # VE STEP 245 | for it in range(VE_iters): 246 | elbo_it = model(x,y) # Forward pass -> computes ELBO 247 | ve_optimizer.zero_grad() 248 | elbo_it.backward() # Backward pass <- computes gradients 249 | ve_optimizer.step() 250 | 251 | # VM STEP 252 | # 1. hyper-parameters 253 | for it in range(VM_iters): 254 | elbo_it = model(x,y) # Forward pass -> computes ELBO 255 | vm_optimizer.zero_grad() 256 | elbo_it.backward() # Backward pass <- computes gradients 257 | vm_optimizer.step() 258 | 259 | # 2. inducing-points 260 | for it in range(Z_iters): 261 | elbo_it = model(x,y) # Forward pass -> computes ELBO 262 | z_optimizer.zero_grad() 263 | elbo_it.backward() # Backward pass <- computes gradients 264 | z_optimizer.step() 265 | 266 | print('Variational EM step (it=' + str(em_it) + ')') 267 | print(' \__ elbo =', model(x, y).item()) 268 | elbo_its[em_it] = -model(x, y).item() 269 | 270 | 271 | elif optimizer=='lbfgs': 272 | ve_optimizer = torch.optim.LBFGS([{model.q_m, model.q_L}], max_iter=50) 273 | vm_optimizer = torch.optim.LBFGS(model.kernel.parameters(), lr=1e-3, max_iter=10) 274 | 275 | 276 | elbo_its = np.empty((em_iters,1)) 277 | for em_it in range(em_iters): 278 | # VE STEP 279 | for name, param in model.kernel.named_parameters(): 280 | param.requires_grad = False 281 | 282 | def closure(): 283 | ve_optimizer.zero_grad() 284 | elbo_opt = model(x, y) 285 | #print('ELBO:', elbo_opt.item()) 286 | elbo_opt.backward() 287 | return elbo_opt 288 | 289 | ve_optimizer.step(closure) 290 | 291 | # VM STEP 292 | for name, param in model.kernel.named_parameters(): 293 | param.requires_grad = True 294 | 295 | def closure(): 296 | vm_optimizer.zero_grad() 297 | elbo_opt = model(x, y) 298 | #print('ELBO:', elbo_opt.item()) 299 | elbo_opt.backward() 300 | return elbo_opt 301 | 302 | vm_optimizer.step(closure) 303 | 304 | print('Variational EM step (it=' + str(em_it) + ')') 305 | print(' \__ elbo =', model(x, y).item()) 306 | elbo_its[em_it] = -model(x, y).item() 307 | 308 | if plot: 309 | plt.figure() 310 | plt.plot(elbo_its, 'k-') 311 | plt.title('Sparse GP Regression (ELBO)') 312 | plt.xlabel('Iterations') 313 | plt.show() 314 | 315 | def ensemble_vem(model, em_iters=20, optimizer='sgd',plot=False): 316 | if optimizer=='sgd': 317 | ve_optimizer = torch.optim.SGD([{'params':model.q_m, 'lr':1e-3},{'params':model.q_L,'lr':1e-6}], lr=1e-6, momentum=0.9) 318 | vm_optimizer = torch.optim.SGD(model.kernel.parameters(), lr=1e-8, momentum=0.9) 319 | z_optimizer = torch.optim.SGD([{'params':model.z, 'lr':1e-6}], lr=1e-8, momentum=0.9) 320 | 321 | VE_iters = 30 322 | VM_iters = 10 323 | Z_iters = 10 324 | 325 | elbo_its = np.empty((em_iters, 1)) 326 | for em_it in range(em_iters): 327 | # VE STEP 328 | # 1. Variational parameters 329 | for it in range(VE_iters): 330 | elbo_it = model() # Forward pass -> computes ELBO 331 | ve_optimizer.zero_grad() 332 | elbo_it.backward() # Backward pass <- computes gradients 333 | ve_optimizer.step() 334 | 335 | # VM STEP 336 | # 1. hyper-parameters 337 | for it in range(VM_iters): 338 | elbo_it = model() # Forward pass -> computes ELBO 339 | vm_optimizer.zero_grad() 340 | elbo_it.backward() # Backward pass <- computes gradients 341 | vm_optimizer.step() 342 | 343 | # 2. inducing-points 344 | for it in range(Z_iters): 345 | elbo_it = model() # Forward pass -> computes ELBO 346 | z_optimizer.zero_grad() 347 | elbo_it.backward() # Backward pass <- computes gradients 348 | z_optimizer.step() 349 | 350 | print('Variational EM step (it=' + str(em_it) + ')') 351 | print(' \__ elbo =', model().item()) 352 | elbo_its[em_it] = -model().item() 353 | 354 | if -model().item() > 0.0: 355 | break 356 | 357 | if plot: 358 | plt.figure() 359 | plt.plot(elbo_its, 'k-') 360 | plt.title('Ensemble GP Inference (ELBO)') 361 | plt.xlabel('Iterations') 362 | plt.show() 363 | 364 | 365 | def ensemble_vem_parallel(model, em_iters=30, optimizer='sgd',plot=False): 366 | if optimizer=='sgd': 367 | ve_optimizer = torch.optim.SGD([{'params':model.q_m, 'lr':1e-3},{'params':model.q_L,'lr':1e-6}], lr=1e-6, momentum=0.9) 368 | vm_optimizer = torch.optim.SGD(model.kernel.parameters(), lr=1e-8, momentum=0.9) 369 | z_optimizer = torch.optim.SGD([{'params':model.z, 'lr':1e-8}], lr=1e-8, momentum=0.9) 370 | 371 | VE_iters = 30 372 | VM_iters = 10 373 | Z_iters = 10 374 | 375 | elbo_its = np.zeros((em_iters, 1)) 376 | for em_it in range(em_iters): 377 | # VE STEP 378 | # 1. Variational parameters 379 | for it in range(VE_iters): 380 | elbo_it = model() # Forward pass -> computes ELBO 381 | ve_optimizer.zero_grad() 382 | elbo_it.backward() # Backward pass <- computes gradients 383 | ve_optimizer.step() 384 | 385 | # VM STEP 386 | # 1. hyper-parameters 387 | for it in range(VM_iters): 388 | elbo_it = model() # Forward pass -> computes ELBO 389 | vm_optimizer.zero_grad() 390 | elbo_it.backward() # Backward pass <- computes gradients 391 | vm_optimizer.step() 392 | 393 | # 2. inducing-points 394 | for it in range(Z_iters): 395 | elbo_it = model() # Forward pass -> computes ELBO 396 | z_optimizer.zero_grad() 397 | elbo_it.backward() # Backward pass <- computes gradients 398 | z_optimizer.step() 399 | 400 | print('Variational EM step (it=' + str(em_it) + ')') 401 | print(' \__ elbo =', model().item()) 402 | elbo_its[em_it] = -model().item() 403 | 404 | if -model().item() > 0.0: 405 | break 406 | 407 | if plot: 408 | plt.figure() 409 | plt.plot(elbo_its, 'k-') 410 | plt.title('Ensemble GP Inference (ELBO)') 411 | plt.xlabel('Iterations') 412 | plt.show() 413 | 414 | def vem_algorithm_infographic(model, x, y, em_iters=10, plot=False): 415 | ve_optimizer = torch.optim.SGD([{'params':model.q_m, 'lr':1e-5},{'params':model.q_L,'lr':1e-8}], lr=1e-12, momentum=0.9) 416 | vm_optimizer = torch.optim.SGD(model.kernel.parameters(), lr=1e-10, momentum=0.9) 417 | z_optimizer = torch.optim.SGD([{'params':model.z, 'lr':1e-10}], lr=1e-10, momentum=0.9) 418 | 419 | VE_iters = 20 420 | VM_iters = 20 421 | Z_iters = 10 422 | 423 | elbo_its = np.empty((em_iters, 1)) 424 | for em_it in range(em_iters): 425 | 426 | # VE STEP 427 | for it in range(VE_iters): 428 | elbo_it = model(x,y) # Forward pass -> computes ELBO 429 | ve_optimizer.zero_grad() 430 | elbo_it.backward() # Backward pass <- computes gradients 431 | ve_optimizer.step() 432 | 433 | # VM STEP 434 | # 1. hyper-parameters 435 | for it in range(VM_iters): 436 | elbo_it = model(x,y) # Forward pass -> computes ELBO 437 | vm_optimizer.zero_grad() 438 | elbo_it.backward() # Backward pass <- computes gradients 439 | vm_optimizer.step() 440 | 441 | # 2. inducing-points 442 | for it in range(Z_iters): 443 | elbo_it = model(x,y) # Forward pass -> computes ELBO 444 | z_optimizer.zero_grad() 445 | elbo_it.backward() # Backward pass <- computes gradients 446 | z_optimizer.step() 447 | 448 | print('Variational EM step (it=' + str(em_it) + ')') 449 | print(' \__ elbo =', model(x, y).item()) 450 | elbo_its[em_it] = -model(x, y).item() 451 | 452 | 453 | def ensemble_vem_infographic(model, em_iters=30, optimizer='sgd',plot=False): 454 | if optimizer=='sgd': 455 | ve_optimizer = torch.optim.SGD([{'params':model.q_m, 'lr':1e-3},{'params':model.q_L,'lr':1e-6}], lr=1e-6, momentum=0.9) 456 | vm_optimizer = torch.optim.SGD(model.kernel.parameters(), lr=1e-8, momentum=0.9) 457 | z_optimizer = torch.optim.SGD([{'params':model.z, 'lr':1e-8}], lr=1e-8, momentum=0.9) 458 | 459 | VE_iters = 30 460 | VM_iters = 20 461 | Z_iters = 10 462 | 463 | elbo_its = np.zeros((em_iters, 1)) 464 | for em_it in range(em_iters): 465 | # VE STEP 466 | # 1. Variational parameters 467 | for it in range(VE_iters): 468 | elbo_it = model() # Forward pass -> computes ELBO 469 | ve_optimizer.zero_grad() 470 | elbo_it.backward() # Backward pass <- computes gradients 471 | ve_optimizer.step() 472 | 473 | # VM STEP 474 | # 1. hyper-parameters 475 | for it in range(VM_iters): 476 | elbo_it = model() # Forward pass -> computes ELBO 477 | vm_optimizer.zero_grad() 478 | elbo_it.backward() # Backward pass <- computes gradients 479 | vm_optimizer.step() 480 | 481 | # 2. inducing-points 482 | for it in range(Z_iters): 483 | elbo_it = model() # Forward pass -> computes ELBO 484 | z_optimizer.zero_grad() 485 | elbo_it.backward() # Backward pass <- computes gradients 486 | z_optimizer.step() 487 | 488 | print('Variational EM step (it=' + str(em_it) + ')') 489 | print(' \__ elbo =', model().item()) 490 | elbo_its[em_it] = -model().item() 491 | 492 | if -model().item() > 0.0: 493 | break 494 | 495 | def moensemble_vem(model, em_iters=20, optimizer='sgd',plot=False): 496 | if optimizer=='sgd': 497 | ve_optimizer = torch.optim.SGD([{'params': model.q_m, 'lr': 1e-3}, 498 | {'params': model.q_L,'lr': 1e-6}], lr=1e-6, momentum=0.9) 499 | vm_optimizer = torch.optim.SGD([{'params': model.kernels.parameters(), 'lr': 1e-8}, 500 | {'params': model.coregionalization.W, 'lr': 1e-6}], lr=1e-8, momentum=0.9) 501 | z_optimizer = torch.optim.SGD([{'params': model.z, 'lr':1e-7}], lr=1e-8, momentum=0.9) 502 | 503 | VE_iters = 30 504 | VM_iters = 20 505 | Z_iters = 5 506 | 507 | elbo_its = np.empty((em_iters, 1)) 508 | for em_it in range(em_iters): 509 | # VE STEP 510 | # 1. Variational parameters 511 | for it in range(VE_iters): 512 | elbo_it = model() # Forward pass -> computes ELBO 513 | ve_optimizer.zero_grad() 514 | elbo_it.backward() # Backward pass <- computes gradients 515 | ve_optimizer.step() 516 | 517 | # VM STEP 518 | # 1. hyper-parameters 519 | for it in range(VM_iters): 520 | elbo_it = model() # Forward pass -> computes ELBO 521 | vm_optimizer.zero_grad() 522 | elbo_it.backward() # Backward pass <- computes gradients 523 | vm_optimizer.step() 524 | 525 | # 2. inducing-points 526 | for it in range(Z_iters): 527 | elbo_it = model() # Forward pass -> computes ELBO 528 | z_optimizer.zero_grad() 529 | elbo_it.backward() # Backward pass <- computes gradients 530 | z_optimizer.step() 531 | 532 | print('Variational EM step (it=' + str(em_it) + ')') 533 | print(' \__ elbo =', model().item()) 534 | elbo_its[em_it] = -model().item() 535 | 536 | if -model().item() > 0.0: 537 | break 538 | 539 | if plot: 540 | plt.figure() 541 | plt.plot(elbo_its, 'k-') 542 | plt.title('Ensemble GP Inference (ELBO)') 543 | plt.xlabel('Iterations') 544 | plt.show() 545 | 546 | class AlgorithmMOVEM(): 547 | def __init__(self, model, iters=20, plot=False): 548 | super(AlgorithmMOVEM, self).__init__() 549 | 550 | self.model = model 551 | self.iters = iters 552 | 553 | # Learning rates per param. 554 | self.lr_m = 1e-3 555 | self.lr_L = 1e-6 556 | self.lr_B = 1e-6 557 | self.lr_hyp = 1e-8 558 | self.lr_z = 1e-7 559 | 560 | # VE + VM iterations. 561 | self.ve_iters = 30 562 | self.vm_iters = 20 563 | self.z_iters = 10 564 | 565 | def fit(self, plot=False): 566 | 567 | ve_optimizer = torch.optim.SGD([{'params': self.model.q_m, 'lr': self.lr_m}, 568 | {'params': self.model.q_L,'lr': self.lr_L}], lr=1e-6, momentum=0.9) 569 | vm_optimizer = torch.optim.SGD([{'params': self.model.kernels.parameters(), 'lr': self.lr_hyp}, 570 | {'params': self.model.coregionalization.W, 'lr': self.lr_B}], lr=1e-8, momentum=0.9) 571 | z_optimizer = torch.optim.SGD([{'params': self.model.z, 'lr': self.lr_z}], lr=1e-8, momentum=0.9) 572 | 573 | elbo_its = np.empty((self.iters, 1)) 574 | for em_it in range(self.iters): 575 | # VE STEP 576 | # 1. Variational parameters 577 | for it in range(self.ve_iters): 578 | elbo_it = self.model() # Forward pass -> computes ELBO 579 | ve_optimizer.zero_grad() 580 | elbo_it.backward() # Backward pass <- computes gradients 581 | ve_optimizer.step() 582 | 583 | # VM STEP 584 | # 1. hyper-parameters 585 | for it in range(self.vm_iters): 586 | elbo_it = self.model() # Forward pass -> computes ELBO 587 | vm_optimizer.zero_grad() 588 | elbo_it.backward() # Backward pass <- computes gradients 589 | vm_optimizer.step() 590 | 591 | # 2. inducing-points 592 | for it in range(self.z_iters): 593 | elbo_it = self.model() # Forward pass -> computes ELBO 594 | z_optimizer.zero_grad() 595 | elbo_it.backward() # Backward pass <- computes gradients 596 | z_optimizer.step() 597 | 598 | print('Variational EM step (it=' + str(em_it) + ')') 599 | print(' \__ elbo =', self.model().item()) 600 | elbo_its[em_it] = -self.model().item() 601 | 602 | if -self.model().item() > 0.0: 603 | break 604 | 605 | if plot: 606 | plt.figure() 607 | plt.plot(elbo_its, 'k-') 608 | plt.title('Ensemble GP Inference (ELBO)') 609 | plt.xlabel('Iterations') 610 | plt.show() -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | alembic==1.4.1 2 | anyio==3.2.1 3 | appnope==0.1.2 4 | argon2-cffi==20.1.0 5 | async-generator==1.10 6 | attrs==21.2.0 7 | Babel==2.9.1 8 | backcall==0.2.0 9 | bleach==3.3.0 10 | certifi==2020.6.20 11 | cffi==1.14.5 12 | chardet==4.0.0 13 | click==8.0.1 14 | cloudpickle==2.0.0 15 | cycler==0.10.0 16 | Cython==0.29.23 17 | databricks-cli==0.15.0 18 | decorator==5.0.7 19 | defusedxml==0.7.1 20 | docker==5.0.2 21 | entrypoints==0.3 22 | Flask==2.0.1 23 | gitdb==4.0.7 24 | GitPython==3.1.23 25 | GPy==1.10.0 26 | gpytorch==1.5.1 27 | greenlet==1.1.1 28 | gunicorn==20.1.0 29 | idna==2.10 30 | importlib-metadata==4.0.1 31 | ipykernel==5.5.5 32 | ipython==7.24.1 33 | ipython-genutils==0.2.0 34 | itsdangerous==2.0.1 35 | jedi==0.18.0 36 | Jinja2==3.0.1 37 | joblib==1.0.1 38 | json5==0.9.6 39 | jsonschema==3.2.0 40 | jupyter-client==6.1.12 41 | jupyter-core==4.7.1 42 | jupyter-server==1.9.0 43 | jupyterlab==3.0.16 44 | jupyterlab-pygments==0.1.2 45 | jupyterlab-server==2.6.0 46 | kiwisolver==1.3.1 47 | Mako==1.1.5 48 | MarkupSafe==2.0.1 49 | matplotlib==3.4.2 50 | matplotlib-inline==0.1.2 51 | matplotlib2tikz==0.7.6 52 | mistune==0.8.4 53 | mlflow==1.20.2 54 | nbclassic==0.3.1 55 | nbclient==0.5.3 56 | nbconvert==6.1.0 57 | nbformat==5.1.3 58 | nest-asyncio==1.5.1 59 | networkx @ file:///tmp/build/80754af9/networkx_1627459939258/work 60 | notebook==6.4.0 61 | numpy==1.20.3 62 | opt-einsum==3.3.0 63 | packaging==20.9 64 | pandas==1.2.4 65 | pandocfilters==1.4.3 66 | paramz==0.9.5 67 | parso==0.8.2 68 | pexpect==4.8.0 69 | pickleshare==0.7.5 70 | Pillow==8.2.0 71 | prometheus-client==0.11.0 72 | prometheus-flask-exporter==0.18.2 73 | prompt-toolkit==3.0.19 74 | protobuf==3.17.3 75 | ptyprocess==0.7.0 76 | pycparser==2.20 77 | Pygments==2.9.0 78 | pyparsing==2.4.7 79 | pyreadstat==1.1.2 80 | pyro-api==0.1.2 81 | pyro-ppl==1.7.0 82 | pyrsistent==0.17.3 83 | python-dateutil==2.8.1 84 | python-editor==1.0.4 85 | pytz==2021.1 86 | PyYAML==5.4.1 87 | pyzmq==22.1.0 88 | querystring-parser==1.2.4 89 | requests==2.25.1 90 | requests-unixsocket==0.2.0 91 | scikit-learn==0.24.2 92 | scipy==1.6.3 93 | Send2Trash==1.7.1 94 | six==1.16.0 95 | sklearn==0.0 96 | smmap==4.0.0 97 | sniffio==1.2.0 98 | SQLAlchemy==1.4.23 99 | sqlparse==0.4.2 100 | tabulate==0.8.9 101 | terminado==0.10.1 102 | testpath==0.5.0 103 | threadpoolctl==2.1.0 104 | tikzplotlib==0.9.8 105 | torch==1.9.0 106 | torchplot==0.2.0 107 | torchvision==0.9.1 108 | tornado==6.1 109 | tqdm==4.62.2 110 | traitlets==5.0.5 111 | typing-extensions==3.10.0.0 112 | urllib3==1.26.5 113 | wcwidth==0.2.5 114 | webencodings==0.5.1 115 | websocket-client==1.1.0 116 | Werkzeug==2.0.1 117 | zipp==3.4.1 118 | -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | # squared distance is based on the gptorch code 2 | # by Steven Atkinson (steven@atkinson.mn) 3 | # ----------------------------------------------------------------- 4 | # This script belongs to the ModularGP repo 5 | # "Modular Gaussian Processes for Transfer Learning" @ NeurIPS 2021 6 | # Copyright (c) 2021 Pablo Moreno-Munoz 7 | # ----------------------------------------------------------------- 8 | # 9 | # 10 | # Pablo Moreno-Munoz (pabmo@dtu.dk) 11 | # Section for Cognitive Systems 12 | # Technical University of Denmark (DTU) 13 | # October 2021 14 | 15 | import os 16 | import torch 17 | import numpy as np 18 | import pandas as pd 19 | from torch.utils.data import Dataset, DataLoader 20 | 21 | _lim_val = np.finfo(np.float64).max 22 | _lim_val_exp = np.log(_lim_val) 23 | _lim_val_square = np.sqrt(_lim_val) 24 | #_lim_val_cube = cbrt(_lim_val) 25 | _lim_val_cube = np.nextafter(_lim_val**(1/3.0), -np.inf) 26 | _lim_val_quad = np.nextafter(_lim_val**(1/4.0), -np.inf) 27 | _lim_val_three_times = np.nextafter(_lim_val/3.0, -np.inf) 28 | 29 | 30 | def safe_exp(f): 31 | clamp_f = torch.clamp(f, min=-np.inf, max=_lim_val_exp) 32 | return torch.exp(clamp_f) 33 | 34 | def safe_square(f): 35 | f = torch.clamp(f, min=-np.inf, max=_lim_val_square) 36 | return f**2 37 | 38 | def safe_cube(f): 39 | f = torch.clamp(f, min=-np.inf, max=_lim_val_cube) 40 | return f**3 41 | 42 | def safe_quad(f): 43 | f = torch.clamp(f, min=-np.inf, max=_lim_val_quad) 44 | return f**4 45 | 46 | def true_function(x): 47 | y = 4.5*torch.cos(2*np.pi*x + 1.5*np.pi) - \ 48 | 3*torch.sin(4.3*np.pi*x + 0.3*np.pi) + \ 49 | 5*torch.cos(7*np.pi*x + 2.4*np.pi) 50 | return y 51 | 52 | def smooth_function(x): 53 | y = 4.5*torch.cos(2*np.pi*x + 1.5*np.pi) - \ 54 | 3*torch.sin(4.3*np.pi*x + 0.3*np.pi) 55 | return y 56 | 57 | def smooth_function_bias(x): 58 | y = 4.5*torch.cos(2*np.pi*x + 1.5*np.pi) - \ 59 | 3*torch.sin(4.3*np.pi*x + 0.3*np.pi) + \ 60 | 3.0*x - 7.5 61 | return y 62 | 63 | 64 | def true_u_functions(x_list, Q): 65 | u_functions = [] 66 | amplitude = (1.5 - 0.5) * torch.rand(Q, 3) + 0.5 67 | freq = (3 - 1) * torch.rand(Q, 3) + 1 68 | shift = 2 * torch.rand(Q, 3) 69 | for x in x_list: 70 | u_function = torch.empty(x.shape[0], Q) 71 | for q in range(Q): 72 | u_function[:,q,None] = 3.0 * amplitude[q, 0] * np.cos(freq[q, 0] * np.pi * x + shift[q, 0] * np.pi) - \ 73 | 2.0 * amplitude[q, 1] * np.sin(2 * freq[q, 1] * np.pi * x + shift[q, 1] * np.pi) + \ 74 | amplitude[q, 2] * np.cos(4 * freq[q, 2] * np.pi * x + shift[q, 2] * np.pi) 75 | u_functions.append(u_function) 76 | return u_functions 77 | 78 | 79 | def true_f_functions(x_list, Q): 80 | K = len(x_list) 81 | W = 0.5 * torch.randn(K, Q) 82 | f_functions = [] 83 | u_functions = true_u_functions(x_list, Q) 84 | for k, u_function in enumerate(u_functions): 85 | Nk = u_function.shape[0] 86 | f_function = torch.zeros(Nk, 1) 87 | for q in range(Q): 88 | f_function += torch.tile(W[k:k+1, q:q+1], (Nk, 1)) * u_function[:, q:q+1] 89 | 90 | f_functions.append(f_function) 91 | 92 | return f_functions 93 | 94 | 95 | def squared_distance(x1, x2=None): 96 | """ 97 | Given points x1 [n1 x d1] and x2 [n2 x d2], return a [n1 x n2] matrix with 98 | the pairwise squared distances between the points. 99 | Entry (i, j) is sum_{j=1}^d (x_1[i, j] - x_2[i, j]) ^ 2 100 | """ 101 | if x2 is None: 102 | return squared_distance(x1, x1) 103 | 104 | x1s = x1.pow(2).sum(1, keepdim=True) 105 | x2s = x2.pow(2).sum(1, keepdim=True) 106 | 107 | r2 = x1s + x2s.t() -2.0 * x1 @ x2.t() 108 | 109 | # Prevent negative squared distances using torch.clamp 110 | # NOTE: Clamping is for numerics. 111 | # This use of .detach() is to avoid breaking the gradient flow. 112 | return r2 - (torch.clamp(r2, max=0.0)).detach() 113 | 114 | 115 | class DataGP(Dataset): 116 | def __init__(self, x, y): 117 | if not torch.is_tensor(x): 118 | self.x = torch.from_numpy(x) 119 | if not torch.is_tensor(y): 120 | self.y = torch.from_numpy(y) 121 | 122 | def __len__(self): 123 | return len(self.x) 124 | 125 | def __getitem__(self, item): 126 | return self.x[item], self.y[item] 127 | 128 | 129 | class DataMOGP(Dataset): 130 | def __init__(self, x, y): 131 | self.x = x # x is a list 132 | self.y = y # y is a list 133 | 134 | def __len__(self): 135 | return min(len(x_d) for x_d in self.x) 136 | 137 | def __getitem__(self, item): 138 | x_tuple = tuple(x_d[item] for x_d in self.x) 139 | y_tuple = tuple(y_d[item] for y_d in self.y) 140 | return x_tuple, y_tuple 141 | 142 | 143 | 144 | --------------------------------------------------------------------------------