├── .gitignore ├── LICENSE ├── README.md ├── experiments ├── banana │ ├── README.md │ ├── banana_utils.py │ ├── streaming_banana_bui.ipynb │ ├── streaming_banana_maddox.ipynb │ └── streaming_banana_tsvgp.ipynb ├── configs │ ├── dataset │ │ ├── adult.yaml │ │ ├── bank.yaml │ │ ├── bike.yaml │ │ ├── elevators.yaml │ │ ├── magnetometer.yaml │ │ ├── mammographic.yaml │ │ ├── mnist.yaml │ │ ├── mushroom.yaml │ │ └── split_mnist.yaml │ ├── magnetometer_offline_experiment.yaml │ ├── magnetometer_online_experiment.yaml │ ├── magnetometer_streaming_experiment.yaml │ ├── model │ │ ├── kernel │ │ │ ├── matern52.yaml │ │ │ ├── rbf.yaml │ │ │ └── sum_constant_matern52.yaml │ │ ├── likelihood │ │ │ ├── bernoulli.yaml │ │ │ ├── gaussian.yaml │ │ │ └── softmax.yaml │ │ ├── ovc.yaml │ │ ├── sgpr.yaml │ │ ├── svgp.yaml │ │ ├── tsvgp.yaml │ │ ├── tsvgp_continual.yaml │ │ └── tsvgp_continual_classification.yaml │ ├── offline_experiment.yaml │ ├── offline_mnist_experiment.yaml │ ├── online_experiment.yaml │ ├── online_mnist_experiment.yaml │ ├── optimizer │ │ ├── adam.yaml │ │ └── scipy.yaml │ ├── streaming_experiment.yaml │ └── streaming_mnist_experiment.yaml ├── data │ ├── adult.csv │ ├── banana_test_x.txt │ ├── banana_test_y.txt │ ├── banana_train_x.txt │ ├── banana_train_y.txt │ ├── bank.csv │ ├── bike.csv │ ├── elevators.csv │ ├── invensense │ │ ├── 1-loc.csv │ │ ├── 1-mag.csv │ │ ├── 1-time.csv │ │ ├── 2-loc.csv │ │ ├── 2-mag.csv │ │ ├── 2-time.csv │ │ ├── 3-loc.csv │ │ ├── 3-mag.csv │ │ ├── 3-time.csv │ │ ├── 4-loc.csv │ │ ├── 4-mag.csv │ │ ├── 4-time.csv │ │ ├── 5-loc.csv │ │ ├── 5-mag.csv │ │ ├── 5-time.csv │ │ ├── 6-loc.csv │ │ ├── 6-mag.csv │ │ ├── 6-time.csv │ │ ├── 7-loc.csv │ │ ├── 7-mag.csv │ │ ├── 7-time.csv │ │ ├── 8-loc.csv │ │ ├── 8-mag.csv │ │ ├── 8-time.csv │ │ ├── 9-loc.csv │ │ ├── 9-mag.csv │ │ └── 9-time.csv │ ├── mammographic.csv │ └── mushroom.csv ├── exp_utils.py ├── hotspots │ ├── README.md │ ├── env.yaml │ ├── extract_results.py │ ├── hotspots.py │ ├── our_tsvgp.py │ ├── results │ │ ├── hotspots-results-acc.tex │ │ ├── hotspots-results-mse.tex │ │ ├── hotspots_results.npz │ │ └── timings.dat │ ├── submit_ours.sh │ ├── submit_ovc.sh │ ├── submit_random.sh │ └── visualize_results.py ├── magnetometer │ ├── .ipynb_checkpoints │ │ └── debug-checkpoint.ipynb │ ├── README.md │ ├── __pycache__ │ │ └── magnetometer_utils.cpython-38.pyc │ ├── magnetometer_utils.py │ ├── offline_model.py │ ├── online_fc_plots.py │ ├── online_model.py │ ├── online_model_predictions.py │ └── streaming_gp_model.py ├── split_mnist │ ├── README.md │ ├── mnist_utils.py │ ├── notebooks │ │ ├── leverage_score.ipynb │ │ ├── leverage_score_vs_random.ipynb │ │ ├── plot_accuracy.ipynb │ │ └── plot_memory_z.ipynb │ ├── offline_model.py │ ├── online_model.py │ └── streaming_gp_model.py └── uci │ ├── README.md │ ├── offline_model.py │ ├── online_fc_model.py │ ├── online_model.py │ ├── streaming_sgpr.py │ ├── streaming_sgpr_fc.py │ └── uci_utils.py ├── requirements.txt └── src ├── __init__.py ├── __pycache__ ├── __init__.cpython-38.pyc ├── sites.cpython-38.pyc └── util.cpython-38.pyc ├── models ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-38.pyc │ ├── tsvgp.cpython-38.pyc │ ├── tsvgp_cont.cpython-38.pyc │ ├── tsvgp_white.cpython-38.pyc │ └── utils.cpython-38.pyc ├── tsvgp.py ├── tsvgp_cont.py ├── tsvgp_sites.py ├── tsvgp_white.py ├── tvgp.py └── utils.py ├── sites.py ├── streaming_sparse_gp ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-38.pyc │ └── osvgpc.cpython-38.pyc ├── osgpr.py ├── osvgpc.py └── readme.md └── util.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.svg 2 | *.jpg 3 | 4 | .DS_Store 5 | *.idea* 6 | 7 | # Python 8 | .ipynb_checkpoints 9 | *.coverage 10 | *egg* 11 | __pycache__ 12 | 13 | # LaTeX 14 | .auctex-auto 15 | *.aux 16 | *.bbl 17 | *.blg 18 | *.out 19 | *.log 20 | *.snm 21 | *.toc 22 | *.fdb_latexmk 23 | *.fls 24 | 25 | *.gz 26 | *.pdf 27 | 28 | .hydra 29 | outputs/ 30 | wandb/ 31 | multirun/ 32 | code/experiments/data/uci/ 33 | tmp/ 34 | raw.githubusercontent.com/ 35 | final_outputs/ 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 AaltoML 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Memory-based dual Gaussian processes for sequential learning 2 | 3 | This repository is the official implementation of the methods in the publication: 4 | 5 | * P.E. Chang, P. Verma, S.T. John, A. Solin, and M.E. Khan (2023). **Memory-based dual Gaussian processes for sequential learning**. In *International Conference on Machine Learning (ICML)*. [[arXiv]](https://arxiv.org/abs/2306.03566) 6 | 7 | Sequential learning with Gaussian processes (GPs) is challenging when access to past data is limited, for example, in continual and active learning. In such cases, errors can accumulate over time due to inaccuracies in the posterior, hyperparameters, and inducing points, making accurate learning challenging. Here, we present a method to keep all such errors in check using the recently proposed dual sparse variational GP. Our method enables accurate inference for generic likelihoods and improves learning by actively building and updating a memory of past data. We demonstrate its effectiveness in several applications involving Bayesian optimization, active learning, and continual learning. 8 | 9 | ## Environment 10 | 11 | We recommend setting up a [conda](https://docs.conda.io/projects/conda/en/latest/index.html) environment for running the experiments. The code base is tested on a machine with a Ubuntu 22.04 distribution, CUDA11.6, and conda 23.1.0. 12 | ```shell 13 | conda create -n sequential-gp python==3.8 14 | conda activate sequential-gp 15 | ``` 16 | 17 | Within the virtual environment, install the dependencies by running 18 | ```shell 19 | pip install -r requirements.txt 20 | ``` 21 | 22 | (Note that the `hotspots` experiment has its own environment and setup instructions.) 23 | 24 | ## Experiments 25 | 26 | There are a series of experiments which are organized inside the `experiments` folder as separate sub-folders. 27 | Each experiment sub-folder has their respective readme files with instructions on how to run the particular experiment. 28 | 29 | ## Data sets 30 | 31 | The datasets used for banana, UCI, and magnetometer experiments are available in `experiments/data/` directory. 32 | The original source of the data sets are: 33 | 34 | * **Banana:** https://github.com/thangbui/streaming_sparse_gp 35 | * **UCI:** https://archive.ics.uci.edu/datasets 36 | * **Magnetometer:** https://github.com/AaltoML/magnetic-data 37 | 38 | ## Contributing 39 | 40 | For all correspondence, please contact [paul.chang@aalto.fi](mailto:paul.chang@aalto.fi) 41 | or [prakhar.verma@aalto.fi](mailto:prakhar.verma@aalto.fi). 42 | 43 | 44 | ## License 45 | 46 | This software is provided under the [MIT license](LICENSE). 47 | -------------------------------------------------------------------------------- /experiments/banana/README.md: -------------------------------------------------------------------------------- 1 | ## Streaming Banana Experiment 2 | 3 | The `streaming_banana_maddox.ipynb` notebook (to run the OVC model by Maddox *et al.* (2021)) requires the following packages: 4 | ``` 5 | torch == 1.12.1 6 | botorch == 0.7.2 7 | gpytorch == 1.4.1 8 | volatilitygp 9 | ``` 10 | 11 | (You can also use the conda environment in `../hotspots/env.yaml`) 12 | -------------------------------------------------------------------------------- /experiments/banana/banana_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import numpy as np 4 | import matplotlib 5 | import matplotlib.pyplot as plt 6 | import gpflow 7 | 8 | import sys 9 | 10 | sys.path.append("../../") 11 | from src.streaming_sparse_gp.osvgpc import OSVGPC 12 | 13 | 14 | def load_banana_dataset() -> [np.ndarray, np.ndarray, np.ndarray, np.ndarray]: 15 | train_x = np.loadtxt( 16 | "../data/banana_train_x.txt", 17 | delimiter="," 18 | ) 19 | train_y = np.loadtxt( 20 | "../data/banana_train_y.txt", delimiter="," 21 | ) 22 | train_y[train_y == -1] = 0 23 | 24 | test_x = np.loadtxt( 25 | "../data/banana_test_x.txt", 26 | delimiter="," 27 | ) 28 | test_y = np.loadtxt( 29 | "../data/banana_test_y.txt", delimiter="," 30 | ) 31 | test_y[test_y == -1] = 0 32 | 33 | return train_x, train_y, test_x, test_y 34 | 35 | 36 | def plot_banana(pred_mu, pred_var, pred_prob, inducing_pnts, data, xtest, ytest, vmin=0., vmax=1., 37 | plot_inducing=False, plot_probability=False, plot_colorbar=False, previous_data=None): 38 | if plot_probability: 39 | camp0_color = ["C1", "white"] 40 | camp1_color = ["white", "C0"] 41 | else: 42 | camp0_color = ["white", "C1"] 43 | camp1_color = ["C0", "white"] 44 | 45 | cmap0 = matplotlib.colors.LinearSegmentedColormap.from_list("", camp0_color) 46 | cmap1 = matplotlib.colors.LinearSegmentedColormap.from_list("", camp1_color) 47 | colors0 = cmap0(np.linspace(0, 1., 128)) 48 | colors1 = cmap1(np.linspace(0, 1., 128)) 49 | colors = np.append(colors0, colors1, axis=0) 50 | cmap = matplotlib.colors.LinearSegmentedColormap.from_list('mycmap', colors) 51 | 52 | X, Y = data 53 | 54 | fig, ax = plt.subplots(1, 1, figsize=(6, 6)) 55 | 56 | for i, mark, color in [[1, 'o', 'C0'], [0, 's', 'C1']]: 57 | ind = Y[:, 0] == i 58 | ax.scatter(X[ind, 0], X[ind, 1], s=100, alpha=.5, edgecolor='k', marker=mark, color=color) 59 | 60 | # Plotting prevous data ghosted out 61 | if previous_data is not None: 62 | X_prev, Y_prev = previous_data 63 | 64 | for i, mark, color in [[1, 'o', 'C0'], [0, 's', 'C1']]: 65 | ind = Y_prev[:, 0] == i 66 | ax.scatter(X_prev[ind, 0], X_prev[ind, 1], s=100, alpha=.1, edgecolor='k', marker=mark, color=color) 67 | 68 | if plot_inducing and inducing_pnts is not None: 69 | ax.scatter(inducing_pnts[:, 0], inducing_pnts[:, 1], s=40, color='k') 70 | 71 | # Scale background 72 | if plot_probability: 73 | foo = pred_prob.numpy() 74 | else: 75 | foo = pred_mu.numpy() > 0.5 76 | foo = foo.astype(float) 77 | foo = (2. * foo - 1.) * np.sqrt(pred_var.numpy()) 78 | if vmax is None: 79 | vmax = np.max(np.sqrt(pred_var.numpy())) 80 | vmin = -vmax 81 | im = ax.imshow(foo.reshape(100, 100).transpose(), extent=[-2.8, 2.8, -2.8, 2.8], 82 | origin='lower', cmap=cmap, vmin=vmin, vmax=vmax) 83 | 84 | ax.axis('equal') 85 | 86 | plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False) 87 | plt.tick_params(axis='y', which='both', right=False, left=False, labelleft=False) 88 | ax.set_xlim(-2.8, 2.8) 89 | ax.set_ylim(-2.8, 2.8) 90 | ax.contour(xtest, ytest, pred_mu.numpy().reshape(100, 100), levels=[.5], 91 | colors='k', linewidths=4.) 92 | 93 | if plot_colorbar: 94 | plt.colorbar(im, ax=ax) 95 | 96 | plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False) 97 | plt.tick_params(axis='y', which='both', right=False, left=False, labelleft=False) 98 | ax.set_xlim(-2.8, 2.8) 99 | ax.set_ylim(-2.8, 2.8) 100 | 101 | 102 | def optimize_streaming_model(optimizer, model, train_data: Tuple[np.ndarray, np.ndarray], 103 | iterations: int = 100, mu=None, Su=None, Kaa=None, Zopt=None, first_init=True): 104 | """ 105 | Optimize Bui model 106 | """ 107 | 108 | def optimization_step_adam(): 109 | optimizer.minimize(model.training_loss, model.trainable_variables) 110 | 111 | def optimization_step_scipy(): 112 | optimizer.minimize(model.training_loss, model.trainable_variables, options={'maxiter': iterations}) 113 | 114 | def optimization_step(): 115 | if isinstance(optimizer, gpflow.optimizers.Scipy): 116 | optimization_step_scipy() 117 | else: 118 | for _ in range(iterations): 119 | optimization_step_adam() 120 | 121 | def init_Z(cur_Z, new_X, use_old_Z=True): 122 | if use_old_Z: 123 | Z = np.copy(cur_Z) 124 | else: 125 | M = cur_Z.shape[0] 126 | M_old = int(0.7 * M) 127 | M_new = M - M_old 128 | old_Z = cur_Z[np.random.permutation(M)[0:M_old], :] 129 | new_Z = new_X[np.random.permutation(new_X.shape[0])[0:M_new], :] 130 | Z = np.vstack((old_Z, new_Z)) 131 | return Z 132 | 133 | use_old_z = True 134 | 135 | X, y = train_data 136 | 137 | if first_init: 138 | if isinstance(optimizer, gpflow.optimizers.Scipy): 139 | gpflow.optimizers.Scipy().minimize( 140 | model.training_loss_closure((X, y)), model.trainable_variables, options={'maxiter': iterations}) 141 | else: 142 | for _ in range(iterations): 143 | optimizer.minimize(model.training_loss_closure((X, y)), model.trainable_variables) 144 | else: 145 | Zinit = init_Z(Zopt, X, use_old_z) 146 | model = OSVGPC((X, y), gpflow.kernels.Matern52(), gpflow.likelihoods.Bernoulli(), mu, Su, Kaa, 147 | Zopt, Zinit) 148 | optimization_step() 149 | 150 | Zopt = model.inducing_variable.Z.numpy() 151 | mu, Su = model.predict_f(Zopt, full_cov=True) 152 | if len(Su.shape) == 3: 153 | Su = Su[0, :, :] + 1e-4 * np.eye(mu.shape[0]) 154 | Kaa = model.kernel(model.inducing_variable.Z) 155 | 156 | return mu, Su, Kaa, Zopt, model 157 | -------------------------------------------------------------------------------- /experiments/configs/dataset/adult.yaml: -------------------------------------------------------------------------------- 1 | input_dim: 15 2 | output_dim: 1 3 | num_classes: 2 4 | 5 | dataloader: 6 | _target_: uci_utils.load_data 7 | normalize: true 8 | train_split_percentage: 0.8 9 | data_path: "../data/adult.csv" 10 | seed: ${seed} 11 | n_k_folds: null 12 | random_state: null 13 | dataset_type: "classification" 14 | -------------------------------------------------------------------------------- /experiments/configs/dataset/bank.yaml: -------------------------------------------------------------------------------- 1 | input_dim: 17 2 | output_dim: 1 3 | num_classes: 2 4 | dataloader: 5 | _target_: uci_utils.load_data 6 | normalize: true 7 | train_split_percentage: 0.8 8 | data_path: "../data/bank.csv" 9 | seed: ${seed} 10 | n_k_folds: null 11 | random_state: null 12 | dataset_type: "classification" -------------------------------------------------------------------------------- /experiments/configs/dataset/bike.yaml: -------------------------------------------------------------------------------- 1 | input_dim: 17 2 | output_dim: 1 3 | dataloader: 4 | _target_: uci_utils.load_data 5 | normalize: true 6 | train_split_percentage: 0.8 7 | data_path: "../data/bike.csv" 8 | seed: ${seed} 9 | n_k_folds: null 10 | random_state: null 11 | dataset_type: "regression" 12 | -------------------------------------------------------------------------------- /experiments/configs/dataset/elevators.yaml: -------------------------------------------------------------------------------- 1 | input_dim: 18 2 | output_dim: 1 3 | dataloader: 4 | _target_: uci_utils.load_data 5 | normalize: true 6 | train_split_percentage: 0.8 7 | data_path: "../data/elevators.csv" 8 | seed: ${seed} 9 | n_k_folds: null 10 | random_state: null 11 | dataset_type: "regression" 12 | -------------------------------------------------------------------------------- /experiments/configs/dataset/magnetometer.yaml: -------------------------------------------------------------------------------- 1 | dataloader: 2 | _partial_: true 3 | _target_: magnetometer_utils.load_data 4 | main_dir: "../data/invensense" -------------------------------------------------------------------------------- /experiments/configs/dataset/mammographic.yaml: -------------------------------------------------------------------------------- 1 | input_dim: 6 2 | output_dim: 1 3 | num_classes: 2 4 | 5 | dataloader: 6 | _target_: uci_utils.load_data 7 | normalize: true 8 | train_split_percentage: 0.8 9 | data_path: "../data/mammographic.csv" 10 | seed: ${seed} 11 | n_k_folds: null 12 | random_state: null 13 | dataset_type: "classification" 14 | -------------------------------------------------------------------------------- /experiments/configs/dataset/mnist.yaml: -------------------------------------------------------------------------------- 1 | input_dim: 784 2 | output_dim: 1 3 | dataloader: 4 | _target_: mnist_utils.load_mnist 5 | seed: ${seed} 6 | -------------------------------------------------------------------------------- /experiments/configs/dataset/mushroom.yaml: -------------------------------------------------------------------------------- 1 | input_dim: 22 2 | output_dim: 1 3 | num_classes: 2 4 | 5 | dataloader: 6 | _target_: uci_utils.load_data 7 | normalize: true 8 | train_split_percentage: 0.8 9 | data_path: "../data/mushroom.csv" 10 | seed: ${seed} 11 | n_k_folds: null 12 | random_state: null 13 | dataset_type: "classification" 14 | -------------------------------------------------------------------------------- /experiments/configs/dataset/split_mnist.yaml: -------------------------------------------------------------------------------- 1 | input_dim: 784 2 | output_dim: 1 3 | dataloader: 4 | _target_: mnist_utils.load_split_mnist 5 | seed: ${seed} 6 | -------------------------------------------------------------------------------- /experiments/configs/magnetometer_offline_experiment.yaml: -------------------------------------------------------------------------------- 1 | seed: null 2 | 3 | defaults: 4 | - model: svgp 5 | - dataset: magnetometer 6 | - optimizer: adam 7 | 8 | optimize: 9 | _partial_: true 10 | _target_: exp_utils.optimize_full_model 11 | minibatch_size: 500 12 | iterations: 20000 13 | lambda_lr: 0.8 14 | 15 | wandb: 16 | username: null 17 | 18 | n_inducing_variable: 100 19 | num_latent_gps: 1 20 | -------------------------------------------------------------------------------- /experiments/configs/magnetometer_online_experiment.yaml: -------------------------------------------------------------------------------- 1 | seed: null 2 | 3 | defaults: 4 | - model: tsvgp_continual 5 | - dataset: magnetometer 6 | - optimizer: adam 7 | 8 | online_gp: 9 | _partial_: true 10 | _target_: src.models.tsvgp_cont.OnlineGP 11 | n_steps: 2 12 | lambda_lr: 0.9 13 | num_mem: 100 14 | 15 | optimize: 16 | _partial_: true 17 | _target_: exp_utils.optimize_online_model 18 | train_hyperparams: True 19 | hyperparams_step: 20000 20 | train_memory: True 21 | debug: True 22 | 23 | wandb: 24 | username: null 25 | 26 | n_inducing_variable: 100 27 | num_latent_gps: 1 28 | streaming: false 29 | -------------------------------------------------------------------------------- /experiments/configs/magnetometer_streaming_experiment.yaml: -------------------------------------------------------------------------------- 1 | seed: null 2 | 3 | defaults: 4 | - model: sgpr 5 | - dataset: magnetometer 6 | - optimizer: scipy 7 | 8 | optimize: 9 | _partial_: true 10 | _target_: uci.uci_utils.optimize_streaming_model 11 | iterations: 20000 12 | task: "regression" 13 | 14 | wandb: 15 | username: null 16 | 17 | n_inducing_variable: 100 18 | -------------------------------------------------------------------------------- /experiments/configs/model/kernel/matern52.yaml: -------------------------------------------------------------------------------- 1 | _target_: gpflow.kernels.Matern52 2 | lengthscales: 1. 3 | variance: 1. -------------------------------------------------------------------------------- /experiments/configs/model/kernel/rbf.yaml: -------------------------------------------------------------------------------- 1 | _target_: gpflow.kernels.RBF 2 | lengthscales: 1. 3 | variance: 1. -------------------------------------------------------------------------------- /experiments/configs/model/kernel/sum_constant_matern52.yaml: -------------------------------------------------------------------------------- 1 | _target_: gpflow.kernels.Sum 2 | kernels: 3 | - _target_: gpflow.kernels.Constant 4 | - _target_: gpflow.kernels.Matern52 5 | -------------------------------------------------------------------------------- /experiments/configs/model/likelihood/bernoulli.yaml: -------------------------------------------------------------------------------- 1 | _target_: gpflow.likelihoods.Bernoulli -------------------------------------------------------------------------------- /experiments/configs/model/likelihood/gaussian.yaml: -------------------------------------------------------------------------------- 1 | _target_: gpflow.likelihoods.Gaussian 2 | variance: 0.1 -------------------------------------------------------------------------------- /experiments/configs/model/likelihood/softmax.yaml: -------------------------------------------------------------------------------- 1 | _target_: gpflow.likelihoods.Softmax 2 | num_classes: ${num_classes} -------------------------------------------------------------------------------- /experiments/configs/model/ovc.yaml: -------------------------------------------------------------------------------- 1 | _target_: volatilitygp.models.SingleTaskVariationalGP 2 | _partial_: true 3 | 4 | num_inducing: ${n_inducing_variable} 5 | use_piv_chol_init: false 6 | learn_inducing_locations: false 7 | -------------------------------------------------------------------------------- /experiments/configs/model/sgpr.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - kernel: matern52 3 | 4 | _target_: gpflow.models.SGPR 5 | _partial_: true 6 | noise_variance: 1.0 7 | 8 | inducing_variable: 9 | _target_: numpy.ones 10 | shape: 11 | - ${n_inducing_variable} 12 | - ${dataset.input_dim} 13 | -------------------------------------------------------------------------------- /experiments/configs/model/svgp.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - kernel: matern52 3 | - likelihood: gaussian 4 | 5 | _target_: gpflow.models.SVGP 6 | inducing_variable: 7 | _target_: numpy.ones 8 | shape: 9 | - ${n_inducing_variable} 10 | - ${dataset.input_dim} 11 | num_latent_gps: ${num_latent_gps} 12 | num_data: ??? 13 | 14 | -------------------------------------------------------------------------------- /experiments/configs/model/tsvgp.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - kernel: matern52 3 | - likelihood: gaussian 4 | 5 | _target_: src.models.tsvgp_white.t_SVGP_white 6 | inducing_variable: 7 | _target_: numpy.ones 8 | shape: 9 | - ${n_inducing_variable} 10 | - ${dataset.input_dim} 11 | num_data: ??? 12 | num_latent_gps: ${num_latent_gps} 13 | -------------------------------------------------------------------------------- /experiments/configs/model/tsvgp_continual.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - kernel: matern52 3 | - likelihood: gaussian 4 | 5 | _target_: src.models.tsvgp_cont.t_SVGP_cont 6 | inducing_variable: 7 | _target_: numpy.ones 8 | shape: 9 | - ${n_inducing_variable} 10 | - ${dataset.input_dim} 11 | num_latent_gps: ${num_latent_gps} -------------------------------------------------------------------------------- /experiments/configs/model/tsvgp_continual_classification.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - kernel: matern52 3 | - likelihood: softmax 4 | 5 | _target_: src.models.tsvgp_cont.t_SVGP_cont 6 | inducing_variable: 7 | _target_: numpy.ones 8 | shape: 9 | - ${n_inducing_variable} 10 | - ${dataset.input_dim} 11 | num_latent_gps: ${num_classes} 12 | -------------------------------------------------------------------------------- /experiments/configs/offline_experiment.yaml: -------------------------------------------------------------------------------- 1 | seed: null 2 | 3 | defaults: 4 | - model: tsvgp 5 | - dataset: bike 6 | - optimizer: adam 7 | 8 | wandb: 9 | username: null 10 | 11 | optimize: 12 | _partial_: true 13 | _target_: exp_utils.optimize_full_model 14 | minibatch_size: 200 15 | iterations: 2000 16 | lambda_lr: 0.8 17 | debug: True 18 | 19 | n_inducing_variable: 100 20 | 21 | load_model_path: null 22 | num_classes: ${dataset.num_classes} 23 | num_latent_gps: 1 24 | -------------------------------------------------------------------------------- /experiments/configs/offline_mnist_experiment.yaml: -------------------------------------------------------------------------------- 1 | seed: null 2 | 3 | defaults: 4 | - model: svgp 5 | - optimizer: adam 6 | - dataset: mnist 7 | - override model/likelihood: softmax 8 | 9 | optimize: 10 | _partial_: true 11 | _target_: exp_utils.optimize_full_model 12 | minibatch_size: 4000 13 | iterations: 1000 14 | debug: true 15 | 16 | wandb: 17 | username: null 18 | 19 | n_inducing_variable: 300 20 | num_classes: 10 21 | num_latent_gps: ${num_classes} 22 | -------------------------------------------------------------------------------- /experiments/configs/online_experiment.yaml: -------------------------------------------------------------------------------- 1 | seed: null 2 | 3 | defaults: 4 | - model: tsvgp_continual 5 | - dataset: bike 6 | - optimizer: adam 7 | 8 | online_gp: 9 | _partial_: true 10 | _target_: src.models.tsvgp_cont.OnlineGP 11 | n_steps: 4 12 | lambda_lr: 0.8 13 | num_mem: 10 14 | memory_picker: "bls" 15 | 16 | optimize: 17 | _partial_: true 18 | _target_: exp_utils.optimize_online_model 19 | train_hyperparams: true 20 | hyperparams_step: 20 21 | train_memory: true 22 | 23 | wandb: 24 | username: null 25 | 26 | n_sets: 10 27 | sort_data: True 28 | n_inducing_variable: 100 29 | 30 | load_model_path: null 31 | num_classes: ${dataset.num_classes} 32 | num_latent_gps: 1 -------------------------------------------------------------------------------- /experiments/configs/online_mnist_experiment.yaml: -------------------------------------------------------------------------------- 1 | seed: null 2 | 3 | defaults: 4 | - model: tsvgp_continual_classification 5 | - optimizer: adam 6 | - dataset: split_mnist 7 | 8 | online_gp: 9 | _partial_: true 10 | _target_: src.models.tsvgp_cont.OnlineGP 11 | n_steps: 10 12 | lambda_lr: 0.1 13 | num_mem: 400 14 | 15 | memory_picker: 16 | _partial_: true 17 | _target_: src.models.tsvgp_cont.memory_picker 18 | 19 | optimize: 20 | _partial_: true 21 | _target_: mnist_utils.optimize_online_model_minibatch 22 | minibatch_size: 4000 23 | train_hyper: True 24 | train_mem: True 25 | n_hyp_opt_steps: 50 26 | 27 | wandb: 28 | username: null 29 | 30 | n_inducing_variable: 300 31 | num_classes: 10 -------------------------------------------------------------------------------- /experiments/configs/optimizer/adam.yaml: -------------------------------------------------------------------------------- 1 | _target_: tensorflow.optimizers.Adam 2 | learning_rate: 0.01 -------------------------------------------------------------------------------- /experiments/configs/optimizer/scipy.yaml: -------------------------------------------------------------------------------- 1 | _target_: gpflow.optimizers.Scipy -------------------------------------------------------------------------------- /experiments/configs/streaming_experiment.yaml: -------------------------------------------------------------------------------- 1 | seed: null 2 | 3 | defaults: 4 | - model: sgpr 5 | - dataset: bike 6 | - optimizer: scipy 7 | 8 | optimize: 9 | _partial_: true 10 | _target_: uci_utils.optimize_streaming_model 11 | iterations: 20 12 | task: "regression" 13 | 14 | wandb: 15 | username: null 16 | 17 | n_sets: 10 18 | n_inducing_variable: 100 19 | sort_data: true 20 | 21 | load_model_path: null 22 | num_latent_gps: 1 23 | -------------------------------------------------------------------------------- /experiments/configs/streaming_mnist_experiment.yaml: -------------------------------------------------------------------------------- 1 | seed: null 2 | 3 | defaults: 4 | - model: svgp 5 | - dataset: split_mnist 6 | - optimizer: adam 7 | - override model/likelihood: softmax 8 | 9 | optimize: 10 | _partial_: true 11 | _target_: mnist_utils.optimize_streaming_model_minibatch 12 | iterations: 100 13 | minibatch_size: 4000 14 | 15 | wandb: 16 | username: null 17 | 18 | n_inducing_variable: 300 19 | num_classes: 10 20 | num_latent_gps: 10 21 | -------------------------------------------------------------------------------- /experiments/data/banana_train_x.txt: -------------------------------------------------------------------------------- 1 | -0.7808,-0.11324 2 | 1.3595,-0.1174 3 | 1.4517,0.27285 4 | -0.41644,-0.94114 5 | 0.17815,1.4909 6 | 1.1574,-0.45807 7 | 1.6198,-0.018726 8 | 0.4261,0.19381 9 | 1.1214,0.17328 10 | 0.29157,-0.15852 11 | 0.20511,-0.47057 12 | -0.10292,-1.7141 13 | 1.2472,-0.14605 14 | 0.74045,1.1954 15 | 0.9041,0.19498 16 | -0.91656,-1.0823 17 | -0.81851,-0.94307 18 | 0.99532,0.13051 19 | -1.5282,0.65571 20 | -1.5857,0.32248 21 | 0.89861,1.4022 22 | -0.5329,-0.4022 23 | -0.051002,-0.97204 24 | -1.871,-0.45135 25 | -0.70564,-0.69672 26 | 1.9597,-0.70865 27 | -1.672,-0.26623 28 | 1.0227,-0.72842 29 | -0.66961,1.2186 30 | 0.37384,0.96613 31 | 1.7978,0.060729 32 | -0.43432,2.2394 33 | 0.043579,0.012589 34 | -0.82737,-0.75656 35 | -0.32538,0.96858 36 | -1.0294,0.6807 37 | 0.64171,1.6543 38 | 0.26867,1.616 39 | 0.89562,0.35955 40 | -0.35201,-0.48996 41 | -0.80347,-0.78224 42 | 1.3857,-0.49794 43 | 1.1355,-0.11378 44 | 0.18326,-1.1636 45 | -1.2774,-1.1232 46 | -0.75754,-1.2075 47 | 1.4542,-1.7756 48 | -1.7247,-0.41044 49 | 0.64712,0.3167 50 | 0.39919,-0.5939 51 | 0.82375,-0.67253 52 | -0.87418,0.92198 53 | 0.30949,-0.68867 54 | 1.2101,0.19738 55 | 1.1291,1.5062 56 | 1.7137,-0.22463 57 | -0.951,-1.3011 58 | -0.1777,-0.55051 59 | -0.50589,-1.0891 60 | -0.21904,0.52376 61 | 1.1794,1.4244 62 | -0.43369,1.9786 63 | -0.038111,-0.79058 64 | -1.1584,0.38121 65 | 0.27162,-1.6 66 | -0.26888,1.3707 67 | 0.29905,1.4941 68 | -0.9735,-0.046973 69 | -0.32257,-0.26362 70 | -0.51085,1.5305 71 | -1.7869,-1.4577 72 | -0.069626,-0.89604 73 | 0.74023,1.5349 74 | -1.5201,0.65952 75 | -1.1169,0.15393 76 | 1.0495,-1.0524 77 | -1.7946,-0.98046 78 | -1.2461,0.10383 79 | 1.6014,-0.18274 80 | -0.52044,1.2026 81 | 1.1794,0.58503 82 | 0.63751,1.1114 83 | -0.32934,-0.95485 84 | 0.69789,1.0197 85 | -1.0715,-0.03901 86 | -0.52868,-1.0244 87 | -1.4329,-1.1823 88 | -0.17739,-1.453 89 | -0.74762,-0.57011 90 | -0.74733,-0.71958 91 | 0.97282,0.6533 92 | 0.0016784,0.26233 93 | -1.0387,-0.035936 94 | 1.81,-0.21759 95 | -0.36939,-0.98376 96 | 0.32053,0.39347 97 | -1.8514,0.54479 98 | 1.0266,1.6072 99 | 0.31659,-1.1335 100 | -0.02466,-1.4955 101 | -0.98993,0.2311 102 | -0.59826,-1.3514 103 | 0.18375,1.6995 104 | 0.81934,0.046239 105 | -0.89708,-0.97167 106 | -1.2301,-0.3357 107 | -0.7868,-0.11321 108 | 0.83904,-0.081896 109 | 1.567,0.48344 110 | -1.3867,-0.28249 111 | -0.23001,1.9999 112 | -1.8406,-0.86529 113 | -1.8741,-1.0988 114 | -1.9768,-0.87983 115 | -0.23442,-0.65611 116 | 0.22244,0.21397 117 | 0.11206,0.2383 118 | 0.89639,-1.2731 119 | 0.90797,-0.50572 120 | -0.25871,1.3374 121 | 1.1915,0.1126 122 | 0.32484,-0.15295 123 | 2.1774,1.1275 124 | -0.55818,-1.7297 125 | 0.29073,1.3384 126 | 1.8044,1.3171 127 | -0.012678,-1.1227 128 | 2.031,0.98364 129 | 0.731,1.7405 130 | 1.1571,0.84702 131 | 1.4231,1.493 132 | -1.551,-1.595 133 | 0.92272,0.24555 134 | -0.33003,0.3011 135 | -0.51355,-1.0719 136 | 0.76926,0.77155 137 | 0.36906,0.49003 138 | 0.9754,-1.3444 139 | -1.0484,0.21912 140 | 1.7304,1.6833 141 | 0.055373,-0.13947 142 | 1.148,-1.137 143 | 1.6599,0.67197 144 | 0.32765,0.5529 145 | 0.0066516,0.011805 146 | 0.5574,1.38 147 | 1.4623,-0.11099 148 | 1.188,-0.96605 149 | -0.64159,-1.3495 150 | 0.80872,0.74066 151 | 1.4716,1.2057 152 | 0.094916,-1.5595 153 | 0.40382,0.0099855 154 | 0.21961,2.0809 155 | -1.5133,-0.62639 156 | 0.16188,-0.7914 157 | -0.24861,0.02337 158 | -1.1031,0.32164 159 | 1.1384,-1.5726 160 | 1.3988,0.4304 161 | 0.031673,-0.82063 162 | -1.2481,-0.68908 163 | 0.17651,-1.0714 164 | 1.5469,1.1025 165 | 0.14176,-0.69801 166 | -1.887,-0.41758 167 | -0.85036,-0.91999 168 | -0.03384,1.5222 169 | -0.32399,-1.0266 170 | -0.052656,-0.74586 171 | -0.2411,-0.37411 172 | 1.0967,0.13124 173 | 0.50689,1.0166 174 | 1.0942,-0.64317 175 | 0.52287,1.0525 176 | -0.94119,0.97597 177 | -0.66398,1.183 178 | 0.27097,-0.038155 179 | 0.48702,1.3334 180 | -0.83556,0.95856 181 | -1.6345,0.52418 182 | 2.0333,-0.42685 183 | 0.029212,-1.213 184 | -1.1808,-0.75829 185 | 1.5179,-1.339 186 | 1.0955,0.73997 187 | 0.10069,1.9315 188 | 0.053324,-1.0184 189 | 0.53455,1.991 190 | 1.2224,0.32471 191 | -0.43598,-0.95364 192 | 1.3058,1.6222 193 | 0.12167,0.59928 194 | -0.78637,-0.6397 195 | -1.2231,0.083578 196 | -0.8562,-0.23466 197 | 0.24465,-0.68074 198 | -0.0029396,0.51294 199 | -1.4616,0.082911 200 | -0.41512,-1.2959 201 | 0.82382,0.58666 202 | -1.0714,-0.61609 203 | 0.92519,0.89528 204 | -0.88564,-1.2681 205 | -1.5289,0.67891 206 | -0.65366,-1.02 207 | 1.0939,1.0263 208 | -1.0366,1.0428 209 | 0.62742,0.23435 210 | -0.74924,0.01009 211 | -0.4744,0.26351 212 | 0.096598,-1.0642 213 | 0.97277,0.45654 214 | -0.88896,-0.88412 215 | -0.0062612,-0.95936 216 | -1.5023,-1.2236 217 | -0.85198,-0.59008 218 | -0.30515,1.1774 219 | 1.0892,-1.5861 220 | 0.39321,1.6378 221 | -0.13858,-0.36282 222 | -1.8446,0.093183 223 | 1.2988,0.5551 224 | -0.7285,1.4952 225 | 0.3411,-1.4284 226 | -0.29501,-1.2243 227 | -0.85288,0.3581 228 | 0.36964,1.0337 229 | 0.56868,1.443 230 | 0.35117,-0.040233 231 | -0.95869,1.0709 232 | -2.0934,-1.2221 233 | -0.94602,1.0165 234 | 1.6347,-2.1572 235 | -0.4091,0.34864 236 | 0.5704,1.4158 237 | 0.94247,0.24778 238 | -0.8062,0.94121 239 | -1.1223,-0.29652 240 | 0.0065983,-0.99376 241 | 0.4301,-0.65434 242 | -0.86105,-0.97185 243 | -0.63048,0.94882 244 | -0.56502,-0.85269 245 | -1.179,0.73223 246 | -0.20089,0.072604 247 | 1.052,0.6512 248 | -0.18695,-1.3826 249 | 0.42746,1.7654 250 | 1.3503,-1.5074 251 | 2.2037,1.2567 252 | -0.58056,-1.3361 253 | 1.6498,0.040415 254 | -0.17517,-0.909 255 | -0.48785,-1.3062 256 | 0.67747,0.96414 257 | -0.90442,0.95289 258 | 0.90919,0.11786 259 | -0.22625,-1.0585 260 | -1.1139,0.2758 261 | 1.4613,-1.0605 262 | 0.20545,-1.2805 263 | -0.25482,0.38662 264 | -0.82775,-0.82042 265 | 0.8507,1.4647 266 | -0.065898,-1.3294 267 | 0.63547,0.55913 268 | 1.0538,-0.36193 269 | -1.8113,-1.2112 270 | -1.2076,-0.85428 271 | 0.75688,-0.20816 272 | 0.81199,0.70836 273 | 0.7254,1.3888 274 | 0.92625,0.97222 275 | -0.10018,-1.0525 276 | -1.0634,-1.0668 277 | 1.4891,1.5458 278 | 1.0838,0.088002 279 | 0.68983,1.57 280 | -0.47214,-1.1575 281 | 0.68584,-0.18991 282 | 0.84622,1.8684 283 | -0.7788,1.0888 284 | -0.18316,1.1455 285 | 0.94499,0.30946 286 | 2.2699,0.28398 287 | -1.0272,1.3082 288 | -2.062,-1.2301 289 | 0.18965,0.96933 290 | 0.80178,0.85218 291 | 0.72413,-0.57405 292 | -0.41278,0.05538 293 | 0.40488,-0.55507 294 | -1.0334,-1.0923 295 | -0.52916,0.36537 296 | 0.47075,2.029 297 | -1.369,-0.17742 298 | -0.0062115,1.5596 299 | 1.3488,-0.46149 300 | 0.25973,-1.1745 301 | -0.5107,-0.73367 302 | 0.50118,0.055263 303 | -0.54407,-0.83055 304 | -1.5529,-1.0165 305 | -0.022746,0.16322 306 | -0.73145,-0.90615 307 | 1.0997,1.5757 308 | 1.4106,0.09066 309 | 0.45008,-0.10378 310 | 1.0078,1.8211 311 | -1.6162,0.59348 312 | -0.2904,0.69996 313 | -0.71074,0.95999 314 | -0.55461,-1.0169 315 | -0.71697,0.45503 316 | 1.7476,0.016037 317 | -0.23868,-1.1679 318 | 0.037898,-1.6385 319 | 2.2033,1.1391 320 | -0.17453,-0.94554 321 | 1.432,0.4835 322 | 0.39559,-0.60611 323 | -0.82895,1.0629 324 | -0.60647,-0.99659 325 | -0.63251,0.25688 326 | -1.6331,-0.36254 327 | -0.57241,1.1601 328 | -0.12797,-1.141 329 | 0.042814,-1.043 330 | -0.58161,-0.75229 331 | 1.4268,0.49784 332 | 1.223,-0.60719 333 | -0.58003,-1.0917 334 | 1.4133,-1.4769 335 | -0.56736,-1.1443 336 | -1.7664,-0.9229 337 | 0.58205,0.23212 338 | -0.91411,-0.52243 339 | -1.5143,-0.248 340 | 1.0993,-0.31324 341 | 0.38791,1.6365 342 | -0.18675,-0.26701 343 | 0.3182,1.3952 344 | 1.1982,0.069142 345 | 0.95311,0.71885 346 | 0.70093,0.56424 347 | 0.017872,-0.69987 348 | -0.47891,0.071929 349 | 0.40915,1.7616 350 | -1.5869,0.13848 351 | -1.295,-0.23017 352 | 0.22876,1.628 353 | 0.38928,-0.8044 354 | -1.2614,-0.45689 355 | 0.6164,-1.0708 356 | -1.2015,0.4548 357 | 0.055056,1.3448 358 | 0.5224,1.46 359 | -1.2034,0.93202 360 | -0.07085,0.43863 361 | 1.525,0.35573 362 | 0.80472,1.2121 363 | 0.3572,-1.0729 364 | -1.6012,0.68223 365 | -0.073968,1.968 366 | 0.75834,-1.4517 367 | 1.3083,-1.8064 368 | 1.6175,-0.91923 369 | -0.58432,0.093728 370 | -1.1064,-0.82129 371 | 0.4088,-0.057168 372 | 0.57474,1.4167 373 | 0.80775,-0.83144 374 | 0.66015,0.83503 375 | 1.1148,0.42774 376 | 0.36694,-0.45114 377 | 0.5691,0.48111 378 | 1.0909,1.2127 379 | -0.043416,-0.44182 380 | -1.7828,-0.49107 381 | -1.591,-0.98008 382 | -0.2963,0.53799 383 | 0.80193,1.2108 384 | -1.8184,-0.84741 385 | 0.32927,0.60596 386 | -1.5049,0.28241 387 | 0.17711,0.73114 388 | 1.0606,-0.23215 389 | 1.4436,0.09457 390 | -0.28542,1.3863 391 | 0.20759,-1.2118 392 | -0.53583,-0.81398 393 | 0.43128,0.26649 394 | -0.87146,-0.82542 395 | 1.4726,0.57425 396 | -0.050809,-1.1859 397 | -0.38296,-1.0432 398 | -1.0423,-1.0733 399 | -1.7794,-0.582 400 | -0.65306,0.4328 401 | -------------------------------------------------------------------------------- /experiments/data/banana_train_y.txt: -------------------------------------------------------------------------------- 1 | 1 2 | -1 3 | -1 4 | -1 5 | -1 6 | 1 7 | -1 8 | 1 9 | -1 10 | 1 11 | 1 12 | -1 13 | 1 14 | 1 15 | -1 16 | -1 17 | -1 18 | 1 19 | -1 20 | -1 21 | 1 22 | 1 23 | -1 24 | 1 25 | -1 26 | -1 27 | 1 28 | 1 29 | -1 30 | -1 31 | -1 32 | -1 33 | 1 34 | -1 35 | 1 36 | -1 37 | 1 38 | 1 39 | 1 40 | 1 41 | -1 42 | -1 43 | -1 44 | -1 45 | 1 46 | -1 47 | 1 48 | 1 49 | -1 50 | 1 51 | 1 52 | -1 53 | 1 54 | -1 55 | 1 56 | -1 57 | -1 58 | 1 59 | -1 60 | 1 61 | 1 62 | -1 63 | -1 64 | -1 65 | -1 66 | -1 67 | -1 68 | 1 69 | 1 70 | -1 71 | 1 72 | -1 73 | 1 74 | -1 75 | 1 76 | 1 77 | 1 78 | 1 79 | -1 80 | -1 81 | -1 82 | -1 83 | -1 84 | -1 85 | 1 86 | -1 87 | 1 88 | -1 89 | 1 90 | -1 91 | -1 92 | 1 93 | 1 94 | -1 95 | -1 96 | 1 97 | -1 98 | -1 99 | -1 100 | -1 101 | 1 102 | -1 103 | 1 104 | 1 105 | -1 106 | 1 107 | 1 108 | 1 109 | -1 110 | 1 111 | 1 112 | 1 113 | 1 114 | 1 115 | 1 116 | 1 117 | 1 118 | 1 119 | 1 120 | -1 121 | 1 122 | 1 123 | 1 124 | -1 125 | -1 126 | 1 127 | -1 128 | 1 129 | 1 130 | -1 131 | 1 132 | 1 133 | -1 134 | 1 135 | -1 136 | -1 137 | 1 138 | 1 139 | 1 140 | 1 141 | 1 142 | 1 143 | -1 144 | -1 145 | 1 146 | -1 147 | -1 148 | 1 149 | -1 150 | -1 151 | 1 152 | -1 153 | 1 154 | -1 155 | 1 156 | -1 157 | 1 158 | 1 159 | 1 160 | -1 161 | 1 162 | 1 163 | -1 164 | 1 165 | 1 166 | 1 167 | -1 168 | -1 169 | -1 170 | -1 171 | 1 172 | -1 173 | -1 174 | 1 175 | -1 176 | -1 177 | -1 178 | 1 179 | -1 180 | -1 181 | -1 182 | -1 183 | -1 184 | 1 185 | 1 186 | -1 187 | 1 188 | -1 189 | -1 190 | -1 191 | -1 192 | 1 193 | 1 194 | 1 195 | 1 196 | 1 197 | -1 198 | 1 199 | 1 200 | -1 201 | 1 202 | 1 203 | -1 204 | -1 205 | -1 206 | -1 207 | -1 208 | -1 209 | -1 210 | 1 211 | 1 212 | -1 213 | -1 214 | -1 215 | -1 216 | 1 217 | 1 218 | -1 219 | 1 220 | -1 221 | 1 222 | -1 223 | -1 224 | -1 225 | -1 226 | -1 227 | 1 228 | -1 229 | -1 230 | 1 231 | -1 232 | 1 233 | -1 234 | 1 235 | 1 236 | -1 237 | -1 238 | -1 239 | 1 240 | -1 241 | 1 242 | -1 243 | -1 244 | -1 245 | -1 246 | 1 247 | -1 248 | -1 249 | 1 250 | 1 251 | 1 252 | -1 253 | -1 254 | -1 255 | -1 256 | -1 257 | -1 258 | -1 259 | -1 260 | 1 261 | 1 262 | -1 263 | 1 264 | -1 265 | -1 266 | -1 267 | 1 268 | 1 269 | 1 270 | 1 271 | 1 272 | -1 273 | -1 274 | -1 275 | -1 276 | -1 277 | 1 278 | -1 279 | -1 280 | -1 281 | 1 282 | -1 283 | -1 284 | -1 285 | -1 286 | -1 287 | -1 288 | 1 289 | 1 290 | -1 291 | 1 292 | 1 293 | 1 294 | -1 295 | 1 296 | -1 297 | 1 298 | -1 299 | -1 300 | -1 301 | -1 302 | 1 303 | -1 304 | 1 305 | 1 306 | -1 307 | 1 308 | -1 309 | 1 310 | 1 311 | -1 312 | 1 313 | -1 314 | -1 315 | 1 316 | -1 317 | -1 318 | -1 319 | 1 320 | -1 321 | -1 322 | 1 323 | -1 324 | -1 325 | 1 326 | 1 327 | -1 328 | -1 329 | -1 330 | 1 331 | -1 332 | 1 333 | -1 334 | 1 335 | -1 336 | 1 337 | -1 338 | 1 339 | 1 340 | 1 341 | -1 342 | 1 343 | -1 344 | -1 345 | -1 346 | -1 347 | 1 348 | 1 349 | -1 350 | -1 351 | 1 352 | -1 353 | 1 354 | 1 355 | 1 356 | -1 357 | -1 358 | -1 359 | -1 360 | 1 361 | -1 362 | -1 363 | -1 364 | -1 365 | 1 366 | -1 367 | 1 368 | -1 369 | 1 370 | 1 371 | 1 372 | 1 373 | 1 374 | -1 375 | -1 376 | 1 377 | 1 378 | -1 379 | 1 380 | 1 381 | 1 382 | 1 383 | -1 384 | 1 385 | -1 386 | 1 387 | 1 388 | -1 389 | -1 390 | -1 391 | -1 392 | -1 393 | -1 394 | 1 395 | -1 396 | -1 397 | -1 398 | -1 399 | 1 400 | 1 401 | -------------------------------------------------------------------------------- /experiments/exp_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions for experiments. 3 | """ 4 | from typing import Tuple, List 5 | import time 6 | import numpy as np 7 | import tensorflow as tf 8 | from gpflow.likelihoods import Bernoulli 9 | import gpflow 10 | from gpflow.models.svgp import SVGP 11 | import hydra 12 | 13 | import sys 14 | 15 | sys.path.append("../..") 16 | 17 | from src.models.tsvgp_white import t_SVGP_white 18 | from src.models.tsvgp_cont import OnlineGP 19 | from src.models.tsvgp import base_SVGP 20 | 21 | 22 | def get_hydra_output_dir(): 23 | """Return the current output directory path generated by hydra""" 24 | hydra_cfg = hydra.core.hydra_config.HydraConfig.get() 25 | return hydra_cfg['runtime']['output_dir'] 26 | 27 | 28 | def optimize_full_model(model, train_data: [np.ndarray, np.ndarray], 29 | test_data: [np.ndarray, np.ndarray], optimizer: tf.optimizers, 30 | minibatch_size: int = 64, 31 | iterations: int = 100, debug: bool = False, 32 | lambda_lr: float = 0.5) -> [list, list]: 33 | """ 34 | Optimize t-SVGP (white) model using minibatches and return the ELBO and NLPD values. 35 | """ 36 | 37 | if not (isinstance(model, t_SVGP_white) or isinstance(model, SVGP)): 38 | raise Exception("Model not supported for optimization!") 39 | 40 | n_train = train_data[0].shape[0] 41 | if n_train < 50000: 42 | train_dataset = tf.data.Dataset.from_tensor_slices(train_data).repeat().shuffle(n_train) 43 | else: 44 | train_dataset = tf.data.Dataset.from_tensor_slices(train_data).repeat() 45 | 46 | train_iter = iter(train_dataset.batch(minibatch_size)) 47 | 48 | training_loss = model.training_loss_closure(train_iter, compile=True) 49 | 50 | @tf.function 51 | def optimization_step(): 52 | optimizer.minimize(training_loss, model.trainable_variables) 53 | if isinstance(model, t_SVGP_white): 54 | model.natgrad_step(train_data, lr=lambda_lr) 55 | 56 | elbo_vals = [] 57 | nlpd_vals = [] 58 | eval_metric = [] 59 | for step in range(iterations): 60 | optimization_step() 61 | elbo_vals.append(-training_loss().numpy()) 62 | nlpd_vals.append(get_predictive_nlpd(model, test_data)) 63 | 64 | if isinstance(model.likelihood, Bernoulli): 65 | eval_metric.append(get_accuracy(model, test_data)) 66 | elif isinstance(model.likelihood, gpflow.likelihoods.Softmax): 67 | pred_m, _ = model.predict_y(test_data[0]) 68 | pred_argmax = tf.reshape(tf.argmax(pred_m, axis=1), (-1, 1)) 69 | acc = np.mean(pred_argmax == test_data[1]) 70 | eval_metric.append(acc) 71 | else: 72 | eval_metric.append(get_rmse(model, test_data)) 73 | 74 | if debug and step % 20 == 0: 75 | print(f"{step} Iteration; NLPD {nlpd_vals[-1]}; Evaluation metric (RMSE/Acc.) {eval_metric[-1]}") 76 | 77 | return elbo_vals, nlpd_vals, eval_metric 78 | 79 | 80 | def get_predictive_nlpd(model: base_SVGP, test_data: Tuple[np.ndarray, np.ndarray]) -> float: 81 | """ 82 | Calculate and return negative log predictive density (NLPD). 83 | """ 84 | return -1 * tf.reduce_mean(model.predict_log_density(test_data)) 85 | 86 | 87 | def get_accuracy(model: base_SVGP, test_data: Tuple[np.ndarray, np.ndarray]) -> float: 88 | """ 89 | Calculate and returns accuracy in classification case. 90 | """ 91 | pred_mean, _ = model.predict_y(test_data[0]) 92 | pred_mean = pred_mean.numpy() 93 | pred_mean[pred_mean >= 0.5] = 1 94 | pred_mean[pred_mean < 0.5] = 0 95 | correct_prediction = np.sum(pred_mean == test_data[1]) 96 | return correct_prediction / test_data[0].shape[0] 97 | 98 | 99 | def get_rmse(model: base_SVGP, test_data: Tuple[np.ndarray, np.ndarray]): 100 | """ 101 | Calculates RMSE. 102 | """ 103 | y_pred, _ = model.predict_y(test_data[0]) 104 | return np.sqrt(np.mean(np.square(y_pred - test_data[1]))) 105 | 106 | 107 | def get_multiclass_accuracy(model: base_SVGP, test_data: Tuple[np.ndarray, np.ndarray]) -> float: 108 | """ 109 | Get multiclass accuracy 110 | """ 111 | pred_m, _ = model.predict_y(test_data[0]) 112 | pred_argmax = tf.reshape(tf.argmax(pred_m, axis=1), (-1, 1)) 113 | acc = np.mean(pred_argmax == test_data[1]).item() 114 | return acc 115 | 116 | 117 | def convert_data_to_online(data: [np.ndarray, np.ndarray], n_sets: int, 118 | shuffle: bool = True, sort_data: bool = False) -> list: 119 | """ 120 | Get an offline data and convert it into an online dataset of n_sets. 121 | 122 | returns: a list of tuple of np.ndarray (X_i, Y_i) with X_i of shape [n_set_data, data_dim] and 123 | Y is of shape [n_set_data, output_dim]. 124 | """ 125 | Y_dtype = data[1].dtype 126 | 127 | X, Y = data 128 | XY = np.concatenate([X, Y], axis=1) 129 | 130 | if shuffle: 131 | np.random.shuffle(XY) 132 | 133 | if sort_data: 134 | np.sort(XY, axis=0) 135 | 136 | n = XY.shape[0] 137 | last_set_size = int(n % n_sets) 138 | set_size = int((n - last_set_size) / n_sets - 1) 139 | 140 | streaming_data = [] 141 | for i in range(n_sets - 1): 142 | set_data = XY[i * set_size: (i + 1) * set_size] 143 | Y_casted = set_data[:, X.shape[-1]:].astype(Y_dtype) 144 | streaming_data.append((set_data[:, :X.shape[-1]], Y_casted)) 145 | 146 | # Adding last set; this could be more than other sets as well 147 | set_data = XY[(n_sets - 1) * set_size:] 148 | 149 | Y_casted = set_data[:, X.shape[-1]:].astype(Y_dtype) 150 | streaming_data.append((set_data[:, :X.shape[-1]], Y_casted)) 151 | 152 | assert len(streaming_data) == n_sets 153 | assert streaming_data[0][0].shape[-1] == X.shape[-1] 154 | assert streaming_data[0][1].shape[-1] == Y.shape[-1] 155 | 156 | return streaming_data 157 | 158 | 159 | def optimize_online_model(online_gp: OnlineGP, train_data: List[Tuple[np.ndarray, np.ndarray]], 160 | test_data: Tuple[np.ndarray, np.ndarray], train_hyperparams: bool = False, 161 | hyperparams_step: int = 10, train_memory: bool = False, debug: bool = False) -> [list, list, 162 | list]: 163 | """ 164 | Optimize online GP model on train data, which are already in streaming set, and returns nlpd values on test set, 165 | rmse or accuracy value, and time taken. 166 | """ 167 | n_sets = len(train_data) 168 | 169 | if debug: 170 | print(f"Initial NLPD: {get_predictive_nlpd(online_gp.model, test_data)}") 171 | 172 | nlpd_vals = [] 173 | eval_metric = [] 174 | time_vals = [] 175 | for n in range(n_sets): 176 | for var in online_gp.optimizer.variables(): 177 | var.assign(tf.zeros_like(var)) 178 | 179 | start_time = time.time() 180 | new_data = train_data[n] 181 | new_data = (new_data[0], new_data[1]) 182 | online_gp.update_with_new_batch(new_data, n_hyp_opt_steps=hyperparams_step, train_hyps=train_hyperparams, 183 | train_mem=train_memory, remove_memory=True, return_kernel_params=False) 184 | 185 | time_vals.append(time.time() - start_time) 186 | 187 | nlpd_vals.append(get_predictive_nlpd(online_gp.model, test_data)) 188 | 189 | if isinstance(online_gp.model.likelihood, Bernoulli): 190 | eval_metric.append(get_accuracy(online_gp.model, test_data)) 191 | elif isinstance(online_gp.model.likelihood, gpflow.likelihoods.Softmax): 192 | eval_metric.append(get_multiclass_accuracy(online_gp.model, test_data)) 193 | else: 194 | eval_metric.append(get_rmse(online_gp.model, test_data)) 195 | 196 | if debug: 197 | print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 198 | print(f"Set {n}") 199 | print(f"NLPD = {nlpd_vals[-1]}") 200 | print(f"Eval. metric (RMSE/Acc.) = {eval_metric[-1]}") 201 | print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 202 | 203 | return nlpd_vals, eval_metric, time_vals 204 | -------------------------------------------------------------------------------- /experiments/hotspots/README.md: -------------------------------------------------------------------------------- 1 | # Hotspots experiment 2 | 3 | This folder contains the experiments to reproduce our comparison against [Maddox et al. (2021)](https://arxiv.org/abs/2110.15172). 4 | 5 | As such, `data/civ_data.csv` is a verbatim copy of [their data set](https://github.com/wjmaddox/online_vargp/blob/main/experiments/hotspots/data/civ_data.csv) and `hotspots.py` only has minimal changes from [their original experiment script](https://github.com/wjmaddox/online_vargp/blob/main/experiments/hotspots/hotspots.py) to integrate our own method. 6 | 7 | Our only noteworthy departure from Maddox et al. (2021) is that we remove their tempering (changing `beta=0.1` to `beta=1.0`). This change benefits all methods. 8 | 9 | `our_tsvgp.py` contains a GPyTorch-compatible implementation of our proposed method (not feature-complete; it only includes those aspects required to run the Hotspots experiment). 10 | 11 | `env.yaml` describes a Conda environment with all required dependencies; it can be instantiated using 12 | ```bash 13 | conda env create --file env.yaml 14 | ``` 15 | 16 | ## Re-run experiments 17 | 18 | The experiments can be reproduced by submitting the following jobs on a SLURM cluster: 19 | ```bash 20 | sbatch submit_random.sh 21 | sbatch submit_ovc.sh 22 | sbatch submit_ours.sh 23 | ``` 24 | Each script will spawn the respective experiment for 50 different seeds using SLURM's [Job Array support](https://slurm.schedmd.com/job_array.html). 25 | 26 | After all the runs have finished, run 27 | ```bash 28 | python extract_results.py 29 | ``` 30 | to regenerate `results/hotspot_results.npz` (which should be equivalent to the version stored in this repository). 31 | 32 | ## Re-create figures 33 | 34 | To recreate Figure 4 and the timing results in our paper, run 35 | ```bash 36 | python visualize_results.py 37 | ``` 38 | which regenerates the following three files in the `results/` subdirectory: 39 | - `hotspots-results-acc.tex` is the TikZ/pgfplots figure for Hotspot Accuracy. 40 | - `hotspots-results-mse.tex` is the TikZ/pgfplots figure for Prevalence MSE. 41 | - `timings.dat` gives the average run times per step of each method, including standard deviation. 42 | -------------------------------------------------------------------------------- /experiments/hotspots/env.yaml: -------------------------------------------------------------------------------- 1 | name: ovcexperiment 2 | dependencies: 3 | - python=3.8 4 | - pytorch==1.12.1 5 | - pip 6 | - pip: 7 | - gpytorch==1.4 8 | - botorch==0.4.0 9 | - git+https://github.com/wjmaddox/online_vargp@7bd3da50eac32d70ca323309e3f3d80a2ae7c419 10 | - matplotlib 11 | - tikzplotlib 12 | -------------------------------------------------------------------------------- /experiments/hotspots/extract_results.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import numpy as np 4 | 5 | import glob 6 | import re 7 | 8 | num_re = re.compile(r".*_([0-9]*)_.*") 9 | 10 | def get_all_runs_available(fn_globs): 11 | all_runs_available = set() 12 | for fn_glob in fn_globs: 13 | files = glob.glob(fn_glob) 14 | nums = [int(num_re.match(fn)[1]) for fn in files] 15 | print(fn_glob, len(nums)) 16 | if not all_runs_available: 17 | all_runs_available = set(nums) 18 | else: 19 | all_runs_available = all_runs_available.intersection(set(nums)) 20 | 21 | all_runs_available = sorted(all_runs_available) 22 | return all_runs_available 23 | 24 | from collections import namedtuple 25 | 26 | Run = namedtuple("Run", ["label", "results_pattern", "output_pattern"]) 27 | 28 | class Data: 29 | beta = 1.0 30 | _results_base = "output_dir/civ_ind_svgp_{i}_AMD_" 31 | runs = [ 32 | Run("Random", _results_base+"random.pt", "./hotspots_random_{i}.out"), 33 | Run("Entropy (OVC)", _results_base+"entropy.pt", "./hotspots_ovc_{i}.out"), 34 | Run("Entropy (Ours)", _results_base+"tsvgp.pt", "./hotspots_ours_{i}.out"), 35 | ] 36 | 37 | class DataTempered: 38 | beta = 0.1 39 | _results_base = "beta0.1/output_dir/civ_ind_svgp_{i}_AMD_" 40 | runs = [ 41 | Run("random", _results_base+"random.pt", "./beta0.1/hotspots_random_{i}.out"), 42 | Run("entropy (OVC)", _results_base+"entropy.pt", "./beta0.1/hotspots_ovc_{i}.out"), 43 | Run("entropy (Ours)", _results_base+"tsvgp.pt", "./beta0.1/hotspots_ours_{i}.out"), 44 | ] 45 | 46 | time_re = re.compile(r"time = ([0-9.]*)\.memory") 47 | 48 | def parse_time(fn): 49 | with open(fn) as f: 50 | matches = time_re.findall(f.read()) 51 | return np.array(list(map(float, matches))) 52 | 53 | def get_timings(fnames): 54 | return np.array([parse_time(fn) for fn in fnames]) 55 | 56 | def load_results(fnames): 57 | props = ("acc", "mse", "sens", "sampled_acc") 58 | all_res = {prop: [] for prop in props} 59 | for fn in fnames: 60 | d = torch.load(fn, map_location=torch.device('cpu')) 61 | res = {} 62 | [ 63 | res["acc"], res["mse"], res["sens"], res["sampled_acc"] 64 | ] = [ 65 | hotspot_acc_list, 66 | hotspot_mse_list, 67 | hotspot_sens_list, 68 | hotspot_sampled_acc_list 69 | ] = d["results"] 70 | for prop in props: 71 | all_res[prop].append(res[prop].numpy()) 72 | for prop in props: 73 | all_res[prop] = np.array(all_res[prop]) 74 | mean = {prop: all_res[prop].mean(axis=0) for prop in props} 75 | stderr = {prop: all_res[prop].std(axis=0) / np.sqrt(all_res[prop].shape[0] - 1) for prop in props} 76 | return all_res, mean, stderr 77 | 78 | #Results = namedtuple("Results", ["label", "timings", "all_res", "mean", "stderr"]) 79 | 80 | def process(dat): 81 | all_runs_available = get_all_runs_available([run.results_pattern.format(i="*") for run in dat.runs]) 82 | results = [] 83 | for run in dat.runs: 84 | ts = get_timings([run.output_pattern.format(i=i) for i in all_runs_available]) 85 | all_res, mean, stderr = load_results([run.results_pattern.format(i=i) for i in all_runs_available]) 86 | results.append((run.label, ts, all_res, mean, stderr)) 87 | return results 88 | 89 | def main(): 90 | np.savez("results/hotspots_results.npz", [dict( 91 | results=process(Data()), 92 | # tempered=process(DataTempered()), 93 | )]) 94 | 95 | if __name__ == "__main__": 96 | main() 97 | -------------------------------------------------------------------------------- /experiments/hotspots/hotspots.py: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/wjmaddox/online_vargp/blob/main/experiments/hotspots/hotspots.py (licensed under GPL) 2 | # to allow importing our own implementation from our_tsvgp.py (with its own license) and run in the same experiment framework 3 | 4 | import math 5 | import argparse 6 | import pandas as pd 7 | import time 8 | import torch 9 | 10 | import numpy as np 11 | 12 | torch.set_default_dtype(torch.float64) 13 | 14 | from botorch.sampling import SobolQMCNormalSampler 15 | from botorch.optim.fit import fit_gpytorch_torch 16 | from torch.distributions import Bernoulli 17 | from gpytorch.kernels import ScaleKernel, MaternKernel 18 | from gpytorch.mlls import VariationalELBO, PredictiveLogLikelihood 19 | from gpytorch.priors import GammaPrior 20 | 21 | from volatilitygp.models import SingleTaskVariationalGP 22 | from our_tsvgp import OurSingleTaskVariationalGP 23 | from volatilitygp.likelihoods.binomial_likelihood import BinomialLikelihood 24 | 25 | 26 | class Squeeze(torch.nn.Module): 27 | def forward(self, x): 28 | return x.squeeze(-1) 29 | 30 | 31 | def parse(): 32 | parser = argparse.ArgumentParser() 33 | parser.add_argument("--seed", type=int, default=0) 34 | parser.add_argument("--output", type=str, default="results.pt") 35 | parser.add_argument("--dataset", type=str, default="civ") 36 | parser.add_argument("--n_batch", type=int, default=100) 37 | parser.add_argument("--num_init", type=int, default=100) 38 | parser.add_argument("--batch_size", type=int, default=1) 39 | parser.add_argument("--batch_limit", type=int, default=64) 40 | parser.add_argument("--inner_samples", type=int, default=16) 41 | parser.add_argument("--outer_samples", type=int, default=16) 42 | parser.add_argument("--random", action="store_true") 43 | parser.add_argument("--loss", type=str, default="elbo") 44 | parser.add_argument("--lr", type=float, default=0.01) 45 | parser.add_argument("--beta", type=float, default=0.1) 46 | parser.add_argument("--ind_models", action="store_true") 47 | parser.add_argument("--eval_on_full_set", action="store_true") 48 | 49 | parser.add_argument("--use_tsvgp", action="store_true") 50 | parser.add_argument("--tsvgp_lr", type=float, default=1.0) 51 | parser.add_argument("--tsvgp_num_online_updates", type=int, default=1) 52 | parser.add_argument("--tsvgp_jitter", type=float, default=0.0) 53 | 54 | return parser.parse_args() 55 | 56 | 57 | def entropy_via_threshold(f, threshold=0.1): 58 | bern_entropy = Bernoulli(logits=f).entropy() 59 | spiked_bern_entropy = bern_entropy * (f > math.log(threshold / (1 - threshold))) 60 | return spiked_bern_entropy.mean(0).sum(-1) 61 | 62 | 63 | def entropy_reduction(model, batch_set, test_set, inner_samples=32, outer_samples=16): 64 | inner_sampler = SobolQMCNormalSampler(inner_samples) 65 | outer_sampler = SobolQMCNormalSampler(outer_samples) 66 | 67 | original_entropy = entropy_via_threshold(inner_sampler(model.posterior(test_set))) 68 | 69 | fantasy_model = model.fantasize( 70 | batch_set, sampler=inner_sampler, observation_noise=True 71 | ) 72 | fant_post = fantasy_model.posterior(test_set) 73 | fant_samp = outer_sampler(fant_post) 74 | fantasy_entropy = entropy_via_threshold( 75 | fant_samp 76 | ) 77 | 78 | return (original_entropy - fantasy_entropy).clamp(min=0.0).sum(0) 79 | 80 | 81 | def main( 82 | dataset: str = "civ", 83 | seed: int = 0, 84 | num_init: int = 100, 85 | batch_size: int = 1, 86 | n_batch: int = 100, 87 | inner_samples: int = 16, 88 | outer_samples: int = 16, 89 | batch_limit: int = 64, 90 | output: str = "results.pt", 91 | random: bool = False, 92 | beta: float = 0.1, 93 | loss: str = "elbo", 94 | lr: float = 0.01, 95 | eval_on_full_set: bool = False, 96 | recycle_lengthscales: bool = True, 97 | 98 | use_tsvgp: bool = False, 99 | tsvgp_lr: float=1.0, 100 | tsvgp_num_online_updates: int=1, 101 | tsvgp_jitter: float=0.0, 102 | ): 103 | verbose = True 104 | np.random.seed(seed) 105 | torch.manual_seed(seed) 106 | 107 | data = pd.read_csv("data/" + dataset + "_data.csv") 108 | if dataset == "civ" or dataset == "hti": 109 | threshold = 0.1 110 | else: 111 | threshold = 0.02 112 | 113 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 114 | 115 | ## split data 116 | 117 | full_x = torch.tensor(data.iloc[:, :-2].values).to(device) 118 | full_ground_truth_prob = torch.tensor(data.iloc[:, -1].values).to(device) 119 | 120 | perm = torch.randperm(data.shape[0]) 121 | train_inds = perm[:num_init] 122 | test_inds = perm[num_init:] 123 | 124 | train_x = full_x[train_inds] 125 | train_ground_truth_prob = full_ground_truth_prob[train_inds] 126 | 127 | test_x = full_x[test_inds] 128 | test_ground_truth_prob = full_ground_truth_prob[test_inds] 129 | 130 | # draw samples from ground truth probability 131 | train_y = ( 132 | torch.distributions.Binomial(total_count=100, probs=train_ground_truth_prob) 133 | .sample() 134 | .unsqueeze(-1) 135 | ) 136 | test_y = ( 137 | torch.distributions.Binomial(total_count=100, probs=test_ground_truth_prob) 138 | .sample() 139 | .unsqueeze(-1) 140 | ) 141 | 142 | ## normalize data to [0, 1]^d 143 | mins = train_x.min(0)[0] 144 | maxes = train_x.max(0)[0] 145 | train_x = (train_x - mins) / (maxes - mins) 146 | test_x = (test_x - mins) / (maxes - mins) 147 | 148 | hotspot_acc_list, hotspot_mse_list, hotspot_sens_list = [], [], [] 149 | hotspot_sampled_acc_list = [] 150 | 151 | for iteration in range(n_batch): 152 | t0 = time.time() 153 | ## define model 154 | covar_module = ScaleKernel( 155 | MaternKernel( 156 | ard_num_dims=8, 157 | nu=1.5, 158 | lengthscale_prior=GammaPrior(3.0, 6.0), 159 | outputscale_prior=GammaPrior(2.0, 0.15), # sic! 160 | ) 161 | ) 162 | if iteration > 0 and recycle_lengthscales: 163 | print("recycling lengthscales") 164 | covar_module.outputscale = old_outputscale 165 | covar_module.base_kernel.lengthscale = old_lengthscale 166 | 167 | VGPClass = OurSingleTaskVariationalGP if use_tsvgp else SingleTaskVariationalGP 168 | 169 | model = VGPClass( 170 | likelihood=BinomialLikelihood(), 171 | init_points=train_x, 172 | init_targets=train_y.squeeze(-1), 173 | num_inducing=train_x.shape[0], 174 | use_piv_chol_init=True, 175 | learn_inducing_locations=True, 176 | covar_module=covar_module, 177 | ) 178 | 179 | if use_tsvgp: 180 | model.lr = tsvgp_lr 181 | model.num_online_updates = tsvgp_num_online_updates 182 | model.tsvgp_jitter = tsvgp_jitter 183 | 184 | if loss == "elbo": 185 | mll = VariationalELBO( 186 | model.likelihood, model, num_data=train_x.shape[0], beta=beta 187 | ) 188 | elif loss == "pll": 189 | mll = PredictiveLogLikelihood( 190 | model.likelihood, model, num_data=train_x.shape[0], beta=beta 191 | ) 192 | 193 | fit_gpytorch_torch(mll, options={"lr": lr, "maxiter": 1000}) 194 | 195 | ### record hotspot probability 196 | with torch.no_grad(): 197 | model.eval() 198 | 199 | # for some ungodly reason andrade-pacheco et al evaluate on the full set, not the heldout set 200 | # thus we need to predict over all of the data (training data included) 201 | if eval_on_full_set: 202 | # we re-apply normalization 203 | x_for_pred = (full_x - mins) / (maxes - mins) 204 | gt_for_pred = full_ground_truth_prob 205 | else: 206 | x_for_pred = test_x 207 | gt_for_pred = test_ground_truth_prob 208 | 209 | pred_dist = model(x_for_pred) 210 | pred_prob = (pred_dist.mean.mul(-1).exp() + 1).reciprocal() 211 | 212 | true_is_hotspot = gt_for_pred > threshold 213 | 214 | # lets see if this is more accurate 215 | hotspot_samples = pred_dist.sample(torch.Size((512,))) 216 | hotspot_sampled_prob = (hotspot_samples.mul(-1).exp() + 1).reciprocal() 217 | hotspot_sampled_pred = (hotspot_sampled_prob > threshold).sum(0) > 256 218 | hotspot_sampled_acc = ( 219 | ((hotspot_sampled_pred > threshold) == true_is_hotspot) 220 | .float() 221 | .mean() 222 | .cpu() 223 | .item() 224 | ) 225 | 226 | hotspot_acc = ( 227 | ((pred_prob > threshold) == true_is_hotspot).float().mean().cpu().item() 228 | ) 229 | hotspot_mse = (pred_prob - gt_for_pred).pow(2).mean().cpu().item() 230 | 231 | hotspot_sens = ( 232 | (pred_prob > threshold).float() * true_is_hotspot.float() 233 | ).sum().cpu().item() / true_is_hotspot.float().sum().cpu().item() 234 | 235 | if not random: 236 | ### now select a new point 237 | entropy_list = [] 238 | for start in range(0, test_x.shape[0] + batch_limit, batch_limit): 239 | [p.detach_() for p in model.parameters()] 240 | # TODO: batch size of 10 via cyclic optimization 241 | query_points = test_x[start : (start + batch_limit)].unsqueeze(-2) 242 | if query_points.shape[0] > 0: 243 | entropy = ( 244 | entropy_reduction( 245 | model, query_points, test_x, inner_samples, outer_samples 246 | ) 247 | .sum(-1) 248 | .detach() 249 | .cpu() 250 | ) 251 | entropy_list.append(entropy) 252 | 253 | if batch_size == 1: 254 | best_point = torch.cat(entropy_list).argmax() 255 | else: 256 | raise NotImplementedError("oops, batch size of 1 is not implemented") 257 | else: 258 | # best point is randomly selected 259 | best_point = torch.randperm(test_x.shape[0])[:batch_size] 260 | if batch_size == 1: 261 | best_point = best_point.item() 262 | entropy_list = None 263 | 264 | train_x = torch.cat((train_x, test_x[best_point].unsqueeze(0))) 265 | train_y = torch.cat((train_y, test_y[best_point].unsqueeze(0))) 266 | train_ground_truth_prob = torch.cat( 267 | (train_ground_truth_prob, test_ground_truth_prob[best_point].unsqueeze(0)) 268 | ) 269 | 270 | test_x = torch.cat((test_x[:best_point], test_x[(best_point + 1) :])) 271 | test_y = torch.cat((test_y[:best_point], test_y[(best_point + 1) :])) 272 | test_ground_truth_prob = torch.cat( 273 | ( 274 | test_ground_truth_prob[:best_point], 275 | test_ground_truth_prob[(best_point + 1) :], 276 | ) 277 | ) 278 | 279 | t1 = time.time() 280 | 281 | if verbose: 282 | print( 283 | f"\nBatch {iteration:>2}: current_value (acc, sacc, mse, sens) = " 284 | f"({hotspot_acc:>4.2f}, {hotspot_sampled_acc:>4.2f}, {hotspot_mse:>4.2f} {hotspot_sens:>4.2f}, " 285 | f"time = {t1-t0:>4.2f}.", 286 | end="", 287 | ) 288 | else: 289 | print(".") 290 | 291 | hotspot_acc_list.append(hotspot_acc) 292 | hotspot_sampled_acc_list.append(hotspot_sampled_acc) 293 | hotspot_mse_list.append(hotspot_mse) 294 | hotspot_sens_list.append(hotspot_sens) 295 | 296 | old_lengthscale = model.covar_module.base_kernel.lengthscale.detach() 297 | old_outputscale = model.covar_module.outputscale.detach() 298 | del model, entropy_list 299 | 300 | torch.cuda.empty_cache() 301 | memory_alloc = torch.cuda.memory_allocated(device) / (1024 ** 3) 302 | print("memory allocated: ", memory_alloc) 303 | 304 | output_dict = { 305 | "results": [ 306 | torch.tensor(hotspot_acc_list), 307 | torch.tensor(hotspot_mse_list), 308 | torch.tensor(hotspot_sens_list), 309 | torch.tensor(hotspot_sampled_acc_list), 310 | ], 311 | "data": {"x": train_x, "theta": train_ground_truth_prob, "y": train_y}, 312 | #"stats": {"time": torch.tensor(time) 313 | } 314 | return output_dict 315 | 316 | 317 | if __name__ == "__main__": 318 | args = parse() 319 | args.recycle_lengthscales = not args.ind_models 320 | del args.ind_models 321 | output_dict = main(**vars(args)) 322 | output_dict["pars"] = vars(args) 323 | 324 | torch.save(output_dict, args.output) 325 | -------------------------------------------------------------------------------- /experiments/hotspots/results/hotspots_results.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/experiments/hotspots/results/hotspots_results.npz -------------------------------------------------------------------------------- /experiments/hotspots/results/timings.dat: -------------------------------------------------------------------------------- 1 | Random : 7.23 +/- 0.03 (std.err) +/- 2.30 (std.dev) 2 | Entropy (OVC) : 84.57 +/- 0.15 (std.err) +/- 10.77 (std.dev) 3 | Entropy (Ours) : 62.09 +/- 0.10 (std.err) +/- 6.75 (std.dev) 4 | 5 | -------------------------------------------------------------------------------- /experiments/hotspots/submit_ours.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --time=3:00:00 3 | #SBATCH --mem=20000M 4 | #SBATCH --gres=gpu:1 5 | #SBATCH --constraint=volta 6 | #SBATCH --output=./hotspots_ours_%a.out 7 | #SBATCH --array=1-50 8 | 9 | module load miniconda 10 | source activate ovcexperiment 11 | 12 | mkdir -p output_dir 13 | 14 | python hotspots.py --seed=$SLURM_ARRAY_TASK_ID --n_batch=100 --batch_limit=8 --num_init=100 \ 15 | --beta=1.0 --loss=elbo --dataset=civ --inner_samples=16 --outer_samples=16 \ 16 | --use_tsvgp --output=output_dir/civ_ind_svgp_${SLURM_ARRAY_TASK_ID}_AMD_tsvgp.pt 17 | -------------------------------------------------------------------------------- /experiments/hotspots/submit_ovc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --time=4:00:00 3 | #SBATCH --mem=20000M 4 | #SBATCH --gres=gpu:1 5 | #SBATCH --constraint=volta 6 | #SBATCH --output=./hotspots_ovc_%a.out 7 | #SBATCH --array=1-50 8 | 9 | module load miniconda 10 | source activate ovcexperiment 11 | 12 | mkdir -p output_dir 13 | 14 | python hotspots.py --seed=$SLURM_ARRAY_TASK_ID --n_batch=100 --batch_limit=8 --num_init=100 \ 15 | --beta=1.0 --loss=elbo --dataset=civ --inner_samples=16 --outer_samples=16 \ 16 | --output=output_dir/civ_ind_svgp_${SLURM_ARRAY_TASK_ID}_AMD_entropy.pt 17 | -------------------------------------------------------------------------------- /experiments/hotspots/submit_random.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --time=2:00:00 3 | #SBATCH --mem=5000M 4 | #SBATCH --output=./hotspots_random_%a.out 5 | #SBATCH --array=1-50 6 | 7 | module load miniconda 8 | source activate ovcexperiment 9 | 10 | mkdir -p output_dir 11 | 12 | python hotspots.py --seed=$SLURM_ARRAY_TASK_ID --n_batch=100 --num_init=100 \ 13 | --beta=1.0 --loss=elbo --dataset=civ --random \ 14 | --output=output_dir/civ_ind_svgp_${SLURM_ARRAY_TASK_ID}_AMD_random.pt 15 | -------------------------------------------------------------------------------- /experiments/hotspots/visualize_results.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | [[res]] = np.load("results/hotspots_results.npz", allow_pickle=True).values() 5 | 6 | betas = {"results": 1.0, "tempered": 0.1} 7 | keys = [ 8 | "results", 9 | # "tempered", 10 | ] 11 | 12 | import sys 13 | 14 | def print_timings(res, io=sys.stdout): 15 | label, ts, _, _, _ = res 16 | mean = ts.mean() 17 | stddev = ts.std() 18 | stderr = ts.std() / np.sqrt(ts.size - 1) 19 | print(f"{label:29s}: {mean:6.2f} +/- {stderr:.2f} (std.err) +/- {stddev:.2f} (std.dev)", file=io) 20 | 21 | with open("results/timings.dat", "w") as f: 22 | for key in keys: 23 | # print(f"beta={betas[key]}:", file=f) 24 | for r in res[key]: 25 | print_timings(r, f) 26 | print(file=f) 27 | 28 | 29 | def plot_mean(res, prop): 30 | label, ts, all_res, mean, stderr = res 31 | plt.plot(range(100,200), mean[prop], label=label) 32 | scale = 1 33 | plt.fill_between(range(100, 200), mean[prop] - scale*stderr[prop], mean[prop] + scale*stderr[prop], alpha=0.3) 34 | 35 | 36 | prop_titles = { 37 | # "sampled_acc": "accuracy (sampled)", 38 | "acc": "Hotspot Accuracy", 39 | "mse": "Prevalence MSE", 40 | # "sens": "[sens]" 41 | } 42 | 43 | 44 | def tikzplotlib_fix_ncols(obj): 45 | """ 46 | workaround for matplotlib 3.6 renamed legend's _ncol to _ncols, which breaks tikzplotlib 47 | """ 48 | if hasattr(obj, "_ncols"): 49 | obj._ncol = obj._ncols 50 | for child in obj.get_children(): 51 | tikzplotlib_fix_ncols(child) 52 | 53 | 54 | import tikzplotlib 55 | 56 | for key in keys: 57 | for prop in ["acc", "mse"]: 58 | fig = plt.figure() 59 | # plt.title(f"$\\beta={betas[key]}$") 60 | plt.xlabel("Steps") 61 | plt.ylabel(f"{prop_titles[prop]}") 62 | 63 | if prop == "mse": 64 | # plt.ylim(0.013, 0.0261) # limits used in Maddox et al. (2021) 65 | plt.ylim(0.0115, 0.0265) # our method is too much better to fit 66 | elif prop == "acc" or prop == "sampled_acc": 67 | plt.ylim(0.81, 0.89) 68 | plt.yticks([0.82,0.84,0.86,0.88]) 69 | 70 | for r in res[key][:3]: 71 | plot_mean(r, prop) 72 | plt.grid() 73 | plt.legend() 74 | tikzplotlib_fix_ncols(fig) 75 | tikzplotlib.save(f"results/hotspots-{key}-{prop}.tex", figure=fig, 76 | axis_width=r"\figurewidth", 77 | axis_height=r"\figureheight") 78 | -------------------------------------------------------------------------------- /experiments/magnetometer/README.md: -------------------------------------------------------------------------------- 1 | # Magnetic Anomalies Experiment 2 | 3 | The experiment to map the local anomalies in the ambient magnetic field. The experiment is based on [Solin et al. (2018)](https://ieeexplore.ieee.org/document/8373720). 4 | 5 | ## Offline Model 6 | For training an offline model: 7 | ```shell 8 | python offline_model.py model/kernel=sum_constant_matern52 9 | ``` 10 | The configurations and model hyperparameters can be found in the Hydra config file `../configs/magnetometer_offline_experiment.yaml`. 11 | ## Fast Conditiong of the proposed model 12 | 13 | For fast-conditiong of the model, Figure 5(b), run the following command: 14 | ```shell 15 | python online_fc_plots.py -o={offline_model_path} 16 | ``` 17 | 18 | ## Proposed model 19 | 20 | Experiment when multiple observations paths are observed: 21 | ``` 22 | python online_model.py model/kernel=sum_constant_matern52 23 | ``` 24 | 25 | The configurations and model hyperparameters can be found in the Hydra config file `../configs/magnetometer_online_experiment.yaml`. 26 | 27 | ## For NLPD comparison experiment 28 | 29 | **Proposed Model** 30 | 31 | ```shell 32 | python online_model.py model/kernel=sum_constant_matern52 streaming=True 33 | ``` 34 | The configurations and model hyperparameters can be found in the Hydra config file `../configs/magnetometer_online_experiment.yaml`. 35 | 36 | **Bui et al. (2017):** 37 | 38 | ``` 39 | python streaming_gp_model.py model/kernel=sum_constant_matern52 40 | ``` 41 | 42 | The configurations and model hyperparameters can be found in the Hydra config file `../configs/magnetometer_streaming_experiment.yaml`. 43 | -------------------------------------------------------------------------------- /experiments/magnetometer/__pycache__/magnetometer_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/experiments/magnetometer/__pycache__/magnetometer_utils.cpython-38.pyc -------------------------------------------------------------------------------- /experiments/magnetometer/magnetometer_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | A_room2video = np.array([70.7791, -388.6396, 618.0954, 7 | -66.1362, 26.6245, 665.3620, 8 | 0, 0, 1.0000]).reshape((3, 3)) 9 | 10 | C_room2video = np.array([0.1597, -0.0318, 1.0000]).reshape((1, 3)) 11 | 12 | A_grid2room = np.array([0.0118, 2.7777, 1.3689, 13 | -2.2243, -0.0967, -1.0929, 14 | 0, 0, 1.0000]).reshape((3, 3)) 15 | 16 | C_grid2room = np.array([0.0160, -0.1450, 1.0000]).reshape((1, 3)) 17 | 18 | 19 | def load_data(main_dir: str, train_id: list = None, test_id: list = None) -> [list, list]: 20 | """ 21 | Load magnetometer data. 22 | 23 | Main source of data is: https://github.com/AaltoML/magnetic-data 24 | 25 | Note: The function involves some constants that are specific to the data. 26 | """ 27 | data_train = [] 28 | data_test = None 29 | 30 | if train_id is None: 31 | train_id = [1, 2, 4, 5] 32 | 33 | if test_id is None: 34 | test_id = [1] 35 | 36 | for i in train_id: 37 | loc_path = os.path.join(main_dir, str(i) + "-loc.csv") 38 | mag_path = os.path.join(main_dir, str(i) + "-mag.csv") 39 | 40 | loc_data = pd.read_csv(loc_path).to_numpy() 41 | mag_data = pd.read_csv(mag_path).to_numpy() 42 | 43 | # take norm of mag data 44 | mag_data_norm = np.sqrt(np.sum(np.square(mag_data), axis=-1))[..., None] 45 | data_combined = np.concatenate([loc_data, mag_data_norm], axis=1) 46 | 47 | data_train.append([data_combined[:, :-1], data_combined[:, -1:]]) 48 | 49 | for i in test_id: 50 | loc_path = os.path.join(main_dir, str(i) + "-loc.csv") 51 | mag_path = os.path.join(main_dir, str(i) + "-mag.csv") 52 | 53 | loc_data = pd.read_csv(loc_path).to_numpy() 54 | mag_data = pd.read_csv(mag_path).to_numpy() 55 | 56 | # take norm of mag data 57 | mag_data_norm = np.sqrt(np.sum(np.square(mag_data), axis=-1))[..., None] 58 | 59 | data_combined = np.concatenate([loc_data, mag_data_norm], axis=1) 60 | 61 | if data_test is None: 62 | data_test = [np.array(data_combined[:, :-1]), np.array(data_combined[:, -1:])] 63 | else: 64 | data_test = [np.concatenate([data_test[0], np.array(data_combined[:, :-1])], axis=0), 65 | np.concatenate([data_test[1], np.array(data_combined[:, -1:])], axis=0)] 66 | 67 | return data_train, data_test 68 | 69 | 70 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 71 | """ 72 | Below function are for plotting purposes and comes from original Matlab scripts. 73 | 74 | They are for transformation between room, video, grid. 75 | """ 76 | 77 | 78 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 79 | 80 | 81 | def get_transformed_grid(): 82 | z1 = np.concatenate([np.linspace(-1, 1, 32), np.nan * np.ones((1,))]) 83 | z2 = z1.copy() 84 | 85 | g1, g2 = np.meshgrid(z1, z2) 86 | Z = np.concatenate([g1.reshape((-1, 1)), g2.reshape((-1, 1))], axis=1) 87 | 88 | var1 = A_grid2room @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T 89 | var2 = C_grid2room @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T 90 | 91 | Z = np.divide(var1, var2).T 92 | Z = Z[:, :2] 93 | 94 | var1 = A_room2video @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T 95 | var2 = C_room2video @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T 96 | Y = np.divide(var1, var2).T 97 | 98 | g1 = np.reshape(Y[:, 0], g1.shape) 99 | g2 = np.reshape(Y[:, 1], g2.shape) 100 | 101 | return g1, g2 102 | 103 | 104 | def transform_grid2video(x, y): 105 | Z = np.concatenate([x.reshape((-1, 1)), y.reshape((-1, 1))], axis=1) 106 | var1 = A_grid2room @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T 107 | var2 = C_grid2room @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T 108 | 109 | Z = np.divide(var1, var2).T 110 | Z = Z[:, :2] 111 | 112 | var1 = A_room2video @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T 113 | var2 = C_room2video @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T 114 | Y = np.divide(var1, var2).T 115 | return Y[:, 0], Y[:, 1] 116 | 117 | 118 | def transform_room2video(x, y): 119 | Z = np.concatenate([x.reshape((-1, 1)), y.reshape((-1, 1))], axis=1) 120 | var1 = A_room2video @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T 121 | var2 = C_room2video @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T 122 | Y = np.divide(var1, var2).T 123 | 124 | return Y[:, 0], Y[:, 1] 125 | 126 | 127 | def transform_grid2room(x, y): 128 | Z = np.concatenate([x.reshape((-1, 1)), y.reshape((-1, 1))], axis=1) 129 | var1 = A_grid2room @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T 130 | var2 = C_grid2room @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T 131 | Y = np.divide(var1, var2).T 132 | return Y[:, 0], Y[:, 1] 133 | 134 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 135 | -------------------------------------------------------------------------------- /experiments/magnetometer/offline_model.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import pickle 4 | 5 | import gpflow 6 | import numpy as np 7 | import hydra 8 | from omegaconf import DictConfig 9 | from hydra.utils import call, instantiate 10 | 11 | import sys 12 | sys.path.append("..") 13 | sys.path.append("../../") 14 | 15 | from exp_utils import get_hydra_output_dir 16 | 17 | # A logger for this file 18 | log = logging.getLogger(__name__) 19 | 20 | 21 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="magnetometer_offline_experiment") 22 | def run_experiment(cfg: DictConfig): 23 | """ 24 | Initialize and run the experiment. 25 | """ 26 | all_train_data, test_data = instantiate(cfg.dataset.dataloader)() 27 | log.info("Data loaded successfully!!!") 28 | 29 | output_dir = get_hydra_output_dir() 30 | 31 | # Merge all train_data and test_data into one 32 | train_data = None 33 | for data in all_train_data: 34 | if train_data is None: 35 | train_data = (data[0], data[1]) 36 | else: 37 | train_data = (np.concatenate([train_data[0], data[0]]), np.concatenate([train_data[1], data[1]])) 38 | 39 | # Set up inducing variables 40 | n_inducing_variable = int(np.sqrt(cfg.n_inducing_variable).item()) 41 | xx, yy = np.linspace(-2, 5, n_inducing_variable), np.linspace(-4, 2, n_inducing_variable) 42 | z1, z2 = np.meshgrid(xx, yy) 43 | zz = np.vstack((z1.flatten(), z2.flatten())).T 44 | inducing_variable = zz.tolist() 45 | cfg.model.inducing_variable = inducing_variable 46 | cfg.model.num_data = train_data[0].shape[0] 47 | 48 | model = instantiate(cfg.model) 49 | model.kernel.kernels[0].variance.assign(500) 50 | 51 | elbo_vals, nlpd_vals, rmse_vals = call(cfg.optimize)(model=model, train_data=train_data, test_data=test_data, 52 | optimizer=instantiate(cfg.optimizer), debug=True) 53 | 54 | log.info(f"Test NLPD: {nlpd_vals[-1]}") 55 | log.info(f"Test RMSE: {rmse_vals[-1]}") 56 | 57 | log.info("Optimization successfully done!!!") 58 | 59 | parameters = gpflow.utilities.parameter_dict(model) 60 | with open(os.path.join(output_dir, "model_offline_magnetometer.pkl"), "wb") as f: 61 | pickle.dump(parameters, f) 62 | 63 | 64 | if __name__ == '__main__': 65 | run_experiment() 66 | -------------------------------------------------------------------------------- /experiments/magnetometer/online_fc_plots.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import argparse 3 | import gpflow 4 | import numpy as np 5 | import tensorflow as tf 6 | 7 | import matplotlib.pyplot as plt 8 | import sys 9 | sys.path.append("../..") 10 | sys.path.append("..") 11 | 12 | from magnetometer_utils import load_data, transform_room2video, get_transformed_grid, transform_grid2room 13 | from exp_utils import convert_data_to_online 14 | from src.models.tsvgp_cont import t_SVGP_cont, OnlineGP 15 | from src.models.utils import piv_chol, memory_picker 16 | 17 | if __name__ == '__main__': 18 | parser = argparse.ArgumentParser(description="Plot streaming plots for the magntometer experiment.") 19 | parser.add_argument("-o", type=str, default=None, required=True) 20 | args = parser.parse_args() 21 | 22 | model_path = args.o 23 | n_inducing_variable = 100 24 | 25 | with open(model_path, "rb") as f: 26 | dict_params = pickle.load(f) 27 | 28 | kernel = gpflow.kernels.Sum([gpflow.kernels.Constant(), gpflow.kernels.Matern52()]) 29 | likelihood = gpflow.likelihoods.Gaussian() 30 | inducing_variable = -2 + np.zeros((n_inducing_variable, 2)) + np.random.rand(n_inducing_variable * 2).reshape((-1, 2)) 31 | model = t_SVGP_cont(kernel, likelihood, inducing_variable) 32 | 33 | model.kernel.kernels[0].variance = dict_params['.kernel.kernels[0].variance'] 34 | model.kernel.kernels[1].lengthscales = dict_params['.kernel.kernels[1].lengthscales'] 35 | model.kernel.kernels[1].variance = dict_params['.kernel.kernels[1].variance'] 36 | model.likelihood.variance = dict_params['.likelihood.variance'] 37 | 38 | print("Model loaded successfully!!!") 39 | 40 | train_data, _ = load_data("../data/invensense", train_id=[3]) 41 | online_data = convert_data_to_online(train_data[0], n_sets=20, shuffle=False) 42 | 43 | # Fixing hyper parameters 44 | gpflow.utilities.set_trainable(model.kernel.kernels[0].variance, False) 45 | gpflow.utilities.set_trainable(model.kernel.kernels[1].lengthscales, False) 46 | gpflow.utilities.set_trainable(model.kernel.kernels[1].variance, False) 47 | gpflow.utilities.set_trainable(model.likelihood.variance, False) 48 | 49 | memory = (online_data[0][0][:1], online_data[0][1][:1]) 50 | online_gp = OnlineGP(model, opt_hypers=tf.optimizers.Adam(), n_steps=2, lambda_lr=0.9, memory=memory, 51 | Z_picker=piv_chol, memory_picker=memory_picker, num_mem=10) 52 | 53 | path_x = None 54 | path_y = None 55 | 56 | # init Z 57 | first_batch = online_data[0][0] 58 | mean_first_batch = np.mean(first_batch, axis=0) 59 | var_first_batch = np.var(first_batch, axis=0) 60 | cov_first_batch = np.diag(var_first_batch.reshape(-1)) 61 | mean_first_batch = mean_first_batch.reshape(-1) 62 | 63 | inducing_variable = np.random.multivariate_normal(mean_first_batch, cov_first_batch, n_inducing_variable) 64 | model.inducing_variable.Z.assign(inducing_variable) 65 | 66 | for i, batch_data in enumerate(online_data): 67 | online_gp.update_with_new_batch(batch_data, train_hyps=False, train_mem=True, n_hyp_opt_steps=5) 68 | 69 | if path_x is None: 70 | path_x = batch_data[0] 71 | path_y = batch_data[1] 72 | else: 73 | path_x = np.concatenate([path_x, batch_data[0]], axis=0) 74 | path_y = np.concatenate([path_y, batch_data[1]], axis=0) 75 | 76 | # only plot every 5th batch 77 | if (i != 0) and (i+1) % 5 != 0: 78 | continue 79 | 80 | Z_new = model.inducing_variable.Z.numpy() 81 | 82 | # Prediction over grid 83 | xtest, ytest = np.mgrid[-1.3:1.3:100j, -1.3:1.3:100j] 84 | xtest_transformed, ytest_transformed = transform_grid2room(xtest, ytest) 85 | zz = np.concatenate([xtest_transformed[..., None], ytest_transformed[..., None]], axis=1) 86 | 87 | pred_m_grid, pred_S_grid = online_gp.model.predict_f(zz) 88 | pred_m_grid = pred_m_grid.numpy().reshape((100, -1)) 89 | 90 | pred_S_grid = pred_S_grid.numpy() 91 | alpha_map = np.exp(-np.sqrt(pred_S_grid)).reshape((100, 100)) 92 | alpha_map = alpha_map - np.min(alpha_map) 93 | alpha_map = alpha_map/np.max(alpha_map) 94 | # alpha_map = 1 - alpha_map 95 | 96 | # Test points 97 | transformed_x1test, transformed_x2test = transform_room2video(xtest_transformed, ytest_transformed) 98 | transformed_x1test = np.reshape(transformed_x1test, xtest.shape) 99 | transformed_x2test = np.reshape(transformed_x2test, ytest.shape) 100 | 101 | # Path 102 | path_transformed_x0, path_transformed_x1 = transform_room2video(path_x[:, 0], path_x[:, 1]) 103 | path_transformed_x = np.concatenate([path_transformed_x0[..., None], path_transformed_x1[..., None]], axis=1) 104 | 105 | # Grid 106 | g1, g2 = get_transformed_grid() 107 | 108 | # Inducing variables 109 | transformed_Z_0, transformed_Z_1 = transform_room2video(Z_new[:, 0], Z_new[:, 1]) 110 | transformed_Z_0 = np.reshape(transformed_Z_0, Z_new[:, 0].shape) 111 | transformed_Z_1 = np.reshape(transformed_Z_1, Z_new[:, 1].shape) 112 | 113 | # Plotting 114 | # plt.clf() 115 | _, axs = plt.subplots(1, 1) 116 | plt.plot(path_transformed_x[:, 0], path_transformed_x[:, 1]) 117 | pcol = plt.pcolormesh(transformed_x1test, transformed_x2test, pred_m_grid, alpha=alpha_map.reshape(-1), 118 | vmin=10, vmax=90, shading='gouraud', cmap="jet") 119 | pcol.set_edgecolor('face') 120 | 121 | plt.scatter(transformed_Z_0, transformed_Z_1, color="black") 122 | 123 | plt.plot(g1, g2, color="gray", alpha=0.2) 124 | plt.plot(g1.T, g2.T, color="gray", alpha=0.2) 125 | 126 | plt.xlim([0, 1920]) 127 | plt.ylim([0, 1080]) 128 | axs.set_aspect("equal") 129 | plt.axis('off') 130 | plt.gca().invert_yaxis() 131 | # plt.savefig('robot' + str(i + 1) + '.png', bbox_inches='tight', pad_inches=0, dpi=200) 132 | plt.show() 133 | -------------------------------------------------------------------------------- /experiments/magnetometer/online_model.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import pickle 4 | 5 | import gpflow 6 | import numpy as np 7 | import hydra 8 | from omegaconf import DictConfig 9 | from hydra.utils import call, instantiate 10 | 11 | import sys 12 | sys.path.append("..") 13 | sys.path.append("../../") 14 | 15 | from exp_utils import get_hydra_output_dir, convert_data_to_online 16 | from src.models.tsvgp_cont import piv_chol, fixed_Z, random_picker 17 | 18 | # A logger for this file 19 | log = logging.getLogger(__name__) 20 | 21 | 22 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="magnetometer_online_experiment") 23 | def run_experiment(cfg: DictConfig): 24 | """ 25 | Initialize and run the experiment. 26 | """ 27 | if cfg.streaming: 28 | all_train_data, test_data = instantiate(cfg.dataset.dataloader)(train_id=[3], test_id=[1, 2, 4, 5]) 29 | train_data = convert_data_to_online(all_train_data[0], n_sets=20) 30 | else: 31 | train_data, test_data = instantiate(cfg.dataset.dataloader)(train_id=[1, 2, 4, 5], test_id=[1]) 32 | log.info("Data loaded successfully!!!") 33 | 34 | output_dir = get_hydra_output_dir() 35 | 36 | n_inducing_variable = int(np.sqrt(cfg.n_inducing_variable).item()) 37 | xx, yy = np.linspace(-2, 5, n_inducing_variable), np.linspace(-4, 2, n_inducing_variable) 38 | z1, z2 = np.meshgrid(xx, yy) 39 | zz = np.vstack((z1.flatten(), z2.flatten())).T 40 | inducing_variable = zz.tolist() 41 | cfg.model.inducing_variable = inducing_variable 42 | 43 | model = instantiate(cfg.model) 44 | model.kernel.kernels[0].variance.assign(500) 45 | gpflow.set_trainable(model.inducing_variable.Z, True) 46 | 47 | memory = (train_data[0][0][:1], train_data[0][1][:1]) 48 | online_gp = instantiate(cfg.online_gp)(model=model, memory=memory, opt_hypers=instantiate(cfg.optimizer), 49 | Z_picker=fixed_Z, memory_picker=random_picker) 50 | 51 | for i, set_data in enumerate(train_data): 52 | if cfg.streaming: 53 | test_data = test_data 54 | nlpd_vals, rmse_vals, _ = call(cfg.optimize)(online_gp=online_gp, train_data=[set_data], 55 | test_data=test_data, debug=False) 56 | else: 57 | test_data = set_data 58 | set_data = convert_data_to_online(set_data, n_sets=20) 59 | nlpd_vals, rmse_vals, _ = call(cfg.optimize)(online_gp=online_gp, train_data=set_data, 60 | test_data=test_data, debug=True) 61 | 62 | log.info(f"------------------------------------------") 63 | log.info(f"Set {i}") 64 | log.info(f"Test NLPD: {nlpd_vals[-1]}") 65 | log.info(f"Test RMSE: {rmse_vals[-1]}") 66 | log.info(f"------------------------------------------") 67 | 68 | parameters = gpflow.utilities.parameter_dict(model) 69 | with open(os.path.join(output_dir, "model" + str(i) + "_online_magnetometer.pkl"), "wb") as f: 70 | pickle.dump(parameters, f) 71 | 72 | 73 | if __name__ == '__main__': 74 | run_experiment() 75 | -------------------------------------------------------------------------------- /experiments/magnetometer/online_model_predictions.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import gpflow 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import sys 7 | import argparse 8 | 9 | sys.path.append("../..") 10 | sys.path.append("..") 11 | 12 | from magnetometer_utils import load_data, transform_room2video, get_transformed_grid, transform_grid2room 13 | from exp_utils import convert_data_to_online 14 | from src.models.tsvgp_cont import t_SVGP_cont, OnlineGP 15 | from src.models.utils import memory_picker, piv_chol 16 | 17 | 18 | if __name__ == '__main__': 19 | parser = argparse.ArgumentParser(description="Plot online plots for the magntometer experiment.") 20 | parser.add_argument("-o", type=str, default=None, required=True) 21 | args = parser.parse_args() 22 | model_dir = args.o 23 | streaming = False 24 | 25 | if not os.path.exists(model_dir): 26 | raise Exception("Model directory is invalid!!!") 27 | 28 | model_names = [] 29 | for f in os.listdir(model_dir): 30 | if "online_magnetometer.pkl" in f: 31 | model_names.append(f) 32 | 33 | # sort by model id 34 | model_names.sort() 35 | 36 | n_inducing_variable = 100 37 | if streaming: 38 | train_data, _ = load_data("../data/invensense", train_id=[3]) 39 | train_data = convert_data_to_online(train_data[0], n_sets=20, shuffle=False) 40 | else: 41 | train_data, _ = load_data("../data/invensense", train_id=[1, 2, 4, 5]) 42 | 43 | for i, model_name in enumerate(model_names): 44 | model_path = os.path.join(model_dir, model_name) 45 | with open(model_path, "rb") as f: 46 | dict_params = pickle.load(f) 47 | 48 | # Loading model 49 | kernel = gpflow.kernels.Sum([gpflow.kernels.Constant(), gpflow.kernels.Matern52()]) 50 | likelihood = gpflow.likelihoods.Gaussian() 51 | inducing_variable = -2 + np.zeros((n_inducing_variable, 2)) + np.random.rand(n_inducing_variable * 2).reshape( 52 | (-1, 2)) 53 | model = t_SVGP_cont(kernel, likelihood, inducing_variable) 54 | model.kernel.kernels[0].variance = dict_params['.kernel.kernels[0].variance'] 55 | model.kernel.kernels[1].lengthscales = dict_params['.kernel.kernels[1].lengthscales'] 56 | model.kernel.kernels[1].variance = dict_params['.kernel.kernels[1].variance'] 57 | model.likelihood.variance = dict_params['.likelihood.variance'] 58 | model.inducing_variable.Z = dict_params['.inducing_variable.Z'] 59 | Z = model.inducing_variable.Z.numpy().copy() 60 | model.lambda_1.assign(dict_params['.sites.lambda_1']) 61 | model.lambda_2.assign(dict_params['.sites._lambda_2']) 62 | print("Model loaded successfully!!!") 63 | 64 | # Prediction over grid 65 | xtest, ytest = np.mgrid[-1.:1.:100j, -1.:1.:100j] 66 | xtest_transformed, ytest_transformed = transform_grid2room(xtest, ytest) 67 | zz = np.concatenate([xtest_transformed[..., None], ytest_transformed[..., None]], axis=1) 68 | 69 | pred_m_grid, pred_S_grid = model.predict_f(zz) 70 | pred_m_grid = pred_m_grid.numpy().reshape((100, -1)) 71 | 72 | pred_S_grid = pred_S_grid.numpy() 73 | if not streaming: 74 | alpha_map = np.sqrt(pred_S_grid).reshape((100, 100)) 75 | else: 76 | alpha_map = np.exp(-1 * np.sqrt(pred_S_grid).reshape((100, 100))) 77 | alpha_map = alpha_map - np.min(alpha_map) 78 | alpha_map = alpha_map / np.max(alpha_map) 79 | alpha_map = 1 - alpha_map 80 | 81 | # Test points 82 | transformed_x1test, transformed_x2test = transform_room2video(xtest_transformed, ytest_transformed) 83 | transformed_x1test = np.reshape(transformed_x1test, xtest.shape) 84 | transformed_x2test = np.reshape(transformed_x2test, ytest.shape) 85 | 86 | # Path 87 | path_transformed_x0, path_transformed_x1 = transform_room2video(train_data[i][0][:, 0], 88 | train_data[i][0][:, 1]) 89 | path_transformed_x = np.concatenate([path_transformed_x0[..., None], path_transformed_x1[..., None]], axis=1) 90 | 91 | # Grid 92 | g1, g2 = get_transformed_grid() 93 | 94 | # Inducing variables 95 | idx, _ = np.where((Z < -3.0) | (Z > 5.0)) 96 | Z = np.delete(Z, idx, axis=0) 97 | transformed_Z_0, transformed_Z_1 = transform_room2video(Z[:, 0], Z[:, 1]) 98 | transformed_Z_0 = np.reshape(transformed_Z_0, Z[:, 0].shape) 99 | transformed_Z_1 = np.reshape(transformed_Z_1, Z[:, 1].shape) 100 | 101 | # Plotting 102 | plt.clf() 103 | _, axs = plt.subplots(1, 1) 104 | plt.plot(path_transformed_x[:, 0], path_transformed_x[:, 1]) 105 | pcol = plt.pcolormesh(transformed_x1test, transformed_x2test, pred_m_grid, alpha=alpha_map.reshape(-1), 106 | vmin=10, vmax=90, shading='gouraud', cmap="jet") 107 | pcol.set_edgecolor('face') 108 | 109 | plt.scatter(transformed_Z_0, transformed_Z_1, color="black") 110 | 111 | plt.plot(g1, g2, color="gray", alpha=0.2) 112 | plt.plot(g1.T, g2.T, color="gray", alpha=0.2) 113 | 114 | plt.xlim([0, 1920]) 115 | plt.ylim([0, 1080]) 116 | axs.set_aspect("equal") 117 | plt.axis('off') 118 | plt.gca().invert_yaxis() 119 | plt.show() 120 | -------------------------------------------------------------------------------- /experiments/magnetometer/streaming_gp_model.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import pickle 4 | 5 | import gpflow 6 | import numpy as np 7 | import hydra 8 | from omegaconf import DictConfig 9 | from hydra.utils import call, instantiate 10 | 11 | import sys 12 | 13 | sys.path.append("..") 14 | 15 | from exp_utils import get_hydra_output_dir, convert_data_to_online 16 | 17 | # A logger for this file 18 | log = logging.getLogger(__name__) 19 | 20 | 21 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="magnetometer_streaming_experiment") 22 | def run_experiment(cfg: DictConfig): 23 | """ 24 | Initialize and run the experiment. 25 | """ 26 | all_train_data, test_data = instantiate(cfg.dataset.dataloader)(train_id=[3], test_id=[1, 2, 4, 5]) 27 | log.info("Data loaded successfully!!!") 28 | 29 | output_dir = get_hydra_output_dir() 30 | 31 | # Set up inducing variables 32 | n_inducing_variable = int(np.sqrt(cfg.n_inducing_variable).item()) 33 | 34 | xx, yy = np.linspace(-2, 5, n_inducing_variable), np.linspace(-4, 2, n_inducing_variable) 35 | z1, z2 = np.meshgrid(xx, yy) 36 | zz = np.vstack((z1.flatten(), z2.flatten())).T 37 | inducing_variable = zz.tolist() 38 | cfg.model.inducing_variable = inducing_variable 39 | 40 | online_data = convert_data_to_online(all_train_data[0], n_sets=20) 41 | model = instantiate(cfg.model)(data=online_data[0]) 42 | model.kernel.kernels[0].variance.assign(500) 43 | 44 | optimizer = instantiate(cfg.optimizer) 45 | nlpd_vals, rmse_vals, _ = call(cfg.optimize)(optimizer=optimizer, model=model, train_data=online_data, 46 | test_data=test_data) 47 | 48 | log.info(f"Test NLPD: {nlpd_vals[-1]}") 49 | log.info(f"Test RMSE: {rmse_vals[-1]}") 50 | log.info("Optimization successfully done!!!") 51 | 52 | parameters = gpflow.utilities.parameter_dict(model) 53 | with open(os.path.join(output_dir, "model_streaming_magnetometer.pkl"), "wb") as f: 54 | pickle.dump(parameters, f) 55 | 56 | 57 | if __name__ == '__main__': 58 | run_experiment() 59 | -------------------------------------------------------------------------------- /experiments/split_mnist/README.md: -------------------------------------------------------------------------------- 1 | # Split MNIST Experiment 2 | 3 | The split MNIST experiment where we run four models: the offline model where access to the whole dataset is possible, 4 | the proposed model with memory, the proposed model without memory, and the model proposed by Bui *et al.* (2017). 5 | 6 | ## Offline model 7 | 8 | Sparse variational Gaussian process (SVGP) model with access to the whole MNIST data set. 9 | 10 | To run the model: 11 | ``` 12 | python offline_model.py seed=5 13 | ``` 14 | 15 | The configurations and model hyperparameters can be found in the Hydra config file `../configs/offline_mnist_experiment.yaml`. 16 | 17 | ## Proposed model 18 | 19 | To run the model with memory: 20 | ``` 21 | python online_model.py seed=5 22 | ``` 23 | To run the model without memory: 24 | ``` 25 | python online_model.py seed=5 online_gp.num_mem=0 26 | ``` 27 | The configurations and model hyperparameters can be found in the Hydra config file `../configs/online_mnist_experiment.yaml`. 28 | 29 | ## Bui *et al.* (2017) 30 | To run the model: 31 | ``` 32 | python streaming_gp_model.py seed=5 33 | ``` 34 | The configurations and model hyperparameters can be found in the Hydra config file `../configs/streaming_mnist_experiment.yaml`. 35 | 36 | ## Figures and plots 37 | 38 | The figures in the paper can be generated by running the notebooks in `notebooks/`. All the notebooks assume the output 39 | to be present in the following structure: 40 | 41 | ``` 42 | - split_mnist_runs 43 | |- offline 44 | | |- {output run folders} 45 | |- online_memory 46 | | |- {output run folders} 47 | |- online_no_memory 48 | | |- {output run folders} 49 | |- Streaming 50 | | |- {output run folders} 51 | ``` 52 | -------------------------------------------------------------------------------- /experiments/split_mnist/mnist_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | The file contains utility functions for split-mnist expeirment. 3 | """ 4 | from typing import Tuple 5 | 6 | import tensorflow as tf 7 | import numpy as np 8 | import gpflow 9 | import wandb 10 | from omegaconf import OmegaConf 11 | 12 | import sys 13 | 14 | sys.path.append("../../..") 15 | 16 | from src.models.tsvgp_cont import OnlineGP 17 | from src.streaming_sparse_gp.osvgpc import OSVGPC 18 | 19 | 20 | def setup_wandb(cfg): 21 | """ 22 | Set up wandb. 23 | """ 24 | wandb_cfg = OmegaConf.to_container( 25 | cfg, resolve=True, throw_on_missing=True 26 | ) 27 | 28 | wandb.init(project="MNIST", entity=cfg.wandb.username, config=wandb_cfg) 29 | 30 | 31 | def load_mnist(seed: int = None, train_split_percentage: float = 0.80) -> (list, list): 32 | """ 33 | Load MNIST data set. 34 | 35 | seed: if seed needs to be fixed, by default it is None. 36 | train_split_percentage: float value between (0, 1), governing the split of data into train and test set. 37 | """ 38 | if seed is not None: 39 | np.random.seed(seed) 40 | tf.random.set_seed(seed) 41 | 42 | mnist_train, mnist_test = tf.keras.datasets.mnist.load_data() 43 | 44 | x, y = mnist_train 45 | x = tf.reshape(x, [x.shape[0], -1]).numpy() 46 | x = x.astype(np.float64) / 255 47 | y = np.reshape(y, (-1, 1)) 48 | y = np.int64(y) 49 | 50 | xt, yt = mnist_test 51 | xt = tf.reshape(xt, [xt.shape[0], -1]).numpy() 52 | xt = xt.astype(np.float64) / 255 53 | yt = np.reshape(yt, (-1, 1)) 54 | yt = np.int64(yt) 55 | 56 | # merge train and test into one 57 | X = np.concatenate([x, xt], axis=0) 58 | Y = np.concatenate([y, yt], axis=0) 59 | 60 | all_data = np.concatenate([X, Y], axis=1) 61 | 62 | n_train = int(all_data.shape[0] * train_split_percentage) 63 | 64 | np.random.shuffle(all_data) 65 | train_data = all_data[:n_train] 66 | test_data = all_data[n_train:] 67 | 68 | train_tasks = (train_data[:, :-1], train_data[:, -1:].astype(np.int64)) 69 | test_tasks = (test_data[:, :-1], test_data[:, -1:].astype(np.int64)) 70 | 71 | return train_tasks, test_tasks 72 | 73 | 74 | def load_split_mnist(seed: int = None, train_split_percentage: float = 0.80) -> (list, list): 75 | """ 76 | Load split-mnist data set. 77 | 78 | seed: if seed needs to be fixed, by default it is None. 79 | train_split_percentage: float value between (0, 1), governing the split of data into train and test set. 80 | """ 81 | if seed is not None: 82 | np.random.seed(seed) 83 | tf.random.set_seed(seed) 84 | 85 | mnist_train, mnist_test = tf.keras.datasets.mnist.load_data() 86 | 87 | x, y = mnist_train 88 | x = tf.reshape(x, [x.shape[0], -1]).numpy() 89 | x = x.astype(np.float64) / 255 90 | y = np.reshape(y, (-1, 1)) 91 | y = np.int64(y) 92 | 93 | xt, yt = mnist_test 94 | xt = tf.reshape(xt, [xt.shape[0], -1]).numpy() 95 | xt = xt.astype(np.float64) / 255 96 | yt = np.reshape(yt, (-1, 1)) 97 | yt = np.int64(yt) 98 | 99 | # merge train and test into one 100 | X = np.concatenate([x, xt], axis=0) 101 | Y = np.concatenate([y, yt], axis=0) 102 | 103 | train_tasks = [] 104 | test_tasks = [] 105 | 106 | tasks = [(0, 1), (2, 3), (4, 5), (6, 7), (8, 9)] 107 | 108 | # Create specific tasks 109 | for t in tasks: 110 | idx, _ = np.where((Y == t[0]) | (Y == t[1])) 111 | np.random.shuffle(idx) 112 | 113 | x_task = X[idx] 114 | y_task = Y[idx] 115 | 116 | n_task = int(x_task.shape[0] * train_split_percentage) 117 | 118 | train_tasks.append((x_task[:n_task], y_task[:n_task])) 119 | test_tasks.append((x_task[n_task:], y_task[n_task:])) 120 | 121 | return train_tasks, test_tasks 122 | 123 | 124 | def get_mini_batches(data: [np.ndarray, np.ndarray], minibatch_size: int = 1000) -> list: 125 | """ 126 | Make mini-batches of data. 127 | """ 128 | num_batches = int(data[0].shape[0] / minibatch_size) 129 | batched_data = [] 130 | for n in range(num_batches): 131 | tmp_data = (data[0][n * minibatch_size:(n + 1) * minibatch_size], 132 | data[1][n * minibatch_size:(n + 1) * minibatch_size]) 133 | batched_data.append(tmp_data) 134 | # Last batch data 135 | if data[0].shape[0] % minibatch_size != 0: 136 | tmp_data = (data[0][num_batches * minibatch_size:], 137 | data[1][num_batches * minibatch_size:]) 138 | batched_data.append(tmp_data) 139 | 140 | return batched_data 141 | 142 | 143 | def optimize_online_model_minibatch(model: OnlineGP, train_data: [np.ndarray, np.ndarray], 144 | test_data: [np.ndarray, np.ndarray], 145 | minibatch_size: int = 100, train_hyper: bool = True, train_mem: bool = True, 146 | n_hyp_opt_steps: int = 20) -> (list, list): 147 | """ 148 | Optimize the Online GP model 149 | 150 | model: the OnlineGP model. 151 | train_data: A tuple of training data. 152 | test_data: A tuple of test data. 153 | minibatch_size: An integer value corresponding to the minibatch size. Defaults to 100 154 | train_hyper: A boolean variable for training the hyperparameters or not. Defaults to True. 155 | train_mem: A boolean variable for training the memory or not. Defaults to True. 156 | n_hyp_opt_steps: An integer value corresponding to the number of hyperparameter optimization steps. Defaults to 20. 157 | 158 | returns: a list of NLPD and accuracy values. 159 | """ 160 | batched_data = get_mini_batches(train_data, minibatch_size) 161 | 162 | nlpd_vals = [] 163 | acc_vals = [] 164 | for batch_data in batched_data: 165 | for var in model.optimizer.variables(): 166 | var.assign(tf.zeros_like(var)) 167 | model.update_with_new_batch(new_data=batch_data, train_hyps=train_hyper, n_hyp_opt_steps=n_hyp_opt_steps, 168 | train_mem=train_mem, remove_memory=False) 169 | 170 | nlpd = -1 * tf.reduce_mean(model.model.predict_log_density(test_data)) 171 | nlpd_vals.append(nlpd) 172 | 173 | pred_m, _ = model.model.predict_y(test_data[0]) 174 | pred_argmax = tf.reshape(tf.argmax(pred_m, axis=1), (-1, 1)) 175 | acc = np.mean(pred_argmax == test_data[1]) 176 | acc_vals.append(acc) 177 | 178 | return nlpd_vals, acc_vals 179 | 180 | 181 | def optimize_streaming_model_minibatch(optimizer, model, train_data: Tuple[np.ndarray, np.ndarray], 182 | test_data: Tuple[np.ndarray, np.ndarray], iterations: int = 100, 183 | minibatch_size: int = 100, mu=None, Su=None, Kaa=None, Zopt=None, 184 | first_init=True): 185 | """ 186 | Optimize the streaming model of Bui et al. 2017. 187 | 188 | The code is based on the official implementation: https://github.com/thangbui/streaming_sparse_gp 189 | """ 190 | def optimization_step_adam(): 191 | optimizer.minimize(model.training_loss, model.trainable_variables) 192 | 193 | def optimization_step_scipy(): 194 | optimizer.minimize(model.training_loss, model.trainable_variables, options={'maxiter': iterations}) 195 | 196 | def optimization_step(): 197 | if isinstance(optimizer, gpflow.optimizers.Scipy): 198 | optimization_step_scipy() 199 | else: 200 | for _ in range(iterations): 201 | optimization_step_adam() 202 | 203 | def init_Z(cur_Z, new_X, use_old_Z=True): 204 | if use_old_Z: 205 | Z = np.copy(cur_Z) 206 | else: 207 | M = cur_Z.shape[0] 208 | M_old = int(0.7 * M) 209 | M_new = M - M_old 210 | old_Z = cur_Z[np.random.permutation(M)[0:M_old], :] 211 | new_Z = new_X[np.random.permutation(new_X.shape[0])[0:M_new], :] 212 | Z = np.vstack((old_Z, new_Z)) 213 | return Z 214 | 215 | use_old_z = False 216 | nlpd_vals = [] 217 | acc_vals = [] 218 | 219 | batched_data = get_mini_batches(train_data, minibatch_size) 220 | for i, new_data in enumerate(batched_data): 221 | X, y = (new_data[0], new_data[1]) 222 | 223 | if first_init: 224 | if isinstance(optimizer, gpflow.optimizers.Scipy): 225 | gpflow.optimizers.Scipy().minimize( 226 | model.training_loss_closure((X, y)), model.trainable_variables, options={'maxiter': iterations}) 227 | else: 228 | for _ in range(iterations): 229 | optimizer.minimize(model.training_loss_closure((X, y)), model.trainable_variables) 230 | first_init = False 231 | else: 232 | Zinit = init_Z(Zopt, X, use_old_z) 233 | model = OSVGPC((X, y), gpflow.kernels.Matern52(), gpflow.likelihoods.Softmax(num_classes=10), mu, Su, Kaa, 234 | Zopt, Zinit, num_latent_gps=10) 235 | optimization_step() 236 | 237 | Zopt = model.inducing_variable.Z.numpy() 238 | mu, Su = model.predict_f(Zopt, full_cov=True) 239 | if len(Su.shape) == 3: 240 | Su = Su[0, :, :] + 1e-4 * np.eye(mu.shape[0]) 241 | Kaa = model.kernel(model.inducing_variable.Z) 242 | 243 | # NLPD calculation 244 | f_mean, f_var = model.predict_f(test_data[0]) 245 | if len(f_var.shape) == 1: 246 | f_var = f_var[..., None] 247 | nlpd = model.likelihood.predict_log_density(f_mean, f_var, test_data[1]) 248 | nlpd = -1 * tf.reduce_mean(nlpd) 249 | nlpd_vals.append(nlpd) 250 | 251 | # acc 252 | pred_m, _ = model.predict_y(test_data[0]) 253 | pred_argmax = tf.reshape(tf.argmax(pred_m, axis=1), (-1, 1)) 254 | acc = np.mean(pred_argmax == test_data[1]) 255 | acc_vals.append(acc) 256 | 257 | return nlpd_vals, acc_vals, mu, Su, Kaa, Zopt, model 258 | -------------------------------------------------------------------------------- /experiments/split_mnist/offline_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main file for split mnist experiment offline SVGP model i.e. the model has access to the whole data set. 3 | """ 4 | import logging 5 | import os 6 | import pickle 7 | 8 | import gpflow.models 9 | import numpy as np 10 | from omegaconf import DictConfig 11 | from hydra.utils import instantiate, call 12 | import hydra 13 | 14 | import sys 15 | 16 | sys.path.append("..") 17 | 18 | from exp_utils import get_hydra_output_dir 19 | from mnist_utils import setup_wandb 20 | 21 | # A logger for this file 22 | log = logging.getLogger(__name__) 23 | 24 | 25 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="offline_mnist_experiment") 26 | def run_experiment(cfg: DictConfig): 27 | """ 28 | Initialize and run the experiment. 29 | """ 30 | output_dir = get_hydra_output_dir() 31 | 32 | train_data, test_data = call(cfg.dataset.dataloader) 33 | # Set up inducing variables 34 | inducing_variable = train_data[0][:cfg.n_inducing_variable].copy().tolist() 35 | cfg.model.inducing_variable = inducing_variable 36 | cfg.model.num_data = train_data[0].shape[0] 37 | 38 | if cfg.wandb.username is not None: 39 | setup_wandb(cfg) 40 | 41 | offline_model = instantiate(cfg.model) 42 | log.info("Model initialized; Optimization started!!!") 43 | 44 | log.info(f"---------------------------------------------") 45 | log.info(f"Starting mnist experiment with seed={cfg.seed}") 46 | log.info(f"---------------------------------------------") 47 | 48 | _, nlpd, acc = call(cfg.optimize)(model=offline_model, train_data=train_data, 49 | test_data=test_data, optimizer=instantiate(cfg.optimizer)) 50 | 51 | logging.info(f"NLPD after the task is {nlpd[-1]}") 52 | logging.info(f"Accuracy after the task is {acc[-1]}\n\n") 53 | 54 | log.info(f"---------------------------------------------") 55 | 56 | np.savez(os.path.join(output_dir, "training_statistics.npz"), nlpd=nlpd, 57 | acc=acc) 58 | parameters = gpflow.utilities.parameter_dict(offline_model) 59 | with open(os.path.join(output_dir, "model_offline.pkl"), "wb") as f: 60 | pickle.dump(parameters, f) 61 | 62 | 63 | if __name__ == '__main__': 64 | run_experiment() 65 | -------------------------------------------------------------------------------- /experiments/split_mnist/online_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main file for split mnist experiment online model. 3 | """ 4 | 5 | import pickle 6 | import os 7 | import logging 8 | import wandb 9 | 10 | import gpflow 11 | import tensorflow as tf 12 | import hydra 13 | import numpy as np 14 | from omegaconf import DictConfig 15 | from hydra.utils import instantiate, call 16 | 17 | import sys 18 | 19 | sys.path.append("..") 20 | sys.path.append("../..") 21 | 22 | from exp_utils import get_hydra_output_dir 23 | from src.models.tsvgp_cont import piv_chol 24 | from mnist_utils import setup_wandb 25 | 26 | # A logger for this file 27 | log = logging.getLogger(__name__) 28 | 29 | 30 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="online_mnist_experiment") 31 | def run_experiment(cfg: DictConfig): 32 | """ 33 | Initialize and run the experiment. 34 | """ 35 | output_dir = get_hydra_output_dir() 36 | 37 | if cfg.wandb.username is not None: 38 | setup_wandb(cfg) 39 | 40 | all_train_data, all_test_data = call(cfg.dataset.dataloader) 41 | 42 | # Set up inducing variables 43 | inducing_variable = all_train_data[0][0][:cfg.n_inducing_variable].copy().tolist() 44 | cfg.model.inducing_variable = inducing_variable 45 | 46 | model = instantiate(cfg.model) 47 | gpflow.utilities.set_trainable(model.inducing_variable.Z, False) 48 | 49 | memory = (all_train_data[0][0][:1], all_train_data[0][1][:1]) 50 | online_gp = instantiate(cfg.online_gp)(model=model, memory=memory, opt_hypers=instantiate(cfg.optimizer), 51 | Z_picker=piv_chol, memory_picker=call(cfg.memory_picker)) 52 | 53 | log.info(f"---------------------------------------------") 54 | log.info(f"Starting split mnist experiment with seed={cfg.seed}") 55 | log.info(f"---------------------------------------------") 56 | 57 | nlpd_vals = [] 58 | acc_vals = [] 59 | task_break_pnts = [] 60 | task_id = 0 61 | 62 | previous_tasks = None 63 | for train_data, test_data in zip(all_train_data, all_test_data): 64 | 65 | log.info(f"---------------------------------------------") 66 | log.info(f"Task {task_id}") 67 | log.info(f"Train data are {train_data[0].shape[0]} and test data are {test_data[0].shape[0]}") 68 | log.info("Splitting data into sets...") 69 | 70 | if previous_tasks is None: 71 | previous_tasks = test_data 72 | else: 73 | previous_tasks = (np.concatenate([previous_tasks[0], test_data[0].copy()], axis=0), 74 | np.concatenate([previous_tasks[1], test_data[1].copy()], axis=0)) 75 | 76 | # Calculating Accuracy and NLPD before the model is trained 77 | nlpd_init = -1 * tf.reduce_mean(online_gp.model.predict_log_density(previous_tasks)).numpy().item() 78 | 79 | pred_m, _ = online_gp.model.predict_y(previous_tasks[0]) 80 | pred_argmax = tf.reshape(tf.argmax(pred_m, axis=1), (-1, 1)) 81 | acc_init = np.mean(pred_argmax == previous_tasks[1]) 82 | 83 | nlpd, acc = call(cfg.optimize)(model=online_gp, train_data=train_data, test_data=previous_tasks) 84 | 85 | if cfg.wandb.username is not None: 86 | wandb.log({"Accuracy": acc[-1]}) 87 | 88 | # Add init acc and nlpd 89 | acc = [acc_init] + acc 90 | nlpd = [nlpd_init] + nlpd 91 | 92 | nlpd_vals += nlpd 93 | acc_vals += acc 94 | 95 | task_break_pnts.append(len(nlpd_vals)) 96 | 97 | logging.info(f"NLPD after the task is {nlpd[-1]}") 98 | logging.info(f"Accuracy after the task is {acc[-1]}\n\n") 99 | 100 | logging.info("NLPD on all tasks:\n") 101 | for i in range(task_id, -1, -1): 102 | nlpd = -1 * tf.reduce_mean(online_gp.model.predict_log_density(all_test_data[i])) 103 | 104 | pred_m, _ = online_gp.model.predict_y(all_test_data[i][0]) 105 | pred_argmax = tf.reshape(tf.argmax(pred_m, axis=1), (-1, 1)) 106 | acc = np.mean(pred_argmax == all_test_data[i][1]) 107 | 108 | logging.info(f"NLPD on task {i} is {nlpd}") 109 | logging.info(f"Accuracy on task {i} is {acc}\n\n") 110 | 111 | # Save model memory and inducing variables 112 | Z = online_gp.model.inducing_variable.Z.numpy().copy() 113 | mem = online_gp.memory[0].copy() 114 | np.savez(os.path.join(output_dir, f"memory_and_Z_{task_id}.npz"), mem=mem, Z=Z) 115 | 116 | log.info(f"---------------------------------------------") 117 | task_id += 1 118 | 119 | np.savez(os.path.join(output_dir, "training_statistics.npz"), nlpd=nlpd_vals, 120 | acc=acc_vals, task_break_pnts=task_break_pnts) 121 | parameters = gpflow.utilities.parameter_dict(online_gp.model) 122 | with open(os.path.join(output_dir, "model_online.pkl"), "wb") as f: 123 | pickle.dump(parameters, f) 124 | 125 | 126 | if __name__ == '__main__': 127 | run_experiment() 128 | -------------------------------------------------------------------------------- /experiments/split_mnist/streaming_gp_model.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import pickle 4 | 5 | import gpflow 6 | import tensorflow as tf 7 | import hydra 8 | import numpy as np 9 | from omegaconf import DictConfig 10 | from hydra.utils import instantiate, call 11 | 12 | import sys 13 | 14 | sys.path.append("..") 15 | sys.path.append("../..") 16 | 17 | from exp_utils import get_hydra_output_dir 18 | 19 | # A logger for this file 20 | log = logging.getLogger(__name__) 21 | 22 | 23 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="streaming_mnist_experiment") 24 | def run_experiment(cfg: DictConfig): 25 | """ 26 | Initialize and run the experiment. 27 | """ 28 | output_dir = get_hydra_output_dir() 29 | 30 | all_train_data, all_test_data = call(cfg.dataset.dataloader) 31 | 32 | log.info(f"---------------------------------------------") 33 | log.info(f"Starting mnist experiment with seed={cfg.seed}") 34 | log.info(f"---------------------------------------------") 35 | 36 | # Set up inducing variables 37 | inducing_variable = all_train_data[0][0][:cfg.n_inducing_variable].copy().tolist() 38 | cfg.model.inducing_variable = inducing_variable 39 | 40 | cfg.model.num_data = all_train_data[0][0].shape[0] 41 | cfg.model.num_latent_gps = cfg.num_classes 42 | model = instantiate(cfg.model) 43 | 44 | log.info("Model initialized; Optimization started!!!") 45 | 46 | optimizer = instantiate(cfg.optimizer) 47 | 48 | nlpd_vals = [] 49 | acc_vals = [] 50 | task_break_pnts = [] 51 | task_id = 0 52 | first_init = True 53 | 54 | previous_tasks = None 55 | mu, Su, Kaa, Zopt = None, None, None, None 56 | 57 | for train_data, test_data in zip(all_train_data, all_test_data): 58 | log.info(f"---------------------------------------------") 59 | log.info(f"Task {task_id}") 60 | log.info(f"Train data are {train_data[0].shape[0]} and test data are {test_data[0].shape[0]}") 61 | log.info("Splitting data into sets...") 62 | 63 | if previous_tasks is None: 64 | previous_tasks = test_data 65 | else: 66 | previous_tasks = (np.concatenate([previous_tasks[0], test_data[0].copy()], axis=0), 67 | np.concatenate([previous_tasks[1], test_data[1].copy()], axis=0)) 68 | 69 | # Calculating Accuracy and NLPD before the model is trained 70 | f_mean, f_var = model.predict_f(previous_tasks[0]) 71 | if len(f_var.shape) == 1: 72 | f_var = f_var[..., None] 73 | nlpd = model.likelihood.predict_log_density(f_mean, f_var, previous_tasks[1]) 74 | nlpd_init = -1 * tf.reduce_mean(nlpd).numpy().item() 75 | 76 | pred_m, _ = model.predict_y(previous_tasks[0]) 77 | pred_argmax = tf.reshape(tf.argmax(pred_m, axis=1), (-1, 1)) 78 | acc_init = np.mean(pred_argmax == previous_tasks[1]) 79 | 80 | nlpd, acc, mu, Su, Kaa, Zopt, model = call(cfg.optimize)(optimizer=optimizer, model=model, 81 | train_data=train_data, 82 | test_data=previous_tasks, mu=mu, Su=Su, 83 | Kaa=Kaa, Zopt=Zopt, 84 | first_init=first_init) 85 | first_init = False 86 | logging.info(f"NLPD after the task is {nlpd[-1]}") 87 | logging.info(f"Accuracy after the task is {acc[-1]}\n\n") 88 | 89 | # Add init acc and nlpd 90 | acc = [acc_init] + acc 91 | nlpd = [nlpd_init] + nlpd 92 | 93 | nlpd_vals += nlpd 94 | acc_vals += acc 95 | 96 | task_break_pnts.append(len(nlpd_vals)) 97 | 98 | logging.info("NLPD on previous tasks:\n") 99 | for i in range(task_id, -1, -1): 100 | f_mean, f_var = model.predict_f(all_test_data[i][0]) 101 | if len(f_var.shape) == 1: 102 | f_var = f_var[..., None] 103 | nlpd = model.likelihood.predict_log_density(f_mean, f_var, all_test_data[i][1]) 104 | nlpd = -1 * tf.reduce_mean(nlpd) 105 | 106 | pred_m, _ = model.predict_y(all_test_data[i][0]) 107 | pred_argmax = tf.reshape(tf.argmax(pred_m, axis=1), (-1, 1)) 108 | acc = np.mean(pred_argmax == all_test_data[i][1]) 109 | 110 | logging.info(f"NLPD on task {i} is {nlpd}") 111 | logging.info(f"Accuracy on task {i} is {acc}\n\n") 112 | 113 | log.info(f"---------------------------------------------") 114 | task_id += 1 115 | 116 | np.savez(os.path.join(output_dir, "training_statistics.npz"), nlpd=nlpd_vals, 117 | acc=acc_vals, task_break_pnts=task_break_pnts) 118 | parameters = gpflow.utilities.parameter_dict(model) 119 | with open(os.path.join(output_dir, "model_streaming.pkl"), "wb") as f: 120 | pickle.dump(parameters, f) 121 | 122 | 123 | if __name__ == '__main__': 124 | run_experiment() 125 | -------------------------------------------------------------------------------- /experiments/uci/README.md: -------------------------------------------------------------------------------- 1 | # UCI Experiment 2 | 3 | In the UCI experiment we compare three models: the offline model that has access to the whole dataset, our proposed online model, and the online model proposed by Bui *et al.* (2017). 4 | 5 | For the paper, for each data set we perform 10-fold cross-validation. In all cases we use a Matérn-5/2 kernel. 6 | 7 | ## Offline model 8 | 9 | The sparse variational GP model (t-SVGP) with access to the whole data set. 10 | 11 | To run the model on a **regression** dataset: 12 | ``` 13 | python offline_model.py dataset=bike model.likelihood.variance=0.1 dataset.dataloader.n_k_folds=10 dataset.dataloader.random_state=33 14 | ``` 15 | To run the model on a **classification** dataset: 16 | ``` 17 | python offline_model.py dataset=adult optimize.lambda_lr=0.5 model/likelihood=bernoulli dataset.dataloader.n_k_folds=10 dataset.dataloader.random_state=33 18 | ``` 19 | 20 | The configurations and model hyperparameters can be found in the Hydra config file `../configs/offline_experiment.yaml`. 21 | 22 | ## Proposed model 23 | 24 | To run the model on a **regression** dataset: 25 | ``` 26 | python online_model.py dataset=bike dataset.dataloader.n_k_folds=10 dataset.dataloader.random_state=33 n_sets=50 model.likelihood.variance=0.1 optimize.hyperparams_step=100 online_gp.n_steps=2 27 | ``` 28 | To run the model on a **classification** dataset: 29 | ``` 30 | python online_model.py dataset=adult model/likelihood=bernoulli online_gp.lambda_lr=0.5 online_gp.n_steps=4 dataset.dataloader.n_k_folds=10 dataset.dataloader.random_state=33 n_sets=50 optimize.hyperparams_step=100 31 | ``` 32 | The configurations and model hyperparameters can be found in the Hydra config file `../configs/online_experiment.yaml`. 33 | 34 | The memory picker can be changed from BLS (by default) to random by using the command-line argument `online_gp.memory_picker=random`. 35 | 36 | ## Bui *et al.* (2017) 37 | 38 | To run the Bui *et al.* (2017) model on a **regression** dataset: 39 | ``` 40 | python streaming_sgpr.py dataset=bike dataset.dataloader.n_k_folds=10 dataset.dataloader.random_state=33 n_sets=50 optimize.iterations=100 model.noise_variance=0.1 optimizer=scipy 41 | ``` 42 | To run the Bui *et al.* (2017) model on a **classification** dataset: 43 | ``` 44 | python streaming_sgpr.py dataset=adult dataset.dataloader.n_k_folds=10 dataset.dataloader.random_state=33 optimizer=adam optimize.iterations=100 optimize.task="classification" model=svgp model/likelihood=bernoulli n_sets=50 45 | ``` 46 | The configurations and model hyperparameters can be found in the Hydra config file `../configs/streaming_experiment.yaml`. 47 | 48 | ## Fast-Conditioning 49 | Fast-conditoning _i.e._ only variational parameters are optimized. 50 | ### Proposed model 51 | To run the fast-conditioning of the proposed model: 52 | ``` 53 | python online_fc_model.py dataset=bike dataset.dataloader.n_k_folds=10 dataset.dataloader.random_state=33 n_sets=50 model.likelihood.variance=0.1 online_gp.num_mem=0 online_gp.n_steps=2 optimize.train_memory=False load_model_path={trained_model_path} optimize.train_hyperparams=False 54 | ``` 55 | 56 | ### Bui *et al.* (2017) 57 | To run the fast-conditioning of the Bui _et al._ (2017) model: 58 | 59 | ``` 60 | python streaming_sgpr_fc.py dataset=bike dataset.dataloader.n_k_folds=10 dataset.dataloader.random_state=33 n_sets=50 optimizer=adam load_model_path={trained_model_path} optimize.iterations=0 optimize.task="regression" model.noise_variance=0.1 61 | ``` 62 | -------------------------------------------------------------------------------- /experiments/uci/offline_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main file for UCI regression tasks offline model. 3 | """ 4 | import logging 5 | import os 6 | import pickle 7 | 8 | import gpflow.models 9 | import matplotlib.pyplot as plt 10 | import numpy as np 11 | from omegaconf import DictConfig 12 | from hydra.utils import instantiate, call 13 | import hydra 14 | import wandb 15 | 16 | import sys 17 | sys.path.append("..") 18 | 19 | from exp_utils import get_hydra_output_dir 20 | from uci_utils import load_model_parameters, setup_wandb 21 | 22 | # A logger for this file 23 | log = logging.getLogger(__name__) 24 | 25 | 26 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="offline_experiment") 27 | def run_experiment(cfg: DictConfig): 28 | """ 29 | Initialize and run the experiment. 30 | """ 31 | output_dir = get_hydra_output_dir() 32 | 33 | all_train_data, all_test_data = call(cfg.dataset.dataloader) 34 | 35 | log.info(f"---------------------------------------------") 36 | log.info(f"Dataset : {cfg.dataset}") 37 | log.info(f"---------------------------------------------") 38 | 39 | if len(all_train_data) > 1: 40 | log.info(f"Cross validation starting with {cfg.dataset.dataloader.n_k_folds} K-folds!!!") 41 | 42 | k_fold_nlpds = [] 43 | k_fold_eval = [] 44 | k_fold_id = 0 45 | for train_data, test_data in zip(all_train_data, all_test_data): 46 | cfg.model.num_data = train_data[0].shape[0] 47 | log.info(f"---------------------------------------------") 48 | log.info(f"Starting with set {k_fold_id}") 49 | log.info(f"---------------------------------------------") 50 | log.info(f"Train data are {train_data[0].shape[0]} and test data are {test_data[0].shape[0]}") 51 | 52 | if cfg.wandb.username is not None: 53 | setup_wandb(cfg) 54 | 55 | # Set up inducing variables 56 | inducing_variable = train_data[0][:cfg.n_inducing_variable].copy().tolist() 57 | cfg.model.inducing_variable = inducing_variable 58 | 59 | model = instantiate(cfg.model) 60 | log.info("Model initialized; Optimization started!!!") 61 | if cfg.load_model_path is not None: 62 | with open(cfg.load_model_path, "rb") as f: 63 | dict_params = pickle.load(f) 64 | load_model_parameters(model, dict_params) 65 | log.info(f"Model parameters loaded from {cfg.load_model_path}") 66 | 67 | elbo_vals, nlpd_vals, eval_vals = call(cfg.optimize)(model=model, train_data=train_data, test_data=test_data, 68 | optimizer=instantiate(cfg.optimizer)) 69 | if len(nlpd_vals) > 0: 70 | log.info(f"Final ELBO: {elbo_vals[-1]}") 71 | log.info(f"Test NLPD: {nlpd_vals[-1]}") 72 | log.info(f"Test RMSE/Acc: {eval_vals[-1]}") 73 | 74 | log.info("Optimization successfully done!!!") 75 | 76 | if cfg.wandb.username is not None: 77 | plt.clf() 78 | plt.plot(elbo_vals) 79 | plt.title("ELBO") 80 | wandb.log({"optim_elbo_vals": plt}) 81 | 82 | plt.clf() 83 | plt.plot(nlpd_vals) 84 | plt.title("NLPD") 85 | wandb.log({"optim_nlpd_vals": plt}) 86 | 87 | plt.clf() 88 | plt.plot(eval_vals) 89 | plt.title("Eval.") 90 | wandb.log({"optim_eval_vals": plt}) 91 | 92 | np.savez(os.path.join(output_dir, "training_statistics_" + str(k_fold_id) + ".npz"), elbo=elbo_vals, 93 | nlpd=nlpd_vals, eval=eval_vals) 94 | parameters = gpflow.utilities.parameter_dict(model) 95 | with open(os.path.join(output_dir, "model_" + str(k_fold_id) + ".pkl"), "wb") as f: 96 | pickle.dump(parameters, f) 97 | 98 | k_fold_id += 1 99 | k_fold_nlpds.append(nlpd_vals[-1]) 100 | k_fold_eval.append(eval_vals[-1]) 101 | log.info(f"---------------------------------------------") 102 | 103 | if len(k_fold_nlpds) > 1: 104 | log.info(f"Mean NLPD over k-folds = {np.mean(k_fold_nlpds)}") 105 | log.info(f"Std NLPD over k-folds = {np.std(k_fold_nlpds)}") 106 | 107 | log.info(f"Mean eval over k-folds = {np.mean(k_fold_eval)}") 108 | log.info(f"Std eval over k-folds = {np.std(k_fold_eval)}") 109 | 110 | 111 | if __name__ == '__main__': 112 | run_experiment() 113 | -------------------------------------------------------------------------------- /experiments/uci/online_fc_model.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import pickle 4 | import time 5 | 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | from omegaconf import DictConfig, OmegaConf 9 | from hydra.utils import instantiate, call 10 | import hydra 11 | import wandb 12 | import gpflow 13 | 14 | import sys 15 | sys.path.append("../../") 16 | sys.path.append("..") 17 | from src.models.utils import memory_picker, fixed_Z 18 | 19 | from uci_utils import setup_wandb 20 | from exp_utils import convert_data_to_online, get_hydra_output_dir 21 | 22 | # A logger for this file 23 | log = logging.getLogger(__name__) 24 | 25 | 26 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="online_experiment") 27 | def run_experiment(cfg: DictConfig): 28 | """ 29 | Initialize and run the experiment. 30 | """ 31 | output_dir = get_hydra_output_dir() 32 | 33 | all_train_data, all_test_data = call(cfg.dataset.dataloader) 34 | 35 | if len(all_train_data) > 1: 36 | log.info(f"Cross validation starting with {cfg.dataset.dataloader.n_k_folds} K-folds!!!") 37 | 38 | k_fold_nlpds = [] 39 | k_fold_eval = [] 40 | k_fold_time = [] 41 | k_fold_id = 0 42 | 43 | for train_data, test_data in zip(all_train_data, all_test_data): 44 | 45 | log.info(f"---------------------------------------------") 46 | log.info(f"Starting with set {k_fold_id}") 47 | log.info(f"---------------------------------------------") 48 | 49 | log.info(f"Train data are {train_data[0].shape[0]} and test data are {test_data[0].shape[0]}") 50 | log.info("Splitting data into sets...") 51 | 52 | online_data = convert_data_to_online(train_data, cfg.n_sets, cfg.sort_data) 53 | log.info(f"Data splitted successfully into {cfg.n_sets} sets!!!") 54 | 55 | np.savez(os.path.join(output_dir, "splitted_dataset" + str(k_fold_id) + ".npz"), data=online_data) 56 | 57 | if cfg.wandb.username is not None: 58 | setup_wandb(cfg) 59 | 60 | # Set up inducing variables 61 | inducing_variable = train_data[0][:cfg.n_inducing_variable].copy().tolist() 62 | cfg.model.inducing_variable = inducing_variable 63 | 64 | model = instantiate(cfg.model) 65 | if cfg.load_model_path is None: 66 | raise Exception("FC model should have a model path from where hyperparams are loaded!") 67 | 68 | model_path = os.path.join(cfg.load_model_path, "model_" + str(k_fold_id) + ".pkl") 69 | 70 | with open(model_path, "rb") as f: 71 | dict_params = pickle.load(f) 72 | 73 | model.inducing_variable.Z = dict_params['.inducing_variable.Z'] 74 | model.kernel.lengthscales = dict_params['.kernel.lengthscales'] 75 | model.kernel.variance = dict_params['.kernel.variance'] 76 | 77 | # not present in classification 78 | if '.likelihood.variance' in dict_params: 79 | model.likelihood.variance = dict_params['.likelihood.variance'] 80 | 81 | # make then non-trainable 82 | gpflow.utilities.set_trainable(model.inducing_variable.Z, False) 83 | gpflow.utilities.set_trainable(model.kernel.lengthscales, False) 84 | gpflow.utilities.set_trainable(model.kernel.variance, False) 85 | 86 | if isinstance(model.likelihood, gpflow.likelihoods.Gaussian): 87 | gpflow.utilities.set_trainable(model.likelihood.variance, False) 88 | 89 | log.info(f"Model parameters loaded from {cfg.load_model_path}") 90 | 91 | memory = None 92 | 93 | online_gp = instantiate(cfg.online_gp)(model=model, memory=memory, opt_hypers=instantiate(cfg.optimizer), 94 | Z_picker=fixed_Z, memory_picker=memory_picker) 95 | start_time = time.time() 96 | nlpd_vals, eval_vals, time_vals = call(cfg.optimize)(online_gp=online_gp, train_data=online_data, test_data=test_data, 97 | debug=True) 98 | end_time = time.time() 99 | 100 | log.info(f"Test NLPD: {nlpd_vals[-1]}") 101 | log.info(f"Test RMSE/Acc: {eval_vals[-1]}") 102 | log.info(f"Time (s): {end_time - start_time}") 103 | 104 | log.info("Optimization successfully done!!!") 105 | 106 | if cfg.wandb.username is not None: 107 | plt.clf() 108 | plt.plot(nlpd_vals) 109 | plt.title("NLPD") 110 | wandb.log({"optim_nlpd_vals": plt}) 111 | 112 | plt.clf() 113 | plt.plot(eval_vals) 114 | plt.title("RMSE/Acc") 115 | wandb.log({"optim_eval_vals": plt}) 116 | 117 | np.savez(os.path.join(output_dir, "training_statistics_" + str(k_fold_id) + ".npz"), nlpd=nlpd_vals, 118 | eval=eval_vals, time_vals=time_vals) 119 | parameters = gpflow.utilities.parameter_dict(model) 120 | with open(os.path.join(output_dir, "model_" + str(k_fold_id) + ".pkl"), "wb") as f: 121 | pickle.dump(parameters, f) 122 | 123 | k_fold_id += 1 124 | k_fold_nlpds.append(nlpd_vals[-1]) 125 | k_fold_eval.append(eval_vals[-1]) 126 | k_fold_time.append(end_time - start_time) 127 | log.info(f"---------------------------------------------") 128 | 129 | if len(k_fold_nlpds) > 1: 130 | log.info(f"Mean NLPD over k-folds = {np.mean(k_fold_nlpds)}") 131 | log.info(f"Std NLPD over k-folds = {np.std(k_fold_nlpds)}") 132 | 133 | log.info(f"Mean RMSE/Acc over k-folds = {np.mean(k_fold_eval)}") 134 | log.info(f"Std RMSE/Acc over k-folds = {np.std(k_fold_eval)}") 135 | 136 | log.info(f"Mean time over k-folds = {np.mean(k_fold_time)}") 137 | log.info(f"Std time over k-folds = {np.std(k_fold_time)}") 138 | 139 | 140 | if __name__ == '__main__': 141 | run_experiment() 142 | -------------------------------------------------------------------------------- /experiments/uci/online_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main file for the proposed model on UCI regression tasks. 3 | """ 4 | import logging 5 | import os 6 | import pickle 7 | import time 8 | 9 | import matplotlib.pyplot as plt 10 | import numpy as np 11 | from omegaconf import DictConfig, OmegaConf 12 | from hydra.utils import instantiate, call 13 | import hydra 14 | import wandb 15 | import gpflow 16 | 17 | import sys 18 | 19 | sys.path.append("../../") 20 | sys.path.append("..") 21 | 22 | from src.models.utils import fixed_Z, memory_picker, random_picker 23 | from exp_utils import get_hydra_output_dir, convert_data_to_online 24 | from uci_utils import setup_wandb 25 | 26 | # A logger for this file 27 | log = logging.getLogger(__name__) 28 | 29 | 30 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="online_experiment") 31 | def run_experiment(cfg: DictConfig): 32 | """ 33 | Initialize and run the experiment. 34 | """ 35 | output_dir = get_hydra_output_dir() 36 | 37 | all_train_data, all_test_data = call(cfg.dataset.dataloader) 38 | 39 | if len(all_train_data) > 1: 40 | log.info(f"Cross validation starting with {cfg.dataset.dataloader.n_k_folds} K-folds!!!") 41 | 42 | k_fold_nlpds = [] 43 | k_fold_eval = [] 44 | k_fold_time = [] 45 | k_fold_id = 0 46 | 47 | for train_data, test_data in zip(all_train_data, all_test_data): 48 | 49 | log.info(f"---------------------------------------------") 50 | log.info(f"Starting with set {k_fold_id}") 51 | log.info(f"---------------------------------------------") 52 | 53 | log.info(f"Train data are {train_data[0].shape[0]} and test data are {test_data[0].shape[0]}") 54 | log.info("Splitting data into sets...") 55 | 56 | online_data = convert_data_to_online(train_data, cfg.n_sets, cfg.sort_data) 57 | log.info(f"Data splitted successfully into {cfg.n_sets} sets!!!") 58 | 59 | np.savez(os.path.join(output_dir, "splitted_dataset" + str(k_fold_id) + ".npz"), data=online_data) 60 | 61 | if cfg.wandb.username is not None: 62 | setup_wandb(cfg) 63 | 64 | # Set up inducing variables 65 | inducing_variable = train_data[0][:cfg.n_inducing_variable].copy().tolist() 66 | cfg.model.inducing_variable = inducing_variable 67 | 68 | model = instantiate(cfg.model) 69 | if cfg.load_model_path is not None: 70 | with open(cfg.load_model_path, "rb") as f: 71 | dict_params = pickle.load(f) 72 | 73 | model.inducing_variable.Z = dict_params['.inducing_variable.Z'] 74 | model.kernel.lengthscales = dict_params['.kernel.lengthscales'] 75 | model.kernel.variance = dict_params['.kernel.variance'] 76 | model.likelihood.variance = dict_params['.likelihood.variance'] 77 | 78 | log.info(f"Model parameters loaded from {cfg.load_model_path}") 79 | 80 | memory = (online_data[0][0][:1], online_data[0][1][:1]) 81 | if cfg.online_gp.memory_picker == "random": 82 | memory_picker = random_picker 83 | else: 84 | memory_picker = memory_picker 85 | 86 | online_gp = instantiate(cfg.online_gp)(model=model, memory=memory, opt_hypers=instantiate(cfg.optimizer), 87 | Z_picker=fixed_Z, memory_picker=memory_picker) 88 | start_time = time.time() 89 | nlpd_vals, eval_vals, _ = call(cfg.optimize)(online_gp=online_gp, train_data=online_data, test_data=test_data, 90 | debug=True) 91 | end_time = time.time() 92 | 93 | log.info(f"Test NLPD: {nlpd_vals[-1]}") 94 | log.info(f"Test RMSE/Acc: {eval_vals[-1]}") 95 | log.info(f"Time (s): {end_time - start_time}") 96 | 97 | log.info("Optimization successfully done!!!") 98 | 99 | if cfg.wandb.username is not None: 100 | plt.clf() 101 | plt.plot(nlpd_vals) 102 | plt.title("NLPD") 103 | wandb.log({"optim_nlpd_vals": plt}) 104 | 105 | plt.clf() 106 | plt.plot(eval_vals) 107 | plt.title("RMSE/Acc") 108 | wandb.log({"optim_eval_vals": plt}) 109 | 110 | np.savez(os.path.join(output_dir, "training_statistics_" + str(k_fold_id) + ".npz"), nlpd=nlpd_vals, 111 | eval=eval_vals) 112 | parameters = gpflow.utilities.parameter_dict(model) 113 | with open(os.path.join(output_dir, "model_" + str(k_fold_id) + ".pkl"), "wb") as f: 114 | pickle.dump(parameters, f) 115 | 116 | k_fold_id += 1 117 | k_fold_nlpds.append(nlpd_vals[-1]) 118 | k_fold_eval.append(eval_vals[-1]) 119 | k_fold_time.append(end_time - start_time) 120 | log.info(f"---------------------------------------------") 121 | 122 | if len(k_fold_nlpds) > 1: 123 | log.info(f"Mean NLPD over k-folds = {np.mean(k_fold_nlpds)}") 124 | log.info(f"Std NLPD over k-folds = {np.std(k_fold_nlpds)}") 125 | 126 | log.info(f"Mean RMSE/Acc over k-folds = {np.mean(k_fold_eval)}") 127 | log.info(f"Std RMSE/Acc over k-folds = {np.std(k_fold_eval)}") 128 | 129 | log.info(f"Mean time over k-folds = {np.mean(k_fold_time)}") 130 | log.info(f"Std time over k-folds = {np.std(k_fold_time)}") 131 | 132 | 133 | if __name__ == '__main__': 134 | run_experiment() 135 | -------------------------------------------------------------------------------- /experiments/uci/streaming_sgpr.py: -------------------------------------------------------------------------------- 1 | """ 2 | @inproceedings{BuiNguTur17, 3 | title = {Streaming sparse {G}aussian process approximations}, 4 | author = {Bui, Thang D. and Nguyen, Cuong V. and Turner, Richard E.}, 5 | booktitle = {Advances in Neural Information Processing Systems 30}, 6 | year = {2017} 7 | } 8 | """ 9 | import os 10 | import logging 11 | import pickle 12 | import time 13 | 14 | import matplotlib.pyplot as plt 15 | import numpy as np 16 | from omegaconf import DictConfig 17 | from hydra.utils import instantiate, call 18 | import hydra 19 | import wandb 20 | import gpflow 21 | 22 | import sys 23 | 24 | sys.path.append("..") 25 | 26 | from uci_utils import load_model_parameters, setup_wandb 27 | from exp_utils import get_hydra_output_dir, convert_data_to_online 28 | 29 | # A logger for this file 30 | log = logging.getLogger(__name__) 31 | 32 | 33 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="streaming_experiment") 34 | def run_experiment(cfg: DictConfig): 35 | """ 36 | Initialize and run the experiment. 37 | """ 38 | output_dir = get_hydra_output_dir() 39 | 40 | all_train_data, all_test_data = call(cfg.dataset.dataloader) 41 | 42 | if len(all_train_data) > 1: 43 | log.info(f"Cross validation starting with {cfg.dataset.dataloader.n_k_folds} K-folds!!!") 44 | 45 | k_fold_nlpds = [] 46 | k_fold_rmse = [] 47 | k_fold_time = [] 48 | k_fold_id = 0 49 | 50 | log.info(f"---------------------------------------------") 51 | log.info(f"Dataset : {cfg.dataset}") 52 | log.info(f"---------------------------------------------") 53 | for train_data, test_data in zip(all_train_data, all_test_data): 54 | log.info(f"---------------------------------------------") 55 | log.info(f"Starting with set {k_fold_id}") 56 | log.info(f"---------------------------------------------") 57 | 58 | log.info(f"Train data are {train_data[0].shape[0]} and test data are {test_data[0].shape[0]}") 59 | log.info("Splitting data into sets...") 60 | 61 | online_data = convert_data_to_online(train_data, cfg.n_sets, cfg.sort_data) 62 | log.info(f"Data splitted successfully into {cfg.n_sets} sets!!!") 63 | 64 | np.savez(os.path.join(output_dir, "splitted_dataset" + str(k_fold_id) + ".npz"), data=online_data) 65 | 66 | if cfg.wandb.username is not None: 67 | setup_wandb(cfg) 68 | 69 | # Set up inducing variables 70 | inducing_variable = train_data[0][:cfg.n_inducing_variable].copy().tolist() 71 | cfg.model.inducing_variable = inducing_variable 72 | 73 | if "SGPR" in cfg.model._target_: 74 | model = instantiate(cfg.model)(data=online_data[0]) 75 | else: 76 | cfg.model.num_data = train_data[0].shape[0] 77 | model = instantiate(cfg.model) 78 | 79 | if cfg.load_model_path is not None: 80 | with open(cfg.load_model_path, "rb") as f: 81 | dict_params = pickle.load(f) 82 | 83 | load_model_parameters(model, dict_params) 84 | log.info(f"Model parameters loaded from {cfg.load_model_path}") 85 | 86 | log.info("Model initialized; Optimization started!!!") 87 | 88 | optimizer = instantiate(cfg.optimizer) 89 | 90 | start_time = time.time() 91 | nlpd_vals, rmse_vals, _ = call(cfg.optimize)(optimizer=optimizer, model=model, train_data=online_data, 92 | test_data=test_data) 93 | end_time = time.time() 94 | 95 | log.info(f"Test NLPD: {nlpd_vals[-1]}") 96 | log.info(f"Test RMSE: {rmse_vals[-1]}") 97 | log.info(f"Time (s): {end_time - start_time}") 98 | log.info("Optimization successfully done!!!") 99 | 100 | if cfg.wandb.username is not None: 101 | plt.clf() 102 | plt.plot(nlpd_vals) 103 | plt.title("NLPD") 104 | wandb.log({"optim_nlpd_vals": plt}) 105 | 106 | plt.clf() 107 | plt.plot(rmse_vals) 108 | plt.title("RMSE") 109 | wandb.log({"optim_rmse_vals": plt}) 110 | 111 | np.savez(os.path.join(output_dir, "training_statistics_" + str(k_fold_id) + ".npz"), nlpd=nlpd_vals, 112 | rmse=rmse_vals) 113 | parameters = gpflow.utilities.parameter_dict(model) 114 | with open(os.path.join(output_dir, "model_" + str(k_fold_id) + ".pkl"), "wb") as f: 115 | pickle.dump(parameters, f) 116 | 117 | k_fold_id += 1 118 | k_fold_nlpds.append(nlpd_vals[-1]) 119 | k_fold_rmse.append(rmse_vals[-1]) 120 | k_fold_time.append(end_time - start_time) 121 | log.info(f"---------------------------------------------") 122 | 123 | if len(k_fold_nlpds) > 1: 124 | log.info(f"Mean NLPD over k-folds = {np.mean(k_fold_nlpds)}") 125 | log.info(f"Std NLPD over k-folds = {np.std(k_fold_nlpds)}") 126 | 127 | log.info(f"Mean RMSE over k-folds = {np.mean(k_fold_rmse)}") 128 | log.info(f"Std RMSE over k-folds = {np.std(k_fold_rmse)}") 129 | 130 | log.info(f"Mean time over k-folds = {np.mean(k_fold_time)}") 131 | log.info(f"Std time over k-folds = {np.std(k_fold_time)}") 132 | 133 | 134 | if __name__ == '__main__': 135 | """ 136 | """ 137 | run_experiment() 138 | -------------------------------------------------------------------------------- /experiments/uci/streaming_sgpr_fc.py: -------------------------------------------------------------------------------- 1 | """ 2 | @inproceedings{BuiNguTur17, 3 | title = {Streaming sparse {G}aussian process approximations}, 4 | author = {Bui, Thang D. and Nguyen, Cuong V. and Turner, Richard E.}, 5 | booktitle = {Advances in Neural Information Processing Systems 30}, 6 | year = {2017} 7 | } 8 | 9 | @article{BuiYanTur16, 10 | title={A Unifying Framework for Sparse {G}aussian Process Approximation using {P}ower {E}xpectation {P}ropagation}, 11 | author={Thang D. Bui and Josiah Yan and Richard E. Turner}, 12 | journal={arXiv preprint arXiv:1605.07066}, 13 | year={2016} 14 | } 15 | """ 16 | import os 17 | import logging 18 | import pickle 19 | import time 20 | 21 | import matplotlib.pyplot as plt 22 | import numpy as np 23 | from omegaconf import DictConfig, OmegaConf 24 | from hydra.utils import instantiate, call 25 | import hydra 26 | import wandb 27 | import gpflow 28 | 29 | import sys 30 | sys.path.append("..") 31 | 32 | from exp_utils import convert_data_to_online, get_hydra_output_dir 33 | 34 | 35 | def _setup_wandb(cfg): 36 | """ 37 | Set up wandb if username is passed. 38 | """ 39 | wandb_cfg = OmegaConf.to_container( 40 | cfg, resolve=True, throw_on_missing=True 41 | ) 42 | 43 | wandb.init(project="UCI", entity=cfg.wandb.username, config=wandb_cfg) 44 | 45 | log.info("wandb initialized!!!") 46 | 47 | 48 | # A logger for this file 49 | log = logging.getLogger(__name__) 50 | 51 | 52 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="streaming_experiment") 53 | def run_experiment(cfg: DictConfig): 54 | """ 55 | Initialize and run the experiment. 56 | """ 57 | output_dir = get_hydra_output_dir() 58 | 59 | all_train_data, all_test_data = call(cfg.dataset.dataloader) 60 | 61 | if len(all_train_data) > 1: 62 | log.info(f"Cross validation starting with {cfg.dataset.dataloader.n_k_folds} K-folds!!!") 63 | 64 | k_fold_nlpds = [] 65 | k_fold_rmse = [] 66 | k_fold_time = [] 67 | k_fold_id = 0 68 | 69 | log.info(f"---------------------------------------------") 70 | log.info(f"Dataset : {cfg.dataset}") 71 | log.info(f"---------------------------------------------") 72 | for train_data, test_data in zip(all_train_data, all_test_data): 73 | 74 | log.info(f"---------------------------------------------") 75 | log.info(f"Starting with set {k_fold_id}") 76 | log.info(f"---------------------------------------------") 77 | 78 | log.info(f"Train data are {train_data[0].shape[0]} and test data are {test_data[0].shape[0]}") 79 | log.info("Splitting data into sets...") 80 | 81 | online_data = convert_data_to_online(train_data, cfg.n_sets, cfg.sort_data) 82 | log.info(f"Data splitted successfully into {cfg.n_sets} sets!!!") 83 | 84 | np.savez(os.path.join(output_dir, "splitted_dataset" + str(k_fold_id) + ".npz"), data=online_data) 85 | 86 | if cfg.wandb.username is not None: 87 | _setup_wandb(cfg) 88 | 89 | # Set up inducing variables 90 | inducing_variable = train_data[0][:cfg.n_inducing_variable].copy().tolist() 91 | cfg.model.inducing_variable = inducing_variable 92 | 93 | if "SGPR" in cfg.model._target_: 94 | model = instantiate(cfg.model)(data=online_data[0]) 95 | else: 96 | cfg.model.num_data = train_data[0].shape[0] 97 | model = instantiate(cfg.model) 98 | 99 | # Loading model hyperparam values 100 | if cfg.load_model_path is None: 101 | raise Exception("FC model should have a model path from where hyperparams are loaded!") 102 | 103 | model_path = os.path.join(cfg.load_model_path, "model_" + str(k_fold_id) + ".pkl") 104 | 105 | with open(model_path, "rb") as f: 106 | dict_params = pickle.load(f) 107 | 108 | model.inducing_variable.Z = dict_params['.inducing_variable.Z'] 109 | model.kernel.lengthscales = dict_params['.kernel.lengthscales'] 110 | model.kernel.variance = dict_params['.kernel.variance'] 111 | 112 | # not present in classification 113 | if '.likelihood.variance' in dict_params: 114 | model.likelihood.variance = dict_params['.likelihood.variance'] 115 | 116 | # make then non-trainable 117 | gpflow.utilities.set_trainable(model.inducing_variable.Z, False) 118 | gpflow.utilities.set_trainable(model.kernel.lengthscales, False) 119 | gpflow.utilities.set_trainable(model.kernel.variance, False) 120 | 121 | if isinstance(model.likelihood, gpflow.likelihoods.Gaussian): 122 | gpflow.utilities.set_trainable(model.likelihood.variance, False) 123 | 124 | log.info("Model initialized; Optimization started!!!") 125 | 126 | optimizer = instantiate(cfg.optimizer) 127 | 128 | start_time = time.time() 129 | nlpd_vals, rmse_vals, time_vals = call(cfg.optimize)(optimizer=optimizer, model=model, train_data=online_data, 130 | test_data=test_data, use_old_z=True, fast_conditioning=True) 131 | end_time = time.time() 132 | 133 | log.info(f"Test NLPD: {nlpd_vals[-1]}") 134 | log.info(f"Test RMSE: {rmse_vals[-1]}") 135 | log.info(f"Time (s): {end_time - start_time}") 136 | log.info("Optimization successfully done!!!") 137 | 138 | if cfg.wandb.username is not None: 139 | plt.clf() 140 | plt.plot(nlpd_vals) 141 | plt.title("NLPD") 142 | wandb.log({"optim_nlpd_vals": plt}) 143 | 144 | plt.clf() 145 | plt.plot(rmse_vals) 146 | plt.title("RMSE") 147 | wandb.log({"optim_rmse_vals": plt}) 148 | 149 | np.savez(os.path.join(output_dir, "training_statistics_" + str(k_fold_id) + ".npz"), nlpd=nlpd_vals, 150 | rmse=rmse_vals, time_vals=time_vals) 151 | 152 | parameters = gpflow.utilities.parameter_dict(model) 153 | with open(os.path.join(output_dir, "model_" + str(k_fold_id) + ".pkl"), "wb") as f: 154 | pickle.dump(parameters, f) 155 | 156 | k_fold_id += 1 157 | k_fold_nlpds.append(nlpd_vals[-1]) 158 | k_fold_rmse.append(rmse_vals[-1]) 159 | k_fold_time.append(end_time-start_time) 160 | log.info(f"---------------------------------------------") 161 | 162 | if len(k_fold_nlpds) > 1: 163 | log.info(f"Mean NLPD over k-folds = {np.mean(k_fold_nlpds)}") 164 | log.info(f"Std NLPD over k-folds = {np.std(k_fold_nlpds)}") 165 | 166 | log.info(f"Mean RMSE over k-folds = {np.mean(k_fold_rmse)}") 167 | log.info(f"Std RMSE over k-folds = {np.std(k_fold_rmse)}") 168 | 169 | log.info(f"Mean time over k-folds = {np.mean(k_fold_time)}") 170 | log.info(f"Std time over k-folds = {np.std(k_fold_time)}") 171 | 172 | 173 | if __name__ == '__main__': 174 | run_experiment() 175 | -------------------------------------------------------------------------------- /experiments/uci/uci_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility function for UCI datasets. 3 | """ 4 | import time 5 | from typing import Tuple 6 | import os 7 | 8 | import gpflow.models 9 | import tensorflow as tf 10 | import pandas as pd 11 | import numpy as np 12 | from gpflow.likelihoods import Bernoulli 13 | from sklearn.preprocessing import StandardScaler 14 | from gpflow.models.svgp import SVGP 15 | from sklearn.model_selection import KFold 16 | import wandb 17 | from omegaconf import OmegaConf 18 | 19 | import sys 20 | 21 | sys.path.append("../..") 22 | sys.path.append("..") 23 | 24 | from src.streaming_sparse_gp.osgpr import OSGPR_VFE 25 | from src.streaming_sparse_gp.osvgpc import OSVGPC 26 | 27 | 28 | def setup_wandb(cfg): 29 | """ 30 | Set up wandb. 31 | """ 32 | wandb_cfg = OmegaConf.to_container( 33 | cfg, resolve=True, throw_on_missing=True 34 | ) 35 | 36 | wandb.init(project="UCI", entity=cfg.wandb.username, config=wandb_cfg) 37 | 38 | 39 | def load_data(data_path: str, train_split_percentage: float = 0.8, normalize: bool = False, 40 | seed: int = None, n_k_folds: int = None, random_state: int = None, 41 | dataset_type: str = "regression") -> (Tuple[np.ndarray, np.ndarray], 42 | Tuple[np.ndarray, np.ndarray]): 43 | """ 44 | Load UCI dataset on the basis of data name. 45 | 46 | If k_folds is passed then a list of several folds are returned. 47 | 48 | returns a list of set of (X, Y) as Tuple as train_data and test_data. 49 | """ 50 | if seed is not None: 51 | np.random.seed(seed) 52 | 53 | if not os.path.exists(data_path): 54 | raise Exception("Data path does not exist ") 55 | 56 | df = pd.read_csv(data_path) 57 | X = df.to_numpy()[:, :-1] 58 | Y = df.to_numpy()[:, -1].reshape((-1, 1)) 59 | 60 | if normalize: 61 | x_scaler = StandardScaler().fit(X) 62 | X = x_scaler.transform(X) 63 | 64 | if dataset_type == "regression": 65 | y_scaler = StandardScaler().fit(Y) 66 | Y = y_scaler.transform(Y) 67 | 68 | if n_k_folds is None: 69 | data_dim = X.shape[-1] 70 | n = Y.shape[0] 71 | 72 | # combine X and Y and shuffle 73 | XY = np.concatenate([X, Y], axis=1) 74 | np.random.shuffle(XY) 75 | 76 | n_train = int(np.floor(n * train_split_percentage)) 77 | 78 | x_train = XY[:n_train, :data_dim] 79 | y_train = XY[:n_train, data_dim:] 80 | 81 | x_test = XY[n_train:, :data_dim] 82 | y_test = XY[n_train:, data_dim:] 83 | 84 | if dataset_type == "classification": 85 | y_train = y_train.astype(np.int64) 86 | y_test = y_test.astype(np.int64) 87 | 88 | train_data = [(x_train, y_train)] 89 | test_data = [(x_test, y_test)] 90 | else: 91 | train_data, test_data = get_cross_validation_sets((X, Y), k_folds=n_k_folds, random_state=random_state) 92 | 93 | return train_data, test_data 94 | 95 | 96 | def load_model_parameters(model, params: dict): 97 | """Loads the parameters from dictionary to the model""" 98 | gpflow.utilities.multiple_assign(model, params) 99 | 100 | 101 | def get_cross_validation_sets(data: Tuple[np.ndarray, np.ndarray], k_folds=5, random_state: int = None): 102 | """ 103 | Split the dataset for K-Fold validation. 104 | """ 105 | 106 | kf = KFold(n_splits=k_folds, random_state=random_state, shuffle=True) 107 | 108 | train_k_folds_set = [] 109 | test_k_folds_set = [] 110 | for train_idx, test_idx in kf.split(data[0]): 111 | train_k_folds_set.append((data[0][train_idx], data[1][train_idx])) 112 | test_k_folds_set.append((data[0][test_idx], data[1][test_idx])) 113 | 114 | return train_k_folds_set, test_k_folds_set 115 | 116 | 117 | def optimize_streaming_model(optimizer, model, train_data: Tuple[np.ndarray, np.ndarray], 118 | test_data: Tuple[np.ndarray, np.ndarray], task: str, iterations: int = 100, 119 | use_old_z=False, fast_conditioning=False): 120 | """ 121 | Optimize the streaming model of Bui et al. 2017. 122 | 123 | The code is based on the official implementation: https://github.com/thangbui/streaming_sparse_gp 124 | """ 125 | 126 | @tf.function 127 | def optimization_step_adam(): 128 | for _ in range(iterations): 129 | optimizer.minimize(model.training_loss, model.trainable_variables) 130 | 131 | # @tf.function 132 | def optimization_step_adam_classification(loss, variables): 133 | for _ in range(iterations): 134 | optimizer.minimize(loss, variables) 135 | 136 | def optimization_step_scipy(): 137 | optimizer.minimize(model.training_loss, model.trainable_variables, options={'maxiter': iterations}) 138 | 139 | def optimization_step(): 140 | if isinstance(optimizer, gpflow.optimizers.Scipy): 141 | optimization_step_scipy() 142 | else: 143 | optimization_step_adam() 144 | 145 | def get_model_prediction(): 146 | Zopt = model.inducing_variable.Z.numpy() 147 | mu, Su = model.predict_f(Zopt, full_cov=True) 148 | if len(Su.shape) == 3: 149 | Su = Su[0, :, :] 150 | 151 | return mu, Su, Zopt 152 | 153 | def init_Z(cur_Z, new_X, use_old_Z=True): 154 | if use_old_Z: 155 | Z = np.copy(cur_Z) 156 | else: 157 | M = cur_Z.shape[0] 158 | M_old = int(0.7 * M) 159 | M_new = M - M_old 160 | old_Z = cur_Z[np.random.permutation(M)[0:M_old], :] 161 | new_Z = new_X[np.random.permutation(new_X.shape[0])[0:M_new], :] 162 | Z = np.vstack((old_Z, new_Z)) 163 | return Z 164 | 165 | n_sets = len(train_data) 166 | 167 | # NLPD calculation 168 | f_mean, f_var = model.predict_f(test_data[0]) 169 | if len(f_var.shape) == 1: 170 | f_var = f_var[..., None] 171 | nlpd = model.likelihood.predict_log_density(f_mean, f_var, test_data[1]) 172 | nlpd = -1 * tf.reduce_mean(nlpd) 173 | print(f"Initial NLPD: {nlpd}") 174 | 175 | nlpd_vals = [] 176 | evaluation_vals = [] 177 | time_vals = [] 178 | for n in range(n_sets): 179 | new_data = train_data[n] 180 | X, y = (new_data[0], new_data[1]) 181 | 182 | start_time = time.time() 183 | if task == "regression": 184 | if n == 0: 185 | optimization_step() 186 | 187 | mu, Su, Zopt = get_model_prediction() 188 | else: 189 | Kaa1 = model.kernel(model.inducing_variable.Z) 190 | 191 | Zinit = init_Z(Zopt, X, use_old_z) 192 | var = model.likelihood.variance 193 | if isinstance(model.kernel, gpflow.kernels.Matern52): 194 | kernel = gpflow.kernels.Matern52(variance=model.kernel.variance, 195 | lengthscales=model.kernel.lengthscales) 196 | else: # For running Magnetometer. 197 | kernel = gpflow.kernels.Sum([gpflow.kernels.Constant(model.kernel.kernels[0].variance), 198 | gpflow.kernels.Matern52( 199 | lengthscales=model.kernel.kernels[1].lengthscales, 200 | variance=model.kernel.kernels[1].variance)]) 201 | 202 | model = OSGPR_VFE((X, y), kernel, mu, Su, Kaa1, Zopt, Zinit) 203 | model.likelihood.variance.assign(var) 204 | 205 | optimization_step() 206 | 207 | mu, Su, Zopt = get_model_prediction() 208 | else: 209 | if n == 0: 210 | if isinstance(optimizer, gpflow.optimizers.Scipy): 211 | gpflow.optimizers.Scipy().minimize( 212 | model.training_loss_closure((X, y)), model.trainable_variables, options={'maxiter': iterations}) 213 | else: 214 | for _ in range(iterations): 215 | optimizer.minimize(model.training_loss_closure((X, y)), model.trainable_variables) 216 | else: 217 | Zinit = init_Z(Zopt, X, use_old_z) 218 | if fast_conditioning: 219 | kernel = model.kernel 220 | else: 221 | kernel = gpflow.kernels.Matern52() 222 | model = OSVGPC((X, y), kernel, gpflow.likelihoods.Bernoulli(), mu, Su, Kaa, Zopt, 223 | Zinit) 224 | optimization_step_adam_classification(model.training_loss, model.trainable_variables) 225 | 226 | Zopt = model.inducing_variable.Z.numpy() 227 | mu, Su = model.predict_f(Zopt, full_cov=True) 228 | if len(Su.shape) == 3: 229 | Su = Su[0, :, :] + 1e-4 * np.eye(mu.shape[0]) 230 | Kaa = model.kernel(model.inducing_variable.Z) 231 | 232 | time_vals.append(time.time() - start_time) 233 | 234 | # NLPD calculation 235 | f_mean, f_var = model.predict_f(test_data[0]) 236 | if len(f_var.shape) == 1: 237 | f_var = f_var[..., None] 238 | nlpd = model.likelihood.predict_log_density(f_mean, f_var, test_data[1]) 239 | nlpd = -1 * tf.reduce_mean(nlpd) 240 | nlpd_vals.append(nlpd) 241 | 242 | # RMSE calculation 243 | if task == "regression": 244 | y_pred, _ = model.likelihood.predict_mean_and_var(f_mean, f_var) 245 | rmse = np.sqrt(np.mean(np.square(y_pred - test_data[1]))) 246 | evaluation_vals.append(rmse) 247 | else: 248 | pred_mean, _ = model.likelihood.predict_mean_and_var(f_mean, f_var) 249 | pred_mean = pred_mean.numpy() 250 | pred_mean[pred_mean >= 0.5] = 1 251 | pred_mean[pred_mean < 0.5] = 0 252 | correct_prediction = np.sum(pred_mean == test_data[1]) 253 | acc = correct_prediction / test_data[0].shape[0] 254 | evaluation_vals.append(acc) 255 | 256 | print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 257 | print(f"Set {n}") 258 | print(f"NLPD = {nlpd_vals[-1]}") 259 | print(f"Eval. metric (RMSE/Acc.) = {evaluation_vals[-1]}") 260 | print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 261 | 262 | return nlpd_vals, evaluation_vals, time_vals 263 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow==2.6.2 2 | pandas==1.3.5 3 | numpy==1.19.2 4 | scikit-learn==1.1.1 5 | hydra-core==1.2.0 6 | wandb==0.13.3 7 | gpflow==2.5.2 8 | matplotlib==3.5.0 9 | plotly==5.10.0 10 | tensorflow-probability==0.12.0 11 | tensorflow-estimator==2.6.0 12 | jupyter-core==4.10.0 13 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/__init__.py -------------------------------------------------------------------------------- /src/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/__pycache__/sites.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/__pycache__/sites.cpython-38.pyc -------------------------------------------------------------------------------- /src/__pycache__/util.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/__pycache__/util.cpython-38.pyc -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/models/__init__.py -------------------------------------------------------------------------------- /src/models/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/models/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/models/__pycache__/tsvgp.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/models/__pycache__/tsvgp.cpython-38.pyc -------------------------------------------------------------------------------- /src/models/__pycache__/tsvgp_cont.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/models/__pycache__/tsvgp_cont.cpython-38.pyc -------------------------------------------------------------------------------- /src/models/__pycache__/tsvgp_white.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/models/__pycache__/tsvgp_white.cpython-38.pyc -------------------------------------------------------------------------------- /src/models/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/models/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /src/models/tsvgp.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for the t-SVGP model 3 | """ 4 | 5 | # Copyright Anonymous Authors 6 | # Only for double-blind review. Not to be shared. 7 | 8 | # This code has been extended from the SVGP implementation in GPflow and is 9 | # to be released under a compatible license. 10 | 11 | import abc 12 | 13 | import gpflow 14 | import numpy as np 15 | import tensorflow as tf 16 | from gpflow import kullback_leiblers 17 | from gpflow.conditionals import conditional 18 | from gpflow.config import default_float, default_jitter 19 | from gpflow.covariances import Kuf, Kuu 20 | from gpflow.models.model import GPModel, InputData, MeanAndVariance, RegressionData 21 | from gpflow.models.training_mixins import ExternalDataTrainingLossMixin 22 | from gpflow.models.util import inducingpoint_wrapper 23 | 24 | from src.sites import DenseSites 25 | from src.util import ( 26 | conditional_from_precision_sites, 27 | gradient_transformation_mean_var_to_expectation, 28 | posterior_from_dense_site, 29 | ) 30 | 31 | 32 | class base_SVGP(GPModel, ExternalDataTrainingLossMixin, abc.ABC): 33 | """ 34 | Modified gpflow.svgp.SVGP class to accommodate 35 | for different paramaterization of q(u) 36 | """ 37 | 38 | def __init__( 39 | self, 40 | kernel, 41 | likelihood, 42 | inducing_variable, 43 | *, 44 | mean_function=None, 45 | num_latent_gps: int = 1, 46 | num_data=None, 47 | ): 48 | """ 49 | - kernel, likelihood, inducing_variables, mean_function are appropriate 50 | GPflow objects 51 | - num_latent_gps is the number of latent processes to use, defaults to 1 52 | - num_data is the total number of observations, defaults to X.shape[0] 53 | (relevant when feeding in external minibatches) 54 | """ 55 | # init the super class, accept args 56 | super().__init__(kernel, likelihood, mean_function, num_latent_gps) 57 | self.num_data = num_data 58 | self.inducing_variable = inducingpoint_wrapper(inducing_variable) 59 | 60 | def get_mean_chol_cov_inducing_posterior(self): 61 | """Returns the mean and cholesky factor of the covariance matrix of q(u)""" 62 | raise NotImplementedError 63 | 64 | def prior_kl(self) -> tf.Tensor: 65 | """Returns the KL divergence KL[q(u)|p(u)]""" 66 | q_mu, q_sqrt = self.get_mean_chol_cov_inducing_posterior() 67 | return kullback_leiblers.prior_kl( 68 | self.inducing_variable, self.kernel, q_mu, q_sqrt, whiten=False 69 | ) 70 | 71 | def maximum_log_likelihood_objective(self, data: RegressionData) -> tf.Tensor: 72 | """ 73 | The variational lower bound 74 | :param data: input data 75 | """ 76 | return self.elbo(data) 77 | 78 | def elbo(self, data: RegressionData) -> tf.Tensor: 79 | """ 80 | This gives a variational bound (the evidence lower bound or ELBO) on 81 | the log marginal likelihood of the model. 82 | :param data: input data 83 | """ 84 | X, Y = data 85 | kl = self.prior_kl() 86 | f_mean, f_var = self.predict_f(X, full_cov=False, full_output_cov=False) 87 | var_exp = self.likelihood.variational_expectations(f_mean, f_var, Y) 88 | if self.num_data is not None: 89 | num_data = tf.cast(self.num_data, kl.dtype) 90 | minibatch_size = tf.cast(tf.shape(X)[0], kl.dtype) 91 | scale = num_data / minibatch_size 92 | else: 93 | scale = tf.cast(1.0, kl.dtype) 94 | return tf.reduce_sum(var_exp) * scale - kl 95 | 96 | def predict_f(self, Xnew: InputData, full_cov=False, full_output_cov=False) -> MeanAndVariance: 97 | """ 98 | Posterior prediction at new input Xnew 99 | :param Xnew: N x D Tensor 100 | """ 101 | q_mu, q_sqrt = self.get_mean_chol_cov_inducing_posterior() 102 | mu, var = conditional( 103 | Xnew, 104 | self.inducing_variable, 105 | self.kernel, 106 | q_mu, 107 | q_sqrt=q_sqrt, 108 | full_cov=full_cov, 109 | white=False, 110 | full_output_cov=full_output_cov, 111 | ) 112 | tf.debugging.assert_positive(var) 113 | return mu + self.mean_function(Xnew), var 114 | 115 | 116 | class t_SVGP(base_SVGP): 117 | """ 118 | Class for the t-SVGP model 119 | """ 120 | 121 | def __init__( 122 | self, 123 | kernel, 124 | likelihood, 125 | inducing_variable, 126 | *, 127 | mean_function=None, 128 | num_latent_gps: int = 1, 129 | lambda_1=None, 130 | lambda_2_sqrt=None, 131 | num_data=None, 132 | force=False, 133 | ): 134 | """ 135 | - kernel, likelihood, inducing_variables, mean_function are appropriate 136 | GPflow objects 137 | - num_latent_gps is the number of latent processes to use, defaults to 1 138 | - q_diag is a boolean. If True, the covariance is approximated by a 139 | diagonal matrix. 140 | - whiten is a boolean. If True, we use the whitened representation of 141 | the inducing points. 142 | - num_data is the total number of observations, defaults to X.shape[0] 143 | (relevant when feeding in external minibatches) 144 | """ 145 | # init the super class, accept args 146 | GPModel.__init__(self, kernel, likelihood, mean_function, num_latent_gps) 147 | 148 | self.num_data = num_data 149 | self.inducing_variable = inducingpoint_wrapper(inducing_variable) 150 | 151 | # init variational parameters 152 | self.num_inducing = self.inducing_variable.num_inducing 153 | 154 | self._init_variational_parameters(self.num_inducing, lambda_1, lambda_2_sqrt) 155 | self.whiten = False 156 | self.force = force 157 | 158 | def _init_variational_parameters(self, num_inducing, lambda_1, lambda_2_sqrt, **kwargs): 159 | """ 160 | Constructs the site parameters λ₁, Λ₂. 161 | for site t(u) = exp(uᵀλ₁ - ½ uᵀΛ₂u) 162 | 163 | Parameters 164 | ---------- 165 | :param num_inducing: int 166 | Number of inducing variables, typically referred to as M. 167 | :param lambda_1: np.array or None 168 | First order natural parameter of the variational site. 169 | :param lambda_2_sqrt: np.array or None 170 | Second order natural parameter of the variational site. 171 | """ 172 | 173 | lambda_1 = np.zeros((num_inducing, self.num_latent_gps)) if lambda_1 is None else lambda_1 174 | if lambda_2_sqrt is None: 175 | lambda_2_sqrt = [ 176 | -tf.eye(num_inducing, dtype=default_float()) * 1e-10 177 | for _ in range(self.num_latent_gps) 178 | ] 179 | lambda_2_sqrt = np.array(lambda_2_sqrt) 180 | else: 181 | assert lambda_2_sqrt.ndim == 3 182 | self.num_latent_gps = lambda_2_sqrt.shape[0] 183 | 184 | self.sites = DenseSites(lambda_1, lambda_2_sqrt) 185 | 186 | @property 187 | def lambda_1(self): 188 | """first natural parameter""" 189 | return self.sites.lambda_1 190 | 191 | @property 192 | def lambda_2_sqrt(self): 193 | """Cholesky factor of the second natural parameter""" 194 | return self.sites.lambda_2_sqrt 195 | 196 | @property 197 | def lambda_2(self): 198 | """second natural parameter""" 199 | return tf.matmul(self.lambda_2_sqrt, self.lambda_2_sqrt, transpose_b=True) 200 | 201 | def get_mean_chol_cov_inducing_posterior(self): 202 | """ 203 | Computes the mean and cholesky factor of the posterior 204 | on the inducing variables q(u) = 𝓝(u; m, S) 205 | S = (K⁻¹ + Λ₂)⁻¹ = (K⁻¹ + L₂L₂ᵀ)⁻¹ = K - KL₂W⁻¹L₂ᵀK , W = (I + L₂ᵀKL₂)⁻¹ 206 | m = S λ₁ 207 | """ 208 | K_uu = Kuu( 209 | self.inducing_variable, self.kernel, jitter=default_jitter() 210 | ) # [P, M, M] or [M, M] 211 | return posterior_from_dense_site(K_uu, self.lambda_1, self.lambda_2_sqrt) 212 | 213 | # todo : make broadcastable 214 | def new_predict_f( 215 | self, Xnew: InputData, full_cov=False, full_output_cov=False 216 | ) -> MeanAndVariance: 217 | """ 218 | Posterior prediction at new input Xnew 219 | :param Xnew: N x D Tensor 220 | """ 221 | K_uu = Kuu( 222 | self.inducing_variable, self.kernel, jitter=default_jitter() 223 | ) # [P, M, M] or [M, M] 224 | K_uf = Kuf(self.inducing_variable, self.kernel, Xnew) # [P, M, M] or [M, M] 225 | K_ff = self.kernel.K_diag(Xnew)[..., None] 226 | 227 | mu, var = conditional_from_precision_sites( 228 | K_uu, K_ff, K_uf, self.lambda_1, L=self.lambda_2_sqrt 229 | ) 230 | tf.debugging.assert_positive(var) # We really should make the tests pass with this here 231 | return mu + self.mean_function(Xnew), var 232 | 233 | def natgrad_step(self, data, lr=0.1, jitter=1e-9): 234 | """Takes natural gradient step in Variational parameters in the local parameters 235 | λₜ = rₜ▽[Var_exp] + (1-rₜ)λₜ₋₁ 236 | Input: 237 | :param: X : N x D 238 | :param: Y: N x 1 239 | :param: lr: Scalar 240 | 241 | Output: 242 | Updates the params 243 | """ 244 | X, Y = data 245 | mean, var = self.predict_f(X) 246 | 247 | # todo : hack to get heterokedastic demo to run 248 | if isinstance( 249 | self.inducing_variable, gpflow.inducing_variables.SharedIndependentInducingVariables 250 | ): 251 | meanZ, _ = self.predict_f(self.inducing_variable.inducing_variables[0].Z) 252 | else: 253 | meanZ, _ = self.predict_f(self.inducing_variable.Z) 254 | 255 | with tf.GradientTape() as g: 256 | g.watch([mean, var]) 257 | ve = self.likelihood.variational_expectations(mean, var, Y) 258 | grads = g.gradient(ve, [mean, var]) 259 | 260 | # cropping grads to stay negative 261 | eps = 1e-8 262 | grads[1] = tf.minimum(grads[1], -eps * tf.ones_like(grads[1])) 263 | 264 | Id = tf.eye(self.num_inducing, dtype=tf.float64) 265 | 266 | # Compute the projection matrix A from prior information 267 | K_uu = Kuu(self.inducing_variable, self.kernel) 268 | K_uf = Kuf(self.inducing_variable, self.kernel, X) # [P, M, M] or [M, M] 269 | chol_Kuu = tf.linalg.cholesky(K_uu + Id * jitter) 270 | A = tf.transpose(tf.linalg.cholesky_solve(chol_Kuu, K_uf)) 271 | 272 | # ▽μ₁[Var_exp] = aₙαₙ , 273 | # ▽μ2[Var_exp] = λₙaₙaₙᵀ 274 | 275 | if tf.rank(A) == 2: 276 | A = tf.tile(A[..., None], [1, 1, self.num_latent_gps]) 277 | grads = [ 278 | tf.einsum("nml,nl->ml", A, grads[0]), 279 | tf.einsum("nml,nol,nl->lmo", A, A, grads[1]), 280 | ] 281 | 282 | # chain rule at f 283 | grad_mu = gradient_transformation_mean_var_to_expectation(meanZ, grads) 284 | 285 | if self.num_data is not None: 286 | num_data = tf.cast(self.num_data, dtype=tf.float64) 287 | minibatch_size = tf.cast(tf.shape(X)[0], dtype=tf.float64) 288 | scale = num_data / minibatch_size 289 | else: 290 | scale = tf.cast(1.0, dtype=tf.float64) 291 | 292 | lambda_2 = -0.5 * self.lambda_2 293 | lambda_1 = self.lambda_1 294 | # compute update in natural form 295 | lambda_1 = (1 - lr) * lambda_1 + lr * scale * grad_mu[0] 296 | lambda_2 = (1 - lr) * lambda_2 + lr * scale * grad_mu[1] 297 | 298 | # transform and perform update 299 | lambda_2_sqrt = -tf.linalg.cholesky(-2.0 * lambda_2 + Id * jitter) 300 | # To match SVGP you need to eliminate this jitter for minibatching 301 | self.lambda_1.assign(lambda_1) 302 | self.lambda_2_sqrt.assign(lambda_2_sqrt) 303 | self.get_mean_chol_cov_inducing_posterior() 304 | -------------------------------------------------------------------------------- /src/models/tsvgp_sites.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for the t-SVGP models with individual sites per data point. 3 | """ 4 | from typing import Optional 5 | 6 | import numpy as np 7 | import tensorflow as tf 8 | from gpflow import default_jitter, kullback_leiblers 9 | from gpflow.conditionals import conditional 10 | from gpflow.covariances import Kuf, Kuu 11 | from gpflow.models import GPModel 12 | from gpflow.models.training_mixins import InputData, RegressionData 13 | from gpflow.models.util import inducingpoint_wrapper 14 | #from gpflow.types import MeanAndVariance 15 | 16 | from src.sites import DiagSites 17 | from src.util import posterior_from_dense_site_white, project_diag_sites 18 | 19 | 20 | class t_SVGP_sites(GPModel): 21 | """ 22 | Class for the t-SVGP model with sites 23 | """ 24 | 25 | def __init__( 26 | self, 27 | data: RegressionData, 28 | kernel, 29 | likelihood, 30 | inducing_variable, 31 | *, 32 | mean_function=None, 33 | num_latent_gps: int = 1, 34 | lambda_1=None, 35 | lambda_2=None, 36 | num_latent: Optional[int] = 1 37 | ): 38 | """ 39 | - kernel, likelihood, inducing_variables, mean_function are appropriate 40 | GPflow objects 41 | - num_latent_gps is the number of latent processes to use, defaults to 1 42 | - q_diag is a boolean. If True, the covariance is approximated by a 43 | diagonal matrix. 44 | - whiten is a boolean. If True, we use the whitened representation of 45 | the inducing points. 46 | - num_data is the total number of observations, defaults to X.shape[0] 47 | (relevant when feeding in external minibatches) 48 | """ 49 | GPModel.__init__(self, kernel, likelihood, mean_function, num_latent_gps) 50 | x_data, y_data = data 51 | num_data = x_data.shape[0] 52 | self.num_data = num_data 53 | self.num_latent = num_latent or y_data.shape[1] 54 | self.data = data 55 | self.inducing_variable = inducingpoint_wrapper(inducing_variable) 56 | 57 | self.num_inducing = self.inducing_variable.num_inducing 58 | self._init_variational_parameters(self.num_data, lambda_1, lambda_2) 59 | self.whiten = False 60 | 61 | def _init_variational_parameters(self, num_inducing, lambda_1, lambda_2): 62 | """ 63 | Constructs the site parameters λ₁, Λ₂. 64 | for site t(u) = exp(uᵀλ₁ - ½ uᵀΛ₂u) 65 | 66 | Parameters 67 | ---------- 68 | :param num_inducing: int 69 | Number of inducing variables, typically referred to as M. 70 | :param lambda_1: np.array or None 71 | First order natural parameter of the variational site. 72 | :param lambda_2: np.array or None 73 | Second order natural parameter of the variational site. 74 | """ 75 | 76 | lambda_1 = np.zeros((num_inducing, self.num_latent_gps)) if lambda_1 is None else lambda_1 77 | if lambda_2 is None: 78 | lambda_2 = ( 79 | np.ones((num_inducing, self.num_latent_gps)) * 1e-6 80 | if lambda_2 is None 81 | else lambda_2 82 | ) 83 | else: 84 | assert lambda_2.ndim == 2 85 | self.num_latent_gps = lambda_2.shape[-1] 86 | 87 | self.sites = DiagSites(lambda_1, lambda_2) 88 | 89 | @property 90 | def lambda_1(self): 91 | """first natural parameter""" 92 | return self.sites.lambda_1 93 | 94 | @property 95 | def lambda_2(self): 96 | """second natural parameter""" 97 | return self.sites.lambda_2 98 | 99 | def get_mean_chol_cov_inducing_posterior(self): 100 | """ 101 | Computes the mean and cholesky factor of the posterior 102 | on the inducing variables q(u) = 𝓝(u; m, S) 103 | S = (K⁻¹ + Λ₂)⁻¹ = (K⁻¹ + L₂L₂ᵀ)⁻¹ = K - KL₂W⁻¹L₂ᵀK , W = (I + L₂ᵀKL₂)⁻¹ 104 | m = S λ₁ 105 | """ 106 | X, _ = self.data 107 | K_uu = Kuu( 108 | self.inducing_variable, self.kernel, jitter=default_jitter() 109 | ) # [P, M, M] or [M, M] 110 | K_uf = Kuf(self.inducing_variable, self.kernel, X) # [P, M, M] or [M, M] 111 | lambda_1, lambda_2 = project_diag_sites(K_uf, self.lambda_1, self.lambda_2, cholesky=False) 112 | return posterior_from_dense_site_white(K_uu, lambda_1, lambda_2) 113 | 114 | def natgrad_step(self, lr=0.1): 115 | """Takes natural gradient step in Variational parameters in the local parameters 116 | λₜ = rₜ▽[Var_exp] + (1-rₜ)λₜ₋₁ 117 | Input: 118 | :param: X : N x D 119 | :param: Y: N x 1 120 | :param: lr: Scalar 121 | 122 | Output: 123 | Updates the params 124 | """ 125 | X, Y = self.data 126 | mean, var = self.predict_f(X) 127 | 128 | with tf.GradientTape() as g: 129 | g.watch([mean, var]) 130 | ve = self.likelihood.variational_expectations(mean, var, Y) 131 | grads = g.gradient(ve, [mean, var]) 132 | 133 | grads = grads[0] - 2.0 * grads[1] * mean, grads[1] 134 | 135 | # compute update in natural form 136 | lambda_2 = -0.5 * self.lambda_2 137 | lambda_1 = self.lambda_1 138 | 139 | lambda_1 = (1 - lr) * lambda_1 + lr * grads[0] 140 | lambda_2 = (1 - lr) * lambda_2 + lr * grads[1] 141 | 142 | eps = 1e-8 143 | # crop hack, can't instantiate negative sites nats2 but optim might take you there 144 | lambda_2 = tf.minimum(lambda_2, -eps * tf.ones_like(lambda_2)) 145 | 146 | # To match SVGP you need to eliminate this jitter for minibatching 147 | self.lambda_1.assign(lambda_1) 148 | self.lambda_2.assign(-2.0 * lambda_2) 149 | 150 | def prior_kl(self) -> tf.Tensor: 151 | """Returns the KL divergence KL[q(u)|p(u)]""" 152 | q_mu, q_sqrt = self.get_mean_chol_cov_inducing_posterior() 153 | return kullback_leiblers.prior_kl( 154 | self.inducing_variable, self.kernel, q_mu, q_sqrt, whiten=self.whiten 155 | ) 156 | 157 | def maximum_log_likelihood_objective(self) -> tf.Tensor: 158 | """The variational lower bound""" 159 | return self.elbo() 160 | 161 | def elbo(self) -> tf.Tensor: 162 | """ 163 | This gives a variational bound (the evidence lower bound or ELBO) on 164 | the log marginal likelihood of the model. 165 | """ 166 | X, Y = self.data 167 | kl = self.prior_kl() 168 | f_mean, f_var = self.predict_f(X, full_cov=False, full_output_cov=False) 169 | var_exp = self.likelihood.variational_expectations(f_mean, f_var, Y) 170 | if self.num_data is not None: 171 | num_data = tf.cast(self.num_data, kl.dtype) 172 | minibatch_size = tf.cast(tf.shape(X)[0], kl.dtype) 173 | scale = num_data / minibatch_size 174 | else: 175 | scale = tf.cast(1.0, kl.dtype) 176 | 177 | return tf.reduce_sum(var_exp) * scale - kl 178 | 179 | def predict_f(self, Xnew: InputData, full_cov=False, full_output_cov=False) -> None: 180 | q_mu, q_sqrt = self.get_mean_chol_cov_inducing_posterior() 181 | mu, var = conditional( 182 | Xnew, 183 | self.inducing_variable, 184 | self.kernel, 185 | q_mu, 186 | q_sqrt=q_sqrt, 187 | full_cov=full_cov, 188 | white=self.whiten, 189 | full_output_cov=full_output_cov, 190 | ) 191 | return mu + self.mean_function(Xnew), var 192 | -------------------------------------------------------------------------------- /src/models/tsvgp_white.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for the t-SVGP model with whitened parameterization 3 | """ 4 | import numpy as np 5 | import tensorflow as tf 6 | from gpflow import default_float, default_jitter 7 | from gpflow.covariances import Kuf, Kuu 8 | from gpflow.models import GPModel 9 | from gpflow.models.model import RegressionData 10 | from gpflow.models.training_mixins import InputData 11 | from gpflow.models.util import inducingpoint_wrapper 12 | #from gpflow.types import MeanAndVariance 13 | 14 | import sys 15 | sys.path.append("../..") 16 | 17 | from src.sites import DenseSites 18 | from src.util import ( 19 | conditional_from_precision_sites_white, 20 | conditional_from_precision_sites_white_full, 21 | gradient_transformation_mean_var_to_expectation, 22 | kl_from_precision_sites_white, 23 | posterior_from_dense_site_white, 24 | ) 25 | 26 | from src.models.tsvgp import base_SVGP 27 | 28 | 29 | class t_SVGP_white(base_SVGP): 30 | """ 31 | Class for the t-SVGP model with whitened paramterization 32 | """ 33 | 34 | def __init__( 35 | self, 36 | kernel, 37 | likelihood, 38 | inducing_variable, 39 | *, 40 | mean_function=None, 41 | num_latent_gps: int = 1, 42 | lambda_1=None, 43 | lambda_2=None, 44 | num_data=None, 45 | ): 46 | """ 47 | - kernel, likelihood, inducing_variables, mean_function are appropriate 48 | GPflow objects 49 | - num_latent_gps is the number of latent processes to use, defaults to 1 50 | - q_diag is a boolean. If True, the covariance is approximated by a 51 | diagonal matrix. 52 | - whiten is a boolean. If True, we use the whitened representation of 53 | the inducing points. 54 | - num_data is the total number of observations, defaults to X.shape[0] 55 | (relevant when feeding in external minibatches) 56 | """ 57 | # init the super class, accept args 58 | GPModel.__init__(self, kernel, likelihood, mean_function, num_latent_gps) 59 | 60 | self.num_data = num_data 61 | self.inducing_variable = inducingpoint_wrapper(inducing_variable) 62 | 63 | # init variational parameters 64 | self.num_inducing = self.inducing_variable.num_inducing 65 | 66 | self._init_variational_parameters(self.num_inducing, lambda_1, lambda_2) 67 | 68 | def _init_variational_parameters(self, num_inducing, lambda_1, lambda_2): 69 | """ 70 | Constructs the site parameters λ₁, Λ₂. 71 | for site t(u) = exp(uᵀλ₁ - ½ uᵀΛ₂u) 72 | 73 | Parameters 74 | ---------- 75 | :param num_inducing: int 76 | Number of inducing variables, typically referred to as M. 77 | :param lambda_1: np.array or None 78 | First order natural parameter of the variational site. 79 | :param lambda_2_sqrt: np.array or None 80 | Second order natural parameter of the variational site. 81 | """ 82 | 83 | lambda_1 = np.zeros((num_inducing, self.num_latent_gps)) if lambda_1 is None else lambda_1 84 | 85 | if lambda_2 is None: 86 | lambda_2 = [ 87 | tf.eye(num_inducing, dtype=default_float()) * 1e-10 88 | for _ in range(self.num_latent_gps) 89 | ] 90 | lambda_2 = np.array(lambda_2) 91 | else: 92 | assert lambda_2.ndim == 3 93 | self.num_latent_gps = lambda_2.shape[0] 94 | 95 | self.sites = DenseSites(lambda_1=lambda_1, lambda_2=lambda_2) 96 | 97 | @property 98 | def lambda_1(self): 99 | return self.sites.lambda_1 100 | 101 | @property 102 | def lambda_2(self): 103 | return self.sites.lambda_2 104 | 105 | def get_mean_chol_cov_inducing_posterior(self): 106 | """ 107 | Computes the mean and cholesky factor of the posterior 108 | on the inducing variables q(u) = 𝓝(u; m, S) 109 | S = (K⁻¹ + Λ₂)⁻¹ = (K⁻¹ + L₂L₂ᵀ)⁻¹ = K - KL₂W⁻¹L₂ᵀK , W = (I + L₂ᵀKL₂)⁻¹ 110 | m = S λ₁ 111 | """ 112 | # THIS FUNCTION IS WRONG 113 | K_uu = Kuu( 114 | self.inducing_variable, self.kernel, jitter=default_jitter() 115 | ) # [P, M, M] or [M, M] 116 | return posterior_from_dense_site_white(K_uu, self.lambda_1, self.lambda_2) 117 | 118 | 119 | @property 120 | def cache_statistics(self): 121 | return self.cache_statistics_from_data(self.data) 122 | 123 | def prior_kl(self) -> tf.Tensor: 124 | K_uu = Kuu( 125 | self.inducing_variable, self.kernel, jitter=default_jitter() 126 | ) # [P, M, M] or [M, M] 127 | return kl_from_precision_sites_white(K_uu, self.lambda_1, L2=self.lambda_2) 128 | 129 | def predict_f(self, Xnew: InputData, full_cov=False, full_output_cov=False) -> None: 130 | K_uu = Kuu( 131 | self.inducing_variable, self.kernel, jitter=default_jitter() 132 | ) # [P, M, M] or [M, M] 133 | K_uf = Kuf(self.inducing_variable, self.kernel, Xnew) # [P, M, M] or [M, M] 134 | #print(K_uf.shape) 135 | 136 | if full_output_cov == False: 137 | K_ff = self.kernel.K_diag(Xnew)[..., None] 138 | 139 | mu, var = conditional_from_precision_sites_white( 140 | K_uu, K_ff, K_uf, self.lambda_1, L2=self.lambda_2) 141 | else: 142 | K_ff = self.kernel.K(Xnew)[None, ...] 143 | mu, var = conditional_from_precision_sites_white_full( 144 | K_uu, K_ff, K_uf, self.lambda_1, L2=self.lambda_2) 145 | 146 | #tf.debugging.assert_positive(var) # We really should make the tests pass with this here 147 | return mu + self.mean_function(Xnew), var 148 | 149 | def predict_f_extra_data(self, Xnew: InputData, extra_data=RegressionData, 150 | jitter=default_jitter(), full_output_cov=False) -> None: 151 | """ 152 | Compute the mean and variance of the latent function at some new points 153 | Xnew. 154 | """ 155 | 156 | grad_mu = self.grad_varexp_natural_params(extra_data) 157 | 158 | lambda_1 = self.lambda_1 159 | lambda_2 = self.lambda_2 160 | 161 | K_uu = Kuu(self.inducing_variable, self.kernel, jitter=jitter) 162 | 163 | lambda_1c = lambda_1 + grad_mu[0] 164 | lambda_2c = lambda_2 + -2*grad_mu[1] 165 | 166 | # predicting at new inputs 167 | K_uf = Kuf(self.inducing_variable, self.kernel, Xnew) 168 | 169 | if full_output_cov == False: 170 | K_ff = self.kernel.K_diag(Xnew)[..., None] 171 | 172 | mu, var = conditional_from_precision_sites_white( 173 | K_uu, K_ff, K_uf, lambda_1c, L2=lambda_2c) 174 | else: 175 | K_ff = self.kernel.K(Xnew)[None, ...] 176 | mu, var = conditional_from_precision_sites_white_full( 177 | K_uu, K_ff, K_uf, lambda_1c, L2=lambda_2c) 178 | 179 | return mu + self.mean_function(Xnew), var 180 | 181 | 182 | def elbo(self, data: RegressionData) -> tf.Tensor: 183 | """ 184 | This gives a variational bound (the evidence lower bound or ELBO) on 185 | the log marginal likelihood of the model. 186 | """ 187 | X, Y = data 188 | kl = self.prior_kl() 189 | f_mean, f_var = self.predict_f(X, full_cov=False, full_output_cov=False) 190 | var_exp = self.likelihood.variational_expectations(f_mean, f_var, Y) 191 | if self.num_data is not None: 192 | num_data = tf.cast(self.num_data, kl.dtype) 193 | minibatch_size = tf.cast(tf.shape(X)[0], kl.dtype) 194 | scale = num_data / minibatch_size 195 | else: 196 | scale = tf.cast(1.0, kl.dtype) 197 | return tf.reduce_sum(var_exp) * scale - kl 198 | 199 | def maximum_log_likelihood_objective(self, data) -> tf.Tensor: 200 | """The variational lower bound""" 201 | return self.elbo(data) 202 | 203 | 204 | def grad_varexp_natural_params(self, data, jitter=1e-9, nat_params=None): 205 | X, Y = data 206 | # print(X.shape) 207 | mean, var = self.predict_f(X) 208 | 209 | with tf.GradientTape(persistent=True) as g: 210 | g.watch(mean) 211 | g.watch(var) 212 | ve = self.likelihood.variational_expectations(mean, var, Y) 213 | d_exp_dm = g.gradient(ve, mean) 214 | d_exp_dv = g.gradient(ve, var) 215 | del g 216 | 217 | eps = 1e-8 218 | d_exp_dv = tf.minimum(d_exp_dv, -eps * tf.ones_like(d_exp_dv)) 219 | 220 | 221 | grad_nat_1 = (d_exp_dm - 2.0 * (d_exp_dv * mean)) 222 | grad_nat_2 = d_exp_dv 223 | 224 | K_uf = Kuf(self.inducing_variable, self.kernel, X) 225 | 226 | grad_sparse_1 = K_uf @ grad_nat_1 227 | 228 | grad_sparse_2 = K_uf @ tf.linalg.diag(tf.transpose(grad_nat_2)) @ tf.transpose(K_uf) 229 | 230 | return (grad_sparse_1, grad_sparse_2) 231 | 232 | 233 | def natgrad_step(self, dataset, lr=1.0, jitter=1e-9): 234 | """Takes natural gradient step in Variational parameters in the local parameters 235 | λₜ = rₜ▽[Var_exp] + (1-rₜ)λₜ₋₁ 236 | 237 | Input: 238 | :param: X : N x D 239 | :param: Y: N x 1 240 | :param: lr: Scalar 241 | 242 | Output: 243 | Updates the params 244 | """ 245 | 246 | X, Y = dataset 247 | 248 | # chain rule at f 249 | grad_mu = self.grad_varexp_natural_params((X, Y)) 250 | # K_uu = Kuu(self.inducing_variable, self.kernel) 251 | 252 | if self.num_data is not None: 253 | num_data = tf.cast(self.num_data, dtype=tf.float64) 254 | minibatch_size = tf.cast(tf.shape(X)[0], dtype=tf.float64) 255 | scale = num_data / minibatch_size 256 | else: 257 | scale = tf.cast(1.0, dtype=tf.float64) 258 | 259 | lambda_1 = self.lambda_1 260 | lambda_2 = self.lambda_2 261 | 262 | # compute update in natural form 263 | # Old version: projection matrix A in grad_varexp_natural_params includes Kuu^{-1} 264 | # lambda_1 = (1.0 - lr) * lambda_1 + lr * scale * K_uu @ grad_mu[0] 265 | # lambda_2 = (1.0 - lr) * lambda_2 + lr * scale * K_uu @ grad_mu[1] @ K_uu 266 | # New version: removed Kuu as well as Kuu inverse in grad_varexp_natural_params 267 | lambda_1 = (1.0 - lr) * lambda_1 + lr * scale * grad_mu[0] 268 | lambda_2 = (1.0 - lr) * lambda_2 + lr * scale * (-2) * grad_mu[1] 269 | 270 | self.lambda_1.assign(lambda_1) 271 | self.lambda_2.assign(lambda_2) -------------------------------------------------------------------------------- /src/models/tvgp.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for the t-VGP model class 3 | """ 4 | from typing import Optional 5 | 6 | import numpy as np 7 | import tensorflow as tf 8 | from gpflow.config import default_float, default_jitter 9 | from gpflow.kernels import Kernel 10 | from gpflow.likelihoods import Likelihood 11 | from gpflow.mean_functions import MeanFunction 12 | from gpflow.models.model import GPModel, InputData, MeanAndVariance, RegressionData 13 | from gpflow.models.training_mixins import InternalDataTrainingLossMixin 14 | 15 | from src.sites import DiagSites 16 | 17 | 18 | class t_VGP(GPModel, InternalDataTrainingLossMixin): 19 | r""" 20 | This method approximates the Gaussian process posterior using a multivariate Gaussian. 21 | 22 | The idea is that the posterior over the function-value vector F is 23 | approximated by a Gaussian, and the KL divergence is minimised between 24 | the approximation and the posterior. 25 | 26 | The key reference is: 27 | Khan, M., & Lin, W. (2017). Conjugate-Computation Variational Inference: 28 | Converting Variational Inference in Non-Conjugate Models to Inferences in Conjugate Models. 29 | In Artificial Intelligence and Statistics (pp. 878-887). 30 | 31 | """ 32 | 33 | def __init__( 34 | self, 35 | data: RegressionData, 36 | kernel: Kernel, 37 | likelihood: Likelihood, 38 | mean_function: Optional[MeanFunction] = None, 39 | num_latent: Optional[int] = 1, 40 | ): 41 | """ 42 | X is a data matrix, size [N, D] 43 | Y is a data matrix, size [N, R] 44 | kernel, likelihood, mean_function are appropriate GPflow objects 45 | 46 | """ 47 | super().__init__(kernel, likelihood, mean_function, num_latent) 48 | 49 | x_data, y_data = data 50 | num_data = x_data.shape[0] 51 | self.num_data = num_data 52 | self.num_latent = num_latent or y_data.shape[1] 53 | self.data = data 54 | 55 | lambda_1 = np.zeros((num_data, self.num_latent)) 56 | lambda_2 = 1e-6 * np.ones((num_data, self.num_latent)) 57 | self.sites = DiagSites(lambda_1, lambda_2) 58 | 59 | @property 60 | def lambda_1(self): 61 | """first natural parameter""" 62 | return self.sites.lambda_1 63 | 64 | @property 65 | def lambda_2(self): 66 | """second natural parameter""" 67 | return self.sites.lambda_2 68 | 69 | def maximum_log_likelihood_objective(self, *args, **kwargs) -> tf.Tensor: 70 | return self.elbo() 71 | 72 | def elbo(self) -> tf.Tensor: 73 | """ 74 | This gives a variational bound (the evidence lower bound or ELBO) on 75 | the log marginal likelihood of the model. 76 | """ 77 | x_data, y_data = self.data 78 | pseudo_y = self.lambda_1 / self.lambda_2 79 | sW = tf.sqrt(tf.abs(self.lambda_2)) 80 | 81 | # Computes conversion λ₁, λ₂ → m, V by using q(f) ≃ t(f)p(f) 82 | K = self.kernel(x_data) + tf.eye(self.num_data, dtype=default_float()) * default_jitter() 83 | # L = chol(I + √λ₂ᵀ K √λ₂ᵀ) 84 | L = tf.linalg.cholesky( 85 | tf.eye(self.num_data, dtype=tf.float64) + (sW @ tf.transpose(sW)) * K 86 | ) 87 | # T = L⁻¹ λ₂ K 88 | T = tf.linalg.solve(L, tf.tile(sW, (1, self.num_data)) * K) 89 | # Σ = (K⁻¹ + λ₂)⁻¹ = K - K √λ₂ (I + √λ₂ᵀ K √λ₂ᵀ)⁻¹ √λ₂ᵀ K = K - K √λ₂L⁻ᵀL⁻¹√λ₂ᵀ K 90 | post_v = tf.reshape( 91 | tf.linalg.diag_part(K) - tf.reduce_sum(T * T, axis=0), (self.num_data, 1) 92 | ) 93 | # Σ = (K⁻¹ + λ₂)⁻¹ = (K⁻¹(I + λ₂K))⁻¹ = K (I + λ₂K)⁻¹ = K L⁻ᵀL⁻¹ 94 | # μ = Σ λ₁ = K L⁻ᵀL⁻¹ λ₂ (λ₂⁻¹λ₁) = K α 95 | alpha = sW * tf.linalg.solve(tf.transpose(L), tf.linalg.solve(L, sW * pseudo_y)) 96 | post_m = K @ alpha 97 | # Store alpha for prediction 98 | self.q_alpha = alpha 99 | 100 | # Get variational expectations. 101 | # ELBO = E_q log(p(y,f)/q(t)) = E_q log(p(y|f)p(f))/Z⁻¹ p(f)t(f)) 102 | # = log(Z) - E_q log t(f) + E_q log p(y|f) 103 | # log_Z = \int p(f)t(f)df 104 | E_q_log_lik = tf.reduce_sum( 105 | self.likelihood.variational_expectations(post_m, post_v, y_data) 106 | ) 107 | E_q_log_t = -tf.reduce_sum(0.5 * (self.lambda_2) * ((pseudo_y - post_m) ** 2 + post_v)) 108 | log_Z = -tf.transpose(pseudo_y) @ alpha / 2.0 - tf.reduce_sum( 109 | tf.math.log(tf.linalg.diag_part(L)) 110 | ) 111 | elbo = log_Z - E_q_log_t + E_q_log_lik 112 | return elbo 113 | 114 | def update_variational_parameters(self, beta=0.05) -> tf.Tensor: 115 | """Takes natural gradient step in Variational parameters in the local parameters 116 | λₜ = rₜ▽[Var_exp] + (1-rₜ)λₜ₋₁ 117 | Input: 118 | :param: X : N x D 119 | :param: Y: N x 1 120 | :param: lr: Scalar 121 | 122 | Output: 123 | Updates the params 124 | """ 125 | 126 | x_data, y_data = self.data 127 | pseudo_y = self.lambda_1 / self.lambda_2 128 | sW = tf.sqrt(tf.abs(self.lambda_2)) 129 | 130 | # Computes conversion λ₁, λ₂ → m, V by using q(f) ≃ t(f)p(f) 131 | K = self.kernel(x_data) + tf.eye(self.num_data, dtype=default_float()) * default_jitter() 132 | L = tf.linalg.cholesky( 133 | tf.eye(self.num_data, dtype=tf.float64) + (sW @ tf.transpose(sW)) * K 134 | ) 135 | T = tf.linalg.solve(L, tf.tile(sW, (1, self.num_data)) * K) 136 | post_v = tf.reshape( 137 | tf.linalg.diag_part(K) - tf.reduce_sum(T * T, axis=0), (self.num_data, 1) 138 | ) 139 | alpha = sW * tf.linalg.solve(tf.transpose(L), tf.linalg.solve(L, sW * pseudo_y)) 140 | post_m = K @ alpha 141 | 142 | # Keep alphas updated 143 | self.q_alpha = alpha 144 | 145 | # Get variational expectations derivatives. 146 | with tf.GradientTape(persistent=True) as g: 147 | g.watch(post_m) 148 | g.watch(post_v) 149 | var_exp = self.likelihood.variational_expectations(post_m, post_v, y_data) 150 | 151 | d_exp_dm = g.gradient(var_exp, post_m) 152 | d_exp_dv = g.gradient(var_exp, post_v) 153 | del g 154 | 155 | # Take the tVGP step and transform to be ▽μ[Var_exp] 156 | lambda_1 = (1.0 - beta) * self.lambda_1 + beta * (d_exp_dm - 2.0 * (d_exp_dv * post_m)) 157 | lambda_2 = (1.0 - beta) * self.lambda_2 + beta * (-2.0 * d_exp_dv) 158 | 159 | self.lambda_1.assign(lambda_1) 160 | self.lambda_2.assign(lambda_2) 161 | 162 | def predict_f( 163 | self, Xnew: InputData, full_cov: bool = False, full_output_cov: bool = False 164 | ) -> MeanAndVariance: 165 | r""" 166 | The posterior variance of F is given by 167 | q(f) = N(f | K alpha + mean, [K⁻¹ + diag(lambda²)]⁻¹) 168 | Here we project this to F*, the values of the GP at Xnew which is given 169 | by 170 | q(F*) = N ( F* | K_{*F} alpha + mean, K_{**} - K_{*f}[K_{ff} + 171 | diag(lambda⁻²)]⁻¹ K_{f*} ) 172 | 173 | """ 174 | assert full_output_cov is False 175 | x_data, _y_data = self.data 176 | 177 | # Evaluate the kernel 178 | Kx = self.kernel(x_data, Xnew) 179 | K = self.kernel(x_data) 180 | 181 | # Predictive mean 182 | f_mean = tf.linalg.matmul(Kx, self.q_alpha, transpose_a=True) + self.mean_function(Xnew) 183 | 184 | # Predictive var 185 | A = K + tf.linalg.diag(tf.transpose(1.0 / self.lambda_2)) 186 | L = tf.linalg.cholesky(A) 187 | Kx_tiled = tf.tile(Kx[None, ...], [self.num_latent, 1, 1]) 188 | LiKx = tf.linalg.solve(L, Kx_tiled) 189 | if full_cov: 190 | f_var = self.kernel(Xnew) - tf.linalg.matmul(LiKx, LiKx, transpose_a=True) 191 | else: 192 | f_var = self.kernel(Xnew, full_cov=False) - tf.reduce_sum(tf.square(LiKx), 1) 193 | return f_mean, tf.transpose(f_var) 194 | -------------------------------------------------------------------------------- /src/models/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from src.util import mean_cov_to_natural_param 4 | 5 | def random_shuffle(x_batch, model, m): 6 | Z_old = model.inducing_variable.Z.numpy() 7 | Z_batch = np.concatenate([Z_old, x_batch], axis=0) 8 | np.random.shuffle(Z_batch) 9 | 10 | Z_new = Z_batch[:m] 11 | new_q_mu, new_f_cov = m.predict_f(Z_new, full_output_cov=True) 12 | K_zz = m.kernel(m.inducing_variable.Z) 13 | new_l1, new_l2 = mean_cov_to_natural_param(new_q_mu, new_f_cov, K_zz) 14 | 15 | return new_l1, new_l2, Z_new 16 | 17 | 18 | # Sampling functions 19 | def piv_chol(new_batch, old_batch, model, m_z, lamb=None, use_lamb=False): 20 | combined_batch = np.concatenate([old_batch, new_batch], axis=0) 21 | K_zb = model.kernel(combined_batch) 22 | 23 | if use_lamb is True: 24 | K_zb = np.diag(np.sqrt(lamb)) @ K_zb @ np.diag(np.sqrt(lamb)) 25 | 26 | get_diag = lambda: np.diag(K_zb).copy() 27 | get_row = lambda i: K_zb[i, :] 28 | _, pi = pivoted_chol(get_diag, get_row, M=m_z) 29 | Z_new = combined_batch[pi] 30 | return Z_new, pi 31 | 32 | 33 | def fixed_Z(new_batch, old_batch, model, m_z, update=True): 34 | Z_new = old_batch 35 | return Z_new, None 36 | 37 | 38 | def update_lambda_Z_move(model, z_new, z_old): 39 | old_l1 = model.lambda_1 40 | old_l2 = model.lambda_2 41 | 42 | K_zf = model.kernel(z_old, z_new) 43 | A_p = tf.linalg.solve(model.kernel(z_old), K_zf) 44 | 45 | new_l1 = tf.transpose(A_p) @ old_l1 46 | new_l2 = tf.transpose(A_p) @ old_l2 @ A_p 47 | return new_l1, new_l2 # TODO: write test here for broadcasting 48 | 49 | 50 | def pivoted_chol(get_diag, get_row, M, err_tol=1e-6): 51 | """ 52 | A simple python function which computes the Pivoted Cholesky decomposition/approximation of positive 53 | semi-definite operator. 54 | 55 | Args: 56 | - get_diag: A function which takes no arguments and returns the diagonal of the matrix when called. 57 | - get_row: A function which takes 1 integer argument and returns the desired row (zero indexed). 58 | - M: The maximum rank of the approximate decomposition; an integer. 59 | 60 | Returns: 61 | - R, an upper triangular matrix of column dimension equal to the target matrix. 62 | - pi, the index of the pivots. 63 | """ 64 | 65 | d = np.copy(get_diag()) 66 | N = len(d) 67 | 68 | pi = list(range(N)) 69 | 70 | R = np.zeros([M, N]) 71 | 72 | 73 | m = 0 74 | while (m < M): # and (err > err_tol): 75 | 76 | i = m + np.argmax([d[pi[j]] for j in range(m, N)]) 77 | tmp = pi[m] 78 | pi[m] = pi[i] 79 | pi[i] = tmp 80 | 81 | R[m, pi[m]] = np.sqrt(d[pi[m]]) 82 | Apim = get_row(pi[m]) 83 | for i in range(m + 1, N): 84 | if m > 0: 85 | ip = np.inner(R[:m, pi[m]], R[:m, pi[i]]) 86 | else: 87 | ip = 0 88 | R[m, pi[i]] = (Apim[pi[i]] - ip) / R[m, pi[m]] 89 | d[pi[i]] -= pow(R[m, pi[i]], 2) 90 | 91 | m += 1 92 | 93 | R = R[:m, :] 94 | return R, pi[:m] 95 | 96 | 97 | def compute_lev(model, x_data, y_data): 98 | mean, f_varM = model.predict_f(x_data, full_cov=False, full_output_cov=False) 99 | with tf.GradientTape(persistent=True) as g: 100 | g.watch(mean) 101 | g.watch(f_varM) 102 | var_expI = model.likelihood.variational_expectations(mean, f_varM, y_data) 103 | d_exp_dv = g.gradient(var_expI, f_varM) 104 | del g 105 | 106 | lamb = tf.squeeze(-2*d_exp_dv) 107 | lev = tf.abs(tf.reduce_sum(f_varM * lamb, axis=1)) 108 | 109 | if lamb.ndim > 1: 110 | lamb = tf.reduce_sum(lamb, axis=1) 111 | 112 | return lev.numpy(), lamb.numpy() 113 | 114 | 115 | def memory_picker(old_batch, model, mem_size): 116 | 117 | x_old, y_old = old_batch 118 | lev, lamb = compute_lev(model, x_old, y_old) 119 | 120 | # Weighted sampling 121 | ind = np.random.choice(np.arange(y_old.shape[0]), mem_size, p=lev/np.sum(lev)) 122 | return None, ind 123 | 124 | 125 | def random_picker(data, model, mem_size): 126 | """ 127 | Picks random memory 128 | 129 | Note: model parameter is there for uniform function definition. 130 | """ 131 | x_old, y_old = data 132 | ind = np.random.choice(np.arange(y_old.shape[0]), mem_size) 133 | return None, ind 134 | 135 | -------------------------------------------------------------------------------- /src/sites.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module to declare Gaussian Exponential Family sites objects. 3 | """ 4 | 5 | import abc 6 | from typing import Optional 7 | 8 | import tensorflow as tf 9 | from gpflow.base import Module, Parameter 10 | from gpflow.config import default_float 11 | from gpflow.utilities import positive, triangular 12 | 13 | 14 | class Sites(Module, metaclass=abc.ABCMeta): 15 | """ 16 | The base sites class 17 | """ 18 | 19 | def __init__(self, name: Optional[str] = None): 20 | """ 21 | :param name: optional kernel name. 22 | """ 23 | super().__init__(name=name) 24 | 25 | 26 | class DiagSites(Sites): 27 | """ 28 | Sites with diagonal lambda_2 29 | """ 30 | 31 | def __init__(self, lambda_1, lambda_2, name: Optional[str] = None): 32 | """ 33 | :param lambda_1: first order natural parameter 34 | :param lambda_2: second order natural parameter 35 | :param name: optional kernel name. 36 | """ 37 | super().__init__(name=name) 38 | 39 | self.lambda_1 = Parameter(lambda_1, dtype=default_float(), trainable=False) # [M, P] 40 | self.lambda_2 = Parameter(lambda_2, transform=positive(), trainable=False) # [M, P] 41 | 42 | 43 | class DenseSites(Sites): 44 | """ 45 | Sites with dense lambda_2 save as a Cholesky factor 46 | """ 47 | 48 | def __init__(self, lambda_1, lambda_2_sqrt=None, lambda_2=None, name: Optional[str] = None): 49 | """ 50 | :param lambda_1: first order natural parameter 51 | :param lambda_2_sqrt: second order natural parameter 52 | :param name: optional kernel name. 53 | """ 54 | super().__init__(name=name) 55 | 56 | self.lambda_1 = Parameter(lambda_1, dtype=default_float(), trainable=False) # [M, P] 57 | self.num_latent_gps = lambda_1.shape[0] 58 | 59 | assert (lambda_2_sqrt is not None) or (lambda_2 is not None) 60 | 61 | if lambda_2_sqrt is not None: 62 | self.factor = True 63 | self._lambda_2_sqrt = Parameter(lambda_2_sqrt, transform=triangular(), trainable=False) # [L|P, M, M] 64 | else: 65 | self._lambda_2 = Parameter(lambda_2, trainable=False) # [L|P, M, M] 66 | self.factor = False 67 | 68 | @property 69 | def lambda_2(self): 70 | """second natural parameter""" 71 | if self.factor: 72 | return self._lambda_2_sqrt @ tf.linalg.matrix_transpose(self._lambda_2_sqrt) 73 | return self._lambda_2 74 | 75 | @property 76 | def lambda_2_sqrt(self): 77 | """Cholesky factor of the second natural parameter""" 78 | if self.factor: 79 | return self._lambda_2_sqrt 80 | return tf.linalg.cholesky(self._lambda_2) 81 | -------------------------------------------------------------------------------- /src/streaming_sparse_gp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/streaming_sparse_gp/__init__.py -------------------------------------------------------------------------------- /src/streaming_sparse_gp/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/streaming_sparse_gp/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/streaming_sparse_gp/__pycache__/osvgpc.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/streaming_sparse_gp/__pycache__/osvgpc.cpython-38.pyc -------------------------------------------------------------------------------- /src/streaming_sparse_gp/osvgpc.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import gpflow 4 | from gpflow import Parameter, default_float 5 | from gpflow import conditionals, kullback_leiblers 6 | from gpflow.inducing_variables import InducingPoints 7 | from gpflow.models import GPModel, InternalDataTrainingLossMixin 8 | from gpflow.utilities import positive, triangular 9 | from packaging import version # required to handle GPflow breaking changes 10 | 11 | 12 | class OSVGPC(GPModel, InternalDataTrainingLossMixin): 13 | """ 14 | Online Sparse Variational GP classification. 15 | 16 | Streaming Gaussian process approximations 17 | Thang D. Bui, Cuong V. Nguyen, Richard E. Turner 18 | NIPS 2017 19 | """ 20 | 21 | def __init__(self, data, kernel, likelihood, mu_old, Su_old, Kaa_old, Z_old, Z, mean_function=None, 22 | q_diag=False, whiten=True, num_latent_gps=None): 23 | 24 | self.data = gpflow.models.util.data_input_to_tensor(data) 25 | # self.num_data = X.shape[0] 26 | self.num_data = None 27 | 28 | # init the super class, accept args 29 | if num_latent_gps is None: 30 | num_latent_gps = GPModel.calc_num_latent_gps_from_data(data, kernel, likelihood) 31 | super().__init__(kernel, likelihood, mean_function, num_latent_gps) 32 | 33 | self.q_diag, self.whiten = q_diag, whiten 34 | self.inducing_variable = InducingPoints(Z) 35 | num_inducing = self.inducing_variable.num_inducing 36 | 37 | # init variational parameters 38 | q_mu = np.zeros((num_inducing, self.num_latent_gps)) 39 | self.q_mu = Parameter(q_mu, dtype=default_float()) # [M, P] 40 | 41 | if q_diag: 42 | ones = np.ones( 43 | (num_inducing, self.num_latent_gps), dtype=default_float() 44 | ) 45 | self.q_sqrt = Parameter(ones, transform=positive()) # [M, P] 46 | else: 47 | np_q_sqrt = np.array( 48 | [ 49 | np.eye(num_inducing, dtype=default_float()) 50 | for _ in range(self.num_latent_gps) 51 | ] 52 | ) 53 | self.q_sqrt = Parameter(np_q_sqrt, transform=triangular()) # [P, M, M] 54 | 55 | self.mu_old = tf.Variable(mu_old, shape=tf.TensorShape(None), trainable=False) 56 | self.M_old = Z_old.shape[0] 57 | self.Su_old = tf.Variable(Su_old, shape=tf.TensorShape(None), trainable=False) 58 | self.Kaa_old = tf.Variable(Kaa_old, shape=tf.TensorShape(None), trainable=False) 59 | self.Z_old = tf.Variable(Z_old, shape=tf.TensorShape(None), trainable=False) 60 | 61 | def prior_kl(self): 62 | return kullback_leiblers.prior_kl(self.inducing_variable, self.kernel, self.q_mu, self.q_sqrt, whiten=self.whiten) 63 | 64 | def correction_term(self): 65 | Mb = self.inducing_variable.num_inducing 66 | Ma = self.M_old 67 | # jitter = gpflow.default_jitter() 68 | jitter = gpflow.utilities.to_default_float(1e-4) 69 | Saa = self.Su_old 70 | ma = self.mu_old 71 | # a is old inducing points, b is new 72 | mu, Sigma = self.predict_f(self.Z_old, full_cov=True) 73 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 74 | # Added by us for split-MNIST 75 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 76 | if Sigma.shape[0] == 1: 77 | Sigma = tf.squeeze(Sigma, axis=0) 78 | Smm = Sigma + tf.matmul(mu, mu, transpose_b=True) 79 | Kaa = gpflow.utilities.add_noise_cov(self.Kaa_old, jitter) 80 | LSa = tf.linalg.cholesky(Saa) 81 | LKa = tf.linalg.cholesky(Kaa) 82 | obj = tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LKa))) 83 | obj += - tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LSa))) 84 | 85 | Sainv_ma = tf.linalg.cholesky_solve(LSa, ma) 86 | obj += -0.5 * tf.reduce_sum(ma * Sainv_ma) 87 | obj += tf.reduce_sum(mu * Sainv_ma) 88 | 89 | Sainv_Smm = tf.linalg.cholesky_solve(LSa, Smm) 90 | Kainv_Smm = tf.linalg.cholesky_solve(LKa, Smm) 91 | obj += -0.5 * tf.reduce_sum(tf.linalg.diag_part(Sainv_Smm) - tf.linalg.diag_part(Kainv_Smm)) 92 | return obj 93 | 94 | def maximum_log_likelihood_objective(self) -> tf.Tensor: # type: ignore 95 | return self.elbo() 96 | 97 | def elbo(self): 98 | """ 99 | This gives a variational bound on the model likelihood. 100 | """ 101 | X, Y = self.data 102 | 103 | # Get prior KL. 104 | kl = self.prior_kl() 105 | 106 | # Get conditionals 107 | fmean, fvar = self.predict_f(X, full_cov=False) 108 | 109 | # Get variational expectations. 110 | if version.parse(gpflow.__version__) < version.Version("2.6.0"): 111 | var_exp = self.likelihood.variational_expectations(fmean, fvar, Y) 112 | else: 113 | # breaking change https://github.com/GPflow/GPflow/pull/1919 114 | var_exp = self.likelihood.variational_expectations(X, fmean, fvar, Y) 115 | 116 | # re-scale for minibatch size 117 | if self.num_data is not None: 118 | raise NotImplementedError("need to update code to ExternalDataTrainingLossMixin") 119 | num_data = tf.cast(self.num_data, kl.dtype) 120 | minibatch_size = tf.cast(tf.shape(X)[0], kl.dtype) 121 | scale = num_data / minibatch_size 122 | else: 123 | scale = tf.cast(1.0, kl.dtype) 124 | 125 | # compute online correction term 126 | online_reg = self.correction_term() 127 | 128 | return tf.reduce_sum(var_exp) * scale - kl + online_reg 129 | 130 | def predict_f(self, Xnew, full_cov=False, full_output_cov=False): 131 | mu, var = conditionals.conditional(Xnew, self.inducing_variable, self.kernel, self.q_mu, 132 | q_sqrt=self.q_sqrt, full_cov=full_cov, white=self.whiten, 133 | full_output_cov=full_output_cov) 134 | return mu + self.mean_function(Xnew), var 135 | -------------------------------------------------------------------------------- /src/streaming_sparse_gp/readme.md: -------------------------------------------------------------------------------- 1 | # Streaming sparse GP 2 | 3 | The model files are taken from the official code available at https://github.com/thangbui/streaming_sparse_gp as the code is not available as a package. 4 | 5 | --------------------------------------------------------------------------------