├── .gitignore
├── LICENSE
├── README.md
├── experiments
    ├── banana
    │   ├── README.md
    │   ├── banana_utils.py
    │   ├── streaming_banana_bui.ipynb
    │   ├── streaming_banana_maddox.ipynb
    │   └── streaming_banana_tsvgp.ipynb
    ├── configs
    │   ├── dataset
    │   │   ├── adult.yaml
    │   │   ├── bank.yaml
    │   │   ├── bike.yaml
    │   │   ├── elevators.yaml
    │   │   ├── magnetometer.yaml
    │   │   ├── mammographic.yaml
    │   │   ├── mnist.yaml
    │   │   ├── mushroom.yaml
    │   │   └── split_mnist.yaml
    │   ├── magnetometer_offline_experiment.yaml
    │   ├── magnetometer_online_experiment.yaml
    │   ├── magnetometer_streaming_experiment.yaml
    │   ├── model
    │   │   ├── kernel
    │   │   │   ├── matern52.yaml
    │   │   │   ├── rbf.yaml
    │   │   │   └── sum_constant_matern52.yaml
    │   │   ├── likelihood
    │   │   │   ├── bernoulli.yaml
    │   │   │   ├── gaussian.yaml
    │   │   │   └── softmax.yaml
    │   │   ├── ovc.yaml
    │   │   ├── sgpr.yaml
    │   │   ├── svgp.yaml
    │   │   ├── tsvgp.yaml
    │   │   ├── tsvgp_continual.yaml
    │   │   └── tsvgp_continual_classification.yaml
    │   ├── offline_experiment.yaml
    │   ├── offline_mnist_experiment.yaml
    │   ├── online_experiment.yaml
    │   ├── online_mnist_experiment.yaml
    │   ├── optimizer
    │   │   ├── adam.yaml
    │   │   └── scipy.yaml
    │   ├── streaming_experiment.yaml
    │   └── streaming_mnist_experiment.yaml
    ├── data
    │   ├── adult.csv
    │   ├── banana_test_x.txt
    │   ├── banana_test_y.txt
    │   ├── banana_train_x.txt
    │   ├── banana_train_y.txt
    │   ├── bank.csv
    │   ├── bike.csv
    │   ├── elevators.csv
    │   ├── invensense
    │   │   ├── 1-loc.csv
    │   │   ├── 1-mag.csv
    │   │   ├── 1-time.csv
    │   │   ├── 2-loc.csv
    │   │   ├── 2-mag.csv
    │   │   ├── 2-time.csv
    │   │   ├── 3-loc.csv
    │   │   ├── 3-mag.csv
    │   │   ├── 3-time.csv
    │   │   ├── 4-loc.csv
    │   │   ├── 4-mag.csv
    │   │   ├── 4-time.csv
    │   │   ├── 5-loc.csv
    │   │   ├── 5-mag.csv
    │   │   ├── 5-time.csv
    │   │   ├── 6-loc.csv
    │   │   ├── 6-mag.csv
    │   │   ├── 6-time.csv
    │   │   ├── 7-loc.csv
    │   │   ├── 7-mag.csv
    │   │   ├── 7-time.csv
    │   │   ├── 8-loc.csv
    │   │   ├── 8-mag.csv
    │   │   ├── 8-time.csv
    │   │   ├── 9-loc.csv
    │   │   ├── 9-mag.csv
    │   │   └── 9-time.csv
    │   ├── mammographic.csv
    │   └── mushroom.csv
    ├── exp_utils.py
    ├── hotspots
    │   ├── README.md
    │   ├── env.yaml
    │   ├── extract_results.py
    │   ├── hotspots.py
    │   ├── our_tsvgp.py
    │   ├── results
    │   │   ├── hotspots-results-acc.tex
    │   │   ├── hotspots-results-mse.tex
    │   │   ├── hotspots_results.npz
    │   │   └── timings.dat
    │   ├── submit_ours.sh
    │   ├── submit_ovc.sh
    │   ├── submit_random.sh
    │   └── visualize_results.py
    ├── magnetometer
    │   ├── .ipynb_checkpoints
    │   │   └── debug-checkpoint.ipynb
    │   ├── README.md
    │   ├── __pycache__
    │   │   └── magnetometer_utils.cpython-38.pyc
    │   ├── magnetometer_utils.py
    │   ├── offline_model.py
    │   ├── online_fc_plots.py
    │   ├── online_model.py
    │   ├── online_model_predictions.py
    │   └── streaming_gp_model.py
    ├── split_mnist
    │   ├── README.md
    │   ├── mnist_utils.py
    │   ├── notebooks
    │   │   ├── leverage_score.ipynb
    │   │   ├── leverage_score_vs_random.ipynb
    │   │   ├── plot_accuracy.ipynb
    │   │   └── plot_memory_z.ipynb
    │   ├── offline_model.py
    │   ├── online_model.py
    │   └── streaming_gp_model.py
    └── uci
    │   ├── README.md
    │   ├── offline_model.py
    │   ├── online_fc_model.py
    │   ├── online_model.py
    │   ├── streaming_sgpr.py
    │   ├── streaming_sgpr_fc.py
    │   └── uci_utils.py
├── requirements.txt
└── src
    ├── __init__.py
    ├── __pycache__
        ├── __init__.cpython-38.pyc
        ├── sites.cpython-38.pyc
        └── util.cpython-38.pyc
    ├── models
        ├── __init__.py
        ├── __pycache__
        │   ├── __init__.cpython-38.pyc
        │   ├── tsvgp.cpython-38.pyc
        │   ├── tsvgp_cont.cpython-38.pyc
        │   ├── tsvgp_white.cpython-38.pyc
        │   └── utils.cpython-38.pyc
        ├── tsvgp.py
        ├── tsvgp_cont.py
        ├── tsvgp_sites.py
        ├── tsvgp_white.py
        ├── tvgp.py
        └── utils.py
    ├── sites.py
    ├── streaming_sparse_gp
        ├── __init__.py
        ├── __pycache__
        │   ├── __init__.cpython-38.pyc
        │   └── osvgpc.cpython-38.pyc
        ├── osgpr.py
        ├── osvgpc.py
        └── readme.md
    └── util.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.svg
 2 | *.jpg
 3 | 
 4 | .DS_Store
 5 | *.idea*
 6 | 
 7 | # Python
 8 | .ipynb_checkpoints
 9 | *.coverage
10 | *egg*
11 | __pycache__
12 | 
13 | # LaTeX
14 | .auctex-auto
15 | *.aux
16 | *.bbl
17 | *.blg
18 | *.out
19 | *.log
20 | *.snm
21 | *.toc
22 | *.fdb_latexmk
23 | *.fls
24 | 
25 | *.gz
26 | *.pdf
27 | 
28 | .hydra
29 | outputs/
30 | wandb/
31 | multirun/
32 | code/experiments/data/uci/
33 | tmp/
34 | raw.githubusercontent.com/
35 | final_outputs/
36 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 AaltoML
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Memory-based dual Gaussian processes for sequential learning
 2 | 
 3 | This repository is the official implementation of the methods in the publication:
 4 | 
 5 | * P.E. Chang, P. Verma, S.T. John, A. Solin, and M.E. Khan (2023). **Memory-based dual Gaussian processes for sequential learning**. In *International Conference on Machine Learning (ICML)*. [[arXiv]](https://arxiv.org/abs/2306.03566)
 6 | 
 7 | Sequential learning with Gaussian processes (GPs) is challenging when access to past data is limited, for example, in continual and active learning. In such cases, errors can accumulate over time due to inaccuracies in the posterior, hyperparameters, and inducing points, making accurate learning challenging. Here, we present a method to keep all such errors in check using the recently proposed dual sparse variational GP. Our method enables accurate inference for generic likelihoods and improves learning by actively building and updating a memory of past data. We demonstrate its effectiveness in several applications involving Bayesian optimization, active learning, and continual learning. 
 8 | 
 9 | ## Environment
10 | 
11 | We recommend setting up a [conda](https://docs.conda.io/projects/conda/en/latest/index.html) environment for running the experiments. The code base is tested on a machine with a Ubuntu 22.04 distribution, CUDA11.6, and conda 23.1.0.
12 | ```shell
13 | conda create -n sequential-gp python==3.8
14 | conda activate sequential-gp
15 | ```
16 | 
17 | Within the virtual environment, install the dependencies by running
18 | ```shell
19 | pip install -r requirements.txt
20 | ```
21 | 
22 | (Note that the `hotspots` experiment has its own environment and setup instructions.)
23 | 
24 | ## Experiments
25 | 
26 | There are a series of experiments which are organized inside the `experiments` folder as separate sub-folders. 
27 | Each experiment sub-folder has their respective readme files with instructions on how to run the particular experiment.
28 | 
29 | ## Data sets
30 | 
31 | The datasets used for banana, UCI, and magnetometer experiments are available in `experiments/data/` directory.
32 | The original source of the data sets are:
33 | 
34 | * **Banana:** https://github.com/thangbui/streaming_sparse_gp
35 | * **UCI:** https://archive.ics.uci.edu/datasets
36 | * **Magnetometer:** https://github.com/AaltoML/magnetic-data
37 | 
38 | ## Contributing
39 | 
40 | For all correspondence, please contact [paul.chang@aalto.fi](mailto:paul.chang@aalto.fi) 
41 | or [prakhar.verma@aalto.fi](mailto:prakhar.verma@aalto.fi).
42 | 
43 | 
44 | ## License
45 | 
46 | This software is provided under the [MIT license](LICENSE).
47 | 


--------------------------------------------------------------------------------
/experiments/banana/README.md:
--------------------------------------------------------------------------------
 1 | ## Streaming Banana Experiment
 2 | 
 3 | The `streaming_banana_maddox.ipynb` notebook (to run the OVC model by Maddox *et al.* (2021)) requires the following packages:
 4 | ```
 5 | torch == 1.12.1
 6 | botorch == 0.7.2
 7 | gpytorch == 1.4.1
 8 | volatilitygp
 9 | ```
10 | 
11 | (You can also use the conda environment in `../hotspots/env.yaml`)
12 | 


--------------------------------------------------------------------------------
/experiments/banana/banana_utils.py:
--------------------------------------------------------------------------------
  1 | from typing import Tuple
  2 | 
  3 | import numpy as np
  4 | import matplotlib
  5 | import matplotlib.pyplot as plt
  6 | import gpflow
  7 | 
  8 | import sys
  9 | 
 10 | sys.path.append("../../")
 11 | from src.streaming_sparse_gp.osvgpc import OSVGPC
 12 | 
 13 | 
 14 | def load_banana_dataset() -> [np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
 15 |     train_x = np.loadtxt(
 16 |         "../data/banana_train_x.txt",
 17 |         delimiter=","
 18 |     )
 19 |     train_y = np.loadtxt(
 20 |         "../data/banana_train_y.txt", delimiter=","
 21 |     )
 22 |     train_y[train_y == -1] = 0
 23 | 
 24 |     test_x = np.loadtxt(
 25 |         "../data/banana_test_x.txt",
 26 |         delimiter=","
 27 |     )
 28 |     test_y = np.loadtxt(
 29 |         "../data/banana_test_y.txt", delimiter=","
 30 |     )
 31 |     test_y[test_y == -1] = 0
 32 | 
 33 |     return train_x, train_y, test_x, test_y
 34 | 
 35 | 
 36 | def plot_banana(pred_mu, pred_var, pred_prob, inducing_pnts, data, xtest, ytest, vmin=0., vmax=1.,
 37 |                 plot_inducing=False, plot_probability=False, plot_colorbar=False, previous_data=None):
 38 |     if plot_probability:
 39 |         camp0_color = ["C1", "white"]
 40 |         camp1_color = ["white", "C0"]
 41 |     else:
 42 |         camp0_color = ["white", "C1"]
 43 |         camp1_color = ["C0", "white"]
 44 | 
 45 |     cmap0 = matplotlib.colors.LinearSegmentedColormap.from_list("", camp0_color)
 46 |     cmap1 = matplotlib.colors.LinearSegmentedColormap.from_list("", camp1_color)
 47 |     colors0 = cmap0(np.linspace(0, 1., 128))
 48 |     colors1 = cmap1(np.linspace(0, 1., 128))
 49 |     colors = np.append(colors0, colors1, axis=0)
 50 |     cmap = matplotlib.colors.LinearSegmentedColormap.from_list('mycmap', colors)
 51 | 
 52 |     X, Y = data
 53 | 
 54 |     fig, ax = plt.subplots(1, 1, figsize=(6, 6))
 55 | 
 56 |     for i, mark, color in [[1, 'o', 'C0'], [0, 's', 'C1']]:
 57 |         ind = Y[:, 0] == i
 58 |         ax.scatter(X[ind, 0], X[ind, 1], s=100, alpha=.5, edgecolor='k', marker=mark, color=color)
 59 | 
 60 |     # Plotting prevous data ghosted out
 61 |     if previous_data is not None:
 62 |         X_prev, Y_prev = previous_data
 63 | 
 64 |         for i, mark, color in [[1, 'o', 'C0'], [0, 's', 'C1']]:
 65 |             ind = Y_prev[:, 0] == i
 66 |             ax.scatter(X_prev[ind, 0], X_prev[ind, 1], s=100, alpha=.1, edgecolor='k', marker=mark, color=color)
 67 | 
 68 |     if plot_inducing and inducing_pnts is not None:
 69 |         ax.scatter(inducing_pnts[:, 0], inducing_pnts[:, 1], s=40, color='k')
 70 | 
 71 |     # Scale background
 72 |     if plot_probability:
 73 |         foo = pred_prob.numpy()
 74 |     else:
 75 |         foo = pred_mu.numpy() > 0.5
 76 |         foo = foo.astype(float)
 77 |         foo = (2. * foo - 1.) * np.sqrt(pred_var.numpy())
 78 |     if vmax is None:
 79 |         vmax = np.max(np.sqrt(pred_var.numpy()))
 80 |         vmin = -vmax
 81 |     im = ax.imshow(foo.reshape(100, 100).transpose(), extent=[-2.8, 2.8, -2.8, 2.8],
 82 |                    origin='lower', cmap=cmap, vmin=vmin, vmax=vmax)
 83 | 
 84 |     ax.axis('equal')
 85 | 
 86 |     plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
 87 |     plt.tick_params(axis='y', which='both', right=False, left=False, labelleft=False)
 88 |     ax.set_xlim(-2.8, 2.8)
 89 |     ax.set_ylim(-2.8, 2.8)
 90 |     ax.contour(xtest, ytest, pred_mu.numpy().reshape(100, 100), levels=[.5],
 91 |                colors='k', linewidths=4.)
 92 | 
 93 |     if plot_colorbar:
 94 |         plt.colorbar(im, ax=ax)
 95 | 
 96 |     plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
 97 |     plt.tick_params(axis='y', which='both', right=False, left=False, labelleft=False)
 98 |     ax.set_xlim(-2.8, 2.8)
 99 |     ax.set_ylim(-2.8, 2.8)
100 | 
101 | 
102 | def optimize_streaming_model(optimizer, model, train_data: Tuple[np.ndarray, np.ndarray],
103 |                              iterations: int = 100, mu=None, Su=None, Kaa=None, Zopt=None, first_init=True):
104 |     """
105 |     Optimize Bui model
106 |     """
107 | 
108 |     def optimization_step_adam():
109 |         optimizer.minimize(model.training_loss, model.trainable_variables)
110 | 
111 |     def optimization_step_scipy():
112 |         optimizer.minimize(model.training_loss, model.trainable_variables, options={'maxiter': iterations})
113 | 
114 |     def optimization_step():
115 |         if isinstance(optimizer, gpflow.optimizers.Scipy):
116 |             optimization_step_scipy()
117 |         else:
118 |             for _ in range(iterations):
119 |                 optimization_step_adam()
120 | 
121 |     def init_Z(cur_Z, new_X, use_old_Z=True):
122 |         if use_old_Z:
123 |             Z = np.copy(cur_Z)
124 |         else:
125 |             M = cur_Z.shape[0]
126 |             M_old = int(0.7 * M)
127 |             M_new = M - M_old
128 |             old_Z = cur_Z[np.random.permutation(M)[0:M_old], :]
129 |             new_Z = new_X[np.random.permutation(new_X.shape[0])[0:M_new], :]
130 |             Z = np.vstack((old_Z, new_Z))
131 |         return Z
132 | 
133 |     use_old_z = True
134 | 
135 |     X, y = train_data
136 | 
137 |     if first_init:
138 |         if isinstance(optimizer, gpflow.optimizers.Scipy):
139 |             gpflow.optimizers.Scipy().minimize(
140 |                 model.training_loss_closure((X, y)), model.trainable_variables, options={'maxiter': iterations})
141 |         else:
142 |             for _ in range(iterations):
143 |                 optimizer.minimize(model.training_loss_closure((X, y)), model.trainable_variables)
144 |     else:
145 |         Zinit = init_Z(Zopt, X, use_old_z)
146 |         model = OSVGPC((X, y), gpflow.kernels.Matern52(), gpflow.likelihoods.Bernoulli(), mu, Su, Kaa,
147 |                        Zopt, Zinit)
148 |         optimization_step()
149 | 
150 |     Zopt = model.inducing_variable.Z.numpy()
151 |     mu, Su = model.predict_f(Zopt, full_cov=True)
152 |     if len(Su.shape) == 3:
153 |         Su = Su[0, :, :] + 1e-4 * np.eye(mu.shape[0])
154 |     Kaa = model.kernel(model.inducing_variable.Z)
155 | 
156 |     return mu, Su, Kaa, Zopt, model
157 | 


--------------------------------------------------------------------------------
/experiments/configs/dataset/adult.yaml:
--------------------------------------------------------------------------------
 1 | input_dim: 15
 2 | output_dim: 1
 3 | num_classes: 2
 4 | 
 5 | dataloader:
 6 |   _target_: uci_utils.load_data
 7 |   normalize: true
 8 |   train_split_percentage: 0.8
 9 |   data_path: "../data/adult.csv"
10 |   seed: ${seed}
11 |   n_k_folds: null
12 |   random_state: null
13 |   dataset_type: "classification"
14 | 


--------------------------------------------------------------------------------
/experiments/configs/dataset/bank.yaml:
--------------------------------------------------------------------------------
 1 | input_dim: 17
 2 | output_dim: 1
 3 | num_classes: 2
 4 | dataloader:
 5 |   _target_: uci_utils.load_data
 6 |   normalize: true
 7 |   train_split_percentage: 0.8
 8 |   data_path: "../data/bank.csv"
 9 |   seed: ${seed}
10 |   n_k_folds: null
11 |   random_state: null
12 |   dataset_type: "classification"


--------------------------------------------------------------------------------
/experiments/configs/dataset/bike.yaml:
--------------------------------------------------------------------------------
 1 | input_dim: 17
 2 | output_dim: 1
 3 | dataloader:
 4 |   _target_: uci_utils.load_data
 5 |   normalize: true
 6 |   train_split_percentage: 0.8
 7 |   data_path: "../data/bike.csv"
 8 |   seed: ${seed}
 9 |   n_k_folds: null
10 |   random_state: null
11 |   dataset_type: "regression"
12 | 


--------------------------------------------------------------------------------
/experiments/configs/dataset/elevators.yaml:
--------------------------------------------------------------------------------
 1 | input_dim: 18
 2 | output_dim: 1
 3 | dataloader:
 4 |   _target_: uci_utils.load_data
 5 |   normalize: true
 6 |   train_split_percentage: 0.8
 7 |   data_path: "../data/elevators.csv"
 8 |   seed: ${seed}
 9 |   n_k_folds: null
10 |   random_state: null
11 |   dataset_type: "regression"
12 | 


--------------------------------------------------------------------------------
/experiments/configs/dataset/magnetometer.yaml:
--------------------------------------------------------------------------------
1 | dataloader:
2 |   _partial_: true
3 |   _target_: magnetometer_utils.load_data
4 |   main_dir: "../data/invensense"


--------------------------------------------------------------------------------
/experiments/configs/dataset/mammographic.yaml:
--------------------------------------------------------------------------------
 1 | input_dim: 6
 2 | output_dim: 1
 3 | num_classes: 2
 4 | 
 5 | dataloader:
 6 |   _target_: uci_utils.load_data
 7 |   normalize: true
 8 |   train_split_percentage: 0.8
 9 |   data_path: "../data/mammographic.csv"
10 |   seed: ${seed}
11 |   n_k_folds: null
12 |   random_state: null
13 |   dataset_type: "classification"
14 | 


--------------------------------------------------------------------------------
/experiments/configs/dataset/mnist.yaml:
--------------------------------------------------------------------------------
1 | input_dim: 784
2 | output_dim: 1
3 | dataloader:
4 |   _target_: mnist_utils.load_mnist
5 |   seed: ${seed}
6 | 


--------------------------------------------------------------------------------
/experiments/configs/dataset/mushroom.yaml:
--------------------------------------------------------------------------------
 1 | input_dim: 22
 2 | output_dim: 1
 3 | num_classes: 2
 4 | 
 5 | dataloader:
 6 |   _target_: uci_utils.load_data
 7 |   normalize: true
 8 |   train_split_percentage: 0.8
 9 |   data_path: "../data/mushroom.csv"
10 |   seed: ${seed}
11 |   n_k_folds: null
12 |   random_state: null
13 |   dataset_type: "classification"
14 | 


--------------------------------------------------------------------------------
/experiments/configs/dataset/split_mnist.yaml:
--------------------------------------------------------------------------------
1 | input_dim: 784
2 | output_dim: 1
3 | dataloader:
4 |   _target_: mnist_utils.load_split_mnist
5 |   seed: ${seed}
6 | 


--------------------------------------------------------------------------------
/experiments/configs/magnetometer_offline_experiment.yaml:
--------------------------------------------------------------------------------
 1 | seed: null
 2 | 
 3 | defaults:
 4 |   - model: svgp
 5 |   - dataset: magnetometer
 6 |   - optimizer: adam
 7 | 
 8 | optimize:
 9 |   _partial_: true
10 |   _target_: exp_utils.optimize_full_model
11 |   minibatch_size: 500
12 |   iterations: 20000
13 |   lambda_lr: 0.8
14 | 
15 | wandb:
16 |   username: null
17 | 
18 | n_inducing_variable: 100
19 | num_latent_gps: 1
20 | 


--------------------------------------------------------------------------------
/experiments/configs/magnetometer_online_experiment.yaml:
--------------------------------------------------------------------------------
 1 | seed: null
 2 | 
 3 | defaults:
 4 |   - model: tsvgp_continual
 5 |   - dataset: magnetometer
 6 |   - optimizer: adam
 7 | 
 8 | online_gp:
 9 |   _partial_: true
10 |   _target_: src.models.tsvgp_cont.OnlineGP
11 |   n_steps: 2
12 |   lambda_lr: 0.9
13 |   num_mem: 100
14 | 
15 | optimize:
16 |   _partial_: true
17 |   _target_: exp_utils.optimize_online_model
18 |   train_hyperparams: True
19 |   hyperparams_step: 20000
20 |   train_memory: True
21 |   debug: True
22 | 
23 | wandb:
24 |   username: null
25 | 
26 | n_inducing_variable: 100
27 | num_latent_gps: 1
28 | streaming: false
29 | 


--------------------------------------------------------------------------------
/experiments/configs/magnetometer_streaming_experiment.yaml:
--------------------------------------------------------------------------------
 1 | seed: null
 2 | 
 3 | defaults:
 4 |   - model: sgpr
 5 |   - dataset: magnetometer
 6 |   - optimizer: scipy
 7 | 
 8 | optimize:
 9 |   _partial_: true
10 |   _target_: uci.uci_utils.optimize_streaming_model
11 |   iterations: 20000
12 |   task: "regression"
13 | 
14 | wandb:
15 |   username: null
16 | 
17 | n_inducing_variable: 100
18 | 


--------------------------------------------------------------------------------
/experiments/configs/model/kernel/matern52.yaml:
--------------------------------------------------------------------------------
1 | _target_: gpflow.kernels.Matern52
2 | lengthscales: 1.
3 | variance: 1.


--------------------------------------------------------------------------------
/experiments/configs/model/kernel/rbf.yaml:
--------------------------------------------------------------------------------
1 | _target_: gpflow.kernels.RBF
2 | lengthscales: 1.
3 | variance: 1.


--------------------------------------------------------------------------------
/experiments/configs/model/kernel/sum_constant_matern52.yaml:
--------------------------------------------------------------------------------
1 | _target_: gpflow.kernels.Sum
2 | kernels:
3 |   - _target_: gpflow.kernels.Constant
4 |   - _target_: gpflow.kernels.Matern52
5 | 


--------------------------------------------------------------------------------
/experiments/configs/model/likelihood/bernoulli.yaml:
--------------------------------------------------------------------------------
1 | _target_: gpflow.likelihoods.Bernoulli


--------------------------------------------------------------------------------
/experiments/configs/model/likelihood/gaussian.yaml:
--------------------------------------------------------------------------------
1 | _target_: gpflow.likelihoods.Gaussian
2 | variance: 0.1


--------------------------------------------------------------------------------
/experiments/configs/model/likelihood/softmax.yaml:
--------------------------------------------------------------------------------
1 | _target_: gpflow.likelihoods.Softmax
2 | num_classes: ${num_classes}


--------------------------------------------------------------------------------
/experiments/configs/model/ovc.yaml:
--------------------------------------------------------------------------------
1 | _target_: volatilitygp.models.SingleTaskVariationalGP
2 | _partial_: true
3 | 
4 | num_inducing: ${n_inducing_variable}
5 | use_piv_chol_init: false
6 | learn_inducing_locations: false
7 | 


--------------------------------------------------------------------------------
/experiments/configs/model/sgpr.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - kernel: matern52
 3 | 
 4 | _target_: gpflow.models.SGPR
 5 | _partial_: true
 6 | noise_variance: 1.0
 7 | 
 8 | inducing_variable:
 9 |   _target_: numpy.ones
10 |   shape:
11 |     - ${n_inducing_variable}
12 |     - ${dataset.input_dim}
13 | 


--------------------------------------------------------------------------------
/experiments/configs/model/svgp.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - kernel: matern52
 3 |   - likelihood: gaussian
 4 | 
 5 | _target_: gpflow.models.SVGP
 6 | inducing_variable:
 7 |   _target_: numpy.ones
 8 |   shape:
 9 |     - ${n_inducing_variable}
10 |     - ${dataset.input_dim}
11 | num_latent_gps: ${num_latent_gps}
12 | num_data: ???
13 | 
14 | 


--------------------------------------------------------------------------------
/experiments/configs/model/tsvgp.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - kernel: matern52
 3 |   - likelihood: gaussian
 4 | 
 5 | _target_: src.models.tsvgp_white.t_SVGP_white
 6 | inducing_variable:
 7 |   _target_: numpy.ones
 8 |   shape:
 9 |     - ${n_inducing_variable}
10 |     - ${dataset.input_dim}
11 | num_data: ???
12 | num_latent_gps: ${num_latent_gps}
13 | 


--------------------------------------------------------------------------------
/experiments/configs/model/tsvgp_continual.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - kernel: matern52
 3 |   - likelihood: gaussian
 4 | 
 5 | _target_: src.models.tsvgp_cont.t_SVGP_cont
 6 | inducing_variable:
 7 |   _target_: numpy.ones
 8 |   shape:
 9 |     - ${n_inducing_variable}
10 |     - ${dataset.input_dim}
11 | num_latent_gps: ${num_latent_gps}


--------------------------------------------------------------------------------
/experiments/configs/model/tsvgp_continual_classification.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - kernel: matern52
 3 |   - likelihood: softmax
 4 | 
 5 | _target_: src.models.tsvgp_cont.t_SVGP_cont
 6 | inducing_variable:
 7 |   _target_: numpy.ones
 8 |   shape:
 9 |     - ${n_inducing_variable}
10 |     - ${dataset.input_dim}
11 | num_latent_gps: ${num_classes}
12 | 


--------------------------------------------------------------------------------
/experiments/configs/offline_experiment.yaml:
--------------------------------------------------------------------------------
 1 | seed: null
 2 | 
 3 | defaults:
 4 |   - model: tsvgp
 5 |   - dataset: bike
 6 |   - optimizer: adam
 7 | 
 8 | wandb:
 9 |   username: null
10 | 
11 | optimize:
12 |   _partial_: true
13 |   _target_: exp_utils.optimize_full_model
14 |   minibatch_size: 200
15 |   iterations: 2000
16 |   lambda_lr: 0.8
17 |   debug: True
18 | 
19 | n_inducing_variable: 100
20 | 
21 | load_model_path: null
22 | num_classes: ${dataset.num_classes}
23 | num_latent_gps: 1
24 | 


--------------------------------------------------------------------------------
/experiments/configs/offline_mnist_experiment.yaml:
--------------------------------------------------------------------------------
 1 | seed: null
 2 | 
 3 | defaults:
 4 |   - model: svgp
 5 |   - optimizer: adam
 6 |   - dataset: mnist
 7 |   - override model/likelihood: softmax
 8 | 
 9 | optimize:
10 |   _partial_: true
11 |   _target_: exp_utils.optimize_full_model
12 |   minibatch_size: 4000
13 |   iterations: 1000
14 |   debug: true
15 | 
16 | wandb:
17 |   username: null
18 | 
19 | n_inducing_variable: 300
20 | num_classes: 10
21 | num_latent_gps: ${num_classes}
22 | 


--------------------------------------------------------------------------------
/experiments/configs/online_experiment.yaml:
--------------------------------------------------------------------------------
 1 | seed: null
 2 | 
 3 | defaults:
 4 |   - model: tsvgp_continual
 5 |   - dataset: bike
 6 |   - optimizer: adam
 7 | 
 8 | online_gp:
 9 |   _partial_: true
10 |   _target_: src.models.tsvgp_cont.OnlineGP
11 |   n_steps: 4
12 |   lambda_lr: 0.8
13 |   num_mem: 10
14 |   memory_picker: "bls"
15 | 
16 | optimize:
17 |   _partial_: true
18 |   _target_: exp_utils.optimize_online_model
19 |   train_hyperparams: true
20 |   hyperparams_step: 20
21 |   train_memory: true
22 | 
23 | wandb:
24 |   username: null
25 | 
26 | n_sets: 10
27 | sort_data: True
28 | n_inducing_variable: 100
29 | 
30 | load_model_path: null
31 | num_classes: ${dataset.num_classes}
32 | num_latent_gps: 1


--------------------------------------------------------------------------------
/experiments/configs/online_mnist_experiment.yaml:
--------------------------------------------------------------------------------
 1 | seed: null
 2 | 
 3 | defaults:
 4 |   - model: tsvgp_continual_classification
 5 |   - optimizer: adam
 6 |   - dataset: split_mnist
 7 | 
 8 | online_gp:
 9 |   _partial_: true
10 |   _target_: src.models.tsvgp_cont.OnlineGP
11 |   n_steps: 10
12 |   lambda_lr: 0.1
13 |   num_mem: 400
14 | 
15 | memory_picker:
16 |   _partial_: true
17 |   _target_: src.models.tsvgp_cont.memory_picker
18 | 
19 | optimize:
20 |   _partial_: true
21 |   _target_: mnist_utils.optimize_online_model_minibatch
22 |   minibatch_size: 4000
23 |   train_hyper: True
24 |   train_mem: True
25 |   n_hyp_opt_steps: 50
26 | 
27 | wandb:
28 |   username: null
29 | 
30 | n_inducing_variable: 300
31 | num_classes: 10


--------------------------------------------------------------------------------
/experiments/configs/optimizer/adam.yaml:
--------------------------------------------------------------------------------
1 | _target_: tensorflow.optimizers.Adam
2 | learning_rate: 0.01


--------------------------------------------------------------------------------
/experiments/configs/optimizer/scipy.yaml:
--------------------------------------------------------------------------------
1 | _target_: gpflow.optimizers.Scipy


--------------------------------------------------------------------------------
/experiments/configs/streaming_experiment.yaml:
--------------------------------------------------------------------------------
 1 | seed: null
 2 | 
 3 | defaults:
 4 |   - model: sgpr
 5 |   - dataset: bike
 6 |   - optimizer: scipy
 7 | 
 8 | optimize:
 9 |   _partial_: true
10 |   _target_: uci_utils.optimize_streaming_model
11 |   iterations: 20
12 |   task: "regression"
13 | 
14 | wandb:
15 |   username: null
16 | 
17 | n_sets: 10
18 | n_inducing_variable: 100
19 | sort_data: true
20 | 
21 | load_model_path: null
22 | num_latent_gps: 1
23 | 


--------------------------------------------------------------------------------
/experiments/configs/streaming_mnist_experiment.yaml:
--------------------------------------------------------------------------------
 1 | seed: null
 2 | 
 3 | defaults:
 4 |   - model: svgp
 5 |   - dataset: split_mnist
 6 |   - optimizer: adam
 7 |   - override model/likelihood: softmax
 8 | 
 9 | optimize:
10 |   _partial_: true
11 |   _target_: mnist_utils.optimize_streaming_model_minibatch
12 |   iterations: 100
13 |   minibatch_size: 4000
14 | 
15 | wandb:
16 |   username: null
17 | 
18 | n_inducing_variable: 300
19 | num_classes: 10
20 | num_latent_gps: 10
21 | 


--------------------------------------------------------------------------------
/experiments/data/banana_train_x.txt:
--------------------------------------------------------------------------------
  1 | -0.7808,-0.11324
  2 | 1.3595,-0.1174
  3 | 1.4517,0.27285
  4 | -0.41644,-0.94114
  5 | 0.17815,1.4909
  6 | 1.1574,-0.45807
  7 | 1.6198,-0.018726
  8 | 0.4261,0.19381
  9 | 1.1214,0.17328
 10 | 0.29157,-0.15852
 11 | 0.20511,-0.47057
 12 | -0.10292,-1.7141
 13 | 1.2472,-0.14605
 14 | 0.74045,1.1954
 15 | 0.9041,0.19498
 16 | -0.91656,-1.0823
 17 | -0.81851,-0.94307
 18 | 0.99532,0.13051
 19 | -1.5282,0.65571
 20 | -1.5857,0.32248
 21 | 0.89861,1.4022
 22 | -0.5329,-0.4022
 23 | -0.051002,-0.97204
 24 | -1.871,-0.45135
 25 | -0.70564,-0.69672
 26 | 1.9597,-0.70865
 27 | -1.672,-0.26623
 28 | 1.0227,-0.72842
 29 | -0.66961,1.2186
 30 | 0.37384,0.96613
 31 | 1.7978,0.060729
 32 | -0.43432,2.2394
 33 | 0.043579,0.012589
 34 | -0.82737,-0.75656
 35 | -0.32538,0.96858
 36 | -1.0294,0.6807
 37 | 0.64171,1.6543
 38 | 0.26867,1.616
 39 | 0.89562,0.35955
 40 | -0.35201,-0.48996
 41 | -0.80347,-0.78224
 42 | 1.3857,-0.49794
 43 | 1.1355,-0.11378
 44 | 0.18326,-1.1636
 45 | -1.2774,-1.1232
 46 | -0.75754,-1.2075
 47 | 1.4542,-1.7756
 48 | -1.7247,-0.41044
 49 | 0.64712,0.3167
 50 | 0.39919,-0.5939
 51 | 0.82375,-0.67253
 52 | -0.87418,0.92198
 53 | 0.30949,-0.68867
 54 | 1.2101,0.19738
 55 | 1.1291,1.5062
 56 | 1.7137,-0.22463
 57 | -0.951,-1.3011
 58 | -0.1777,-0.55051
 59 | -0.50589,-1.0891
 60 | -0.21904,0.52376
 61 | 1.1794,1.4244
 62 | -0.43369,1.9786
 63 | -0.038111,-0.79058
 64 | -1.1584,0.38121
 65 | 0.27162,-1.6
 66 | -0.26888,1.3707
 67 | 0.29905,1.4941
 68 | -0.9735,-0.046973
 69 | -0.32257,-0.26362
 70 | -0.51085,1.5305
 71 | -1.7869,-1.4577
 72 | -0.069626,-0.89604
 73 | 0.74023,1.5349
 74 | -1.5201,0.65952
 75 | -1.1169,0.15393
 76 | 1.0495,-1.0524
 77 | -1.7946,-0.98046
 78 | -1.2461,0.10383
 79 | 1.6014,-0.18274
 80 | -0.52044,1.2026
 81 | 1.1794,0.58503
 82 | 0.63751,1.1114
 83 | -0.32934,-0.95485
 84 | 0.69789,1.0197
 85 | -1.0715,-0.03901
 86 | -0.52868,-1.0244
 87 | -1.4329,-1.1823
 88 | -0.17739,-1.453
 89 | -0.74762,-0.57011
 90 | -0.74733,-0.71958
 91 | 0.97282,0.6533
 92 | 0.0016784,0.26233
 93 | -1.0387,-0.035936
 94 | 1.81,-0.21759
 95 | -0.36939,-0.98376
 96 | 0.32053,0.39347
 97 | -1.8514,0.54479
 98 | 1.0266,1.6072
 99 | 0.31659,-1.1335
100 | -0.02466,-1.4955
101 | -0.98993,0.2311
102 | -0.59826,-1.3514
103 | 0.18375,1.6995
104 | 0.81934,0.046239
105 | -0.89708,-0.97167
106 | -1.2301,-0.3357
107 | -0.7868,-0.11321
108 | 0.83904,-0.081896
109 | 1.567,0.48344
110 | -1.3867,-0.28249
111 | -0.23001,1.9999
112 | -1.8406,-0.86529
113 | -1.8741,-1.0988
114 | -1.9768,-0.87983
115 | -0.23442,-0.65611
116 | 0.22244,0.21397
117 | 0.11206,0.2383
118 | 0.89639,-1.2731
119 | 0.90797,-0.50572
120 | -0.25871,1.3374
121 | 1.1915,0.1126
122 | 0.32484,-0.15295
123 | 2.1774,1.1275
124 | -0.55818,-1.7297
125 | 0.29073,1.3384
126 | 1.8044,1.3171
127 | -0.012678,-1.1227
128 | 2.031,0.98364
129 | 0.731,1.7405
130 | 1.1571,0.84702
131 | 1.4231,1.493
132 | -1.551,-1.595
133 | 0.92272,0.24555
134 | -0.33003,0.3011
135 | -0.51355,-1.0719
136 | 0.76926,0.77155
137 | 0.36906,0.49003
138 | 0.9754,-1.3444
139 | -1.0484,0.21912
140 | 1.7304,1.6833
141 | 0.055373,-0.13947
142 | 1.148,-1.137
143 | 1.6599,0.67197
144 | 0.32765,0.5529
145 | 0.0066516,0.011805
146 | 0.5574,1.38
147 | 1.4623,-0.11099
148 | 1.188,-0.96605
149 | -0.64159,-1.3495
150 | 0.80872,0.74066
151 | 1.4716,1.2057
152 | 0.094916,-1.5595
153 | 0.40382,0.0099855
154 | 0.21961,2.0809
155 | -1.5133,-0.62639
156 | 0.16188,-0.7914
157 | -0.24861,0.02337
158 | -1.1031,0.32164
159 | 1.1384,-1.5726
160 | 1.3988,0.4304
161 | 0.031673,-0.82063
162 | -1.2481,-0.68908
163 | 0.17651,-1.0714
164 | 1.5469,1.1025
165 | 0.14176,-0.69801
166 | -1.887,-0.41758
167 | -0.85036,-0.91999
168 | -0.03384,1.5222
169 | -0.32399,-1.0266
170 | -0.052656,-0.74586
171 | -0.2411,-0.37411
172 | 1.0967,0.13124
173 | 0.50689,1.0166
174 | 1.0942,-0.64317
175 | 0.52287,1.0525
176 | -0.94119,0.97597
177 | -0.66398,1.183
178 | 0.27097,-0.038155
179 | 0.48702,1.3334
180 | -0.83556,0.95856
181 | -1.6345,0.52418
182 | 2.0333,-0.42685
183 | 0.029212,-1.213
184 | -1.1808,-0.75829
185 | 1.5179,-1.339
186 | 1.0955,0.73997
187 | 0.10069,1.9315
188 | 0.053324,-1.0184
189 | 0.53455,1.991
190 | 1.2224,0.32471
191 | -0.43598,-0.95364
192 | 1.3058,1.6222
193 | 0.12167,0.59928
194 | -0.78637,-0.6397
195 | -1.2231,0.083578
196 | -0.8562,-0.23466
197 | 0.24465,-0.68074
198 | -0.0029396,0.51294
199 | -1.4616,0.082911
200 | -0.41512,-1.2959
201 | 0.82382,0.58666
202 | -1.0714,-0.61609
203 | 0.92519,0.89528
204 | -0.88564,-1.2681
205 | -1.5289,0.67891
206 | -0.65366,-1.02
207 | 1.0939,1.0263
208 | -1.0366,1.0428
209 | 0.62742,0.23435
210 | -0.74924,0.01009
211 | -0.4744,0.26351
212 | 0.096598,-1.0642
213 | 0.97277,0.45654
214 | -0.88896,-0.88412
215 | -0.0062612,-0.95936
216 | -1.5023,-1.2236
217 | -0.85198,-0.59008
218 | -0.30515,1.1774
219 | 1.0892,-1.5861
220 | 0.39321,1.6378
221 | -0.13858,-0.36282
222 | -1.8446,0.093183
223 | 1.2988,0.5551
224 | -0.7285,1.4952
225 | 0.3411,-1.4284
226 | -0.29501,-1.2243
227 | -0.85288,0.3581
228 | 0.36964,1.0337
229 | 0.56868,1.443
230 | 0.35117,-0.040233
231 | -0.95869,1.0709
232 | -2.0934,-1.2221
233 | -0.94602,1.0165
234 | 1.6347,-2.1572
235 | -0.4091,0.34864
236 | 0.5704,1.4158
237 | 0.94247,0.24778
238 | -0.8062,0.94121
239 | -1.1223,-0.29652
240 | 0.0065983,-0.99376
241 | 0.4301,-0.65434
242 | -0.86105,-0.97185
243 | -0.63048,0.94882
244 | -0.56502,-0.85269
245 | -1.179,0.73223
246 | -0.20089,0.072604
247 | 1.052,0.6512
248 | -0.18695,-1.3826
249 | 0.42746,1.7654
250 | 1.3503,-1.5074
251 | 2.2037,1.2567
252 | -0.58056,-1.3361
253 | 1.6498,0.040415
254 | -0.17517,-0.909
255 | -0.48785,-1.3062
256 | 0.67747,0.96414
257 | -0.90442,0.95289
258 | 0.90919,0.11786
259 | -0.22625,-1.0585
260 | -1.1139,0.2758
261 | 1.4613,-1.0605
262 | 0.20545,-1.2805
263 | -0.25482,0.38662
264 | -0.82775,-0.82042
265 | 0.8507,1.4647
266 | -0.065898,-1.3294
267 | 0.63547,0.55913
268 | 1.0538,-0.36193
269 | -1.8113,-1.2112
270 | -1.2076,-0.85428
271 | 0.75688,-0.20816
272 | 0.81199,0.70836
273 | 0.7254,1.3888
274 | 0.92625,0.97222
275 | -0.10018,-1.0525
276 | -1.0634,-1.0668
277 | 1.4891,1.5458
278 | 1.0838,0.088002
279 | 0.68983,1.57
280 | -0.47214,-1.1575
281 | 0.68584,-0.18991
282 | 0.84622,1.8684
283 | -0.7788,1.0888
284 | -0.18316,1.1455
285 | 0.94499,0.30946
286 | 2.2699,0.28398
287 | -1.0272,1.3082
288 | -2.062,-1.2301
289 | 0.18965,0.96933
290 | 0.80178,0.85218
291 | 0.72413,-0.57405
292 | -0.41278,0.05538
293 | 0.40488,-0.55507
294 | -1.0334,-1.0923
295 | -0.52916,0.36537
296 | 0.47075,2.029
297 | -1.369,-0.17742
298 | -0.0062115,1.5596
299 | 1.3488,-0.46149
300 | 0.25973,-1.1745
301 | -0.5107,-0.73367
302 | 0.50118,0.055263
303 | -0.54407,-0.83055
304 | -1.5529,-1.0165
305 | -0.022746,0.16322
306 | -0.73145,-0.90615
307 | 1.0997,1.5757
308 | 1.4106,0.09066
309 | 0.45008,-0.10378
310 | 1.0078,1.8211
311 | -1.6162,0.59348
312 | -0.2904,0.69996
313 | -0.71074,0.95999
314 | -0.55461,-1.0169
315 | -0.71697,0.45503
316 | 1.7476,0.016037
317 | -0.23868,-1.1679
318 | 0.037898,-1.6385
319 | 2.2033,1.1391
320 | -0.17453,-0.94554
321 | 1.432,0.4835
322 | 0.39559,-0.60611
323 | -0.82895,1.0629
324 | -0.60647,-0.99659
325 | -0.63251,0.25688
326 | -1.6331,-0.36254
327 | -0.57241,1.1601
328 | -0.12797,-1.141
329 | 0.042814,-1.043
330 | -0.58161,-0.75229
331 | 1.4268,0.49784
332 | 1.223,-0.60719
333 | -0.58003,-1.0917
334 | 1.4133,-1.4769
335 | -0.56736,-1.1443
336 | -1.7664,-0.9229
337 | 0.58205,0.23212
338 | -0.91411,-0.52243
339 | -1.5143,-0.248
340 | 1.0993,-0.31324
341 | 0.38791,1.6365
342 | -0.18675,-0.26701
343 | 0.3182,1.3952
344 | 1.1982,0.069142
345 | 0.95311,0.71885
346 | 0.70093,0.56424
347 | 0.017872,-0.69987
348 | -0.47891,0.071929
349 | 0.40915,1.7616
350 | -1.5869,0.13848
351 | -1.295,-0.23017
352 | 0.22876,1.628
353 | 0.38928,-0.8044
354 | -1.2614,-0.45689
355 | 0.6164,-1.0708
356 | -1.2015,0.4548
357 | 0.055056,1.3448
358 | 0.5224,1.46
359 | -1.2034,0.93202
360 | -0.07085,0.43863
361 | 1.525,0.35573
362 | 0.80472,1.2121
363 | 0.3572,-1.0729
364 | -1.6012,0.68223
365 | -0.073968,1.968
366 | 0.75834,-1.4517
367 | 1.3083,-1.8064
368 | 1.6175,-0.91923
369 | -0.58432,0.093728
370 | -1.1064,-0.82129
371 | 0.4088,-0.057168
372 | 0.57474,1.4167
373 | 0.80775,-0.83144
374 | 0.66015,0.83503
375 | 1.1148,0.42774
376 | 0.36694,-0.45114
377 | 0.5691,0.48111
378 | 1.0909,1.2127
379 | -0.043416,-0.44182
380 | -1.7828,-0.49107
381 | -1.591,-0.98008
382 | -0.2963,0.53799
383 | 0.80193,1.2108
384 | -1.8184,-0.84741
385 | 0.32927,0.60596
386 | -1.5049,0.28241
387 | 0.17711,0.73114
388 | 1.0606,-0.23215
389 | 1.4436,0.09457
390 | -0.28542,1.3863
391 | 0.20759,-1.2118
392 | -0.53583,-0.81398
393 | 0.43128,0.26649
394 | -0.87146,-0.82542
395 | 1.4726,0.57425
396 | -0.050809,-1.1859
397 | -0.38296,-1.0432
398 | -1.0423,-1.0733
399 | -1.7794,-0.582
400 | -0.65306,0.4328
401 | 


--------------------------------------------------------------------------------
/experiments/data/banana_train_y.txt:
--------------------------------------------------------------------------------
  1 | 1
  2 | -1
  3 | -1
  4 | -1
  5 | -1
  6 | 1
  7 | -1
  8 | 1
  9 | -1
 10 | 1
 11 | 1
 12 | -1
 13 | 1
 14 | 1
 15 | -1
 16 | -1
 17 | -1
 18 | 1
 19 | -1
 20 | -1
 21 | 1
 22 | 1
 23 | -1
 24 | 1
 25 | -1
 26 | -1
 27 | 1
 28 | 1
 29 | -1
 30 | -1
 31 | -1
 32 | -1
 33 | 1
 34 | -1
 35 | 1
 36 | -1
 37 | 1
 38 | 1
 39 | 1
 40 | 1
 41 | -1
 42 | -1
 43 | -1
 44 | -1
 45 | 1
 46 | -1
 47 | 1
 48 | 1
 49 | -1
 50 | 1
 51 | 1
 52 | -1
 53 | 1
 54 | -1
 55 | 1
 56 | -1
 57 | -1
 58 | 1
 59 | -1
 60 | 1
 61 | 1
 62 | -1
 63 | -1
 64 | -1
 65 | -1
 66 | -1
 67 | -1
 68 | 1
 69 | 1
 70 | -1
 71 | 1
 72 | -1
 73 | 1
 74 | -1
 75 | 1
 76 | 1
 77 | 1
 78 | 1
 79 | -1
 80 | -1
 81 | -1
 82 | -1
 83 | -1
 84 | -1
 85 | 1
 86 | -1
 87 | 1
 88 | -1
 89 | 1
 90 | -1
 91 | -1
 92 | 1
 93 | 1
 94 | -1
 95 | -1
 96 | 1
 97 | -1
 98 | -1
 99 | -1
100 | -1
101 | 1
102 | -1
103 | 1
104 | 1
105 | -1
106 | 1
107 | 1
108 | 1
109 | -1
110 | 1
111 | 1
112 | 1
113 | 1
114 | 1
115 | 1
116 | 1
117 | 1
118 | 1
119 | 1
120 | -1
121 | 1
122 | 1
123 | 1
124 | -1
125 | -1
126 | 1
127 | -1
128 | 1
129 | 1
130 | -1
131 | 1
132 | 1
133 | -1
134 | 1
135 | -1
136 | -1
137 | 1
138 | 1
139 | 1
140 | 1
141 | 1
142 | 1
143 | -1
144 | -1
145 | 1
146 | -1
147 | -1
148 | 1
149 | -1
150 | -1
151 | 1
152 | -1
153 | 1
154 | -1
155 | 1
156 | -1
157 | 1
158 | 1
159 | 1
160 | -1
161 | 1
162 | 1
163 | -1
164 | 1
165 | 1
166 | 1
167 | -1
168 | -1
169 | -1
170 | -1
171 | 1
172 | -1
173 | -1
174 | 1
175 | -1
176 | -1
177 | -1
178 | 1
179 | -1
180 | -1
181 | -1
182 | -1
183 | -1
184 | 1
185 | 1
186 | -1
187 | 1
188 | -1
189 | -1
190 | -1
191 | -1
192 | 1
193 | 1
194 | 1
195 | 1
196 | 1
197 | -1
198 | 1
199 | 1
200 | -1
201 | 1
202 | 1
203 | -1
204 | -1
205 | -1
206 | -1
207 | -1
208 | -1
209 | -1
210 | 1
211 | 1
212 | -1
213 | -1
214 | -1
215 | -1
216 | 1
217 | 1
218 | -1
219 | 1
220 | -1
221 | 1
222 | -1
223 | -1
224 | -1
225 | -1
226 | -1
227 | 1
228 | -1
229 | -1
230 | 1
231 | -1
232 | 1
233 | -1
234 | 1
235 | 1
236 | -1
237 | -1
238 | -1
239 | 1
240 | -1
241 | 1
242 | -1
243 | -1
244 | -1
245 | -1
246 | 1
247 | -1
248 | -1
249 | 1
250 | 1
251 | 1
252 | -1
253 | -1
254 | -1
255 | -1
256 | -1
257 | -1
258 | -1
259 | -1
260 | 1
261 | 1
262 | -1
263 | 1
264 | -1
265 | -1
266 | -1
267 | 1
268 | 1
269 | 1
270 | 1
271 | 1
272 | -1
273 | -1
274 | -1
275 | -1
276 | -1
277 | 1
278 | -1
279 | -1
280 | -1
281 | 1
282 | -1
283 | -1
284 | -1
285 | -1
286 | -1
287 | -1
288 | 1
289 | 1
290 | -1
291 | 1
292 | 1
293 | 1
294 | -1
295 | 1
296 | -1
297 | 1
298 | -1
299 | -1
300 | -1
301 | -1
302 | 1
303 | -1
304 | 1
305 | 1
306 | -1
307 | 1
308 | -1
309 | 1
310 | 1
311 | -1
312 | 1
313 | -1
314 | -1
315 | 1
316 | -1
317 | -1
318 | -1
319 | 1
320 | -1
321 | -1
322 | 1
323 | -1
324 | -1
325 | 1
326 | 1
327 | -1
328 | -1
329 | -1
330 | 1
331 | -1
332 | 1
333 | -1
334 | 1
335 | -1
336 | 1
337 | -1
338 | 1
339 | 1
340 | 1
341 | -1
342 | 1
343 | -1
344 | -1
345 | -1
346 | -1
347 | 1
348 | 1
349 | -1
350 | -1
351 | 1
352 | -1
353 | 1
354 | 1
355 | 1
356 | -1
357 | -1
358 | -1
359 | -1
360 | 1
361 | -1
362 | -1
363 | -1
364 | -1
365 | 1
366 | -1
367 | 1
368 | -1
369 | 1
370 | 1
371 | 1
372 | 1
373 | 1
374 | -1
375 | -1
376 | 1
377 | 1
378 | -1
379 | 1
380 | 1
381 | 1
382 | 1
383 | -1
384 | 1
385 | -1
386 | 1
387 | 1
388 | -1
389 | -1
390 | -1
391 | -1
392 | -1
393 | -1
394 | 1
395 | -1
396 | -1
397 | -1
398 | -1
399 | 1
400 | 1
401 | 


--------------------------------------------------------------------------------
/experiments/exp_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Utility functions for experiments.
  3 | """
  4 | from typing import Tuple, List
  5 | import time
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | from gpflow.likelihoods import Bernoulli
  9 | import gpflow
 10 | from gpflow.models.svgp import SVGP
 11 | import hydra
 12 | 
 13 | import sys
 14 | 
 15 | sys.path.append("../..")
 16 | 
 17 | from src.models.tsvgp_white import t_SVGP_white
 18 | from src.models.tsvgp_cont import OnlineGP
 19 | from src.models.tsvgp import base_SVGP
 20 | 
 21 | 
 22 | def get_hydra_output_dir():
 23 |     """Return the current output directory path generated by hydra"""
 24 |     hydra_cfg = hydra.core.hydra_config.HydraConfig.get()
 25 |     return hydra_cfg['runtime']['output_dir']
 26 | 
 27 | 
 28 | def optimize_full_model(model, train_data: [np.ndarray, np.ndarray],
 29 |                         test_data: [np.ndarray, np.ndarray], optimizer: tf.optimizers,
 30 |                         minibatch_size: int = 64,
 31 |                         iterations: int = 100, debug: bool = False,
 32 |                         lambda_lr: float = 0.5) -> [list, list]:
 33 |     """
 34 |     Optimize t-SVGP (white) model using minibatches and return the ELBO and NLPD values.
 35 |     """
 36 | 
 37 |     if not (isinstance(model, t_SVGP_white) or isinstance(model, SVGP)):
 38 |         raise Exception("Model not supported for optimization!")
 39 | 
 40 |     n_train = train_data[0].shape[0]
 41 |     if n_train < 50000:
 42 |         train_dataset = tf.data.Dataset.from_tensor_slices(train_data).repeat().shuffle(n_train)
 43 |     else:
 44 |         train_dataset = tf.data.Dataset.from_tensor_slices(train_data).repeat()
 45 | 
 46 |     train_iter = iter(train_dataset.batch(minibatch_size))
 47 | 
 48 |     training_loss = model.training_loss_closure(train_iter, compile=True)
 49 | 
 50 |     @tf.function
 51 |     def optimization_step():
 52 |         optimizer.minimize(training_loss, model.trainable_variables)
 53 |         if isinstance(model, t_SVGP_white):
 54 |             model.natgrad_step(train_data, lr=lambda_lr)
 55 | 
 56 |     elbo_vals = []
 57 |     nlpd_vals = []
 58 |     eval_metric = []
 59 |     for step in range(iterations):
 60 |         optimization_step()
 61 |         elbo_vals.append(-training_loss().numpy())
 62 |         nlpd_vals.append(get_predictive_nlpd(model, test_data))
 63 | 
 64 |         if isinstance(model.likelihood, Bernoulli):
 65 |             eval_metric.append(get_accuracy(model, test_data))
 66 |         elif isinstance(model.likelihood, gpflow.likelihoods.Softmax):
 67 |             pred_m, _ = model.predict_y(test_data[0])
 68 |             pred_argmax = tf.reshape(tf.argmax(pred_m, axis=1), (-1, 1))
 69 |             acc = np.mean(pred_argmax == test_data[1])
 70 |             eval_metric.append(acc)
 71 |         else:
 72 |             eval_metric.append(get_rmse(model, test_data))
 73 | 
 74 |         if debug and step % 20 == 0:
 75 |             print(f"{step} Iteration; NLPD {nlpd_vals[-1]}; Evaluation metric (RMSE/Acc.) {eval_metric[-1]}")
 76 | 
 77 |     return elbo_vals, nlpd_vals, eval_metric
 78 | 
 79 | 
 80 | def get_predictive_nlpd(model: base_SVGP, test_data: Tuple[np.ndarray, np.ndarray]) -> float:
 81 |     """
 82 |     Calculate and return negative log predictive density (NLPD).
 83 |     """
 84 |     return -1 * tf.reduce_mean(model.predict_log_density(test_data))
 85 | 
 86 | 
 87 | def get_accuracy(model: base_SVGP, test_data: Tuple[np.ndarray, np.ndarray]) -> float:
 88 |     """
 89 |     Calculate and returns accuracy in classification case.
 90 |     """
 91 |     pred_mean, _ = model.predict_y(test_data[0])
 92 |     pred_mean = pred_mean.numpy()
 93 |     pred_mean[pred_mean >= 0.5] = 1
 94 |     pred_mean[pred_mean < 0.5] = 0
 95 |     correct_prediction = np.sum(pred_mean == test_data[1])
 96 |     return correct_prediction / test_data[0].shape[0]
 97 | 
 98 | 
 99 | def get_rmse(model: base_SVGP, test_data: Tuple[np.ndarray, np.ndarray]):
100 |     """
101 |     Calculates RMSE.
102 |     """
103 |     y_pred, _ = model.predict_y(test_data[0])
104 |     return np.sqrt(np.mean(np.square(y_pred - test_data[1])))
105 | 
106 | 
107 | def get_multiclass_accuracy(model: base_SVGP, test_data: Tuple[np.ndarray, np.ndarray]) -> float:
108 |     """
109 |     Get multiclass accuracy
110 |     """
111 |     pred_m, _ = model.predict_y(test_data[0])
112 |     pred_argmax = tf.reshape(tf.argmax(pred_m, axis=1), (-1, 1))
113 |     acc = np.mean(pred_argmax == test_data[1]).item()
114 |     return acc
115 | 
116 | 
117 | def convert_data_to_online(data: [np.ndarray, np.ndarray], n_sets: int,
118 |                            shuffle: bool = True, sort_data: bool = False) -> list:
119 |     """
120 |     Get an offline data and convert it into an online dataset of n_sets.
121 | 
122 |     returns: a list of tuple of np.ndarray (X_i, Y_i) with X_i of shape [n_set_data, data_dim] and
123 |              Y is of shape [n_set_data, output_dim].
124 |     """
125 |     Y_dtype = data[1].dtype
126 | 
127 |     X, Y = data
128 |     XY = np.concatenate([X, Y], axis=1)
129 | 
130 |     if shuffle:
131 |         np.random.shuffle(XY)
132 | 
133 |     if sort_data:
134 |         np.sort(XY, axis=0)
135 | 
136 |     n = XY.shape[0]
137 |     last_set_size = int(n % n_sets)
138 |     set_size = int((n - last_set_size) / n_sets - 1)
139 | 
140 |     streaming_data = []
141 |     for i in range(n_sets - 1):
142 |         set_data = XY[i * set_size: (i + 1) * set_size]
143 |         Y_casted = set_data[:, X.shape[-1]:].astype(Y_dtype)
144 |         streaming_data.append((set_data[:, :X.shape[-1]], Y_casted))
145 | 
146 |     # Adding last set; this could be more than other sets as well
147 |     set_data = XY[(n_sets - 1) * set_size:]
148 | 
149 |     Y_casted = set_data[:, X.shape[-1]:].astype(Y_dtype)
150 |     streaming_data.append((set_data[:, :X.shape[-1]], Y_casted))
151 | 
152 |     assert len(streaming_data) == n_sets
153 |     assert streaming_data[0][0].shape[-1] == X.shape[-1]
154 |     assert streaming_data[0][1].shape[-1] == Y.shape[-1]
155 | 
156 |     return streaming_data
157 | 
158 | 
159 | def optimize_online_model(online_gp: OnlineGP, train_data: List[Tuple[np.ndarray, np.ndarray]],
160 |                           test_data: Tuple[np.ndarray, np.ndarray], train_hyperparams: bool = False,
161 |                           hyperparams_step: int = 10, train_memory: bool = False, debug: bool = False) -> [list, list,
162 |                                                                                                            list]:
163 |     """
164 |     Optimize online GP model on train data, which are already in streaming set, and returns nlpd values on test set,
165 |     rmse or accuracy value, and time taken.
166 |     """
167 |     n_sets = len(train_data)
168 | 
169 |     if debug:
170 |         print(f"Initial NLPD: {get_predictive_nlpd(online_gp.model, test_data)}")
171 | 
172 |     nlpd_vals = []
173 |     eval_metric = []
174 |     time_vals = []
175 |     for n in range(n_sets):
176 |         for var in online_gp.optimizer.variables():
177 |             var.assign(tf.zeros_like(var))
178 | 
179 |         start_time = time.time()
180 |         new_data = train_data[n]
181 |         new_data = (new_data[0], new_data[1])
182 |         online_gp.update_with_new_batch(new_data, n_hyp_opt_steps=hyperparams_step, train_hyps=train_hyperparams,
183 |                                         train_mem=train_memory, remove_memory=True, return_kernel_params=False)
184 | 
185 |         time_vals.append(time.time() - start_time)
186 | 
187 |         nlpd_vals.append(get_predictive_nlpd(online_gp.model, test_data))
188 | 
189 |         if isinstance(online_gp.model.likelihood, Bernoulli):
190 |             eval_metric.append(get_accuracy(online_gp.model, test_data))
191 |         elif isinstance(online_gp.model.likelihood, gpflow.likelihoods.Softmax):
192 |             eval_metric.append(get_multiclass_accuracy(online_gp.model, test_data))
193 |         else:
194 |             eval_metric.append(get_rmse(online_gp.model, test_data))
195 | 
196 |         if debug:
197 |             print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
198 |             print(f"Set {n}")
199 |             print(f"NLPD = {nlpd_vals[-1]}")
200 |             print(f"Eval. metric (RMSE/Acc.) = {eval_metric[-1]}")
201 |             print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
202 | 
203 |     return nlpd_vals, eval_metric, time_vals
204 | 


--------------------------------------------------------------------------------
/experiments/hotspots/README.md:
--------------------------------------------------------------------------------
 1 | # Hotspots experiment
 2 | 
 3 | This folder contains the experiments to reproduce our comparison against [Maddox et al. (2021)](https://arxiv.org/abs/2110.15172).
 4 | 
 5 | As such, `data/civ_data.csv` is a verbatim copy of [their data set](https://github.com/wjmaddox/online_vargp/blob/main/experiments/hotspots/data/civ_data.csv) and `hotspots.py` only has minimal changes from [their original experiment script](https://github.com/wjmaddox/online_vargp/blob/main/experiments/hotspots/hotspots.py) to integrate our own method.
 6 | 
 7 | Our only noteworthy departure from Maddox et al. (2021) is that we remove their tempering (changing `beta=0.1` to `beta=1.0`). This change benefits all methods.
 8 | 
 9 | `our_tsvgp.py` contains a GPyTorch-compatible implementation of our proposed method (not feature-complete; it only includes those aspects required to run the Hotspots experiment).
10 | 
11 | `env.yaml` describes a Conda environment with all required dependencies; it can be instantiated using
12 | ```bash
13 | conda env create --file env.yaml
14 | ```
15 | 
16 | ## Re-run experiments
17 | 
18 | The experiments can be reproduced by submitting the following jobs on a SLURM cluster:
19 | ```bash
20 | sbatch submit_random.sh
21 | sbatch submit_ovc.sh
22 | sbatch submit_ours.sh
23 | ```
24 | Each script will spawn the respective experiment for 50 different seeds using SLURM's [Job Array support](https://slurm.schedmd.com/job_array.html).
25 | 
26 | After all the runs have finished, run
27 | ```bash
28 | python extract_results.py
29 | ```
30 | to regenerate `results/hotspot_results.npz` (which should be equivalent to the version stored in this repository).
31 | 
32 | ## Re-create figures
33 | 
34 | To recreate Figure 4 and the timing results in our paper, run
35 | ```bash
36 | python visualize_results.py
37 | ```
38 | which regenerates the following three files in the `results/` subdirectory:
39 | - `hotspots-results-acc.tex` is the TikZ/pgfplots figure for Hotspot Accuracy.
40 | - `hotspots-results-mse.tex` is the TikZ/pgfplots figure for Prevalence MSE.
41 | - `timings.dat` gives the average run times per step of each method, including standard deviation.
42 | 


--------------------------------------------------------------------------------
/experiments/hotspots/env.yaml:
--------------------------------------------------------------------------------
 1 | name: ovcexperiment
 2 | dependencies:
 3 |   - python=3.8
 4 |   - pytorch==1.12.1
 5 |   - pip
 6 |   - pip:
 7 |     - gpytorch==1.4
 8 |     - botorch==0.4.0
 9 |     - git+https://github.com/wjmaddox/online_vargp@7bd3da50eac32d70ca323309e3f3d80a2ae7c419
10 |     - matplotlib
11 |     - tikzplotlib
12 | 


--------------------------------------------------------------------------------
/experiments/hotspots/extract_results.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | import numpy as np
 4 | 
 5 | import glob
 6 | import re
 7 | 
 8 | num_re = re.compile(r".*_([0-9]*)_.*")
 9 | 
10 | def get_all_runs_available(fn_globs):
11 |     all_runs_available = set()
12 |     for fn_glob in fn_globs:
13 |         files = glob.glob(fn_glob)
14 |         nums = [int(num_re.match(fn)[1]) for fn in files]
15 |         print(fn_glob, len(nums))
16 |         if not all_runs_available:
17 |             all_runs_available = set(nums)
18 |         else:
19 |             all_runs_available = all_runs_available.intersection(set(nums))
20 | 
21 |     all_runs_available = sorted(all_runs_available)
22 |     return all_runs_available
23 | 
24 | from collections import namedtuple
25 | 
26 | Run = namedtuple("Run", ["label", "results_pattern", "output_pattern"])
27 | 
28 | class Data:
29 |     beta = 1.0
30 |     _results_base = "output_dir/civ_ind_svgp_{i}_AMD_"
31 |     runs = [
32 |         Run("Random",         _results_base+"random.pt",  "./hotspots_random_{i}.out"),
33 |         Run("Entropy (OVC)",  _results_base+"entropy.pt", "./hotspots_ovc_{i}.out"),
34 |         Run("Entropy (Ours)", _results_base+"tsvgp.pt",   "./hotspots_ours_{i}.out"),
35 |     ]
36 | 
37 | class DataTempered:
38 |     beta = 0.1
39 |     _results_base = "beta0.1/output_dir/civ_ind_svgp_{i}_AMD_"
40 |     runs = [
41 |         Run("random",         _results_base+"random.pt",  "./beta0.1/hotspots_random_{i}.out"),
42 |         Run("entropy (OVC)",  _results_base+"entropy.pt", "./beta0.1/hotspots_ovc_{i}.out"),
43 |         Run("entropy (Ours)", _results_base+"tsvgp.pt",   "./beta0.1/hotspots_ours_{i}.out"),
44 |     ]
45 | 
46 | time_re = re.compile(r"time = ([0-9.]*)\.memory")
47 | 
48 | def parse_time(fn):
49 |     with open(fn) as f:
50 |         matches = time_re.findall(f.read())
51 |     return np.array(list(map(float, matches)))
52 | 
53 | def get_timings(fnames):
54 |     return np.array([parse_time(fn) for fn in fnames])
55 | 
56 | def load_results(fnames):
57 |     props = ("acc", "mse", "sens", "sampled_acc")
58 |     all_res = {prop: [] for prop in props}
59 |     for fn in fnames:
60 |         d = torch.load(fn, map_location=torch.device('cpu'))
61 |         res = {}
62 |         [
63 |             res["acc"], res["mse"], res["sens"], res["sampled_acc"]
64 |         ] = [
65 |             hotspot_acc_list,
66 |             hotspot_mse_list,
67 |             hotspot_sens_list,
68 |             hotspot_sampled_acc_list
69 |         ] = d["results"]
70 |         for prop in props:
71 |             all_res[prop].append(res[prop].numpy())
72 |     for prop in props:
73 |         all_res[prop] = np.array(all_res[prop])
74 |     mean = {prop: all_res[prop].mean(axis=0) for prop in props}
75 |     stderr = {prop: all_res[prop].std(axis=0) / np.sqrt(all_res[prop].shape[0] - 1) for prop in props}
76 |     return all_res, mean, stderr
77 | 
78 | #Results = namedtuple("Results", ["label", "timings", "all_res", "mean", "stderr"])
79 | 
80 | def process(dat):
81 |     all_runs_available = get_all_runs_available([run.results_pattern.format(i="*") for run in dat.runs])
82 |     results = []
83 |     for run in dat.runs:
84 |         ts = get_timings([run.output_pattern.format(i=i) for i in all_runs_available])
85 |         all_res, mean, stderr = load_results([run.results_pattern.format(i=i) for i in all_runs_available])
86 |         results.append((run.label, ts, all_res, mean, stderr))
87 |     return results
88 | 
89 | def main():
90 |     np.savez("results/hotspots_results.npz", [dict(
91 |         results=process(Data()),
92 |         # tempered=process(DataTempered()),
93 |     )])
94 | 
95 | if __name__ == "__main__":
96 |     main()
97 | 


--------------------------------------------------------------------------------
/experiments/hotspots/hotspots.py:
--------------------------------------------------------------------------------
  1 | # Adapted from https://github.com/wjmaddox/online_vargp/blob/main/experiments/hotspots/hotspots.py (licensed under GPL)
  2 | # to allow importing our own implementation from our_tsvgp.py (with its own license) and run in the same experiment framework
  3 | 
  4 | import math
  5 | import argparse
  6 | import pandas as pd
  7 | import time
  8 | import torch
  9 | 
 10 | import numpy as np
 11 | 
 12 | torch.set_default_dtype(torch.float64)
 13 | 
 14 | from botorch.sampling import SobolQMCNormalSampler
 15 | from botorch.optim.fit import fit_gpytorch_torch
 16 | from torch.distributions import Bernoulli
 17 | from gpytorch.kernels import ScaleKernel, MaternKernel
 18 | from gpytorch.mlls import VariationalELBO, PredictiveLogLikelihood
 19 | from gpytorch.priors import GammaPrior
 20 | 
 21 | from volatilitygp.models import SingleTaskVariationalGP
 22 | from our_tsvgp import OurSingleTaskVariationalGP
 23 | from volatilitygp.likelihoods.binomial_likelihood import BinomialLikelihood
 24 | 
 25 | 
 26 | class Squeeze(torch.nn.Module):
 27 |     def forward(self, x):
 28 |         return x.squeeze(-1)
 29 | 
 30 | 
 31 | def parse():
 32 |     parser = argparse.ArgumentParser()
 33 |     parser.add_argument("--seed", type=int, default=0)
 34 |     parser.add_argument("--output", type=str, default="results.pt")
 35 |     parser.add_argument("--dataset", type=str, default="civ")
 36 |     parser.add_argument("--n_batch", type=int, default=100)
 37 |     parser.add_argument("--num_init", type=int, default=100)
 38 |     parser.add_argument("--batch_size", type=int, default=1)
 39 |     parser.add_argument("--batch_limit", type=int, default=64)
 40 |     parser.add_argument("--inner_samples", type=int, default=16)
 41 |     parser.add_argument("--outer_samples", type=int, default=16)
 42 |     parser.add_argument("--random", action="store_true")
 43 |     parser.add_argument("--loss", type=str, default="elbo")
 44 |     parser.add_argument("--lr", type=float, default=0.01)
 45 |     parser.add_argument("--beta", type=float, default=0.1)
 46 |     parser.add_argument("--ind_models", action="store_true")
 47 |     parser.add_argument("--eval_on_full_set", action="store_true")
 48 | 
 49 |     parser.add_argument("--use_tsvgp", action="store_true")
 50 |     parser.add_argument("--tsvgp_lr", type=float, default=1.0)
 51 |     parser.add_argument("--tsvgp_num_online_updates", type=int, default=1)
 52 |     parser.add_argument("--tsvgp_jitter", type=float, default=0.0)
 53 | 
 54 |     return parser.parse_args()
 55 | 
 56 | 
 57 | def entropy_via_threshold(f, threshold=0.1):
 58 |     bern_entropy = Bernoulli(logits=f).entropy()
 59 |     spiked_bern_entropy = bern_entropy * (f > math.log(threshold / (1 - threshold)))
 60 |     return spiked_bern_entropy.mean(0).sum(-1)
 61 | 
 62 | 
 63 | def entropy_reduction(model, batch_set, test_set, inner_samples=32, outer_samples=16):
 64 |     inner_sampler = SobolQMCNormalSampler(inner_samples)
 65 |     outer_sampler = SobolQMCNormalSampler(outer_samples)
 66 | 
 67 |     original_entropy = entropy_via_threshold(inner_sampler(model.posterior(test_set)))
 68 | 
 69 |     fantasy_model = model.fantasize(
 70 |         batch_set, sampler=inner_sampler, observation_noise=True
 71 |     )
 72 |     fant_post = fantasy_model.posterior(test_set)
 73 |     fant_samp = outer_sampler(fant_post)
 74 |     fantasy_entropy = entropy_via_threshold(
 75 |         fant_samp
 76 |     )
 77 | 
 78 |     return (original_entropy - fantasy_entropy).clamp(min=0.0).sum(0)
 79 | 
 80 | 
 81 | def main(
 82 |     dataset: str = "civ",
 83 |     seed: int = 0,
 84 |     num_init: int = 100,
 85 |     batch_size: int = 1,
 86 |     n_batch: int = 100,
 87 |     inner_samples: int = 16,
 88 |     outer_samples: int = 16,
 89 |     batch_limit: int = 64,
 90 |     output: str = "results.pt",
 91 |     random: bool = False,
 92 |     beta: float = 0.1,
 93 |     loss: str = "elbo",
 94 |     lr: float = 0.01,
 95 |     eval_on_full_set: bool = False,
 96 |     recycle_lengthscales: bool = True,
 97 | 
 98 |     use_tsvgp: bool = False,
 99 |     tsvgp_lr: float=1.0,
100 |     tsvgp_num_online_updates: int=1,
101 |     tsvgp_jitter: float=0.0,
102 | ):
103 |     verbose = True
104 |     np.random.seed(seed)
105 |     torch.manual_seed(seed)
106 | 
107 |     data = pd.read_csv("data/" + dataset + "_data.csv")
108 |     if dataset == "civ" or dataset == "hti":
109 |         threshold = 0.1
110 |     else:
111 |         threshold = 0.02
112 | 
113 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
114 | 
115 |     ## split data
116 | 
117 |     full_x = torch.tensor(data.iloc[:, :-2].values).to(device)
118 |     full_ground_truth_prob = torch.tensor(data.iloc[:, -1].values).to(device)
119 | 
120 |     perm = torch.randperm(data.shape[0])
121 |     train_inds = perm[:num_init]
122 |     test_inds = perm[num_init:]
123 | 
124 |     train_x = full_x[train_inds]
125 |     train_ground_truth_prob = full_ground_truth_prob[train_inds]
126 | 
127 |     test_x = full_x[test_inds]
128 |     test_ground_truth_prob = full_ground_truth_prob[test_inds]
129 | 
130 |     # draw samples from ground truth probability
131 |     train_y = (
132 |         torch.distributions.Binomial(total_count=100, probs=train_ground_truth_prob)
133 |         .sample()
134 |         .unsqueeze(-1)
135 |     )
136 |     test_y = (
137 |         torch.distributions.Binomial(total_count=100, probs=test_ground_truth_prob)
138 |         .sample()
139 |         .unsqueeze(-1)
140 |     )
141 | 
142 |     ## normalize data to [0, 1]^d
143 |     mins = train_x.min(0)[0]
144 |     maxes = train_x.max(0)[0]
145 |     train_x = (train_x - mins) / (maxes - mins)
146 |     test_x = (test_x - mins) / (maxes - mins)
147 | 
148 |     hotspot_acc_list, hotspot_mse_list, hotspot_sens_list = [], [], []
149 |     hotspot_sampled_acc_list = []
150 | 
151 |     for iteration in range(n_batch):
152 |         t0 = time.time()
153 |         ## define model
154 |         covar_module = ScaleKernel(
155 |             MaternKernel(
156 |                 ard_num_dims=8,
157 |                 nu=1.5,
158 |                 lengthscale_prior=GammaPrior(3.0, 6.0),
159 |                 outputscale_prior=GammaPrior(2.0, 0.15),  # sic!
160 |             )
161 |         )
162 |         if iteration > 0 and recycle_lengthscales:
163 |             print("recycling lengthscales")
164 |             covar_module.outputscale = old_outputscale
165 |             covar_module.base_kernel.lengthscale = old_lengthscale
166 | 
167 |         VGPClass = OurSingleTaskVariationalGP if use_tsvgp else SingleTaskVariationalGP
168 |         
169 |         model = VGPClass(
170 |             likelihood=BinomialLikelihood(),
171 |             init_points=train_x,
172 |             init_targets=train_y.squeeze(-1),
173 |             num_inducing=train_x.shape[0],
174 |             use_piv_chol_init=True,
175 |             learn_inducing_locations=True,
176 |             covar_module=covar_module,
177 |         )
178 |         
179 |         if use_tsvgp:
180 |             model.lr = tsvgp_lr
181 |             model.num_online_updates = tsvgp_num_online_updates
182 |             model.tsvgp_jitter = tsvgp_jitter
183 | 
184 |         if loss == "elbo":
185 |             mll = VariationalELBO(
186 |                 model.likelihood, model, num_data=train_x.shape[0], beta=beta
187 |             )
188 |         elif loss == "pll":
189 |             mll = PredictiveLogLikelihood(
190 |                 model.likelihood, model, num_data=train_x.shape[0], beta=beta
191 |             )
192 | 
193 |         fit_gpytorch_torch(mll, options={"lr": lr, "maxiter": 1000})
194 | 
195 |         ### record hotspot probability
196 |         with torch.no_grad():
197 |             model.eval()
198 | 
199 |             # for some ungodly reason andrade-pacheco et al evaluate on the full set, not the heldout set
200 |             # thus we need to predict over all of the data (training data included)
201 |             if eval_on_full_set:
202 |                 # we re-apply normalization
203 |                 x_for_pred = (full_x - mins) / (maxes - mins)
204 |                 gt_for_pred = full_ground_truth_prob
205 |             else:
206 |                 x_for_pred = test_x
207 |                 gt_for_pred = test_ground_truth_prob
208 | 
209 |             pred_dist = model(x_for_pred)
210 |             pred_prob = (pred_dist.mean.mul(-1).exp() + 1).reciprocal()
211 | 
212 |             true_is_hotspot = gt_for_pred > threshold
213 | 
214 |             # lets see if this is more accurate
215 |             hotspot_samples = pred_dist.sample(torch.Size((512,)))
216 |             hotspot_sampled_prob = (hotspot_samples.mul(-1).exp() + 1).reciprocal()
217 |             hotspot_sampled_pred = (hotspot_sampled_prob > threshold).sum(0) > 256
218 |             hotspot_sampled_acc = (
219 |                 ((hotspot_sampled_pred > threshold) == true_is_hotspot)
220 |                 .float()
221 |                 .mean()
222 |                 .cpu()
223 |                 .item()
224 |             )
225 | 
226 |             hotspot_acc = (
227 |                 ((pred_prob > threshold) == true_is_hotspot).float().mean().cpu().item()
228 |             )
229 |             hotspot_mse = (pred_prob - gt_for_pred).pow(2).mean().cpu().item()
230 | 
231 |             hotspot_sens = (
232 |                 (pred_prob > threshold).float() * true_is_hotspot.float()
233 |             ).sum().cpu().item() / true_is_hotspot.float().sum().cpu().item()
234 | 
235 |         if not random:
236 |             ### now select a new point
237 |             entropy_list = []
238 |             for start in range(0, test_x.shape[0] + batch_limit, batch_limit):
239 |                 [p.detach_() for p in model.parameters()]
240 |                 # TODO: batch size of 10 via cyclic optimization
241 |                 query_points = test_x[start : (start + batch_limit)].unsqueeze(-2)
242 |                 if query_points.shape[0] > 0:
243 |                     entropy = (
244 |                         entropy_reduction(
245 |                             model, query_points, test_x, inner_samples, outer_samples
246 |                         )
247 |                         .sum(-1)
248 |                         .detach()
249 |                         .cpu()
250 |                     )
251 |                     entropy_list.append(entropy)
252 | 
253 |             if batch_size == 1:
254 |                 best_point = torch.cat(entropy_list).argmax()
255 |             else:
256 |                 raise NotImplementedError("oops, batch size of 1 is not implemented")
257 |         else:
258 |             # best point is randomly selected
259 |             best_point = torch.randperm(test_x.shape[0])[:batch_size]
260 |             if batch_size == 1:
261 |                 best_point = best_point.item()
262 |             entropy_list = None
263 | 
264 |         train_x = torch.cat((train_x, test_x[best_point].unsqueeze(0)))
265 |         train_y = torch.cat((train_y, test_y[best_point].unsqueeze(0)))
266 |         train_ground_truth_prob = torch.cat(
267 |             (train_ground_truth_prob, test_ground_truth_prob[best_point].unsqueeze(0))
268 |         )
269 | 
270 |         test_x = torch.cat((test_x[:best_point], test_x[(best_point + 1) :]))
271 |         test_y = torch.cat((test_y[:best_point], test_y[(best_point + 1) :]))
272 |         test_ground_truth_prob = torch.cat(
273 |             (
274 |                 test_ground_truth_prob[:best_point],
275 |                 test_ground_truth_prob[(best_point + 1) :],
276 |             )
277 |         )
278 | 
279 |         t1 = time.time()
280 | 
281 |         if verbose:
282 |             print(
283 |                 f"\nBatch {iteration:>2}: current_value (acc, sacc, mse, sens) = "
284 |                 f"({hotspot_acc:>4.2f},  {hotspot_sampled_acc:>4.2f}, {hotspot_mse:>4.2f} {hotspot_sens:>4.2f}, "
285 |                 f"time = {t1-t0:>4.2f}.",
286 |                 end="",
287 |             )
288 |         else:
289 |             print(".")
290 | 
291 |         hotspot_acc_list.append(hotspot_acc)
292 |         hotspot_sampled_acc_list.append(hotspot_sampled_acc)
293 |         hotspot_mse_list.append(hotspot_mse)
294 |         hotspot_sens_list.append(hotspot_sens)
295 | 
296 |         old_lengthscale = model.covar_module.base_kernel.lengthscale.detach()
297 |         old_outputscale = model.covar_module.outputscale.detach()
298 |         del model, entropy_list
299 | 
300 |         torch.cuda.empty_cache()
301 |         memory_alloc = torch.cuda.memory_allocated(device) / (1024 ** 3)
302 |         print("memory allocated: ", memory_alloc)
303 | 
304 |     output_dict = {
305 |         "results": [
306 |             torch.tensor(hotspot_acc_list),
307 |             torch.tensor(hotspot_mse_list),
308 |             torch.tensor(hotspot_sens_list),
309 |             torch.tensor(hotspot_sampled_acc_list),
310 |         ],
311 |         "data": {"x": train_x, "theta": train_ground_truth_prob, "y": train_y},
312 |         #"stats": {"time": torch.tensor(time)
313 |     }
314 |     return output_dict
315 | 
316 | 
317 | if __name__ == "__main__":
318 |     args = parse()
319 |     args.recycle_lengthscales = not args.ind_models
320 |     del args.ind_models
321 |     output_dict = main(**vars(args))
322 |     output_dict["pars"] = vars(args)
323 | 
324 |     torch.save(output_dict, args.output)
325 | 


--------------------------------------------------------------------------------
/experiments/hotspots/results/hotspots_results.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/experiments/hotspots/results/hotspots_results.npz


--------------------------------------------------------------------------------
/experiments/hotspots/results/timings.dat:
--------------------------------------------------------------------------------
1 | Random                       :   7.23 +/- 0.03 (std.err) +/- 2.30 (std.dev)
2 | Entropy (OVC)                :  84.57 +/- 0.15 (std.err) +/- 10.77 (std.dev)
3 | Entropy (Ours)               :  62.09 +/- 0.10 (std.err) +/- 6.75 (std.dev)
4 | 
5 | 


--------------------------------------------------------------------------------
/experiments/hotspots/submit_ours.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --time=3:00:00
 3 | #SBATCH --mem=20000M
 4 | #SBATCH --gres=gpu:1
 5 | #SBATCH --constraint=volta
 6 | #SBATCH --output=./hotspots_ours_%a.out
 7 | #SBATCH --array=1-50
 8 | 
 9 | module load miniconda
10 | source activate ovcexperiment
11 | 
12 | mkdir -p output_dir
13 | 
14 | python hotspots.py --seed=$SLURM_ARRAY_TASK_ID --n_batch=100 --batch_limit=8 --num_init=100 \
15 |     --beta=1.0 --loss=elbo --dataset=civ --inner_samples=16 --outer_samples=16 \
16 |     --use_tsvgp --output=output_dir/civ_ind_svgp_${SLURM_ARRAY_TASK_ID}_AMD_tsvgp.pt
17 | 


--------------------------------------------------------------------------------
/experiments/hotspots/submit_ovc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --time=4:00:00
 3 | #SBATCH --mem=20000M
 4 | #SBATCH --gres=gpu:1
 5 | #SBATCH --constraint=volta
 6 | #SBATCH --output=./hotspots_ovc_%a.out
 7 | #SBATCH --array=1-50
 8 | 
 9 | module load miniconda
10 | source activate ovcexperiment
11 | 
12 | mkdir -p output_dir
13 | 
14 | python hotspots.py --seed=$SLURM_ARRAY_TASK_ID --n_batch=100 --batch_limit=8 --num_init=100 \
15 |     --beta=1.0 --loss=elbo --dataset=civ --inner_samples=16 --outer_samples=16 \
16 |     --output=output_dir/civ_ind_svgp_${SLURM_ARRAY_TASK_ID}_AMD_entropy.pt
17 | 


--------------------------------------------------------------------------------
/experiments/hotspots/submit_random.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --time=2:00:00
 3 | #SBATCH --mem=5000M
 4 | #SBATCH --output=./hotspots_random_%a.out
 5 | #SBATCH --array=1-50
 6 | 
 7 | module load miniconda
 8 | source activate ovcexperiment
 9 | 
10 | mkdir -p output_dir
11 | 
12 | python hotspots.py --seed=$SLURM_ARRAY_TASK_ID --n_batch=100 --num_init=100 \
13 |     --beta=1.0 --loss=elbo --dataset=civ --random \
14 |     --output=output_dir/civ_ind_svgp_${SLURM_ARRAY_TASK_ID}_AMD_random.pt
15 | 


--------------------------------------------------------------------------------
/experiments/hotspots/visualize_results.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | [[res]] = np.load("results/hotspots_results.npz", allow_pickle=True).values()
 5 | 
 6 | betas = {"results": 1.0, "tempered": 0.1}
 7 | keys = [
 8 |     "results",
 9 |     # "tempered",
10 | ]
11 | 
12 | import sys
13 | 
14 | def print_timings(res, io=sys.stdout):
15 |     label, ts, _, _, _ = res
16 |     mean = ts.mean()
17 |     stddev = ts.std()
18 |     stderr = ts.std() / np.sqrt(ts.size - 1)
19 |     print(f"{label:29s}: {mean:6.2f} +/- {stderr:.2f} (std.err) +/- {stddev:.2f} (std.dev)", file=io)
20 | 
21 | with open("results/timings.dat", "w") as f:
22 |     for key in keys:
23 |         # print(f"beta={betas[key]}:", file=f)
24 |         for r in res[key]:
25 |             print_timings(r, f)
26 |         print(file=f)
27 | 
28 | 
29 | def plot_mean(res, prop):
30 |     label, ts, all_res, mean, stderr = res
31 |     plt.plot(range(100,200), mean[prop], label=label)
32 |     scale = 1
33 |     plt.fill_between(range(100, 200), mean[prop] - scale*stderr[prop], mean[prop] + scale*stderr[prop], alpha=0.3)
34 | 
35 | 
36 | prop_titles = {
37 |     # "sampled_acc": "accuracy (sampled)",
38 |     "acc": "Hotspot Accuracy",
39 |     "mse": "Prevalence MSE",
40 |     # "sens": "[sens]"
41 | }
42 | 
43 | 
44 | def tikzplotlib_fix_ncols(obj):
45 |     """
46 |     workaround for matplotlib 3.6 renamed legend's _ncol to _ncols, which breaks tikzplotlib
47 |     """
48 |     if hasattr(obj, "_ncols"):
49 |         obj._ncol = obj._ncols
50 |     for child in obj.get_children():
51 |         tikzplotlib_fix_ncols(child)
52 | 
53 | 
54 | import tikzplotlib
55 | 
56 | for key in keys:
57 |     for prop in ["acc", "mse"]:
58 |         fig = plt.figure()
59 |         # plt.title(f"$\\beta={betas[key]}$")
60 |         plt.xlabel("Steps")
61 |         plt.ylabel(f"{prop_titles[prop]}")
62 | 
63 |         if prop == "mse":
64 |             # plt.ylim(0.013, 0.0261)  # limits used in Maddox et al. (2021)
65 |             plt.ylim(0.0115, 0.0265)  # our method is too much better to fit
66 |         elif prop == "acc" or prop == "sampled_acc":
67 |             plt.ylim(0.81, 0.89)
68 |             plt.yticks([0.82,0.84,0.86,0.88])
69 | 
70 |         for r in res[key][:3]:
71 |             plot_mean(r, prop)
72 |         plt.grid()
73 |         plt.legend()
74 |         tikzplotlib_fix_ncols(fig)
75 |         tikzplotlib.save(f"results/hotspots-{key}-{prop}.tex", figure=fig,
76 |                          axis_width=r"\figurewidth",
77 |                          axis_height=r"\figureheight")
78 | 


--------------------------------------------------------------------------------
/experiments/magnetometer/README.md:
--------------------------------------------------------------------------------
 1 | # Magnetic Anomalies Experiment
 2 | 
 3 | The experiment to map the local anomalies in the ambient magnetic field. The experiment is based on [Solin et al. (2018)](https://ieeexplore.ieee.org/document/8373720).
 4 | 
 5 | ## Offline Model
 6 | For training an offline model:
 7 | ```shell
 8 | python offline_model.py model/kernel=sum_constant_matern52
 9 | ```
10 | The configurations and model hyperparameters can be found in the Hydra config file `../configs/magnetometer_offline_experiment.yaml`.
11 | ## Fast Conditiong of the proposed model
12 | 
13 | For fast-conditiong of the model, Figure 5(b), run the following command:
14 | ```shell
15 | python online_fc_plots.py -o={offline_model_path}
16 | ```
17 | 
18 | ## Proposed model
19 | 
20 | Experiment when multiple observations paths are observed:
21 | ```
22 | python online_model.py model/kernel=sum_constant_matern52
23 | ```
24 | 
25 | The configurations and model hyperparameters can be found in the Hydra config file `../configs/magnetometer_online_experiment.yaml`. 
26 | 
27 | ## For NLPD comparison experiment
28 | 
29 | **Proposed Model**
30 | 
31 | ```shell
32 | python online_model.py model/kernel=sum_constant_matern52 streaming=True
33 | ```
34 | The configurations and model hyperparameters can be found in the Hydra config file `../configs/magnetometer_online_experiment.yaml`.
35 | 
36 | **Bui et al. (2017):**
37 | 
38 | ```
39 | python streaming_gp_model.py model/kernel=sum_constant_matern52
40 | ```
41 | 
42 | The configurations and model hyperparameters can be found in the Hydra config file `../configs/magnetometer_streaming_experiment.yaml`. 
43 | 


--------------------------------------------------------------------------------
/experiments/magnetometer/__pycache__/magnetometer_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/experiments/magnetometer/__pycache__/magnetometer_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/experiments/magnetometer/magnetometer_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | 
  6 | A_room2video = np.array([70.7791, -388.6396, 618.0954,
  7 |                          -66.1362, 26.6245, 665.3620,
  8 |                          0, 0, 1.0000]).reshape((3, 3))
  9 | 
 10 | C_room2video = np.array([0.1597, -0.0318, 1.0000]).reshape((1, 3))
 11 | 
 12 | A_grid2room = np.array([0.0118, 2.7777, 1.3689,
 13 |                         -2.2243, -0.0967, -1.0929,
 14 |                         0, 0, 1.0000]).reshape((3, 3))
 15 | 
 16 | C_grid2room = np.array([0.0160, -0.1450, 1.0000]).reshape((1, 3))
 17 | 
 18 | 
 19 | def load_data(main_dir: str, train_id: list = None, test_id: list = None) -> [list, list]:
 20 |     """
 21 |     Load magnetometer data.
 22 | 
 23 |     Main source of data is: https://github.com/AaltoML/magnetic-data
 24 | 
 25 |     Note: The function involves some constants that are specific to the data.
 26 |     """
 27 |     data_train = []
 28 |     data_test = None
 29 | 
 30 |     if train_id is None:
 31 |         train_id = [1, 2, 4, 5]
 32 | 
 33 |     if test_id is None:
 34 |         test_id = [1]
 35 | 
 36 |     for i in train_id:
 37 |         loc_path = os.path.join(main_dir, str(i) + "-loc.csv")
 38 |         mag_path = os.path.join(main_dir, str(i) + "-mag.csv")
 39 | 
 40 |         loc_data = pd.read_csv(loc_path).to_numpy()
 41 |         mag_data = pd.read_csv(mag_path).to_numpy()
 42 | 
 43 |         # take norm of mag data
 44 |         mag_data_norm = np.sqrt(np.sum(np.square(mag_data), axis=-1))[..., None]
 45 |         data_combined = np.concatenate([loc_data, mag_data_norm], axis=1)
 46 | 
 47 |         data_train.append([data_combined[:, :-1], data_combined[:, -1:]])
 48 | 
 49 |     for i in test_id:
 50 |         loc_path = os.path.join(main_dir, str(i) + "-loc.csv")
 51 |         mag_path = os.path.join(main_dir, str(i) + "-mag.csv")
 52 | 
 53 |         loc_data = pd.read_csv(loc_path).to_numpy()
 54 |         mag_data = pd.read_csv(mag_path).to_numpy()
 55 | 
 56 |         # take norm of mag data
 57 |         mag_data_norm = np.sqrt(np.sum(np.square(mag_data), axis=-1))[..., None]
 58 | 
 59 |         data_combined = np.concatenate([loc_data, mag_data_norm], axis=1)
 60 | 
 61 |         if data_test is None:
 62 |             data_test = [np.array(data_combined[:, :-1]), np.array(data_combined[:, -1:])]
 63 |         else:
 64 |             data_test = [np.concatenate([data_test[0], np.array(data_combined[:, :-1])], axis=0),
 65 |                          np.concatenate([data_test[1], np.array(data_combined[:, -1:])], axis=0)]
 66 | 
 67 |     return data_train, data_test
 68 | 
 69 | 
 70 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 71 | """
 72 | Below function are for plotting purposes and comes from original Matlab scripts.
 73 | 
 74 | They are for transformation between room, video, grid.
 75 | """
 76 | 
 77 | 
 78 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 79 | 
 80 | 
 81 | def get_transformed_grid():
 82 |     z1 = np.concatenate([np.linspace(-1, 1, 32), np.nan * np.ones((1,))])
 83 |     z2 = z1.copy()
 84 | 
 85 |     g1, g2 = np.meshgrid(z1, z2)
 86 |     Z = np.concatenate([g1.reshape((-1, 1)), g2.reshape((-1, 1))], axis=1)
 87 | 
 88 |     var1 = A_grid2room @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T
 89 |     var2 = C_grid2room @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T
 90 | 
 91 |     Z = np.divide(var1, var2).T
 92 |     Z = Z[:, :2]
 93 | 
 94 |     var1 = A_room2video @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T
 95 |     var2 = C_room2video @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T
 96 |     Y = np.divide(var1, var2).T
 97 | 
 98 |     g1 = np.reshape(Y[:, 0], g1.shape)
 99 |     g2 = np.reshape(Y[:, 1], g2.shape)
100 | 
101 |     return g1, g2
102 | 
103 | 
104 | def transform_grid2video(x, y):
105 |     Z = np.concatenate([x.reshape((-1, 1)), y.reshape((-1, 1))], axis=1)
106 |     var1 = A_grid2room @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T
107 |     var2 = C_grid2room @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T
108 | 
109 |     Z = np.divide(var1, var2).T
110 |     Z = Z[:, :2]
111 | 
112 |     var1 = A_room2video @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T
113 |     var2 = C_room2video @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T
114 |     Y = np.divide(var1, var2).T
115 |     return Y[:, 0], Y[:, 1]
116 | 
117 | 
118 | def transform_room2video(x, y):
119 |     Z = np.concatenate([x.reshape((-1, 1)), y.reshape((-1, 1))], axis=1)
120 |     var1 = A_room2video @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T
121 |     var2 = C_room2video @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T
122 |     Y = np.divide(var1, var2).T
123 | 
124 |     return Y[:, 0], Y[:, 1]
125 | 
126 | 
127 | def transform_grid2room(x, y):
128 |     Z = np.concatenate([x.reshape((-1, 1)), y.reshape((-1, 1))], axis=1)
129 |     var1 = A_grid2room @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T
130 |     var2 = C_grid2room @ np.concatenate([Z, np.ones((Z.shape[0], 1))], axis=-1).T
131 |     Y = np.divide(var1, var2).T
132 |     return Y[:, 0], Y[:, 1]
133 | 
134 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
135 | 


--------------------------------------------------------------------------------
/experiments/magnetometer/offline_model.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import pickle
 4 | 
 5 | import gpflow
 6 | import numpy as np
 7 | import hydra
 8 | from omegaconf import DictConfig
 9 | from hydra.utils import call, instantiate
10 | 
11 | import sys
12 | sys.path.append("..")
13 | sys.path.append("../../")
14 | 
15 | from exp_utils import get_hydra_output_dir
16 | 
17 | # A logger for this file
18 | log = logging.getLogger(__name__)
19 | 
20 | 
21 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="magnetometer_offline_experiment")
22 | def run_experiment(cfg: DictConfig):
23 |     """
24 |     Initialize and run the experiment.
25 |     """
26 |     all_train_data, test_data = instantiate(cfg.dataset.dataloader)()
27 |     log.info("Data loaded successfully!!!")
28 | 
29 |     output_dir = get_hydra_output_dir()
30 | 
31 |     # Merge all train_data and test_data into one
32 |     train_data = None
33 |     for data in all_train_data:
34 |         if train_data is None:
35 |             train_data = (data[0], data[1])
36 |         else:
37 |             train_data = (np.concatenate([train_data[0], data[0]]), np.concatenate([train_data[1], data[1]]))
38 | 
39 |     # Set up inducing variables
40 |     n_inducing_variable = int(np.sqrt(cfg.n_inducing_variable).item())
41 |     xx, yy = np.linspace(-2, 5, n_inducing_variable), np.linspace(-4, 2, n_inducing_variable)
42 |     z1, z2 = np.meshgrid(xx, yy)
43 |     zz = np.vstack((z1.flatten(), z2.flatten())).T
44 |     inducing_variable = zz.tolist()
45 |     cfg.model.inducing_variable = inducing_variable
46 |     cfg.model.num_data = train_data[0].shape[0]
47 | 
48 |     model = instantiate(cfg.model)
49 |     model.kernel.kernels[0].variance.assign(500)
50 | 
51 |     elbo_vals, nlpd_vals, rmse_vals = call(cfg.optimize)(model=model, train_data=train_data, test_data=test_data,
52 |                                                          optimizer=instantiate(cfg.optimizer), debug=True)
53 | 
54 |     log.info(f"Test NLPD: {nlpd_vals[-1]}")
55 |     log.info(f"Test RMSE: {rmse_vals[-1]}")
56 | 
57 |     log.info("Optimization successfully done!!!")
58 | 
59 |     parameters = gpflow.utilities.parameter_dict(model)
60 |     with open(os.path.join(output_dir, "model_offline_magnetometer.pkl"), "wb") as f:
61 |         pickle.dump(parameters, f)
62 | 
63 | 
64 | if __name__ == '__main__':
65 |     run_experiment()
66 | 


--------------------------------------------------------------------------------
/experiments/magnetometer/online_fc_plots.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import argparse
  3 | import gpflow
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | 
  7 | import matplotlib.pyplot as plt
  8 | import sys
  9 | sys.path.append("../..")
 10 | sys.path.append("..")
 11 | 
 12 | from magnetometer_utils import load_data, transform_room2video, get_transformed_grid, transform_grid2room
 13 | from exp_utils import convert_data_to_online
 14 | from src.models.tsvgp_cont import t_SVGP_cont, OnlineGP
 15 | from src.models.utils import piv_chol, memory_picker
 16 | 
 17 | if __name__ == '__main__':
 18 |     parser = argparse.ArgumentParser(description="Plot streaming plots for the magntometer experiment.")
 19 |     parser.add_argument("-o", type=str, default=None, required=True)
 20 |     args = parser.parse_args()
 21 | 
 22 |     model_path = args.o
 23 |     n_inducing_variable = 100
 24 | 
 25 |     with open(model_path, "rb") as f:
 26 |         dict_params = pickle.load(f)
 27 | 
 28 |     kernel = gpflow.kernels.Sum([gpflow.kernels.Constant(), gpflow.kernels.Matern52()])
 29 |     likelihood = gpflow.likelihoods.Gaussian()
 30 |     inducing_variable = -2 + np.zeros((n_inducing_variable, 2)) + np.random.rand(n_inducing_variable * 2).reshape((-1, 2))
 31 |     model = t_SVGP_cont(kernel, likelihood, inducing_variable)
 32 | 
 33 |     model.kernel.kernels[0].variance = dict_params['.kernel.kernels[0].variance']
 34 |     model.kernel.kernels[1].lengthscales = dict_params['.kernel.kernels[1].lengthscales']
 35 |     model.kernel.kernels[1].variance = dict_params['.kernel.kernels[1].variance']
 36 |     model.likelihood.variance = dict_params['.likelihood.variance']
 37 | 
 38 |     print("Model loaded successfully!!!")
 39 | 
 40 |     train_data, _ = load_data("../data/invensense", train_id=[3])
 41 |     online_data = convert_data_to_online(train_data[0], n_sets=20, shuffle=False)
 42 | 
 43 |     # Fixing hyper parameters
 44 |     gpflow.utilities.set_trainable(model.kernel.kernels[0].variance, False)
 45 |     gpflow.utilities.set_trainable(model.kernel.kernels[1].lengthscales, False)
 46 |     gpflow.utilities.set_trainable(model.kernel.kernels[1].variance, False)
 47 |     gpflow.utilities.set_trainable(model.likelihood.variance, False)
 48 | 
 49 |     memory = (online_data[0][0][:1], online_data[0][1][:1])
 50 |     online_gp = OnlineGP(model, opt_hypers=tf.optimizers.Adam(), n_steps=2, lambda_lr=0.9, memory=memory,
 51 |                          Z_picker=piv_chol, memory_picker=memory_picker, num_mem=10)
 52 | 
 53 |     path_x = None
 54 |     path_y = None
 55 | 
 56 |     # init Z
 57 |     first_batch = online_data[0][0]
 58 |     mean_first_batch = np.mean(first_batch, axis=0)
 59 |     var_first_batch = np.var(first_batch, axis=0)
 60 |     cov_first_batch = np.diag(var_first_batch.reshape(-1))
 61 |     mean_first_batch = mean_first_batch.reshape(-1)
 62 | 
 63 |     inducing_variable = np.random.multivariate_normal(mean_first_batch, cov_first_batch, n_inducing_variable)
 64 |     model.inducing_variable.Z.assign(inducing_variable)
 65 | 
 66 |     for i, batch_data in enumerate(online_data):
 67 |         online_gp.update_with_new_batch(batch_data, train_hyps=False, train_mem=True, n_hyp_opt_steps=5)
 68 | 
 69 |         if path_x is None:
 70 |             path_x = batch_data[0]
 71 |             path_y = batch_data[1]
 72 |         else:
 73 |             path_x = np.concatenate([path_x, batch_data[0]], axis=0)
 74 |             path_y = np.concatenate([path_y, batch_data[1]], axis=0)
 75 | 
 76 |         # only plot every 5th batch
 77 |         if (i != 0) and (i+1) % 5 != 0:
 78 |             continue
 79 | 
 80 |         Z_new = model.inducing_variable.Z.numpy()
 81 | 
 82 |         # Prediction over grid
 83 |         xtest, ytest = np.mgrid[-1.3:1.3:100j, -1.3:1.3:100j]
 84 |         xtest_transformed, ytest_transformed = transform_grid2room(xtest, ytest)
 85 |         zz = np.concatenate([xtest_transformed[..., None], ytest_transformed[..., None]], axis=1)
 86 | 
 87 |         pred_m_grid, pred_S_grid = online_gp.model.predict_f(zz)
 88 |         pred_m_grid = pred_m_grid.numpy().reshape((100, -1))
 89 | 
 90 |         pred_S_grid = pred_S_grid.numpy()
 91 |         alpha_map = np.exp(-np.sqrt(pred_S_grid)).reshape((100, 100))
 92 |         alpha_map = alpha_map - np.min(alpha_map)
 93 |         alpha_map = alpha_map/np.max(alpha_map)
 94 |         # alpha_map = 1 - alpha_map
 95 | 
 96 |         # Test points
 97 |         transformed_x1test, transformed_x2test = transform_room2video(xtest_transformed, ytest_transformed)
 98 |         transformed_x1test = np.reshape(transformed_x1test, xtest.shape)
 99 |         transformed_x2test = np.reshape(transformed_x2test, ytest.shape)
100 | 
101 |         # Path
102 |         path_transformed_x0, path_transformed_x1 = transform_room2video(path_x[:, 0], path_x[:, 1])
103 |         path_transformed_x = np.concatenate([path_transformed_x0[..., None], path_transformed_x1[..., None]], axis=1)
104 | 
105 |         # Grid
106 |         g1, g2 = get_transformed_grid()
107 | 
108 |         # Inducing variables
109 |         transformed_Z_0, transformed_Z_1 = transform_room2video(Z_new[:, 0], Z_new[:, 1])
110 |         transformed_Z_0 = np.reshape(transformed_Z_0, Z_new[:, 0].shape)
111 |         transformed_Z_1 = np.reshape(transformed_Z_1, Z_new[:, 1].shape)
112 | 
113 |         # Plotting
114 |         # plt.clf()
115 |         _, axs = plt.subplots(1, 1)
116 |         plt.plot(path_transformed_x[:, 0], path_transformed_x[:, 1])
117 |         pcol = plt.pcolormesh(transformed_x1test, transformed_x2test, pred_m_grid, alpha=alpha_map.reshape(-1),
118 |                               vmin=10, vmax=90, shading='gouraud', cmap="jet")
119 |         pcol.set_edgecolor('face')
120 | 
121 |         plt.scatter(transformed_Z_0, transformed_Z_1, color="black")
122 | 
123 |         plt.plot(g1, g2, color="gray", alpha=0.2)
124 |         plt.plot(g1.T, g2.T, color="gray", alpha=0.2)
125 | 
126 |         plt.xlim([0, 1920])
127 |         plt.ylim([0, 1080])
128 |         axs.set_aspect("equal")
129 |         plt.axis('off')
130 |         plt.gca().invert_yaxis()
131 |         # plt.savefig('robot' + str(i + 1) + '.png', bbox_inches='tight', pad_inches=0, dpi=200)
132 |         plt.show()
133 | 


--------------------------------------------------------------------------------
/experiments/magnetometer/online_model.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import pickle
 4 | 
 5 | import gpflow
 6 | import numpy as np
 7 | import hydra
 8 | from omegaconf import DictConfig
 9 | from hydra.utils import call, instantiate
10 | 
11 | import sys
12 | sys.path.append("..")
13 | sys.path.append("../../")
14 | 
15 | from exp_utils import get_hydra_output_dir, convert_data_to_online
16 | from src.models.tsvgp_cont import piv_chol, fixed_Z, random_picker
17 | 
18 | # A logger for this file
19 | log = logging.getLogger(__name__)
20 | 
21 | 
22 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="magnetometer_online_experiment")
23 | def run_experiment(cfg: DictConfig):
24 |     """
25 |     Initialize and run the experiment.
26 |     """
27 |     if cfg.streaming:
28 |         all_train_data, test_data = instantiate(cfg.dataset.dataloader)(train_id=[3], test_id=[1, 2, 4, 5])
29 |         train_data = convert_data_to_online(all_train_data[0], n_sets=20)
30 |     else:
31 |         train_data, test_data = instantiate(cfg.dataset.dataloader)(train_id=[1, 2, 4, 5], test_id=[1])
32 |     log.info("Data loaded successfully!!!")
33 | 
34 |     output_dir = get_hydra_output_dir()
35 | 
36 |     n_inducing_variable = int(np.sqrt(cfg.n_inducing_variable).item())
37 |     xx, yy = np.linspace(-2, 5, n_inducing_variable), np.linspace(-4, 2, n_inducing_variable)
38 |     z1, z2 = np.meshgrid(xx, yy)
39 |     zz = np.vstack((z1.flatten(), z2.flatten())).T
40 |     inducing_variable = zz.tolist()
41 |     cfg.model.inducing_variable = inducing_variable
42 | 
43 |     model = instantiate(cfg.model)
44 |     model.kernel.kernels[0].variance.assign(500)
45 |     gpflow.set_trainable(model.inducing_variable.Z, True)
46 | 
47 |     memory = (train_data[0][0][:1], train_data[0][1][:1])
48 |     online_gp = instantiate(cfg.online_gp)(model=model, memory=memory, opt_hypers=instantiate(cfg.optimizer),
49 |                                            Z_picker=fixed_Z, memory_picker=random_picker)
50 | 
51 |     for i, set_data in enumerate(train_data):
52 |         if cfg.streaming:
53 |             test_data = test_data
54 |             nlpd_vals, rmse_vals, _ = call(cfg.optimize)(online_gp=online_gp, train_data=[set_data],
55 |                                                          test_data=test_data, debug=False)
56 |         else:
57 |             test_data = set_data
58 |             set_data = convert_data_to_online(set_data, n_sets=20)
59 |             nlpd_vals, rmse_vals, _ = call(cfg.optimize)(online_gp=online_gp, train_data=set_data,
60 |                                                          test_data=test_data, debug=True)
61 | 
62 |         log.info(f"------------------------------------------")
63 |         log.info(f"Set {i}")
64 |         log.info(f"Test NLPD: {nlpd_vals[-1]}")
65 |         log.info(f"Test RMSE: {rmse_vals[-1]}")
66 |         log.info(f"------------------------------------------")
67 | 
68 |         parameters = gpflow.utilities.parameter_dict(model)
69 |         with open(os.path.join(output_dir, "model" + str(i) + "_online_magnetometer.pkl"), "wb") as f:
70 |             pickle.dump(parameters, f)
71 | 
72 | 
73 | if __name__ == '__main__':
74 |     run_experiment()
75 | 


--------------------------------------------------------------------------------
/experiments/magnetometer/online_model_predictions.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle
  3 | import gpflow
  4 | import numpy as np
  5 | import matplotlib.pyplot as plt
  6 | import sys
  7 | import argparse
  8 | 
  9 | sys.path.append("../..")
 10 | sys.path.append("..")
 11 | 
 12 | from magnetometer_utils import load_data, transform_room2video, get_transformed_grid, transform_grid2room
 13 | from exp_utils import convert_data_to_online
 14 | from src.models.tsvgp_cont import t_SVGP_cont, OnlineGP
 15 | from src.models.utils import memory_picker, piv_chol
 16 | 
 17 | 
 18 | if __name__ == '__main__':
 19 |     parser = argparse.ArgumentParser(description="Plot online plots for the magntometer experiment.")
 20 |     parser.add_argument("-o", type=str, default=None, required=True)
 21 |     args = parser.parse_args()
 22 |     model_dir = args.o
 23 |     streaming = False
 24 | 
 25 |     if not os.path.exists(model_dir):
 26 |         raise Exception("Model directory is invalid!!!")
 27 | 
 28 |     model_names = []
 29 |     for f in os.listdir(model_dir):
 30 |         if "online_magnetometer.pkl" in f:
 31 |             model_names.append(f)
 32 | 
 33 |     # sort by model id
 34 |     model_names.sort()
 35 | 
 36 |     n_inducing_variable = 100
 37 |     if streaming:
 38 |         train_data, _ = load_data("../data/invensense", train_id=[3])
 39 |         train_data = convert_data_to_online(train_data[0], n_sets=20, shuffle=False)
 40 |     else:
 41 |         train_data, _ = load_data("../data/invensense", train_id=[1, 2, 4, 5])
 42 | 
 43 |     for i, model_name in enumerate(model_names):
 44 |         model_path = os.path.join(model_dir, model_name)
 45 |         with open(model_path, "rb") as f:
 46 |             dict_params = pickle.load(f)
 47 | 
 48 |         # Loading model
 49 |         kernel = gpflow.kernels.Sum([gpflow.kernels.Constant(), gpflow.kernels.Matern52()])
 50 |         likelihood = gpflow.likelihoods.Gaussian()
 51 |         inducing_variable = -2 + np.zeros((n_inducing_variable, 2)) + np.random.rand(n_inducing_variable * 2).reshape(
 52 |             (-1, 2))
 53 |         model = t_SVGP_cont(kernel, likelihood, inducing_variable)
 54 |         model.kernel.kernels[0].variance = dict_params['.kernel.kernels[0].variance']
 55 |         model.kernel.kernels[1].lengthscales = dict_params['.kernel.kernels[1].lengthscales']
 56 |         model.kernel.kernels[1].variance = dict_params['.kernel.kernels[1].variance']
 57 |         model.likelihood.variance = dict_params['.likelihood.variance']
 58 |         model.inducing_variable.Z = dict_params['.inducing_variable.Z']
 59 |         Z = model.inducing_variable.Z.numpy().copy()
 60 |         model.lambda_1.assign(dict_params['.sites.lambda_1'])
 61 |         model.lambda_2.assign(dict_params['.sites._lambda_2'])
 62 |         print("Model loaded successfully!!!")
 63 | 
 64 |         # Prediction over grid
 65 |         xtest, ytest = np.mgrid[-1.:1.:100j, -1.:1.:100j]
 66 |         xtest_transformed, ytest_transformed = transform_grid2room(xtest, ytest)
 67 |         zz = np.concatenate([xtest_transformed[..., None], ytest_transformed[..., None]], axis=1)
 68 | 
 69 |         pred_m_grid, pred_S_grid = model.predict_f(zz)
 70 |         pred_m_grid = pred_m_grid.numpy().reshape((100, -1))
 71 | 
 72 |         pred_S_grid = pred_S_grid.numpy()
 73 |         if not streaming:
 74 |             alpha_map = np.sqrt(pred_S_grid).reshape((100, 100))
 75 |         else:
 76 |             alpha_map = np.exp(-1 * np.sqrt(pred_S_grid).reshape((100, 100)))
 77 |         alpha_map = alpha_map - np.min(alpha_map)
 78 |         alpha_map = alpha_map / np.max(alpha_map)
 79 |         alpha_map = 1 - alpha_map
 80 | 
 81 |         # Test points
 82 |         transformed_x1test, transformed_x2test = transform_room2video(xtest_transformed, ytest_transformed)
 83 |         transformed_x1test = np.reshape(transformed_x1test, xtest.shape)
 84 |         transformed_x2test = np.reshape(transformed_x2test, ytest.shape)
 85 | 
 86 |         # Path
 87 |         path_transformed_x0, path_transformed_x1 = transform_room2video(train_data[i][0][:, 0],
 88 |                                                                         train_data[i][0][:, 1])
 89 |         path_transformed_x = np.concatenate([path_transformed_x0[..., None], path_transformed_x1[..., None]], axis=1)
 90 | 
 91 |         # Grid
 92 |         g1, g2 = get_transformed_grid()
 93 | 
 94 |         # Inducing variables
 95 |         idx, _ = np.where((Z < -3.0) | (Z > 5.0))
 96 |         Z = np.delete(Z, idx, axis=0)
 97 |         transformed_Z_0, transformed_Z_1 = transform_room2video(Z[:, 0], Z[:, 1])
 98 |         transformed_Z_0 = np.reshape(transformed_Z_0, Z[:, 0].shape)
 99 |         transformed_Z_1 = np.reshape(transformed_Z_1, Z[:, 1].shape)
100 | 
101 |         # Plotting
102 |         plt.clf()
103 |         _, axs = plt.subplots(1, 1)
104 |         plt.plot(path_transformed_x[:, 0], path_transformed_x[:, 1])
105 |         pcol = plt.pcolormesh(transformed_x1test, transformed_x2test, pred_m_grid, alpha=alpha_map.reshape(-1),
106 |                               vmin=10, vmax=90, shading='gouraud', cmap="jet")
107 |         pcol.set_edgecolor('face')
108 | 
109 |         plt.scatter(transformed_Z_0, transformed_Z_1, color="black")
110 | 
111 |         plt.plot(g1, g2, color="gray", alpha=0.2)
112 |         plt.plot(g1.T, g2.T, color="gray", alpha=0.2)
113 | 
114 |         plt.xlim([0, 1920])
115 |         plt.ylim([0, 1080])
116 |         axs.set_aspect("equal")
117 |         plt.axis('off')
118 |         plt.gca().invert_yaxis()
119 |         plt.show()
120 | 


--------------------------------------------------------------------------------
/experiments/magnetometer/streaming_gp_model.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import pickle
 4 | 
 5 | import gpflow
 6 | import numpy as np
 7 | import hydra
 8 | from omegaconf import DictConfig
 9 | from hydra.utils import call, instantiate
10 | 
11 | import sys
12 | 
13 | sys.path.append("..")
14 | 
15 | from exp_utils import get_hydra_output_dir, convert_data_to_online
16 | 
17 | # A logger for this file
18 | log = logging.getLogger(__name__)
19 | 
20 | 
21 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="magnetometer_streaming_experiment")
22 | def run_experiment(cfg: DictConfig):
23 |     """
24 |     Initialize and run the experiment.
25 |     """
26 |     all_train_data, test_data = instantiate(cfg.dataset.dataloader)(train_id=[3], test_id=[1, 2, 4, 5])
27 |     log.info("Data loaded successfully!!!")
28 | 
29 |     output_dir = get_hydra_output_dir()
30 | 
31 |     # Set up inducing variables
32 |     n_inducing_variable = int(np.sqrt(cfg.n_inducing_variable).item())
33 | 
34 |     xx, yy = np.linspace(-2, 5, n_inducing_variable), np.linspace(-4, 2, n_inducing_variable)
35 |     z1, z2 = np.meshgrid(xx, yy)
36 |     zz = np.vstack((z1.flatten(), z2.flatten())).T
37 |     inducing_variable = zz.tolist()
38 |     cfg.model.inducing_variable = inducing_variable
39 | 
40 |     online_data = convert_data_to_online(all_train_data[0], n_sets=20)
41 |     model = instantiate(cfg.model)(data=online_data[0])
42 |     model.kernel.kernels[0].variance.assign(500)
43 | 
44 |     optimizer = instantiate(cfg.optimizer)
45 |     nlpd_vals, rmse_vals, _ = call(cfg.optimize)(optimizer=optimizer, model=model, train_data=online_data,
46 |                                                  test_data=test_data)
47 | 
48 |     log.info(f"Test NLPD: {nlpd_vals[-1]}")
49 |     log.info(f"Test RMSE: {rmse_vals[-1]}")
50 |     log.info("Optimization successfully done!!!")
51 | 
52 |     parameters = gpflow.utilities.parameter_dict(model)
53 |     with open(os.path.join(output_dir, "model_streaming_magnetometer.pkl"), "wb") as f:
54 |         pickle.dump(parameters, f)
55 | 
56 | 
57 | if __name__ == '__main__':
58 |     run_experiment()
59 | 


--------------------------------------------------------------------------------
/experiments/split_mnist/README.md:
--------------------------------------------------------------------------------
 1 | # Split MNIST Experiment
 2 | 
 3 | The split MNIST experiment where we run four models: the offline model where access to the whole dataset is possible, 
 4 | the proposed model with memory, the proposed model without memory, and the model proposed by Bui *et al.* (2017).
 5 | 
 6 | ## Offline model
 7 | 
 8 | Sparse variational Gaussian process (SVGP) model with access to the whole MNIST data set. 
 9 | 
10 | To run the model:
11 | ```
12 | python offline_model.py seed=5
13 | ```
14 | 
15 | The configurations and model hyperparameters can be found in the Hydra config file `../configs/offline_mnist_experiment.yaml`. 
16 | 
17 | ## Proposed model
18 | 
19 | To run the model with memory:
20 | ```
21 | python online_model.py seed=5
22 | ```
23 | To run the model without memory:
24 | ```
25 | python online_model.py seed=5 online_gp.num_mem=0
26 | ```
27 | The configurations and model hyperparameters can be found in the Hydra config file `../configs/online_mnist_experiment.yaml`.
28 | 
29 | ## Bui *et al.* (2017)
30 | To run the model:
31 | ```
32 | python streaming_gp_model.py seed=5
33 | ```
34 | The configurations and model hyperparameters can be found in the Hydra config file `../configs/streaming_mnist_experiment.yaml`.
35 | 
36 | ## Figures and plots
37 | 
38 | The figures in the paper can be generated by running the notebooks in `notebooks/`. All the notebooks assume the output
39 | to be present in the following structure:
40 | 
41 | ```
42 | - split_mnist_runs
43 |     |- offline
44 |     |  |- {output run folders}
45 |     |- online_memory
46 |     |  |- {output run folders}
47 |     |- online_no_memory
48 |     |  |- {output run folders}
49 |     |- Streaming
50 |     |  |- {output run folders}
51 | ```
52 | 


--------------------------------------------------------------------------------
/experiments/split_mnist/mnist_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The file contains utility functions for split-mnist expeirment.
  3 | """
  4 | from typing import Tuple
  5 | 
  6 | import tensorflow as tf
  7 | import numpy as np
  8 | import gpflow
  9 | import wandb
 10 | from omegaconf import OmegaConf
 11 | 
 12 | import sys
 13 | 
 14 | sys.path.append("../../..")
 15 | 
 16 | from src.models.tsvgp_cont import OnlineGP
 17 | from src.streaming_sparse_gp.osvgpc import OSVGPC
 18 | 
 19 | 
 20 | def setup_wandb(cfg):
 21 |     """
 22 |     Set up wandb.
 23 |     """
 24 |     wandb_cfg = OmegaConf.to_container(
 25 |         cfg, resolve=True, throw_on_missing=True
 26 |     )
 27 | 
 28 |     wandb.init(project="MNIST", entity=cfg.wandb.username, config=wandb_cfg)
 29 | 
 30 | 
 31 | def load_mnist(seed: int = None, train_split_percentage: float = 0.80) -> (list, list):
 32 |     """
 33 |     Load MNIST data set.
 34 | 
 35 |     seed: if seed needs to be fixed, by default it is None.
 36 |     train_split_percentage: float value between (0, 1), governing the split of data into train and test set.
 37 |     """
 38 |     if seed is not None:
 39 |         np.random.seed(seed)
 40 |         tf.random.set_seed(seed)
 41 | 
 42 |     mnist_train, mnist_test = tf.keras.datasets.mnist.load_data()
 43 | 
 44 |     x, y = mnist_train
 45 |     x = tf.reshape(x, [x.shape[0], -1]).numpy()
 46 |     x = x.astype(np.float64) / 255
 47 |     y = np.reshape(y, (-1, 1))
 48 |     y = np.int64(y)
 49 | 
 50 |     xt, yt = mnist_test
 51 |     xt = tf.reshape(xt, [xt.shape[0], -1]).numpy()
 52 |     xt = xt.astype(np.float64) / 255
 53 |     yt = np.reshape(yt, (-1, 1))
 54 |     yt = np.int64(yt)
 55 | 
 56 |     # merge train and test into one
 57 |     X = np.concatenate([x, xt], axis=0)
 58 |     Y = np.concatenate([y, yt], axis=0)
 59 | 
 60 |     all_data = np.concatenate([X, Y], axis=1)
 61 | 
 62 |     n_train = int(all_data.shape[0] * train_split_percentage)
 63 | 
 64 |     np.random.shuffle(all_data)
 65 |     train_data = all_data[:n_train]
 66 |     test_data = all_data[n_train:]
 67 | 
 68 |     train_tasks = (train_data[:, :-1], train_data[:, -1:].astype(np.int64))
 69 |     test_tasks = (test_data[:, :-1], test_data[:, -1:].astype(np.int64))
 70 | 
 71 |     return train_tasks, test_tasks
 72 | 
 73 | 
 74 | def load_split_mnist(seed: int = None, train_split_percentage: float = 0.80) -> (list, list):
 75 |     """
 76 |     Load split-mnist data set.
 77 | 
 78 |     seed: if seed needs to be fixed, by default it is None.
 79 |     train_split_percentage: float value between (0, 1), governing the split of data into train and test set.
 80 |     """
 81 |     if seed is not None:
 82 |         np.random.seed(seed)
 83 |         tf.random.set_seed(seed)
 84 | 
 85 |     mnist_train, mnist_test = tf.keras.datasets.mnist.load_data()
 86 | 
 87 |     x, y = mnist_train
 88 |     x = tf.reshape(x, [x.shape[0], -1]).numpy()
 89 |     x = x.astype(np.float64) / 255
 90 |     y = np.reshape(y, (-1, 1))
 91 |     y = np.int64(y)
 92 | 
 93 |     xt, yt = mnist_test
 94 |     xt = tf.reshape(xt, [xt.shape[0], -1]).numpy()
 95 |     xt = xt.astype(np.float64) / 255
 96 |     yt = np.reshape(yt, (-1, 1))
 97 |     yt = np.int64(yt)
 98 | 
 99 |     # merge train and test into one
100 |     X = np.concatenate([x, xt], axis=0)
101 |     Y = np.concatenate([y, yt], axis=0)
102 | 
103 |     train_tasks = []
104 |     test_tasks = []
105 | 
106 |     tasks = [(0, 1), (2, 3), (4, 5), (6, 7), (8, 9)]
107 | 
108 |     # Create specific tasks
109 |     for t in tasks:
110 |         idx, _ = np.where((Y == t[0]) | (Y == t[1]))
111 |         np.random.shuffle(idx)
112 | 
113 |         x_task = X[idx]
114 |         y_task = Y[idx]
115 | 
116 |         n_task = int(x_task.shape[0] * train_split_percentage)
117 | 
118 |         train_tasks.append((x_task[:n_task], y_task[:n_task]))
119 |         test_tasks.append((x_task[n_task:], y_task[n_task:]))
120 | 
121 |     return train_tasks, test_tasks
122 | 
123 | 
124 | def get_mini_batches(data: [np.ndarray, np.ndarray], minibatch_size: int = 1000) -> list:
125 |     """
126 |     Make mini-batches of data.
127 |     """
128 |     num_batches = int(data[0].shape[0] / minibatch_size)
129 |     batched_data = []
130 |     for n in range(num_batches):
131 |         tmp_data = (data[0][n * minibatch_size:(n + 1) * minibatch_size],
132 |                     data[1][n * minibatch_size:(n + 1) * minibatch_size])
133 |         batched_data.append(tmp_data)
134 |     # Last batch data
135 |     if data[0].shape[0] % minibatch_size != 0:
136 |         tmp_data = (data[0][num_batches * minibatch_size:],
137 |                     data[1][num_batches * minibatch_size:])
138 |         batched_data.append(tmp_data)
139 | 
140 |     return batched_data
141 | 
142 | 
143 | def optimize_online_model_minibatch(model: OnlineGP, train_data: [np.ndarray, np.ndarray],
144 |                                     test_data: [np.ndarray, np.ndarray],
145 |                                     minibatch_size: int = 100, train_hyper: bool = True, train_mem: bool = True,
146 |                                     n_hyp_opt_steps: int = 20) -> (list, list):
147 |     """
148 |     Optimize the Online GP model
149 | 
150 |     model: the OnlineGP model.
151 |     train_data: A tuple of training data.
152 |     test_data: A tuple of test data.
153 |     minibatch_size: An integer value corresponding to the minibatch size. Defaults to 100
154 |     train_hyper: A boolean variable for training the hyperparameters or not. Defaults to True.
155 |     train_mem:  A boolean variable for training the memory or not. Defaults to True.
156 |     n_hyp_opt_steps: An integer value corresponding to the number of hyperparameter optimization steps. Defaults to 20.
157 | 
158 |     returns: a list of NLPD and accuracy values.
159 |     """
160 |     batched_data = get_mini_batches(train_data, minibatch_size)
161 | 
162 |     nlpd_vals = []
163 |     acc_vals = []
164 |     for batch_data in batched_data:
165 |         for var in model.optimizer.variables():
166 |             var.assign(tf.zeros_like(var))
167 |         model.update_with_new_batch(new_data=batch_data, train_hyps=train_hyper, n_hyp_opt_steps=n_hyp_opt_steps,
168 |                                     train_mem=train_mem, remove_memory=False)
169 | 
170 |         nlpd = -1 * tf.reduce_mean(model.model.predict_log_density(test_data))
171 |         nlpd_vals.append(nlpd)
172 | 
173 |         pred_m, _ = model.model.predict_y(test_data[0])
174 |         pred_argmax = tf.reshape(tf.argmax(pred_m, axis=1), (-1, 1))
175 |         acc = np.mean(pred_argmax == test_data[1])
176 |         acc_vals.append(acc)
177 | 
178 |     return nlpd_vals, acc_vals
179 | 
180 | 
181 | def optimize_streaming_model_minibatch(optimizer, model, train_data: Tuple[np.ndarray, np.ndarray],
182 |                                        test_data: Tuple[np.ndarray, np.ndarray], iterations: int = 100,
183 |                                        minibatch_size: int = 100, mu=None, Su=None, Kaa=None, Zopt=None,
184 |                                        first_init=True):
185 |     """
186 |     Optimize the streaming model of Bui et al. 2017.
187 | 
188 |     The code is based on the official implementation: https://github.com/thangbui/streaming_sparse_gp
189 |     """
190 |     def optimization_step_adam():
191 |         optimizer.minimize(model.training_loss, model.trainable_variables)
192 | 
193 |     def optimization_step_scipy():
194 |         optimizer.minimize(model.training_loss, model.trainable_variables, options={'maxiter': iterations})
195 | 
196 |     def optimization_step():
197 |         if isinstance(optimizer, gpflow.optimizers.Scipy):
198 |             optimization_step_scipy()
199 |         else:
200 |             for _ in range(iterations):
201 |                 optimization_step_adam()
202 | 
203 |     def init_Z(cur_Z, new_X, use_old_Z=True):
204 |         if use_old_Z:
205 |             Z = np.copy(cur_Z)
206 |         else:
207 |             M = cur_Z.shape[0]
208 |             M_old = int(0.7 * M)
209 |             M_new = M - M_old
210 |             old_Z = cur_Z[np.random.permutation(M)[0:M_old], :]
211 |             new_Z = new_X[np.random.permutation(new_X.shape[0])[0:M_new], :]
212 |             Z = np.vstack((old_Z, new_Z))
213 |         return Z
214 | 
215 |     use_old_z = False
216 |     nlpd_vals = []
217 |     acc_vals = []
218 | 
219 |     batched_data = get_mini_batches(train_data, minibatch_size)
220 |     for i, new_data in enumerate(batched_data):
221 |         X, y = (new_data[0], new_data[1])
222 | 
223 |         if first_init:
224 |             if isinstance(optimizer, gpflow.optimizers.Scipy):
225 |                 gpflow.optimizers.Scipy().minimize(
226 |                     model.training_loss_closure((X, y)), model.trainable_variables, options={'maxiter': iterations})
227 |             else:
228 |                 for _ in range(iterations):
229 |                     optimizer.minimize(model.training_loss_closure((X, y)), model.trainable_variables)
230 |             first_init = False
231 |         else:
232 |             Zinit = init_Z(Zopt, X, use_old_z)
233 |             model = OSVGPC((X, y), gpflow.kernels.Matern52(), gpflow.likelihoods.Softmax(num_classes=10), mu, Su, Kaa,
234 |                            Zopt, Zinit, num_latent_gps=10)
235 |             optimization_step()
236 | 
237 |         Zopt = model.inducing_variable.Z.numpy()
238 |         mu, Su = model.predict_f(Zopt, full_cov=True)
239 |         if len(Su.shape) == 3:
240 |             Su = Su[0, :, :] + 1e-4 * np.eye(mu.shape[0])
241 |         Kaa = model.kernel(model.inducing_variable.Z)
242 | 
243 |         # NLPD calculation
244 |         f_mean, f_var = model.predict_f(test_data[0])
245 |         if len(f_var.shape) == 1:
246 |             f_var = f_var[..., None]
247 |         nlpd = model.likelihood.predict_log_density(f_mean, f_var, test_data[1])
248 |         nlpd = -1 * tf.reduce_mean(nlpd)
249 |         nlpd_vals.append(nlpd)
250 | 
251 |         # acc
252 |         pred_m, _ = model.predict_y(test_data[0])
253 |         pred_argmax = tf.reshape(tf.argmax(pred_m, axis=1), (-1, 1))
254 |         acc = np.mean(pred_argmax == test_data[1])
255 |         acc_vals.append(acc)
256 | 
257 |     return nlpd_vals, acc_vals, mu, Su, Kaa, Zopt, model
258 | 


--------------------------------------------------------------------------------
/experiments/split_mnist/offline_model.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Main file for split mnist experiment offline SVGP model i.e. the model has access to the whole data set.
 3 | """
 4 | import logging
 5 | import os
 6 | import pickle
 7 | 
 8 | import gpflow.models
 9 | import numpy as np
10 | from omegaconf import DictConfig
11 | from hydra.utils import instantiate, call
12 | import hydra
13 | 
14 | import sys
15 | 
16 | sys.path.append("..")
17 | 
18 | from exp_utils import get_hydra_output_dir
19 | from mnist_utils import setup_wandb
20 | 
21 | # A logger for this file
22 | log = logging.getLogger(__name__)
23 | 
24 | 
25 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="offline_mnist_experiment")
26 | def run_experiment(cfg: DictConfig):
27 |     """
28 |     Initialize and run the experiment.
29 |     """
30 |     output_dir = get_hydra_output_dir()
31 | 
32 |     train_data, test_data = call(cfg.dataset.dataloader)
33 |     # Set up inducing variables
34 |     inducing_variable = train_data[0][:cfg.n_inducing_variable].copy().tolist()
35 |     cfg.model.inducing_variable = inducing_variable
36 |     cfg.model.num_data = train_data[0].shape[0]
37 | 
38 |     if cfg.wandb.username is not None:
39 |         setup_wandb(cfg)
40 | 
41 |     offline_model = instantiate(cfg.model)
42 |     log.info("Model initialized; Optimization started!!!")
43 | 
44 |     log.info(f"---------------------------------------------")
45 |     log.info(f"Starting mnist experiment with seed={cfg.seed}")
46 |     log.info(f"---------------------------------------------")
47 | 
48 |     _, nlpd, acc = call(cfg.optimize)(model=offline_model, train_data=train_data,
49 |                                       test_data=test_data, optimizer=instantiate(cfg.optimizer))
50 | 
51 |     logging.info(f"NLPD after the task is {nlpd[-1]}")
52 |     logging.info(f"Accuracy after the task is {acc[-1]}\n\n")
53 | 
54 |     log.info(f"---------------------------------------------")
55 | 
56 |     np.savez(os.path.join(output_dir, "training_statistics.npz"), nlpd=nlpd,
57 |              acc=acc)
58 |     parameters = gpflow.utilities.parameter_dict(offline_model)
59 |     with open(os.path.join(output_dir, "model_offline.pkl"), "wb") as f:
60 |         pickle.dump(parameters, f)
61 | 
62 | 
63 | if __name__ == '__main__':
64 |     run_experiment()
65 | 


--------------------------------------------------------------------------------
/experiments/split_mnist/online_model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Main file for split mnist experiment online model.
  3 | """
  4 | 
  5 | import pickle
  6 | import os
  7 | import logging
  8 | import wandb
  9 | 
 10 | import gpflow
 11 | import tensorflow as tf
 12 | import hydra
 13 | import numpy as np
 14 | from omegaconf import DictConfig
 15 | from hydra.utils import instantiate, call
 16 | 
 17 | import sys
 18 | 
 19 | sys.path.append("..")
 20 | sys.path.append("../..")
 21 | 
 22 | from exp_utils import get_hydra_output_dir
 23 | from src.models.tsvgp_cont import piv_chol
 24 | from mnist_utils import setup_wandb
 25 | 
 26 | # A logger for this file
 27 | log = logging.getLogger(__name__)
 28 | 
 29 | 
 30 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="online_mnist_experiment")
 31 | def run_experiment(cfg: DictConfig):
 32 |     """
 33 |     Initialize and run the experiment.
 34 |     """
 35 |     output_dir = get_hydra_output_dir()
 36 | 
 37 |     if cfg.wandb.username is not None:
 38 |         setup_wandb(cfg)
 39 | 
 40 |     all_train_data, all_test_data = call(cfg.dataset.dataloader)
 41 | 
 42 |     # Set up inducing variables
 43 |     inducing_variable = all_train_data[0][0][:cfg.n_inducing_variable].copy().tolist()
 44 |     cfg.model.inducing_variable = inducing_variable
 45 | 
 46 |     model = instantiate(cfg.model)
 47 |     gpflow.utilities.set_trainable(model.inducing_variable.Z, False)
 48 | 
 49 |     memory = (all_train_data[0][0][:1], all_train_data[0][1][:1])
 50 |     online_gp = instantiate(cfg.online_gp)(model=model, memory=memory, opt_hypers=instantiate(cfg.optimizer),
 51 |                                            Z_picker=piv_chol, memory_picker=call(cfg.memory_picker))
 52 | 
 53 |     log.info(f"---------------------------------------------")
 54 |     log.info(f"Starting split mnist experiment with seed={cfg.seed}")
 55 |     log.info(f"---------------------------------------------")
 56 | 
 57 |     nlpd_vals = []
 58 |     acc_vals = []
 59 |     task_break_pnts = []
 60 |     task_id = 0
 61 | 
 62 |     previous_tasks = None
 63 |     for train_data, test_data in zip(all_train_data, all_test_data):
 64 | 
 65 |         log.info(f"---------------------------------------------")
 66 |         log.info(f"Task {task_id}")
 67 |         log.info(f"Train data are {train_data[0].shape[0]} and test data are {test_data[0].shape[0]}")
 68 |         log.info("Splitting data into sets...")
 69 | 
 70 |         if previous_tasks is None:
 71 |             previous_tasks = test_data
 72 |         else:
 73 |             previous_tasks = (np.concatenate([previous_tasks[0], test_data[0].copy()], axis=0),
 74 |                               np.concatenate([previous_tasks[1], test_data[1].copy()], axis=0))
 75 | 
 76 |         # Calculating Accuracy and NLPD before the model is trained
 77 |         nlpd_init = -1 * tf.reduce_mean(online_gp.model.predict_log_density(previous_tasks)).numpy().item()
 78 | 
 79 |         pred_m, _ = online_gp.model.predict_y(previous_tasks[0])
 80 |         pred_argmax = tf.reshape(tf.argmax(pred_m, axis=1), (-1, 1))
 81 |         acc_init = np.mean(pred_argmax == previous_tasks[1])
 82 | 
 83 |         nlpd, acc = call(cfg.optimize)(model=online_gp, train_data=train_data, test_data=previous_tasks)
 84 | 
 85 |         if cfg.wandb.username is not None:
 86 |             wandb.log({"Accuracy": acc[-1]})
 87 | 
 88 |         # Add init acc and nlpd
 89 |         acc = [acc_init] + acc
 90 |         nlpd = [nlpd_init] + nlpd
 91 | 
 92 |         nlpd_vals += nlpd
 93 |         acc_vals += acc
 94 | 
 95 |         task_break_pnts.append(len(nlpd_vals))
 96 | 
 97 |         logging.info(f"NLPD after the task is {nlpd[-1]}")
 98 |         logging.info(f"Accuracy after the task is {acc[-1]}\n\n")
 99 | 
100 |         logging.info("NLPD on all tasks:\n")
101 |         for i in range(task_id, -1, -1):
102 |             nlpd = -1 * tf.reduce_mean(online_gp.model.predict_log_density(all_test_data[i]))
103 | 
104 |             pred_m, _ = online_gp.model.predict_y(all_test_data[i][0])
105 |             pred_argmax = tf.reshape(tf.argmax(pred_m, axis=1), (-1, 1))
106 |             acc = np.mean(pred_argmax == all_test_data[i][1])
107 | 
108 |             logging.info(f"NLPD on task {i} is {nlpd}")
109 |             logging.info(f"Accuracy on task {i} is {acc}\n\n")
110 | 
111 |             # Save model memory and inducing variables
112 |             Z = online_gp.model.inducing_variable.Z.numpy().copy()
113 |             mem = online_gp.memory[0].copy()
114 |             np.savez(os.path.join(output_dir, f"memory_and_Z_{task_id}.npz"), mem=mem, Z=Z)
115 | 
116 |         log.info(f"---------------------------------------------")
117 |         task_id += 1
118 | 
119 |     np.savez(os.path.join(output_dir, "training_statistics.npz"), nlpd=nlpd_vals,
120 |              acc=acc_vals, task_break_pnts=task_break_pnts)
121 |     parameters = gpflow.utilities.parameter_dict(online_gp.model)
122 |     with open(os.path.join(output_dir, "model_online.pkl"), "wb") as f:
123 |         pickle.dump(parameters, f)
124 | 
125 | 
126 | if __name__ == '__main__':
127 |     run_experiment()
128 | 


--------------------------------------------------------------------------------
/experiments/split_mnist/streaming_gp_model.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import pickle
  4 | 
  5 | import gpflow
  6 | import tensorflow as tf
  7 | import hydra
  8 | import numpy as np
  9 | from omegaconf import DictConfig
 10 | from hydra.utils import instantiate, call
 11 | 
 12 | import sys
 13 | 
 14 | sys.path.append("..")
 15 | sys.path.append("../..")
 16 | 
 17 | from exp_utils import get_hydra_output_dir
 18 | 
 19 | # A logger for this file
 20 | log = logging.getLogger(__name__)
 21 | 
 22 | 
 23 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="streaming_mnist_experiment")
 24 | def run_experiment(cfg: DictConfig):
 25 |     """
 26 |     Initialize and run the experiment.
 27 |     """
 28 |     output_dir = get_hydra_output_dir()
 29 | 
 30 |     all_train_data, all_test_data = call(cfg.dataset.dataloader)
 31 | 
 32 |     log.info(f"---------------------------------------------")
 33 |     log.info(f"Starting mnist experiment with seed={cfg.seed}")
 34 |     log.info(f"---------------------------------------------")
 35 | 
 36 |     # Set up inducing variables
 37 |     inducing_variable = all_train_data[0][0][:cfg.n_inducing_variable].copy().tolist()
 38 |     cfg.model.inducing_variable = inducing_variable
 39 | 
 40 |     cfg.model.num_data = all_train_data[0][0].shape[0]
 41 |     cfg.model.num_latent_gps = cfg.num_classes
 42 |     model = instantiate(cfg.model)
 43 | 
 44 |     log.info("Model initialized; Optimization started!!!")
 45 | 
 46 |     optimizer = instantiate(cfg.optimizer)
 47 | 
 48 |     nlpd_vals = []
 49 |     acc_vals = []
 50 |     task_break_pnts = []
 51 |     task_id = 0
 52 |     first_init = True
 53 | 
 54 |     previous_tasks = None
 55 |     mu, Su, Kaa, Zopt = None, None, None, None
 56 | 
 57 |     for train_data, test_data in zip(all_train_data, all_test_data):
 58 |         log.info(f"---------------------------------------------")
 59 |         log.info(f"Task {task_id}")
 60 |         log.info(f"Train data are {train_data[0].shape[0]} and test data are {test_data[0].shape[0]}")
 61 |         log.info("Splitting data into sets...")
 62 | 
 63 |         if previous_tasks is None:
 64 |             previous_tasks = test_data
 65 |         else:
 66 |             previous_tasks = (np.concatenate([previous_tasks[0], test_data[0].copy()], axis=0),
 67 |                               np.concatenate([previous_tasks[1], test_data[1].copy()], axis=0))
 68 | 
 69 |         # Calculating Accuracy and NLPD before the model is trained
 70 |         f_mean, f_var = model.predict_f(previous_tasks[0])
 71 |         if len(f_var.shape) == 1:
 72 |             f_var = f_var[..., None]
 73 |         nlpd = model.likelihood.predict_log_density(f_mean, f_var, previous_tasks[1])
 74 |         nlpd_init = -1 * tf.reduce_mean(nlpd).numpy().item()
 75 | 
 76 |         pred_m, _ = model.predict_y(previous_tasks[0])
 77 |         pred_argmax = tf.reshape(tf.argmax(pred_m, axis=1), (-1, 1))
 78 |         acc_init = np.mean(pred_argmax == previous_tasks[1])
 79 | 
 80 |         nlpd, acc, mu, Su, Kaa, Zopt, model = call(cfg.optimize)(optimizer=optimizer, model=model,
 81 |                                                                  train_data=train_data,
 82 |                                                                  test_data=previous_tasks, mu=mu, Su=Su,
 83 |                                                                  Kaa=Kaa, Zopt=Zopt,
 84 |                                                                  first_init=first_init)
 85 |         first_init = False
 86 |         logging.info(f"NLPD after the task is {nlpd[-1]}")
 87 |         logging.info(f"Accuracy after the task is {acc[-1]}\n\n")
 88 | 
 89 |         # Add init acc and nlpd
 90 |         acc = [acc_init] + acc
 91 |         nlpd = [nlpd_init] + nlpd
 92 | 
 93 |         nlpd_vals += nlpd
 94 |         acc_vals += acc
 95 | 
 96 |         task_break_pnts.append(len(nlpd_vals))
 97 | 
 98 |         logging.info("NLPD on previous tasks:\n")
 99 |         for i in range(task_id, -1, -1):
100 |             f_mean, f_var = model.predict_f(all_test_data[i][0])
101 |             if len(f_var.shape) == 1:
102 |                 f_var = f_var[..., None]
103 |             nlpd = model.likelihood.predict_log_density(f_mean, f_var, all_test_data[i][1])
104 |             nlpd = -1 * tf.reduce_mean(nlpd)
105 | 
106 |             pred_m, _ = model.predict_y(all_test_data[i][0])
107 |             pred_argmax = tf.reshape(tf.argmax(pred_m, axis=1), (-1, 1))
108 |             acc = np.mean(pred_argmax == all_test_data[i][1])
109 | 
110 |             logging.info(f"NLPD on task {i} is {nlpd}")
111 |             logging.info(f"Accuracy on task {i} is {acc}\n\n")
112 | 
113 |         log.info(f"---------------------------------------------")
114 |         task_id += 1
115 | 
116 |     np.savez(os.path.join(output_dir, "training_statistics.npz"), nlpd=nlpd_vals,
117 |              acc=acc_vals, task_break_pnts=task_break_pnts)
118 |     parameters = gpflow.utilities.parameter_dict(model)
119 |     with open(os.path.join(output_dir, "model_streaming.pkl"), "wb") as f:
120 |         pickle.dump(parameters, f)
121 | 
122 | 
123 | if __name__ == '__main__':
124 |     run_experiment()
125 | 


--------------------------------------------------------------------------------
/experiments/uci/README.md:
--------------------------------------------------------------------------------
 1 | # UCI Experiment
 2 | 
 3 | In the UCI experiment we compare three models: the offline model that has access to the whole dataset, our proposed online model, and the online model proposed by Bui *et al.* (2017).
 4 | 
 5 | For the paper, for each data set we perform 10-fold cross-validation. In all cases we use a Matérn-5/2 kernel.
 6 | 
 7 | ## Offline model
 8 | 
 9 | The sparse variational GP model (t-SVGP) with access to the whole data set. 
10 | 
11 | To run the model on a **regression** dataset:
12 | ```
13 | python offline_model.py dataset=bike model.likelihood.variance=0.1 dataset.dataloader.n_k_folds=10 dataset.dataloader.random_state=33
14 | ```
15 | To run the model on a **classification** dataset:
16 | ```
17 | python offline_model.py dataset=adult optimize.lambda_lr=0.5 model/likelihood=bernoulli dataset.dataloader.n_k_folds=10 dataset.dataloader.random_state=33
18 | ```
19 | 
20 | The configurations and model hyperparameters can be found in the Hydra config file `../configs/offline_experiment.yaml`. 
21 | 
22 | ## Proposed model
23 | 
24 | To run the model on a **regression** dataset:
25 | ```
26 | python online_model.py dataset=bike dataset.dataloader.n_k_folds=10 dataset.dataloader.random_state=33 n_sets=50 model.likelihood.variance=0.1 optimize.hyperparams_step=100 online_gp.n_steps=2
27 | ```
28 | To run the model on a **classification** dataset:
29 | ```
30 | python online_model.py dataset=adult model/likelihood=bernoulli online_gp.lambda_lr=0.5 online_gp.n_steps=4 dataset.dataloader.n_k_folds=10 dataset.dataloader.random_state=33 n_sets=50 optimize.hyperparams_step=100
31 | ```
32 | The configurations and model hyperparameters can be found in the Hydra config file `../configs/online_experiment.yaml`.
33 | 
34 | The memory picker can be changed from BLS (by default) to random by using the command-line argument `online_gp.memory_picker=random`.
35 | 
36 | ## Bui *et al.* (2017)
37 | 
38 | To run the Bui *et al.* (2017) model on a **regression** dataset:
39 | ```
40 | python streaming_sgpr.py dataset=bike dataset.dataloader.n_k_folds=10 dataset.dataloader.random_state=33 n_sets=50 optimize.iterations=100 model.noise_variance=0.1 optimizer=scipy
41 | ```
42 | To run the Bui *et al.* (2017) model on a **classification** dataset:
43 | ```
44 | python streaming_sgpr.py dataset=adult dataset.dataloader.n_k_folds=10 dataset.dataloader.random_state=33 optimizer=adam optimize.iterations=100 optimize.task="classification" model=svgp model/likelihood=bernoulli n_sets=50
45 | ```
46 | The configurations and model hyperparameters can be found in the Hydra config file `../configs/streaming_experiment.yaml`.
47 | 
48 | ## Fast-Conditioning
49 | Fast-conditoning _i.e._ only variational parameters are optimized.
50 | ### Proposed model
51 | To run the fast-conditioning of the proposed model:
52 | ```
53 | python online_fc_model.py dataset=bike dataset.dataloader.n_k_folds=10 dataset.dataloader.random_state=33 n_sets=50 model.likelihood.variance=0.1 online_gp.num_mem=0 online_gp.n_steps=2 optimize.train_memory=False load_model_path={trained_model_path} optimize.train_hyperparams=False
54 | ```
55 | 
56 | ### Bui *et al.* (2017)
57 | To run the fast-conditioning of the Bui _et al._ (2017) model:
58 | 
59 | ```
60 | python streaming_sgpr_fc.py dataset=bike dataset.dataloader.n_k_folds=10 dataset.dataloader.random_state=33 n_sets=50 optimizer=adam load_model_path={trained_model_path} optimize.iterations=0 optimize.task="regression" model.noise_variance=0.1
61 | ```
62 | 


--------------------------------------------------------------------------------
/experiments/uci/offline_model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Main file for UCI regression tasks offline model.
  3 | """
  4 | import logging
  5 | import os
  6 | import pickle
  7 | 
  8 | import gpflow.models
  9 | import matplotlib.pyplot as plt
 10 | import numpy as np
 11 | from omegaconf import DictConfig
 12 | from hydra.utils import instantiate, call
 13 | import hydra
 14 | import wandb
 15 | 
 16 | import sys
 17 | sys.path.append("..")
 18 | 
 19 | from exp_utils import get_hydra_output_dir
 20 | from uci_utils import load_model_parameters, setup_wandb
 21 | 
 22 | # A logger for this file
 23 | log = logging.getLogger(__name__)
 24 | 
 25 | 
 26 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="offline_experiment")
 27 | def run_experiment(cfg: DictConfig):
 28 |     """
 29 |     Initialize and run the experiment.
 30 |     """
 31 |     output_dir = get_hydra_output_dir()
 32 | 
 33 |     all_train_data, all_test_data = call(cfg.dataset.dataloader)
 34 | 
 35 |     log.info(f"---------------------------------------------")
 36 |     log.info(f"Dataset : {cfg.dataset}")
 37 |     log.info(f"---------------------------------------------")
 38 | 
 39 |     if len(all_train_data) > 1:
 40 |         log.info(f"Cross validation starting with {cfg.dataset.dataloader.n_k_folds} K-folds!!!")
 41 | 
 42 |     k_fold_nlpds = []
 43 |     k_fold_eval = []
 44 |     k_fold_id = 0
 45 |     for train_data, test_data in zip(all_train_data, all_test_data):
 46 |         cfg.model.num_data = train_data[0].shape[0]
 47 |         log.info(f"---------------------------------------------")
 48 |         log.info(f"Starting with set {k_fold_id}")
 49 |         log.info(f"---------------------------------------------")
 50 |         log.info(f"Train data are {train_data[0].shape[0]} and test data are {test_data[0].shape[0]}")
 51 | 
 52 |         if cfg.wandb.username is not None:
 53 |             setup_wandb(cfg)
 54 | 
 55 |         # Set up inducing variables
 56 |         inducing_variable = train_data[0][:cfg.n_inducing_variable].copy().tolist()
 57 |         cfg.model.inducing_variable = inducing_variable
 58 | 
 59 |         model = instantiate(cfg.model)
 60 |         log.info("Model initialized; Optimization started!!!")
 61 |         if cfg.load_model_path is not None:
 62 |             with open(cfg.load_model_path, "rb") as f:
 63 |                 dict_params = pickle.load(f)
 64 |             load_model_parameters(model, dict_params)
 65 |             log.info(f"Model parameters loaded from {cfg.load_model_path}")
 66 | 
 67 |         elbo_vals, nlpd_vals, eval_vals = call(cfg.optimize)(model=model, train_data=train_data, test_data=test_data,
 68 |                                                              optimizer=instantiate(cfg.optimizer))
 69 |         if len(nlpd_vals) > 0:
 70 |             log.info(f"Final ELBO: {elbo_vals[-1]}")
 71 |             log.info(f"Test NLPD: {nlpd_vals[-1]}")
 72 |             log.info(f"Test RMSE/Acc: {eval_vals[-1]}")
 73 | 
 74 |         log.info("Optimization successfully done!!!")
 75 | 
 76 |         if cfg.wandb.username is not None:
 77 |             plt.clf()
 78 |             plt.plot(elbo_vals)
 79 |             plt.title("ELBO")
 80 |             wandb.log({"optim_elbo_vals": plt})
 81 | 
 82 |             plt.clf()
 83 |             plt.plot(nlpd_vals)
 84 |             plt.title("NLPD")
 85 |             wandb.log({"optim_nlpd_vals": plt})
 86 | 
 87 |             plt.clf()
 88 |             plt.plot(eval_vals)
 89 |             plt.title("Eval.")
 90 |             wandb.log({"optim_eval_vals": plt})
 91 | 
 92 |         np.savez(os.path.join(output_dir, "training_statistics_" + str(k_fold_id) + ".npz"), elbo=elbo_vals,
 93 |                  nlpd=nlpd_vals, eval=eval_vals)
 94 |         parameters = gpflow.utilities.parameter_dict(model)
 95 |         with open(os.path.join(output_dir, "model_" + str(k_fold_id) + ".pkl"), "wb") as f:
 96 |             pickle.dump(parameters, f)
 97 | 
 98 |         k_fold_id += 1
 99 |         k_fold_nlpds.append(nlpd_vals[-1])
100 |         k_fold_eval.append(eval_vals[-1])
101 |         log.info(f"---------------------------------------------")
102 | 
103 |     if len(k_fold_nlpds) > 1:
104 |         log.info(f"Mean NLPD over k-folds = {np.mean(k_fold_nlpds)}")
105 |         log.info(f"Std NLPD over k-folds = {np.std(k_fold_nlpds)}")
106 | 
107 |         log.info(f"Mean eval over k-folds = {np.mean(k_fold_eval)}")
108 |         log.info(f"Std eval over k-folds = {np.std(k_fold_eval)}")
109 | 
110 | 
111 | if __name__ == '__main__':
112 |     run_experiment()
113 | 


--------------------------------------------------------------------------------
/experiments/uci/online_fc_model.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import pickle
  4 | import time
  5 | 
  6 | import matplotlib.pyplot as plt
  7 | import numpy as np
  8 | from omegaconf import DictConfig, OmegaConf
  9 | from hydra.utils import instantiate, call
 10 | import hydra
 11 | import wandb
 12 | import gpflow
 13 | 
 14 | import sys
 15 | sys.path.append("../../")
 16 | sys.path.append("..")
 17 | from src.models.utils import memory_picker, fixed_Z
 18 | 
 19 | from uci_utils import setup_wandb
 20 | from exp_utils import convert_data_to_online, get_hydra_output_dir
 21 | 
 22 | # A logger for this file
 23 | log = logging.getLogger(__name__)
 24 | 
 25 | 
 26 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="online_experiment")
 27 | def run_experiment(cfg: DictConfig):
 28 |     """
 29 |     Initialize and run the experiment.
 30 |     """
 31 |     output_dir = get_hydra_output_dir()
 32 | 
 33 |     all_train_data, all_test_data = call(cfg.dataset.dataloader)
 34 | 
 35 |     if len(all_train_data) > 1:
 36 |         log.info(f"Cross validation starting with {cfg.dataset.dataloader.n_k_folds} K-folds!!!")
 37 | 
 38 |     k_fold_nlpds = []
 39 |     k_fold_eval = []
 40 |     k_fold_time = []
 41 |     k_fold_id = 0
 42 | 
 43 |     for train_data, test_data in zip(all_train_data, all_test_data):
 44 | 
 45 |         log.info(f"---------------------------------------------")
 46 |         log.info(f"Starting with set {k_fold_id}")
 47 |         log.info(f"---------------------------------------------")
 48 | 
 49 |         log.info(f"Train data are {train_data[0].shape[0]} and test data are {test_data[0].shape[0]}")
 50 |         log.info("Splitting data into sets...")
 51 | 
 52 |         online_data = convert_data_to_online(train_data, cfg.n_sets, cfg.sort_data)
 53 |         log.info(f"Data splitted successfully into {cfg.n_sets} sets!!!")
 54 | 
 55 |         np.savez(os.path.join(output_dir, "splitted_dataset" + str(k_fold_id) + ".npz"), data=online_data)
 56 | 
 57 |         if cfg.wandb.username is not None:
 58 |             setup_wandb(cfg)
 59 | 
 60 |         # Set up inducing variables
 61 |         inducing_variable = train_data[0][:cfg.n_inducing_variable].copy().tolist()
 62 |         cfg.model.inducing_variable = inducing_variable
 63 | 
 64 |         model = instantiate(cfg.model)
 65 |         if cfg.load_model_path is None:
 66 |             raise Exception("FC model should have a model path from where hyperparams are loaded!")
 67 | 
 68 |         model_path = os.path.join(cfg.load_model_path, "model_" + str(k_fold_id) + ".pkl")
 69 | 
 70 |         with open(model_path, "rb") as f:
 71 |             dict_params = pickle.load(f)
 72 | 
 73 |         model.inducing_variable.Z = dict_params['.inducing_variable.Z']
 74 |         model.kernel.lengthscales = dict_params['.kernel.lengthscales']
 75 |         model.kernel.variance = dict_params['.kernel.variance']
 76 | 
 77 |         # not present in classification
 78 |         if '.likelihood.variance' in dict_params:
 79 |             model.likelihood.variance = dict_params['.likelihood.variance']
 80 | 
 81 |         # make then non-trainable
 82 |         gpflow.utilities.set_trainable(model.inducing_variable.Z, False)
 83 |         gpflow.utilities.set_trainable(model.kernel.lengthscales, False)
 84 |         gpflow.utilities.set_trainable(model.kernel.variance, False)
 85 | 
 86 |         if isinstance(model.likelihood, gpflow.likelihoods.Gaussian):
 87 |             gpflow.utilities.set_trainable(model.likelihood.variance, False)
 88 | 
 89 |         log.info(f"Model parameters loaded from {cfg.load_model_path}")
 90 | 
 91 |         memory = None
 92 | 
 93 |         online_gp = instantiate(cfg.online_gp)(model=model, memory=memory, opt_hypers=instantiate(cfg.optimizer),
 94 |                                                Z_picker=fixed_Z, memory_picker=memory_picker)
 95 |         start_time = time.time()
 96 |         nlpd_vals, eval_vals, time_vals = call(cfg.optimize)(online_gp=online_gp, train_data=online_data, test_data=test_data,
 97 |                                                   debug=True)
 98 |         end_time = time.time()
 99 | 
100 |         log.info(f"Test NLPD: {nlpd_vals[-1]}")
101 |         log.info(f"Test RMSE/Acc: {eval_vals[-1]}")
102 |         log.info(f"Time (s): {end_time - start_time}")
103 | 
104 |         log.info("Optimization successfully done!!!")
105 | 
106 |         if cfg.wandb.username is not None:
107 |             plt.clf()
108 |             plt.plot(nlpd_vals)
109 |             plt.title("NLPD")
110 |             wandb.log({"optim_nlpd_vals": plt})
111 | 
112 |             plt.clf()
113 |             plt.plot(eval_vals)
114 |             plt.title("RMSE/Acc")
115 |             wandb.log({"optim_eval_vals": plt})
116 | 
117 |         np.savez(os.path.join(output_dir, "training_statistics_" + str(k_fold_id) + ".npz"), nlpd=nlpd_vals,
118 |                  eval=eval_vals, time_vals=time_vals)
119 |         parameters = gpflow.utilities.parameter_dict(model)
120 |         with open(os.path.join(output_dir, "model_" + str(k_fold_id) + ".pkl"), "wb") as f:
121 |             pickle.dump(parameters, f)
122 | 
123 |         k_fold_id += 1
124 |         k_fold_nlpds.append(nlpd_vals[-1])
125 |         k_fold_eval.append(eval_vals[-1])
126 |         k_fold_time.append(end_time - start_time)
127 |         log.info(f"---------------------------------------------")
128 | 
129 |     if len(k_fold_nlpds) > 1:
130 |         log.info(f"Mean NLPD over k-folds = {np.mean(k_fold_nlpds)}")
131 |         log.info(f"Std NLPD over k-folds = {np.std(k_fold_nlpds)}")
132 | 
133 |         log.info(f"Mean RMSE/Acc over k-folds = {np.mean(k_fold_eval)}")
134 |         log.info(f"Std RMSE/Acc over k-folds = {np.std(k_fold_eval)}")
135 | 
136 |         log.info(f"Mean time over k-folds = {np.mean(k_fold_time)}")
137 |         log.info(f"Std time over k-folds = {np.std(k_fold_time)}")
138 | 
139 | 
140 | if __name__ == '__main__':
141 |     run_experiment()
142 | 


--------------------------------------------------------------------------------
/experiments/uci/online_model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Main file for the proposed model on UCI regression tasks.
  3 | """
  4 | import logging
  5 | import os
  6 | import pickle
  7 | import time
  8 | 
  9 | import matplotlib.pyplot as plt
 10 | import numpy as np
 11 | from omegaconf import DictConfig, OmegaConf
 12 | from hydra.utils import instantiate, call
 13 | import hydra
 14 | import wandb
 15 | import gpflow
 16 | 
 17 | import sys
 18 | 
 19 | sys.path.append("../../")
 20 | sys.path.append("..")
 21 | 
 22 | from src.models.utils import fixed_Z, memory_picker, random_picker
 23 | from exp_utils import get_hydra_output_dir, convert_data_to_online
 24 | from uci_utils import setup_wandb
 25 | 
 26 | # A logger for this file
 27 | log = logging.getLogger(__name__)
 28 | 
 29 | 
 30 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="online_experiment")
 31 | def run_experiment(cfg: DictConfig):
 32 |     """
 33 |     Initialize and run the experiment.
 34 |     """
 35 |     output_dir = get_hydra_output_dir()
 36 | 
 37 |     all_train_data, all_test_data = call(cfg.dataset.dataloader)
 38 | 
 39 |     if len(all_train_data) > 1:
 40 |         log.info(f"Cross validation starting with {cfg.dataset.dataloader.n_k_folds} K-folds!!!")
 41 | 
 42 |     k_fold_nlpds = []
 43 |     k_fold_eval = []
 44 |     k_fold_time = []
 45 |     k_fold_id = 0
 46 | 
 47 |     for train_data, test_data in zip(all_train_data, all_test_data):
 48 | 
 49 |         log.info(f"---------------------------------------------")
 50 |         log.info(f"Starting with set {k_fold_id}")
 51 |         log.info(f"---------------------------------------------")
 52 | 
 53 |         log.info(f"Train data are {train_data[0].shape[0]} and test data are {test_data[0].shape[0]}")
 54 |         log.info("Splitting data into sets...")
 55 | 
 56 |         online_data = convert_data_to_online(train_data, cfg.n_sets, cfg.sort_data)
 57 |         log.info(f"Data splitted successfully into {cfg.n_sets} sets!!!")
 58 | 
 59 |         np.savez(os.path.join(output_dir, "splitted_dataset" + str(k_fold_id) + ".npz"), data=online_data)
 60 | 
 61 |         if cfg.wandb.username is not None:
 62 |             setup_wandb(cfg)
 63 | 
 64 |         # Set up inducing variables
 65 |         inducing_variable = train_data[0][:cfg.n_inducing_variable].copy().tolist()
 66 |         cfg.model.inducing_variable = inducing_variable
 67 | 
 68 |         model = instantiate(cfg.model)
 69 |         if cfg.load_model_path is not None:
 70 |             with open(cfg.load_model_path, "rb") as f:
 71 |                 dict_params = pickle.load(f)
 72 | 
 73 |             model.inducing_variable.Z = dict_params['.inducing_variable.Z']
 74 |             model.kernel.lengthscales = dict_params['.kernel.lengthscales']
 75 |             model.kernel.variance = dict_params['.kernel.variance']
 76 |             model.likelihood.variance = dict_params['.likelihood.variance']
 77 | 
 78 |             log.info(f"Model parameters loaded from {cfg.load_model_path}")
 79 | 
 80 |         memory = (online_data[0][0][:1], online_data[0][1][:1])
 81 |         if cfg.online_gp.memory_picker == "random":
 82 |             memory_picker = random_picker
 83 |         else:
 84 |             memory_picker = memory_picker
 85 | 
 86 |         online_gp = instantiate(cfg.online_gp)(model=model, memory=memory, opt_hypers=instantiate(cfg.optimizer),
 87 |                                                Z_picker=fixed_Z, memory_picker=memory_picker)
 88 |         start_time = time.time()
 89 |         nlpd_vals, eval_vals, _ = call(cfg.optimize)(online_gp=online_gp, train_data=online_data, test_data=test_data,
 90 |                                                      debug=True)
 91 |         end_time = time.time()
 92 | 
 93 |         log.info(f"Test NLPD: {nlpd_vals[-1]}")
 94 |         log.info(f"Test RMSE/Acc: {eval_vals[-1]}")
 95 |         log.info(f"Time (s): {end_time - start_time}")
 96 | 
 97 |         log.info("Optimization successfully done!!!")
 98 | 
 99 |         if cfg.wandb.username is not None:
100 |             plt.clf()
101 |             plt.plot(nlpd_vals)
102 |             plt.title("NLPD")
103 |             wandb.log({"optim_nlpd_vals": plt})
104 | 
105 |             plt.clf()
106 |             plt.plot(eval_vals)
107 |             plt.title("RMSE/Acc")
108 |             wandb.log({"optim_eval_vals": plt})
109 | 
110 |         np.savez(os.path.join(output_dir, "training_statistics_" + str(k_fold_id) + ".npz"), nlpd=nlpd_vals,
111 |                  eval=eval_vals)
112 |         parameters = gpflow.utilities.parameter_dict(model)
113 |         with open(os.path.join(output_dir, "model_" + str(k_fold_id) + ".pkl"), "wb") as f:
114 |             pickle.dump(parameters, f)
115 | 
116 |         k_fold_id += 1
117 |         k_fold_nlpds.append(nlpd_vals[-1])
118 |         k_fold_eval.append(eval_vals[-1])
119 |         k_fold_time.append(end_time - start_time)
120 |         log.info(f"---------------------------------------------")
121 | 
122 |     if len(k_fold_nlpds) > 1:
123 |         log.info(f"Mean NLPD over k-folds = {np.mean(k_fold_nlpds)}")
124 |         log.info(f"Std NLPD over k-folds = {np.std(k_fold_nlpds)}")
125 | 
126 |         log.info(f"Mean RMSE/Acc over k-folds = {np.mean(k_fold_eval)}")
127 |         log.info(f"Std RMSE/Acc over k-folds = {np.std(k_fold_eval)}")
128 | 
129 |         log.info(f"Mean time over k-folds = {np.mean(k_fold_time)}")
130 |         log.info(f"Std time over k-folds = {np.std(k_fold_time)}")
131 | 
132 | 
133 | if __name__ == '__main__':
134 |     run_experiment()
135 | 


--------------------------------------------------------------------------------
/experiments/uci/streaming_sgpr.py:
--------------------------------------------------------------------------------
  1 | """
  2 | @inproceedings{BuiNguTur17,
  3 |   title =  {Streaming sparse {G}aussian process approximations},
  4 |   author =   {Bui, Thang D. and Nguyen, Cuong V. and Turner, Richard E.},
  5 |   booktitle = {Advances in Neural Information Processing Systems 30},
  6 |   year =   {2017}
  7 | }
  8 | """
  9 | import os
 10 | import logging
 11 | import pickle
 12 | import time
 13 | 
 14 | import matplotlib.pyplot as plt
 15 | import numpy as np
 16 | from omegaconf import DictConfig
 17 | from hydra.utils import instantiate, call
 18 | import hydra
 19 | import wandb
 20 | import gpflow
 21 | 
 22 | import sys
 23 | 
 24 | sys.path.append("..")
 25 | 
 26 | from uci_utils import load_model_parameters, setup_wandb
 27 | from exp_utils import get_hydra_output_dir, convert_data_to_online
 28 | 
 29 | # A logger for this file
 30 | log = logging.getLogger(__name__)
 31 | 
 32 | 
 33 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="streaming_experiment")
 34 | def run_experiment(cfg: DictConfig):
 35 |     """
 36 |     Initialize and run the experiment.
 37 |     """
 38 |     output_dir = get_hydra_output_dir()
 39 | 
 40 |     all_train_data, all_test_data = call(cfg.dataset.dataloader)
 41 | 
 42 |     if len(all_train_data) > 1:
 43 |         log.info(f"Cross validation starting with {cfg.dataset.dataloader.n_k_folds} K-folds!!!")
 44 | 
 45 |     k_fold_nlpds = []
 46 |     k_fold_rmse = []
 47 |     k_fold_time = []
 48 |     k_fold_id = 0
 49 | 
 50 |     log.info(f"---------------------------------------------")
 51 |     log.info(f"Dataset : {cfg.dataset}")
 52 |     log.info(f"---------------------------------------------")
 53 |     for train_data, test_data in zip(all_train_data, all_test_data):
 54 |         log.info(f"---------------------------------------------")
 55 |         log.info(f"Starting with set {k_fold_id}")
 56 |         log.info(f"---------------------------------------------")
 57 | 
 58 |         log.info(f"Train data are {train_data[0].shape[0]} and test data are {test_data[0].shape[0]}")
 59 |         log.info("Splitting data into sets...")
 60 | 
 61 |         online_data = convert_data_to_online(train_data, cfg.n_sets, cfg.sort_data)
 62 |         log.info(f"Data splitted successfully into {cfg.n_sets} sets!!!")
 63 | 
 64 |         np.savez(os.path.join(output_dir, "splitted_dataset" + str(k_fold_id) + ".npz"), data=online_data)
 65 | 
 66 |         if cfg.wandb.username is not None:
 67 |             setup_wandb(cfg)
 68 | 
 69 |         # Set up inducing variables
 70 |         inducing_variable = train_data[0][:cfg.n_inducing_variable].copy().tolist()
 71 |         cfg.model.inducing_variable = inducing_variable
 72 | 
 73 |         if "SGPR" in cfg.model._target_:
 74 |             model = instantiate(cfg.model)(data=online_data[0])
 75 |         else:
 76 |             cfg.model.num_data = train_data[0].shape[0]
 77 |             model = instantiate(cfg.model)
 78 | 
 79 |         if cfg.load_model_path is not None:
 80 |             with open(cfg.load_model_path, "rb") as f:
 81 |                 dict_params = pickle.load(f)
 82 | 
 83 |             load_model_parameters(model, dict_params)
 84 |             log.info(f"Model parameters loaded from {cfg.load_model_path}")
 85 | 
 86 |         log.info("Model initialized; Optimization started!!!")
 87 | 
 88 |         optimizer = instantiate(cfg.optimizer)
 89 | 
 90 |         start_time = time.time()
 91 |         nlpd_vals, rmse_vals, _ = call(cfg.optimize)(optimizer=optimizer, model=model, train_data=online_data,
 92 |                                                      test_data=test_data)
 93 |         end_time = time.time()
 94 | 
 95 |         log.info(f"Test NLPD: {nlpd_vals[-1]}")
 96 |         log.info(f"Test RMSE: {rmse_vals[-1]}")
 97 |         log.info(f"Time (s): {end_time - start_time}")
 98 |         log.info("Optimization successfully done!!!")
 99 | 
100 |         if cfg.wandb.username is not None:
101 |             plt.clf()
102 |             plt.plot(nlpd_vals)
103 |             plt.title("NLPD")
104 |             wandb.log({"optim_nlpd_vals": plt})
105 | 
106 |             plt.clf()
107 |             plt.plot(rmse_vals)
108 |             plt.title("RMSE")
109 |             wandb.log({"optim_rmse_vals": plt})
110 | 
111 |         np.savez(os.path.join(output_dir, "training_statistics_" + str(k_fold_id) + ".npz"), nlpd=nlpd_vals,
112 |                  rmse=rmse_vals)
113 |         parameters = gpflow.utilities.parameter_dict(model)
114 |         with open(os.path.join(output_dir, "model_" + str(k_fold_id) + ".pkl"), "wb") as f:
115 |             pickle.dump(parameters, f)
116 | 
117 |         k_fold_id += 1
118 |         k_fold_nlpds.append(nlpd_vals[-1])
119 |         k_fold_rmse.append(rmse_vals[-1])
120 |         k_fold_time.append(end_time - start_time)
121 |         log.info(f"---------------------------------------------")
122 | 
123 |     if len(k_fold_nlpds) > 1:
124 |         log.info(f"Mean NLPD over k-folds = {np.mean(k_fold_nlpds)}")
125 |         log.info(f"Std NLPD over k-folds = {np.std(k_fold_nlpds)}")
126 | 
127 |         log.info(f"Mean RMSE over k-folds = {np.mean(k_fold_rmse)}")
128 |         log.info(f"Std RMSE over k-folds = {np.std(k_fold_rmse)}")
129 | 
130 |         log.info(f"Mean time over k-folds = {np.mean(k_fold_time)}")
131 |         log.info(f"Std time over k-folds = {np.std(k_fold_time)}")
132 | 
133 | 
134 | if __name__ == '__main__':
135 |     """
136 |     """
137 |     run_experiment()
138 | 


--------------------------------------------------------------------------------
/experiments/uci/streaming_sgpr_fc.py:
--------------------------------------------------------------------------------
  1 | """
  2 | @inproceedings{BuiNguTur17,
  3 |   title =  {Streaming sparse {G}aussian process approximations},
  4 |   author =   {Bui, Thang D. and Nguyen, Cuong V. and Turner, Richard E.},
  5 |   booktitle = {Advances in Neural Information Processing Systems 30},
  6 |   year =   {2017}
  7 | }
  8 | 
  9 | @article{BuiYanTur16,
 10 |   title={A Unifying Framework for Sparse {G}aussian Process Approximation using {P}ower {E}xpectation {P}ropagation},
 11 |   author={Thang D. Bui and Josiah Yan and Richard E. Turner},
 12 |   journal={arXiv preprint arXiv:1605.07066},
 13 |   year={2016}
 14 | }
 15 | """
 16 | import os
 17 | import logging
 18 | import pickle
 19 | import time
 20 | 
 21 | import matplotlib.pyplot as plt
 22 | import numpy as np
 23 | from omegaconf import DictConfig, OmegaConf
 24 | from hydra.utils import instantiate, call
 25 | import hydra
 26 | import wandb
 27 | import gpflow
 28 | 
 29 | import sys
 30 | sys.path.append("..")
 31 | 
 32 | from exp_utils import convert_data_to_online, get_hydra_output_dir
 33 | 
 34 | 
 35 | def _setup_wandb(cfg):
 36 |     """
 37 |     Set up wandb if username is passed.
 38 |     """
 39 |     wandb_cfg = OmegaConf.to_container(
 40 |         cfg, resolve=True, throw_on_missing=True
 41 |     )
 42 | 
 43 |     wandb.init(project="UCI", entity=cfg.wandb.username, config=wandb_cfg)
 44 | 
 45 |     log.info("wandb initialized!!!")
 46 | 
 47 | 
 48 | # A logger for this file
 49 | log = logging.getLogger(__name__)
 50 | 
 51 | 
 52 | @hydra.main(version_base="1.2", config_path="../configs/", config_name="streaming_experiment")
 53 | def run_experiment(cfg: DictConfig):
 54 |     """
 55 |     Initialize and run the experiment.
 56 |     """
 57 |     output_dir = get_hydra_output_dir()
 58 | 
 59 |     all_train_data, all_test_data = call(cfg.dataset.dataloader)
 60 | 
 61 |     if len(all_train_data) > 1:
 62 |         log.info(f"Cross validation starting with {cfg.dataset.dataloader.n_k_folds} K-folds!!!")
 63 | 
 64 |     k_fold_nlpds = []
 65 |     k_fold_rmse = []
 66 |     k_fold_time = []
 67 |     k_fold_id = 0
 68 | 
 69 |     log.info(f"---------------------------------------------")
 70 |     log.info(f"Dataset : {cfg.dataset}")
 71 |     log.info(f"---------------------------------------------")
 72 |     for train_data, test_data in zip(all_train_data, all_test_data):
 73 | 
 74 |         log.info(f"---------------------------------------------")
 75 |         log.info(f"Starting with set {k_fold_id}")
 76 |         log.info(f"---------------------------------------------")
 77 | 
 78 |         log.info(f"Train data are {train_data[0].shape[0]} and test data are {test_data[0].shape[0]}")
 79 |         log.info("Splitting data into sets...")
 80 | 
 81 |         online_data = convert_data_to_online(train_data, cfg.n_sets, cfg.sort_data)
 82 |         log.info(f"Data splitted successfully into {cfg.n_sets} sets!!!")
 83 | 
 84 |         np.savez(os.path.join(output_dir, "splitted_dataset" + str(k_fold_id) + ".npz"), data=online_data)
 85 | 
 86 |         if cfg.wandb.username is not None:
 87 |             _setup_wandb(cfg)
 88 | 
 89 |         # Set up inducing variables
 90 |         inducing_variable = train_data[0][:cfg.n_inducing_variable].copy().tolist()
 91 |         cfg.model.inducing_variable = inducing_variable
 92 | 
 93 |         if "SGPR" in cfg.model._target_:
 94 |             model = instantiate(cfg.model)(data=online_data[0])
 95 |         else:
 96 |             cfg.model.num_data = train_data[0].shape[0]
 97 |             model = instantiate(cfg.model)
 98 | 
 99 |         # Loading model hyperparam values
100 |         if cfg.load_model_path is None:
101 |             raise Exception("FC model should have a model path from where hyperparams are loaded!")
102 | 
103 |         model_path = os.path.join(cfg.load_model_path, "model_" + str(k_fold_id) + ".pkl")
104 | 
105 |         with open(model_path, "rb") as f:
106 |             dict_params = pickle.load(f)
107 | 
108 |         model.inducing_variable.Z = dict_params['.inducing_variable.Z']
109 |         model.kernel.lengthscales = dict_params['.kernel.lengthscales']
110 |         model.kernel.variance = dict_params['.kernel.variance']
111 | 
112 |         # not present in classification
113 |         if '.likelihood.variance' in dict_params:
114 |             model.likelihood.variance = dict_params['.likelihood.variance']
115 | 
116 |         # make then non-trainable
117 |         gpflow.utilities.set_trainable(model.inducing_variable.Z, False)
118 |         gpflow.utilities.set_trainable(model.kernel.lengthscales, False)
119 |         gpflow.utilities.set_trainable(model.kernel.variance, False)
120 | 
121 |         if isinstance(model.likelihood, gpflow.likelihoods.Gaussian):
122 |             gpflow.utilities.set_trainable(model.likelihood.variance, False)
123 | 
124 |         log.info("Model initialized; Optimization started!!!")
125 | 
126 |         optimizer = instantiate(cfg.optimizer)
127 | 
128 |         start_time = time.time()
129 |         nlpd_vals, rmse_vals, time_vals = call(cfg.optimize)(optimizer=optimizer, model=model, train_data=online_data,
130 |                                                   test_data=test_data, use_old_z=True, fast_conditioning=True)
131 |         end_time = time.time()
132 | 
133 |         log.info(f"Test NLPD: {nlpd_vals[-1]}")
134 |         log.info(f"Test RMSE: {rmse_vals[-1]}")
135 |         log.info(f"Time (s): {end_time - start_time}")
136 |         log.info("Optimization successfully done!!!")
137 | 
138 |         if cfg.wandb.username is not None:
139 |             plt.clf()
140 |             plt.plot(nlpd_vals)
141 |             plt.title("NLPD")
142 |             wandb.log({"optim_nlpd_vals": plt})
143 | 
144 |             plt.clf()
145 |             plt.plot(rmse_vals)
146 |             plt.title("RMSE")
147 |             wandb.log({"optim_rmse_vals": plt})
148 | 
149 |         np.savez(os.path.join(output_dir, "training_statistics_" + str(k_fold_id) + ".npz"), nlpd=nlpd_vals,
150 |                  rmse=rmse_vals, time_vals=time_vals)
151 | 
152 |         parameters = gpflow.utilities.parameter_dict(model)
153 |         with open(os.path.join(output_dir, "model_" + str(k_fold_id) + ".pkl"), "wb") as f:
154 |             pickle.dump(parameters, f)
155 | 
156 |         k_fold_id += 1
157 |         k_fold_nlpds.append(nlpd_vals[-1])
158 |         k_fold_rmse.append(rmse_vals[-1])
159 |         k_fold_time.append(end_time-start_time)
160 |         log.info(f"---------------------------------------------")
161 | 
162 |     if len(k_fold_nlpds) > 1:
163 |         log.info(f"Mean NLPD over k-folds = {np.mean(k_fold_nlpds)}")
164 |         log.info(f"Std NLPD over k-folds = {np.std(k_fold_nlpds)}")
165 | 
166 |         log.info(f"Mean RMSE over k-folds = {np.mean(k_fold_rmse)}")
167 |         log.info(f"Std RMSE over k-folds = {np.std(k_fold_rmse)}")
168 | 
169 |         log.info(f"Mean time over k-folds = {np.mean(k_fold_time)}")
170 |         log.info(f"Std time over k-folds = {np.std(k_fold_time)}")
171 | 
172 | 
173 | if __name__ == '__main__':
174 |     run_experiment()
175 | 


--------------------------------------------------------------------------------
/experiments/uci/uci_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Utility function for UCI datasets.
  3 | """
  4 | import time
  5 | from typing import Tuple
  6 | import os
  7 | 
  8 | import gpflow.models
  9 | import tensorflow as tf
 10 | import pandas as pd
 11 | import numpy as np
 12 | from gpflow.likelihoods import Bernoulli
 13 | from sklearn.preprocessing import StandardScaler
 14 | from gpflow.models.svgp import SVGP
 15 | from sklearn.model_selection import KFold
 16 | import wandb
 17 | from omegaconf import OmegaConf
 18 | 
 19 | import sys
 20 | 
 21 | sys.path.append("../..")
 22 | sys.path.append("..")
 23 | 
 24 | from src.streaming_sparse_gp.osgpr import OSGPR_VFE
 25 | from src.streaming_sparse_gp.osvgpc import OSVGPC
 26 | 
 27 | 
 28 | def setup_wandb(cfg):
 29 |     """
 30 |     Set up wandb.
 31 |     """
 32 |     wandb_cfg = OmegaConf.to_container(
 33 |         cfg, resolve=True, throw_on_missing=True
 34 |     )
 35 | 
 36 |     wandb.init(project="UCI", entity=cfg.wandb.username, config=wandb_cfg)
 37 | 
 38 | 
 39 | def load_data(data_path: str, train_split_percentage: float = 0.8, normalize: bool = False,
 40 |               seed: int = None, n_k_folds: int = None, random_state: int = None,
 41 |               dataset_type: str = "regression") -> (Tuple[np.ndarray, np.ndarray],
 42 |                                                     Tuple[np.ndarray, np.ndarray]):
 43 |     """
 44 |     Load UCI dataset on the basis of data name.
 45 | 
 46 |     If k_folds is passed then a list of several folds are returned.
 47 | 
 48 |     returns a list of set of (X, Y) as Tuple as train_data and test_data.
 49 |     """
 50 |     if seed is not None:
 51 |         np.random.seed(seed)
 52 | 
 53 |     if not os.path.exists(data_path):
 54 |         raise Exception("Data path does not exist ")
 55 | 
 56 |     df = pd.read_csv(data_path)
 57 |     X = df.to_numpy()[:, :-1]
 58 |     Y = df.to_numpy()[:, -1].reshape((-1, 1))
 59 | 
 60 |     if normalize:
 61 |         x_scaler = StandardScaler().fit(X)
 62 |         X = x_scaler.transform(X)
 63 | 
 64 |         if dataset_type == "regression":
 65 |             y_scaler = StandardScaler().fit(Y)
 66 |             Y = y_scaler.transform(Y)
 67 | 
 68 |     if n_k_folds is None:
 69 |         data_dim = X.shape[-1]
 70 |         n = Y.shape[0]
 71 | 
 72 |         # combine X and Y and shuffle
 73 |         XY = np.concatenate([X, Y], axis=1)
 74 |         np.random.shuffle(XY)
 75 | 
 76 |         n_train = int(np.floor(n * train_split_percentage))
 77 | 
 78 |         x_train = XY[:n_train, :data_dim]
 79 |         y_train = XY[:n_train, data_dim:]
 80 | 
 81 |         x_test = XY[n_train:, :data_dim]
 82 |         y_test = XY[n_train:, data_dim:]
 83 | 
 84 |         if dataset_type == "classification":
 85 |             y_train = y_train.astype(np.int64)
 86 |             y_test = y_test.astype(np.int64)
 87 | 
 88 |         train_data = [(x_train, y_train)]
 89 |         test_data = [(x_test, y_test)]
 90 |     else:
 91 |         train_data, test_data = get_cross_validation_sets((X, Y), k_folds=n_k_folds, random_state=random_state)
 92 | 
 93 |     return train_data, test_data
 94 | 
 95 | 
 96 | def load_model_parameters(model, params: dict):
 97 |     """Loads the parameters from dictionary to the model"""
 98 |     gpflow.utilities.multiple_assign(model, params)
 99 | 
100 | 
101 | def get_cross_validation_sets(data: Tuple[np.ndarray, np.ndarray], k_folds=5, random_state: int = None):
102 |     """
103 |     Split the dataset for K-Fold validation.
104 |     """
105 | 
106 |     kf = KFold(n_splits=k_folds, random_state=random_state, shuffle=True)
107 | 
108 |     train_k_folds_set = []
109 |     test_k_folds_set = []
110 |     for train_idx, test_idx in kf.split(data[0]):
111 |         train_k_folds_set.append((data[0][train_idx], data[1][train_idx]))
112 |         test_k_folds_set.append((data[0][test_idx], data[1][test_idx]))
113 | 
114 |     return train_k_folds_set, test_k_folds_set
115 | 
116 | 
117 | def optimize_streaming_model(optimizer, model, train_data: Tuple[np.ndarray, np.ndarray],
118 |                              test_data: Tuple[np.ndarray, np.ndarray], task: str, iterations: int = 100,
119 |                              use_old_z=False, fast_conditioning=False):
120 |     """
121 |     Optimize the streaming model of Bui et al. 2017.
122 | 
123 |     The code is based on the official implementation: https://github.com/thangbui/streaming_sparse_gp
124 |     """
125 | 
126 |     @tf.function
127 |     def optimization_step_adam():
128 |         for _ in range(iterations):
129 |             optimizer.minimize(model.training_loss, model.trainable_variables)
130 | 
131 |     # @tf.function
132 |     def optimization_step_adam_classification(loss, variables):
133 |         for _ in range(iterations):
134 |             optimizer.minimize(loss, variables)
135 | 
136 |     def optimization_step_scipy():
137 |         optimizer.minimize(model.training_loss, model.trainable_variables, options={'maxiter': iterations})
138 | 
139 |     def optimization_step():
140 |         if isinstance(optimizer, gpflow.optimizers.Scipy):
141 |             optimization_step_scipy()
142 |         else:
143 |             optimization_step_adam()
144 | 
145 |     def get_model_prediction():
146 |         Zopt = model.inducing_variable.Z.numpy()
147 |         mu, Su = model.predict_f(Zopt, full_cov=True)
148 |         if len(Su.shape) == 3:
149 |             Su = Su[0, :, :]
150 | 
151 |         return mu, Su, Zopt
152 | 
153 |     def init_Z(cur_Z, new_X, use_old_Z=True):
154 |         if use_old_Z:
155 |             Z = np.copy(cur_Z)
156 |         else:
157 |             M = cur_Z.shape[0]
158 |             M_old = int(0.7 * M)
159 |             M_new = M - M_old
160 |             old_Z = cur_Z[np.random.permutation(M)[0:M_old], :]
161 |             new_Z = new_X[np.random.permutation(new_X.shape[0])[0:M_new], :]
162 |             Z = np.vstack((old_Z, new_Z))
163 |         return Z
164 | 
165 |     n_sets = len(train_data)
166 | 
167 |     # NLPD calculation
168 |     f_mean, f_var = model.predict_f(test_data[0])
169 |     if len(f_var.shape) == 1:
170 |         f_var = f_var[..., None]
171 |     nlpd = model.likelihood.predict_log_density(f_mean, f_var, test_data[1])
172 |     nlpd = -1 * tf.reduce_mean(nlpd)
173 |     print(f"Initial NLPD: {nlpd}")
174 | 
175 |     nlpd_vals = []
176 |     evaluation_vals = []
177 |     time_vals = []
178 |     for n in range(n_sets):
179 |         new_data = train_data[n]
180 |         X, y = (new_data[0], new_data[1])
181 | 
182 |         start_time = time.time()
183 |         if task == "regression":
184 |             if n == 0:
185 |                 optimization_step()
186 | 
187 |                 mu, Su, Zopt = get_model_prediction()
188 |             else:
189 |                 Kaa1 = model.kernel(model.inducing_variable.Z)
190 | 
191 |                 Zinit = init_Z(Zopt, X, use_old_z)
192 |                 var = model.likelihood.variance
193 |                 if isinstance(model.kernel, gpflow.kernels.Matern52):
194 |                     kernel = gpflow.kernels.Matern52(variance=model.kernel.variance,
195 |                                                      lengthscales=model.kernel.lengthscales)
196 |                 else:  # For running Magnetometer.
197 |                     kernel = gpflow.kernels.Sum([gpflow.kernels.Constant(model.kernel.kernels[0].variance),
198 |                                                  gpflow.kernels.Matern52(
199 |                                                      lengthscales=model.kernel.kernels[1].lengthscales,
200 |                                                      variance=model.kernel.kernels[1].variance)])
201 | 
202 |                 model = OSGPR_VFE((X, y), kernel, mu, Su, Kaa1, Zopt, Zinit)
203 |                 model.likelihood.variance.assign(var)
204 | 
205 |                 optimization_step()
206 | 
207 |                 mu, Su, Zopt = get_model_prediction()
208 |         else:
209 |             if n == 0:
210 |                 if isinstance(optimizer, gpflow.optimizers.Scipy):
211 |                     gpflow.optimizers.Scipy().minimize(
212 |                         model.training_loss_closure((X, y)), model.trainable_variables, options={'maxiter': iterations})
213 |                 else:
214 |                     for _ in range(iterations):
215 |                         optimizer.minimize(model.training_loss_closure((X, y)), model.trainable_variables)
216 |             else:
217 |                 Zinit = init_Z(Zopt, X, use_old_z)
218 |                 if fast_conditioning:
219 |                     kernel = model.kernel
220 |                 else:
221 |                     kernel = gpflow.kernels.Matern52()
222 |                 model = OSVGPC((X, y), kernel, gpflow.likelihoods.Bernoulli(), mu, Su, Kaa, Zopt,
223 |                                Zinit)
224 |                 optimization_step_adam_classification(model.training_loss, model.trainable_variables)
225 | 
226 |             Zopt = model.inducing_variable.Z.numpy()
227 |             mu, Su = model.predict_f(Zopt, full_cov=True)
228 |             if len(Su.shape) == 3:
229 |                 Su = Su[0, :, :] + 1e-4 * np.eye(mu.shape[0])
230 |             Kaa = model.kernel(model.inducing_variable.Z)
231 | 
232 |         time_vals.append(time.time() - start_time)
233 | 
234 |         # NLPD calculation
235 |         f_mean, f_var = model.predict_f(test_data[0])
236 |         if len(f_var.shape) == 1:
237 |             f_var = f_var[..., None]
238 |         nlpd = model.likelihood.predict_log_density(f_mean, f_var, test_data[1])
239 |         nlpd = -1 * tf.reduce_mean(nlpd)
240 |         nlpd_vals.append(nlpd)
241 | 
242 |         # RMSE calculation
243 |         if task == "regression":
244 |             y_pred, _ = model.likelihood.predict_mean_and_var(f_mean, f_var)
245 |             rmse = np.sqrt(np.mean(np.square(y_pred - test_data[1])))
246 |             evaluation_vals.append(rmse)
247 |         else:
248 |             pred_mean, _ = model.likelihood.predict_mean_and_var(f_mean, f_var)
249 |             pred_mean = pred_mean.numpy()
250 |             pred_mean[pred_mean >= 0.5] = 1
251 |             pred_mean[pred_mean < 0.5] = 0
252 |             correct_prediction = np.sum(pred_mean == test_data[1])
253 |             acc = correct_prediction / test_data[0].shape[0]
254 |             evaluation_vals.append(acc)
255 | 
256 |         print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
257 |         print(f"Set {n}")
258 |         print(f"NLPD = {nlpd_vals[-1]}")
259 |         print(f"Eval. metric (RMSE/Acc.) = {evaluation_vals[-1]}")
260 |         print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
261 | 
262 |     return nlpd_vals, evaluation_vals, time_vals
263 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | tensorflow==2.6.2
 2 | pandas==1.3.5
 3 | numpy==1.19.2
 4 | scikit-learn==1.1.1
 5 | hydra-core==1.2.0
 6 | wandb==0.13.3
 7 | gpflow==2.5.2
 8 | matplotlib==3.5.0
 9 | plotly==5.10.0
10 | tensorflow-probability==0.12.0
11 | tensorflow-estimator==2.6.0
12 | jupyter-core==4.10.0
13 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/__init__.py


--------------------------------------------------------------------------------
/src/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/src/__pycache__/sites.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/__pycache__/sites.cpython-38.pyc


--------------------------------------------------------------------------------
/src/__pycache__/util.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/__pycache__/util.cpython-38.pyc


--------------------------------------------------------------------------------
/src/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/models/__init__.py


--------------------------------------------------------------------------------
/src/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/models/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/src/models/__pycache__/tsvgp.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/models/__pycache__/tsvgp.cpython-38.pyc


--------------------------------------------------------------------------------
/src/models/__pycache__/tsvgp_cont.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/models/__pycache__/tsvgp_cont.cpython-38.pyc


--------------------------------------------------------------------------------
/src/models/__pycache__/tsvgp_white.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/models/__pycache__/tsvgp_white.cpython-38.pyc


--------------------------------------------------------------------------------
/src/models/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/models/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/src/models/tsvgp.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Module for the t-SVGP model
  3 | """
  4 | 
  5 | # Copyright Anonymous Authors
  6 | # Only for double-blind review. Not to be shared.
  7 | 
  8 | # This code has been extended from the SVGP implementation in GPflow and is
  9 | # to be released under a compatible license.
 10 | 
 11 | import abc
 12 | 
 13 | import gpflow
 14 | import numpy as np
 15 | import tensorflow as tf
 16 | from gpflow import kullback_leiblers
 17 | from gpflow.conditionals import conditional
 18 | from gpflow.config import default_float, default_jitter
 19 | from gpflow.covariances import Kuf, Kuu
 20 | from gpflow.models.model import GPModel, InputData, MeanAndVariance, RegressionData
 21 | from gpflow.models.training_mixins import ExternalDataTrainingLossMixin
 22 | from gpflow.models.util import inducingpoint_wrapper
 23 | 
 24 | from src.sites import DenseSites
 25 | from src.util import (
 26 |     conditional_from_precision_sites,
 27 |     gradient_transformation_mean_var_to_expectation,
 28 |     posterior_from_dense_site,
 29 | )
 30 | 
 31 | 
 32 | class base_SVGP(GPModel, ExternalDataTrainingLossMixin, abc.ABC):
 33 |     """
 34 |     Modified gpflow.svgp.SVGP class to accommodate
 35 |     for different paramaterization of q(u)
 36 |     """
 37 | 
 38 |     def __init__(
 39 |         self,
 40 |         kernel,
 41 |         likelihood,
 42 |         inducing_variable,
 43 |         *,
 44 |         mean_function=None,
 45 |         num_latent_gps: int = 1,
 46 |         num_data=None,
 47 |     ):
 48 |         """
 49 |         - kernel, likelihood, inducing_variables, mean_function are appropriate
 50 |           GPflow objects
 51 |         - num_latent_gps is the number of latent processes to use, defaults to 1
 52 |         - num_data is the total number of observations, defaults to X.shape[0]
 53 |           (relevant when feeding in external minibatches)
 54 |         """
 55 |         # init the super class, accept args
 56 |         super().__init__(kernel, likelihood, mean_function, num_latent_gps)
 57 |         self.num_data = num_data
 58 |         self.inducing_variable = inducingpoint_wrapper(inducing_variable)
 59 | 
 60 |     def get_mean_chol_cov_inducing_posterior(self):
 61 |         """Returns the mean and cholesky factor of the covariance matrix of q(u)"""
 62 |         raise NotImplementedError
 63 | 
 64 |     def prior_kl(self) -> tf.Tensor:
 65 |         """Returns the KL divergence KL[q(u)|p(u)]"""
 66 |         q_mu, q_sqrt = self.get_mean_chol_cov_inducing_posterior()
 67 |         return kullback_leiblers.prior_kl(
 68 |             self.inducing_variable, self.kernel, q_mu, q_sqrt, whiten=False
 69 |         )
 70 | 
 71 |     def maximum_log_likelihood_objective(self, data: RegressionData) -> tf.Tensor:
 72 |         """
 73 |         The variational lower bound
 74 |         :param data: input data
 75 |         """
 76 |         return self.elbo(data)
 77 | 
 78 |     def elbo(self, data: RegressionData) -> tf.Tensor:
 79 |         """
 80 |         This gives a variational bound (the evidence lower bound or ELBO) on
 81 |         the log marginal likelihood of the model.
 82 |         :param data: input data
 83 |         """
 84 |         X, Y = data
 85 |         kl = self.prior_kl()
 86 |         f_mean, f_var = self.predict_f(X, full_cov=False, full_output_cov=False)
 87 |         var_exp = self.likelihood.variational_expectations(f_mean, f_var, Y)
 88 |         if self.num_data is not None:
 89 |             num_data = tf.cast(self.num_data, kl.dtype)
 90 |             minibatch_size = tf.cast(tf.shape(X)[0], kl.dtype)
 91 |             scale = num_data / minibatch_size
 92 |         else:
 93 |             scale = tf.cast(1.0, kl.dtype)
 94 |         return tf.reduce_sum(var_exp) * scale - kl
 95 | 
 96 |     def predict_f(self, Xnew: InputData, full_cov=False, full_output_cov=False) -> MeanAndVariance:
 97 |         """
 98 |         Posterior prediction at new input Xnew
 99 |         :param Xnew: N x D Tensor
100 |         """
101 |         q_mu, q_sqrt = self.get_mean_chol_cov_inducing_posterior()
102 |         mu, var = conditional(
103 |             Xnew,
104 |             self.inducing_variable,
105 |             self.kernel,
106 |             q_mu,
107 |             q_sqrt=q_sqrt,
108 |             full_cov=full_cov,
109 |             white=False,
110 |             full_output_cov=full_output_cov,
111 |         )
112 |         tf.debugging.assert_positive(var)
113 |         return mu + self.mean_function(Xnew), var
114 | 
115 | 
116 | class t_SVGP(base_SVGP):
117 |     """
118 |     Class for the t-SVGP model
119 |     """
120 | 
121 |     def __init__(
122 |         self,
123 |         kernel,
124 |         likelihood,
125 |         inducing_variable,
126 |         *,
127 |         mean_function=None,
128 |         num_latent_gps: int = 1,
129 |         lambda_1=None,
130 |         lambda_2_sqrt=None,
131 |         num_data=None,
132 |         force=False,
133 |     ):
134 |         """
135 |         - kernel, likelihood, inducing_variables, mean_function are appropriate
136 |           GPflow objects
137 |         - num_latent_gps is the number of latent processes to use, defaults to 1
138 |         - q_diag is a boolean. If True, the covariance is approximated by a
139 |           diagonal matrix.
140 |         - whiten is a boolean. If True, we use the whitened representation of
141 |           the inducing points.
142 |         - num_data is the total number of observations, defaults to X.shape[0]
143 |           (relevant when feeding in external minibatches)
144 |         """
145 |         # init the super class, accept args
146 |         GPModel.__init__(self, kernel, likelihood, mean_function, num_latent_gps)
147 | 
148 |         self.num_data = num_data
149 |         self.inducing_variable = inducingpoint_wrapper(inducing_variable)
150 | 
151 |         # init variational parameters
152 |         self.num_inducing = self.inducing_variable.num_inducing
153 | 
154 |         self._init_variational_parameters(self.num_inducing, lambda_1, lambda_2_sqrt)
155 |         self.whiten = False
156 |         self.force = force
157 | 
158 |     def _init_variational_parameters(self, num_inducing, lambda_1, lambda_2_sqrt, **kwargs):
159 |         """
160 |         Constructs the site parameters λ₁, Λ₂.
161 |         for site t(u) = exp(uᵀλ₁ - ½ uᵀΛ₂u)
162 | 
163 |         Parameters
164 |         ----------
165 |         :param num_inducing: int
166 |             Number of inducing variables, typically referred to as M.
167 |         :param lambda_1: np.array or None
168 |             First order natural parameter of the variational site.
169 |         :param lambda_2_sqrt: np.array or None
170 |             Second order natural parameter of the variational site.
171 |         """
172 | 
173 |         lambda_1 = np.zeros((num_inducing, self.num_latent_gps)) if lambda_1 is None else lambda_1
174 |         if lambda_2_sqrt is None:
175 |             lambda_2_sqrt = [
176 |                 -tf.eye(num_inducing, dtype=default_float()) * 1e-10
177 |                 for _ in range(self.num_latent_gps)
178 |             ]
179 |             lambda_2_sqrt = np.array(lambda_2_sqrt)
180 |         else:
181 |             assert lambda_2_sqrt.ndim == 3
182 |             self.num_latent_gps = lambda_2_sqrt.shape[0]
183 | 
184 |         self.sites = DenseSites(lambda_1, lambda_2_sqrt)
185 | 
186 |     @property
187 |     def lambda_1(self):
188 |         """first natural parameter"""
189 |         return self.sites.lambda_1
190 | 
191 |     @property
192 |     def lambda_2_sqrt(self):
193 |         """Cholesky factor of the second natural parameter"""
194 |         return self.sites.lambda_2_sqrt
195 | 
196 |     @property
197 |     def lambda_2(self):
198 |         """second natural parameter"""
199 |         return tf.matmul(self.lambda_2_sqrt, self.lambda_2_sqrt, transpose_b=True)
200 | 
201 |     def get_mean_chol_cov_inducing_posterior(self):
202 |         """
203 |         Computes the mean and cholesky factor of the posterior
204 |         on the inducing variables q(u) = 𝓝(u; m, S)
205 |         S = (K⁻¹ + Λ₂)⁻¹ = (K⁻¹ + L₂L₂ᵀ)⁻¹ = K - KL₂W⁻¹L₂ᵀK , W = (I + L₂ᵀKL₂)⁻¹
206 |         m = S λ₁
207 |         """
208 |         K_uu = Kuu(
209 |             self.inducing_variable, self.kernel, jitter=default_jitter()
210 |         )  # [P, M, M] or [M, M]
211 |         return posterior_from_dense_site(K_uu, self.lambda_1, self.lambda_2_sqrt)
212 | 
213 |     # todo : make broadcastable
214 |     def new_predict_f(
215 |         self, Xnew: InputData, full_cov=False, full_output_cov=False
216 |     ) -> MeanAndVariance:
217 |         """
218 |         Posterior prediction at new input Xnew
219 |         :param Xnew: N x D Tensor
220 |         """
221 |         K_uu = Kuu(
222 |             self.inducing_variable, self.kernel, jitter=default_jitter()
223 |         )  # [P, M, M] or [M, M]
224 |         K_uf = Kuf(self.inducing_variable, self.kernel, Xnew)  # [P, M, M] or [M, M]
225 |         K_ff = self.kernel.K_diag(Xnew)[..., None]
226 | 
227 |         mu, var = conditional_from_precision_sites(
228 |             K_uu, K_ff, K_uf, self.lambda_1, L=self.lambda_2_sqrt
229 |         )
230 |         tf.debugging.assert_positive(var)  # We really should make the tests pass with this here
231 |         return mu + self.mean_function(Xnew), var
232 | 
233 |     def natgrad_step(self, data, lr=0.1, jitter=1e-9):
234 |         """Takes natural gradient step in Variational parameters in the local parameters
235 |         λₜ = rₜ▽[Var_exp] + (1-rₜ)λₜ₋₁
236 |         Input:
237 |         :param: X : N x D
238 |         :param: Y:  N x 1
239 |         :param: lr: Scalar
240 | 
241 |         Output:
242 |         Updates the params
243 |         """
244 |         X, Y = data
245 |         mean, var = self.predict_f(X)
246 | 
247 |         # todo : hack to get heterokedastic demo to run
248 |         if isinstance(
249 |             self.inducing_variable, gpflow.inducing_variables.SharedIndependentInducingVariables
250 |         ):
251 |             meanZ, _ = self.predict_f(self.inducing_variable.inducing_variables[0].Z)
252 |         else:
253 |             meanZ, _ = self.predict_f(self.inducing_variable.Z)
254 | 
255 |         with tf.GradientTape() as g:
256 |             g.watch([mean, var])
257 |             ve = self.likelihood.variational_expectations(mean, var, Y)
258 |         grads = g.gradient(ve, [mean, var])
259 | 
260 |         # cropping grads to stay negative
261 |         eps = 1e-8
262 |         grads[1] = tf.minimum(grads[1], -eps * tf.ones_like(grads[1]))
263 | 
264 |         Id = tf.eye(self.num_inducing, dtype=tf.float64)
265 | 
266 |         # Compute the projection matrix A from prior information
267 |         K_uu = Kuu(self.inducing_variable, self.kernel)
268 |         K_uf = Kuf(self.inducing_variable, self.kernel, X)  # [P, M, M] or [M, M]
269 |         chol_Kuu = tf.linalg.cholesky(K_uu + Id * jitter)
270 |         A = tf.transpose(tf.linalg.cholesky_solve(chol_Kuu, K_uf))
271 | 
272 |         # ▽μ₁[Var_exp] = aₙαₙ ,
273 |         # ▽μ2[Var_exp] = λₙaₙaₙᵀ
274 | 
275 |         if tf.rank(A) == 2:
276 |             A = tf.tile(A[..., None], [1, 1, self.num_latent_gps])
277 |         grads = [
278 |             tf.einsum("nml,nl->ml", A, grads[0]),
279 |             tf.einsum("nml,nol,nl->lmo", A, A, grads[1]),
280 |         ]
281 | 
282 |         # chain rule at f
283 |         grad_mu = gradient_transformation_mean_var_to_expectation(meanZ, grads)
284 | 
285 |         if self.num_data is not None:
286 |             num_data = tf.cast(self.num_data, dtype=tf.float64)
287 |             minibatch_size = tf.cast(tf.shape(X)[0], dtype=tf.float64)
288 |             scale = num_data / minibatch_size
289 |         else:
290 |             scale = tf.cast(1.0, dtype=tf.float64)
291 | 
292 |         lambda_2 = -0.5 * self.lambda_2
293 |         lambda_1 = self.lambda_1
294 |         # compute update in natural form
295 |         lambda_1 = (1 - lr) * lambda_1 + lr * scale * grad_mu[0]
296 |         lambda_2 = (1 - lr) * lambda_2 + lr * scale * grad_mu[1]
297 | 
298 |         # transform and perform update
299 |         lambda_2_sqrt = -tf.linalg.cholesky(-2.0 * lambda_2 + Id * jitter)
300 |         # To match SVGP you need to eliminate this jitter for minibatching
301 |         self.lambda_1.assign(lambda_1)
302 |         self.lambda_2_sqrt.assign(lambda_2_sqrt)
303 |         self.get_mean_chol_cov_inducing_posterior()
304 | 


--------------------------------------------------------------------------------
/src/models/tsvgp_sites.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Module for the t-SVGP models with individual sites per data point.
  3 | """
  4 | from typing import Optional
  5 | 
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | from gpflow import default_jitter, kullback_leiblers
  9 | from gpflow.conditionals import conditional
 10 | from gpflow.covariances import Kuf, Kuu
 11 | from gpflow.models import GPModel
 12 | from gpflow.models.training_mixins import InputData, RegressionData
 13 | from gpflow.models.util import inducingpoint_wrapper
 14 | #from gpflow.types import MeanAndVariance
 15 | 
 16 | from src.sites import DiagSites
 17 | from src.util import posterior_from_dense_site_white, project_diag_sites
 18 | 
 19 | 
 20 | class t_SVGP_sites(GPModel):
 21 |     """
 22 |     Class for the t-SVGP model with sites
 23 |     """
 24 | 
 25 |     def __init__(
 26 |         self,
 27 |         data: RegressionData,
 28 |         kernel,
 29 |         likelihood,
 30 |         inducing_variable,
 31 |         *,
 32 |         mean_function=None,
 33 |         num_latent_gps: int = 1,
 34 |         lambda_1=None,
 35 |         lambda_2=None,
 36 |         num_latent: Optional[int] = 1
 37 |     ):
 38 |         """
 39 |         - kernel, likelihood, inducing_variables, mean_function are appropriate
 40 |           GPflow objects
 41 |         - num_latent_gps is the number of latent processes to use, defaults to 1
 42 |         - q_diag is a boolean. If True, the covariance is approximated by a
 43 |           diagonal matrix.
 44 |         - whiten is a boolean. If True, we use the whitened representation of
 45 |           the inducing points.
 46 |         - num_data is the total number of observations, defaults to X.shape[0]
 47 |           (relevant when feeding in external minibatches)
 48 |         """
 49 |         GPModel.__init__(self, kernel, likelihood, mean_function, num_latent_gps)
 50 |         x_data, y_data = data
 51 |         num_data = x_data.shape[0]
 52 |         self.num_data = num_data
 53 |         self.num_latent = num_latent or y_data.shape[1]
 54 |         self.data = data
 55 |         self.inducing_variable = inducingpoint_wrapper(inducing_variable)
 56 | 
 57 |         self.num_inducing = self.inducing_variable.num_inducing
 58 |         self._init_variational_parameters(self.num_data, lambda_1, lambda_2)
 59 |         self.whiten = False
 60 | 
 61 |     def _init_variational_parameters(self, num_inducing, lambda_1, lambda_2):
 62 |         """
 63 |         Constructs the site parameters λ₁, Λ₂.
 64 |         for site t(u) = exp(uᵀλ₁ - ½ uᵀΛ₂u)
 65 | 
 66 |         Parameters
 67 |         ----------
 68 |         :param num_inducing: int
 69 |             Number of inducing variables, typically referred to as M.
 70 |         :param lambda_1: np.array or None
 71 |             First order natural parameter of the variational site.
 72 |         :param lambda_2: np.array or None
 73 |             Second order natural parameter of the variational site.
 74 |         """
 75 | 
 76 |         lambda_1 = np.zeros((num_inducing, self.num_latent_gps)) if lambda_1 is None else lambda_1
 77 |         if lambda_2 is None:
 78 |             lambda_2 = (
 79 |                 np.ones((num_inducing, self.num_latent_gps)) * 1e-6
 80 |                 if lambda_2 is None
 81 |                 else lambda_2
 82 |             )
 83 |         else:
 84 |             assert lambda_2.ndim == 2
 85 |             self.num_latent_gps = lambda_2.shape[-1]
 86 | 
 87 |         self.sites = DiagSites(lambda_1, lambda_2)
 88 | 
 89 |     @property
 90 |     def lambda_1(self):
 91 |         """first natural parameter"""
 92 |         return self.sites.lambda_1
 93 | 
 94 |     @property
 95 |     def lambda_2(self):
 96 |         """second natural parameter"""
 97 |         return self.sites.lambda_2
 98 | 
 99 |     def get_mean_chol_cov_inducing_posterior(self):
100 |         """
101 |         Computes the mean and cholesky factor of the posterior
102 |         on the inducing variables q(u) = 𝓝(u; m, S)
103 |         S = (K⁻¹ + Λ₂)⁻¹ = (K⁻¹ + L₂L₂ᵀ)⁻¹ = K - KL₂W⁻¹L₂ᵀK , W = (I + L₂ᵀKL₂)⁻¹
104 |         m = S λ₁
105 |         """
106 |         X, _ = self.data
107 |         K_uu = Kuu(
108 |             self.inducing_variable, self.kernel, jitter=default_jitter()
109 |         )  # [P, M, M] or [M, M]
110 |         K_uf = Kuf(self.inducing_variable, self.kernel, X)  # [P, M, M] or [M, M]
111 |         lambda_1, lambda_2 = project_diag_sites(K_uf, self.lambda_1, self.lambda_2, cholesky=False)
112 |         return posterior_from_dense_site_white(K_uu, lambda_1, lambda_2)
113 | 
114 |     def natgrad_step(self, lr=0.1):
115 |         """Takes natural gradient step in Variational parameters in the local parameters
116 |         λₜ = rₜ▽[Var_exp] + (1-rₜ)λₜ₋₁
117 |         Input:
118 |         :param: X : N x D
119 |         :param: Y:  N x 1
120 |         :param: lr: Scalar
121 | 
122 |         Output:
123 |         Updates the params
124 |         """
125 |         X, Y = self.data
126 |         mean, var = self.predict_f(X)
127 | 
128 |         with tf.GradientTape() as g:
129 |             g.watch([mean, var])
130 |             ve = self.likelihood.variational_expectations(mean, var, Y)
131 |         grads = g.gradient(ve, [mean, var])
132 | 
133 |         grads = grads[0] - 2.0 * grads[1] * mean, grads[1]
134 | 
135 |         # compute update in natural form
136 |         lambda_2 = -0.5 * self.lambda_2
137 |         lambda_1 = self.lambda_1
138 | 
139 |         lambda_1 = (1 - lr) * lambda_1 + lr * grads[0]
140 |         lambda_2 = (1 - lr) * lambda_2 + lr * grads[1]
141 | 
142 |         eps = 1e-8
143 |         # crop hack, can't instantiate negative sites nats2 but optim might take you there
144 |         lambda_2 = tf.minimum(lambda_2, -eps * tf.ones_like(lambda_2))
145 | 
146 |         # To match SVGP you need to eliminate this jitter for minibatching
147 |         self.lambda_1.assign(lambda_1)
148 |         self.lambda_2.assign(-2.0 * lambda_2)
149 | 
150 |     def prior_kl(self) -> tf.Tensor:
151 |         """Returns the KL divergence KL[q(u)|p(u)]"""
152 |         q_mu, q_sqrt = self.get_mean_chol_cov_inducing_posterior()
153 |         return kullback_leiblers.prior_kl(
154 |             self.inducing_variable, self.kernel, q_mu, q_sqrt, whiten=self.whiten
155 |         )
156 | 
157 |     def maximum_log_likelihood_objective(self) -> tf.Tensor:
158 |         """The variational lower bound"""
159 |         return self.elbo()
160 | 
161 |     def elbo(self) -> tf.Tensor:
162 |         """
163 |         This gives a variational bound (the evidence lower bound or ELBO) on
164 |         the log marginal likelihood of the model.
165 |         """
166 |         X, Y = self.data
167 |         kl = self.prior_kl()
168 |         f_mean, f_var = self.predict_f(X, full_cov=False, full_output_cov=False)
169 |         var_exp = self.likelihood.variational_expectations(f_mean, f_var, Y)
170 |         if self.num_data is not None:
171 |             num_data = tf.cast(self.num_data, kl.dtype)
172 |             minibatch_size = tf.cast(tf.shape(X)[0], kl.dtype)
173 |             scale = num_data / minibatch_size
174 |         else:
175 |             scale = tf.cast(1.0, kl.dtype)
176 | 
177 |         return tf.reduce_sum(var_exp) * scale - kl
178 | 
179 |     def predict_f(self, Xnew: InputData, full_cov=False, full_output_cov=False) -> None:
180 |         q_mu, q_sqrt = self.get_mean_chol_cov_inducing_posterior()
181 |         mu, var = conditional(
182 |             Xnew,
183 |             self.inducing_variable,
184 |             self.kernel,
185 |             q_mu,
186 |             q_sqrt=q_sqrt,
187 |             full_cov=full_cov,
188 |             white=self.whiten,
189 |             full_output_cov=full_output_cov,
190 |         )
191 |         return mu + self.mean_function(Xnew), var
192 | 


--------------------------------------------------------------------------------
/src/models/tsvgp_white.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Module for the t-SVGP model with whitened parameterization
  3 | """
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | from gpflow import default_float, default_jitter
  7 | from gpflow.covariances import Kuf, Kuu
  8 | from gpflow.models import GPModel
  9 | from gpflow.models.model import RegressionData
 10 | from gpflow.models.training_mixins import InputData
 11 | from gpflow.models.util import inducingpoint_wrapper
 12 | #from gpflow.types import MeanAndVariance
 13 | 
 14 | import sys
 15 | sys.path.append("../..")
 16 | 
 17 | from src.sites import DenseSites
 18 | from src.util import (
 19 |     conditional_from_precision_sites_white,
 20 |     conditional_from_precision_sites_white_full,
 21 |     gradient_transformation_mean_var_to_expectation,
 22 |     kl_from_precision_sites_white,
 23 |     posterior_from_dense_site_white,
 24 | )
 25 | 
 26 | from src.models.tsvgp import base_SVGP
 27 | 
 28 | 
 29 | class t_SVGP_white(base_SVGP):
 30 |     """
 31 |     Class for the t-SVGP model with whitened paramterization
 32 |     """
 33 | 
 34 |     def __init__(
 35 |         self,
 36 |         kernel,
 37 |         likelihood,
 38 |         inducing_variable,
 39 |         *,
 40 |         mean_function=None,
 41 |         num_latent_gps: int = 1,
 42 |         lambda_1=None,
 43 |         lambda_2=None,
 44 |         num_data=None,
 45 |     ):
 46 |         """
 47 |         - kernel, likelihood, inducing_variables, mean_function are appropriate
 48 |           GPflow objects
 49 |         - num_latent_gps is the number of latent processes to use, defaults to 1
 50 |         - q_diag is a boolean. If True, the covariance is approximated by a
 51 |           diagonal matrix.
 52 |         - whiten is a boolean. If True, we use the whitened representation of
 53 |           the inducing points.
 54 |         - num_data is the total number of observations, defaults to X.shape[0]
 55 |           (relevant when feeding in external minibatches)
 56 |         """
 57 |         # init the super class, accept args
 58 |         GPModel.__init__(self, kernel, likelihood, mean_function, num_latent_gps)
 59 | 
 60 |         self.num_data = num_data
 61 |         self.inducing_variable = inducingpoint_wrapper(inducing_variable)
 62 | 
 63 |         # init variational parameters
 64 |         self.num_inducing = self.inducing_variable.num_inducing
 65 | 
 66 |         self._init_variational_parameters(self.num_inducing, lambda_1, lambda_2)
 67 | 
 68 |     def _init_variational_parameters(self, num_inducing, lambda_1, lambda_2):
 69 |         """
 70 |         Constructs the site parameters λ₁, Λ₂.
 71 |         for site t(u) = exp(uᵀλ₁ - ½ uᵀΛ₂u)
 72 | 
 73 |         Parameters
 74 |         ----------
 75 |         :param num_inducing: int
 76 |             Number of inducing variables, typically referred to as M.
 77 |         :param lambda_1: np.array or None
 78 |             First order natural parameter of the variational site.
 79 |         :param lambda_2_sqrt: np.array or None
 80 |             Second order natural parameter of the variational site.
 81 |         """
 82 | 
 83 |         lambda_1 = np.zeros((num_inducing, self.num_latent_gps)) if lambda_1 is None else lambda_1
 84 | 
 85 |         if lambda_2 is None:
 86 |             lambda_2 = [
 87 |                 tf.eye(num_inducing, dtype=default_float()) * 1e-10
 88 |                 for _ in range(self.num_latent_gps)
 89 |             ]
 90 |             lambda_2 = np.array(lambda_2)
 91 |         else:
 92 |             assert lambda_2.ndim == 3
 93 |             self.num_latent_gps = lambda_2.shape[0]
 94 | 
 95 |         self.sites = DenseSites(lambda_1=lambda_1, lambda_2=lambda_2)
 96 | 
 97 |     @property
 98 |     def lambda_1(self):
 99 |         return self.sites.lambda_1
100 | 
101 |     @property
102 |     def lambda_2(self):
103 |         return self.sites.lambda_2
104 | 
105 |     def get_mean_chol_cov_inducing_posterior(self):
106 |         """
107 |         Computes the mean and cholesky factor of the posterior
108 |         on the inducing variables q(u) = 𝓝(u; m, S)
109 |         S = (K⁻¹ + Λ₂)⁻¹ = (K⁻¹ + L₂L₂ᵀ)⁻¹ = K - KL₂W⁻¹L₂ᵀK , W = (I + L₂ᵀKL₂)⁻¹
110 |         m = S λ₁
111 |         """
112 |         # THIS FUNCTION IS WRONG
113 |         K_uu = Kuu(
114 |             self.inducing_variable, self.kernel, jitter=default_jitter()
115 |         )  # [P, M, M] or [M, M]
116 |         return posterior_from_dense_site_white(K_uu, self.lambda_1, self.lambda_2)
117 | 
118 | 
119 |     @property
120 |     def cache_statistics(self):
121 |         return self.cache_statistics_from_data(self.data)
122 | 
123 |     def prior_kl(self) -> tf.Tensor:
124 |         K_uu = Kuu(
125 |             self.inducing_variable, self.kernel, jitter=default_jitter()
126 |         )  # [P, M, M] or [M, M]
127 |         return kl_from_precision_sites_white(K_uu, self.lambda_1, L2=self.lambda_2)
128 | 
129 |     def predict_f(self, Xnew: InputData, full_cov=False, full_output_cov=False) -> None:
130 |         K_uu = Kuu(
131 |             self.inducing_variable, self.kernel, jitter=default_jitter()
132 |         )  # [P, M, M] or [M, M]
133 |         K_uf = Kuf(self.inducing_variable, self.kernel, Xnew)  # [P, M, M] or [M, M]
134 |         #print(K_uf.shape)
135 | 
136 |         if full_output_cov == False:
137 |             K_ff = self.kernel.K_diag(Xnew)[..., None]
138 | 
139 |             mu, var = conditional_from_precision_sites_white(
140 |                 K_uu, K_ff, K_uf, self.lambda_1, L2=self.lambda_2)
141 |         else:
142 |             K_ff = self.kernel.K(Xnew)[None, ...]
143 |             mu, var = conditional_from_precision_sites_white_full(
144 |                 K_uu, K_ff, K_uf, self.lambda_1, L2=self.lambda_2)
145 | 
146 |         #tf.debugging.assert_positive(var)  # We really should make the tests pass with this here
147 |         return mu + self.mean_function(Xnew), var
148 | 
149 |     def predict_f_extra_data(self, Xnew: InputData, extra_data=RegressionData,
150 |                              jitter=default_jitter(), full_output_cov=False) -> None:
151 |         """
152 |         Compute the mean and variance of the latent function at some new points
153 |         Xnew.
154 |         """
155 | 
156 |         grad_mu = self.grad_varexp_natural_params(extra_data)
157 | 
158 |         lambda_1 = self.lambda_1
159 |         lambda_2 = self.lambda_2
160 | 
161 |         K_uu = Kuu(self.inducing_variable, self.kernel, jitter=jitter)
162 | 
163 |         lambda_1c = lambda_1 + grad_mu[0]
164 |         lambda_2c = lambda_2 + -2*grad_mu[1]
165 | 
166 |         # predicting at new inputs
167 |         K_uf = Kuf(self.inducing_variable, self.kernel, Xnew)
168 |         
169 |         if full_output_cov == False:
170 |             K_ff = self.kernel.K_diag(Xnew)[..., None]
171 |             
172 |             mu, var = conditional_from_precision_sites_white(
173 |                         K_uu, K_ff, K_uf, lambda_1c, L2=lambda_2c)
174 |         else:
175 |             K_ff = self.kernel.K(Xnew)[None, ...]
176 |             mu, var = conditional_from_precision_sites_white_full(
177 |                         K_uu, K_ff, K_uf, lambda_1c, L2=lambda_2c)
178 | 
179 |         return mu + self.mean_function(Xnew), var
180 | 
181 | 
182 |     def elbo(self, data: RegressionData) -> tf.Tensor:
183 |         """
184 |         This gives a variational bound (the evidence lower bound or ELBO) on
185 |         the log marginal likelihood of the model.
186 |         """
187 |         X, Y = data
188 |         kl = self.prior_kl()
189 |         f_mean, f_var = self.predict_f(X, full_cov=False, full_output_cov=False)
190 |         var_exp = self.likelihood.variational_expectations(f_mean, f_var, Y)
191 |         if self.num_data is not None:
192 |             num_data = tf.cast(self.num_data, kl.dtype)
193 |             minibatch_size = tf.cast(tf.shape(X)[0], kl.dtype)
194 |             scale = num_data / minibatch_size
195 |         else:
196 |             scale = tf.cast(1.0, kl.dtype)
197 |         return tf.reduce_sum(var_exp) * scale - kl
198 | 
199 |     def maximum_log_likelihood_objective(self, data) -> tf.Tensor:
200 |         """The variational lower bound"""
201 |         return self.elbo(data)
202 | 
203 | 
204 |     def grad_varexp_natural_params(self, data, jitter=1e-9, nat_params=None):
205 |         X, Y = data
206 |         # print(X.shape)
207 |         mean, var = self.predict_f(X)
208 | 
209 |         with tf.GradientTape(persistent=True) as g:
210 |             g.watch(mean)
211 |             g.watch(var)
212 |             ve = self.likelihood.variational_expectations(mean, var, Y)
213 |         d_exp_dm = g.gradient(ve, mean)
214 |         d_exp_dv = g.gradient(ve, var)
215 |         del g
216 | 
217 |         eps = 1e-8
218 |         d_exp_dv = tf.minimum(d_exp_dv, -eps * tf.ones_like(d_exp_dv))
219 | 
220 | 
221 |         grad_nat_1 = (d_exp_dm - 2.0 * (d_exp_dv * mean))
222 |         grad_nat_2 = d_exp_dv
223 | 
224 |         K_uf = Kuf(self.inducing_variable, self.kernel, X)
225 | 
226 |         grad_sparse_1 = K_uf @ grad_nat_1
227 | 
228 |         grad_sparse_2 = K_uf @ tf.linalg.diag(tf.transpose(grad_nat_2)) @ tf.transpose(K_uf)
229 | 
230 |         return (grad_sparse_1, grad_sparse_2)
231 | 
232 | 
233 |     def natgrad_step(self, dataset, lr=1.0, jitter=1e-9):
234 |         """Takes natural gradient step in Variational parameters in the local parameters
235 |         λₜ = rₜ▽[Var_exp] + (1-rₜ)λₜ₋₁
236 | 
237 |         Input:
238 |         :param: X : N x D
239 |         :param: Y:  N x 1
240 |         :param: lr: Scalar
241 | 
242 |         Output:
243 |         Updates the params
244 |         """
245 | 
246 |         X, Y = dataset
247 | 
248 |         # chain rule at f
249 |         grad_mu = self.grad_varexp_natural_params((X, Y))
250 |         # K_uu = Kuu(self.inducing_variable, self.kernel)
251 | 
252 |         if self.num_data is not None:
253 |             num_data = tf.cast(self.num_data, dtype=tf.float64)
254 |             minibatch_size = tf.cast(tf.shape(X)[0], dtype=tf.float64)
255 |             scale = num_data / minibatch_size
256 |         else:
257 |             scale = tf.cast(1.0, dtype=tf.float64)
258 | 
259 |         lambda_1 = self.lambda_1
260 |         lambda_2 = self.lambda_2
261 | 
262 |         # compute update in natural form
263 |         # Old version: projection matrix A in grad_varexp_natural_params includes Kuu^{-1}
264 |         # lambda_1 = (1.0 - lr) * lambda_1 + lr * scale * K_uu @ grad_mu[0]
265 |         # lambda_2 = (1.0 - lr) * lambda_2 + lr * scale * K_uu @ grad_mu[1] @ K_uu
266 |         # New version: removed Kuu as well as Kuu inverse in grad_varexp_natural_params
267 |         lambda_1 = (1.0 - lr) * lambda_1 + lr * scale * grad_mu[0]
268 |         lambda_2 = (1.0 - lr) * lambda_2 + lr * scale * (-2) * grad_mu[1]
269 | 
270 |         self.lambda_1.assign(lambda_1)
271 |         self.lambda_2.assign(lambda_2)


--------------------------------------------------------------------------------
/src/models/tvgp.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Module for the t-VGP model class
  3 | """
  4 | from typing import Optional
  5 | 
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | from gpflow.config import default_float, default_jitter
  9 | from gpflow.kernels import Kernel
 10 | from gpflow.likelihoods import Likelihood
 11 | from gpflow.mean_functions import MeanFunction
 12 | from gpflow.models.model import GPModel, InputData, MeanAndVariance, RegressionData
 13 | from gpflow.models.training_mixins import InternalDataTrainingLossMixin
 14 | 
 15 | from src.sites import DiagSites
 16 | 
 17 | 
 18 | class t_VGP(GPModel, InternalDataTrainingLossMixin):
 19 |     r"""
 20 |     This method approximates the Gaussian process posterior using a multivariate Gaussian.
 21 | 
 22 |     The idea is that the posterior over the function-value vector F is
 23 |     approximated by a Gaussian, and the KL divergence is minimised between
 24 |     the approximation and the posterior.
 25 | 
 26 |     The key reference is:
 27 |       Khan, M., & Lin, W. (2017). Conjugate-Computation Variational Inference:
 28 |       Converting Variational Inference in Non-Conjugate Models to Inferences in Conjugate Models.
 29 |       In Artificial Intelligence and Statistics (pp. 878-887).
 30 | 
 31 |     """
 32 | 
 33 |     def __init__(
 34 |         self,
 35 |         data: RegressionData,
 36 |         kernel: Kernel,
 37 |         likelihood: Likelihood,
 38 |         mean_function: Optional[MeanFunction] = None,
 39 |         num_latent: Optional[int] = 1,
 40 |     ):
 41 |         """
 42 |         X is a data matrix, size [N, D]
 43 |         Y is a data matrix, size [N, R]
 44 |         kernel, likelihood, mean_function are appropriate GPflow objects
 45 | 
 46 |         """
 47 |         super().__init__(kernel, likelihood, mean_function, num_latent)
 48 | 
 49 |         x_data, y_data = data
 50 |         num_data = x_data.shape[0]
 51 |         self.num_data = num_data
 52 |         self.num_latent = num_latent or y_data.shape[1]
 53 |         self.data = data
 54 | 
 55 |         lambda_1 = np.zeros((num_data, self.num_latent))
 56 |         lambda_2 = 1e-6 * np.ones((num_data, self.num_latent))
 57 |         self.sites = DiagSites(lambda_1, lambda_2)
 58 | 
 59 |     @property
 60 |     def lambda_1(self):
 61 |         """first natural parameter"""
 62 |         return self.sites.lambda_1
 63 | 
 64 |     @property
 65 |     def lambda_2(self):
 66 |         """second natural parameter"""
 67 |         return self.sites.lambda_2
 68 | 
 69 |     def maximum_log_likelihood_objective(self, *args, **kwargs) -> tf.Tensor:
 70 |         return self.elbo()
 71 | 
 72 |     def elbo(self) -> tf.Tensor:
 73 |         """
 74 |         This gives a variational bound (the evidence lower bound or ELBO) on
 75 |         the log marginal likelihood of the model.
 76 |         """
 77 |         x_data, y_data = self.data
 78 |         pseudo_y = self.lambda_1 / self.lambda_2
 79 |         sW = tf.sqrt(tf.abs(self.lambda_2))
 80 | 
 81 |         # Computes conversion λ₁, λ₂ → m, V by using q(f) ≃ t(f)p(f)
 82 |         K = self.kernel(x_data) + tf.eye(self.num_data, dtype=default_float()) * default_jitter()
 83 |         # L = chol(I  + √λ₂ᵀ K √λ₂ᵀ)
 84 |         L = tf.linalg.cholesky(
 85 |             tf.eye(self.num_data, dtype=tf.float64) + (sW @ tf.transpose(sW)) * K
 86 |         )
 87 |         # T = L⁻¹ λ₂ K
 88 |         T = tf.linalg.solve(L, tf.tile(sW, (1, self.num_data)) * K)
 89 |         # Σ = (K⁻¹ + λ₂)⁻¹  = K - K √λ₂ (I  + √λ₂ᵀ K √λ₂ᵀ)⁻¹ √λ₂ᵀ K =  K - K √λ₂L⁻ᵀL⁻¹√λ₂ᵀ K
 90 |         post_v = tf.reshape(
 91 |             tf.linalg.diag_part(K) - tf.reduce_sum(T * T, axis=0), (self.num_data, 1)
 92 |         )
 93 |         # Σ = (K⁻¹ + λ₂)⁻¹ = (K⁻¹(I + λ₂K))⁻¹ = K (I + λ₂K)⁻¹ = K L⁻ᵀL⁻¹
 94 |         # μ =  Σ λ₁ = K L⁻ᵀL⁻¹ λ₂ (λ₂⁻¹λ₁) = K α
 95 |         alpha = sW * tf.linalg.solve(tf.transpose(L), tf.linalg.solve(L, sW * pseudo_y))
 96 |         post_m = K @ alpha
 97 |         # Store alpha for prediction
 98 |         self.q_alpha = alpha
 99 | 
100 |         # Get variational expectations.
101 |         # ELBO = E_q log(p(y,f)/q(t)) = E_q log(p(y|f)p(f))/Z⁻¹ p(f)t(f))
102 |         # = log(Z) - E_q log t(f) + E_q log p(y|f)
103 |         # log_Z = \int p(f)t(f)df
104 |         E_q_log_lik = tf.reduce_sum(
105 |             self.likelihood.variational_expectations(post_m, post_v, y_data)
106 |         )
107 |         E_q_log_t = -tf.reduce_sum(0.5 * (self.lambda_2) * ((pseudo_y - post_m) ** 2 + post_v))
108 |         log_Z = -tf.transpose(pseudo_y) @ alpha / 2.0 - tf.reduce_sum(
109 |             tf.math.log(tf.linalg.diag_part(L))
110 |         )
111 |         elbo = log_Z - E_q_log_t + E_q_log_lik
112 |         return elbo
113 | 
114 |     def update_variational_parameters(self, beta=0.05) -> tf.Tensor:
115 |         """Takes natural gradient step in Variational parameters in the local parameters
116 |         λₜ = rₜ▽[Var_exp] + (1-rₜ)λₜ₋₁
117 |         Input:
118 |         :param: X : N x D
119 |         :param: Y:  N x 1
120 |         :param: lr: Scalar
121 | 
122 |         Output:
123 |         Updates the params
124 |         """
125 | 
126 |         x_data, y_data = self.data
127 |         pseudo_y = self.lambda_1 / self.lambda_2
128 |         sW = tf.sqrt(tf.abs(self.lambda_2))
129 | 
130 |         # Computes conversion λ₁, λ₂ → m, V by using q(f) ≃ t(f)p(f)
131 |         K = self.kernel(x_data) + tf.eye(self.num_data, dtype=default_float()) * default_jitter()
132 |         L = tf.linalg.cholesky(
133 |             tf.eye(self.num_data, dtype=tf.float64) + (sW @ tf.transpose(sW)) * K
134 |         )
135 |         T = tf.linalg.solve(L, tf.tile(sW, (1, self.num_data)) * K)
136 |         post_v = tf.reshape(
137 |             tf.linalg.diag_part(K) - tf.reduce_sum(T * T, axis=0), (self.num_data, 1)
138 |         )
139 |         alpha = sW * tf.linalg.solve(tf.transpose(L), tf.linalg.solve(L, sW * pseudo_y))
140 |         post_m = K @ alpha
141 | 
142 |         # Keep alphas updated
143 |         self.q_alpha = alpha
144 | 
145 |         # Get variational expectations derivatives.
146 |         with tf.GradientTape(persistent=True) as g:
147 |             g.watch(post_m)
148 |             g.watch(post_v)
149 |             var_exp = self.likelihood.variational_expectations(post_m, post_v, y_data)
150 | 
151 |         d_exp_dm = g.gradient(var_exp, post_m)
152 |         d_exp_dv = g.gradient(var_exp, post_v)
153 |         del g
154 | 
155 |         # Take the tVGP step and transform to be ▽μ[Var_exp]
156 |         lambda_1 = (1.0 - beta) * self.lambda_1 + beta * (d_exp_dm - 2.0 * (d_exp_dv * post_m))
157 |         lambda_2 = (1.0 - beta) * self.lambda_2 + beta * (-2.0 * d_exp_dv)
158 | 
159 |         self.lambda_1.assign(lambda_1)
160 |         self.lambda_2.assign(lambda_2)
161 | 
162 |     def predict_f(
163 |         self, Xnew: InputData, full_cov: bool = False, full_output_cov: bool = False
164 |     ) -> MeanAndVariance:
165 |         r"""
166 |         The posterior variance of F is given by
167 |             q(f) = N(f | K alpha + mean, [K⁻¹ + diag(lambda²)]⁻¹)
168 |         Here we project this to F*, the values of the GP at Xnew which is given
169 |         by
170 |            q(F*) = N ( F* | K_{*F} alpha + mean, K_{**} - K_{*f}[K_{ff} +
171 |                                            diag(lambda⁻²)]⁻¹ K_{f*} )
172 | 
173 |         """
174 |         assert full_output_cov is False
175 |         x_data, _y_data = self.data
176 | 
177 |         # Evaluate the kernel
178 |         Kx = self.kernel(x_data, Xnew)
179 |         K = self.kernel(x_data)
180 | 
181 |         # Predictive mean
182 |         f_mean = tf.linalg.matmul(Kx, self.q_alpha, transpose_a=True) + self.mean_function(Xnew)
183 | 
184 |         # Predictive var
185 |         A = K + tf.linalg.diag(tf.transpose(1.0 / self.lambda_2))
186 |         L = tf.linalg.cholesky(A)
187 |         Kx_tiled = tf.tile(Kx[None, ...], [self.num_latent, 1, 1])
188 |         LiKx = tf.linalg.solve(L, Kx_tiled)
189 |         if full_cov:
190 |             f_var = self.kernel(Xnew) - tf.linalg.matmul(LiKx, LiKx, transpose_a=True)
191 |         else:
192 |             f_var = self.kernel(Xnew, full_cov=False) - tf.reduce_sum(tf.square(LiKx), 1)
193 |         return f_mean, tf.transpose(f_var)
194 | 


--------------------------------------------------------------------------------
/src/models/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from src.util import mean_cov_to_natural_param
  4 | 
  5 | def random_shuffle(x_batch, model, m):
  6 |     Z_old = model.inducing_variable.Z.numpy()
  7 |     Z_batch = np.concatenate([Z_old, x_batch], axis=0)
  8 |     np.random.shuffle(Z_batch)
  9 | 
 10 |     Z_new = Z_batch[:m]
 11 |     new_q_mu, new_f_cov = m.predict_f(Z_new, full_output_cov=True)
 12 |     K_zz = m.kernel(m.inducing_variable.Z)
 13 |     new_l1, new_l2 = mean_cov_to_natural_param(new_q_mu, new_f_cov, K_zz)
 14 | 
 15 |     return new_l1, new_l2, Z_new
 16 | 
 17 | 
 18 | # Sampling functions
 19 | def piv_chol(new_batch, old_batch, model, m_z, lamb=None, use_lamb=False):
 20 |     combined_batch = np.concatenate([old_batch, new_batch], axis=0)
 21 |     K_zb = model.kernel(combined_batch)
 22 | 
 23 |     if use_lamb is True:
 24 |         K_zb = np.diag(np.sqrt(lamb)) @ K_zb @ np.diag(np.sqrt(lamb))
 25 | 
 26 |     get_diag = lambda: np.diag(K_zb).copy()
 27 |     get_row = lambda i: K_zb[i, :]
 28 |     _, pi = pivoted_chol(get_diag, get_row, M=m_z)
 29 |     Z_new = combined_batch[pi]
 30 |     return Z_new, pi
 31 | 
 32 | 
 33 | def fixed_Z(new_batch, old_batch, model, m_z, update=True):
 34 |     Z_new = old_batch
 35 |     return Z_new, None
 36 | 
 37 | 
 38 | def update_lambda_Z_move(model, z_new, z_old):
 39 |     old_l1 = model.lambda_1
 40 |     old_l2 = model.lambda_2
 41 | 
 42 |     K_zf = model.kernel(z_old, z_new)
 43 |     A_p = tf.linalg.solve(model.kernel(z_old), K_zf)
 44 | 
 45 |     new_l1 = tf.transpose(A_p) @ old_l1
 46 |     new_l2 = tf.transpose(A_p) @ old_l2 @ A_p
 47 |     return new_l1, new_l2 # TODO: write test here for broadcasting
 48 | 
 49 | 
 50 | def pivoted_chol(get_diag, get_row, M, err_tol=1e-6):
 51 |     """
 52 |     A simple python function which computes the Pivoted Cholesky decomposition/approximation of positive
 53 |     semi-definite operator.
 54 | 
 55 |     Args:
 56 |         - get_diag: A function which takes no arguments and returns the diagonal of the matrix when called.
 57 |         - get_row: A function which takes 1 integer argument and returns the desired row (zero indexed).
 58 |         - M: The maximum rank of the approximate decomposition; an integer.
 59 | 
 60 |     Returns: 
 61 |         - R, an upper triangular matrix of column dimension equal to the target matrix.
 62 |         - pi, the index of the pivots.
 63 |     """
 64 | 
 65 |     d = np.copy(get_diag())
 66 |     N = len(d)
 67 | 
 68 |     pi = list(range(N))
 69 | 
 70 |     R = np.zeros([M, N])
 71 | 
 72 | 
 73 |     m = 0
 74 |     while (m < M):  # and (err > err_tol):
 75 | 
 76 |         i = m + np.argmax([d[pi[j]] for j in range(m, N)])
 77 |         tmp = pi[m]
 78 |         pi[m] = pi[i]
 79 |         pi[i] = tmp
 80 | 
 81 |         R[m, pi[m]] = np.sqrt(d[pi[m]])
 82 |         Apim = get_row(pi[m])
 83 |         for i in range(m + 1, N):
 84 |             if m > 0:
 85 |                 ip = np.inner(R[:m, pi[m]], R[:m, pi[i]])
 86 |             else:
 87 |                 ip = 0
 88 |             R[m, pi[i]] = (Apim[pi[i]] - ip) / R[m, pi[m]]
 89 |             d[pi[i]] -= pow(R[m, pi[i]], 2)
 90 | 
 91 |         m += 1
 92 | 
 93 |     R = R[:m, :]
 94 |     return R, pi[:m]
 95 | 
 96 | 
 97 | def compute_lev(model, x_data, y_data):
 98 |     mean, f_varM = model.predict_f(x_data, full_cov=False, full_output_cov=False)
 99 |     with tf.GradientTape(persistent=True) as g:
100 |         g.watch(mean)
101 |         g.watch(f_varM)
102 |         var_expI = model.likelihood.variational_expectations(mean, f_varM, y_data)
103 |     d_exp_dv = g.gradient(var_expI, f_varM)
104 |     del g
105 | 
106 |     lamb = tf.squeeze(-2*d_exp_dv)
107 |     lev = tf.abs(tf.reduce_sum(f_varM * lamb, axis=1))
108 | 
109 |     if lamb.ndim > 1:
110 |         lamb = tf.reduce_sum(lamb, axis=1)
111 | 
112 |     return lev.numpy(), lamb.numpy()
113 | 
114 | 
115 | def memory_picker(old_batch, model, mem_size):  
116 | 
117 |     x_old, y_old = old_batch
118 |     lev, lamb = compute_lev(model, x_old, y_old) 
119 | 
120 |     # Weighted sampling
121 |     ind = np.random.choice(np.arange(y_old.shape[0]), mem_size, p=lev/np.sum(lev))
122 |     return None, ind  
123 | 
124 | 
125 | def random_picker(data, model, mem_size):
126 |     """
127 |     Picks random memory
128 | 
129 |     Note: model parameter is there for uniform function definition.
130 |     """
131 |     x_old, y_old = data
132 |     ind = np.random.choice(np.arange(y_old.shape[0]), mem_size)
133 |     return None, ind
134 | 
135 | 


--------------------------------------------------------------------------------
/src/sites.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module to declare Gaussian Exponential Family sites objects.
 3 | """
 4 | 
 5 | import abc
 6 | from typing import Optional
 7 | 
 8 | import tensorflow as tf
 9 | from gpflow.base import Module, Parameter
10 | from gpflow.config import default_float
11 | from gpflow.utilities import positive, triangular
12 | 
13 | 
14 | class Sites(Module, metaclass=abc.ABCMeta):
15 |     """
16 |     The base sites class
17 |     """
18 | 
19 |     def __init__(self, name: Optional[str] = None):
20 |         """
21 |         :param name: optional kernel name.
22 |         """
23 |         super().__init__(name=name)
24 | 
25 | 
26 | class DiagSites(Sites):
27 |     """
28 |     Sites with diagonal lambda_2
29 |     """
30 | 
31 |     def __init__(self, lambda_1, lambda_2, name: Optional[str] = None):
32 |         """
33 |         :param lambda_1: first order natural parameter
34 |         :param lambda_2: second order natural parameter
35 |         :param name: optional kernel name.
36 |         """
37 |         super().__init__(name=name)
38 | 
39 |         self.lambda_1 = Parameter(lambda_1, dtype=default_float(), trainable=False)  # [M, P]
40 |         self.lambda_2 = Parameter(lambda_2, transform=positive(), trainable=False)  # [M, P]
41 | 
42 | 
43 | class DenseSites(Sites):
44 |     """
45 |     Sites with dense lambda_2 save as a Cholesky factor
46 |     """
47 | 
48 |     def __init__(self, lambda_1, lambda_2_sqrt=None, lambda_2=None, name: Optional[str] = None):
49 |         """
50 |         :param lambda_1: first order natural parameter
51 |         :param lambda_2_sqrt: second order natural parameter
52 |         :param name: optional kernel name.
53 |         """
54 |         super().__init__(name=name)
55 | 
56 |         self.lambda_1 = Parameter(lambda_1, dtype=default_float(), trainable=False)  # [M, P]
57 |         self.num_latent_gps = lambda_1.shape[0]
58 | 
59 |         assert (lambda_2_sqrt is not None) or (lambda_2 is not None)
60 | 
61 |         if lambda_2_sqrt is not None:
62 |             self.factor = True
63 |             self._lambda_2_sqrt = Parameter(lambda_2_sqrt, transform=triangular(), trainable=False)  # [L|P, M, M]
64 |         else:
65 |             self._lambda_2 = Parameter(lambda_2, trainable=False)  # [L|P, M, M]
66 |             self.factor = False
67 | 
68 |     @property
69 |     def lambda_2(self):
70 |         """second natural parameter"""
71 |         if self.factor:
72 |             return self._lambda_2_sqrt @ tf.linalg.matrix_transpose(self._lambda_2_sqrt)
73 |         return self._lambda_2
74 | 
75 |     @property
76 |     def lambda_2_sqrt(self):
77 |         """Cholesky factor of the second natural parameter"""
78 |         if self.factor:
79 |             return self._lambda_2_sqrt
80 |         return tf.linalg.cholesky(self._lambda_2)
81 | 


--------------------------------------------------------------------------------
/src/streaming_sparse_gp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/streaming_sparse_gp/__init__.py


--------------------------------------------------------------------------------
/src/streaming_sparse_gp/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/streaming_sparse_gp/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/src/streaming_sparse_gp/__pycache__/osvgpc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoML/sequential-gp/27adda3e068ab7a17117e490c01630ac8a4328b3/src/streaming_sparse_gp/__pycache__/osvgpc.cpython-38.pyc


--------------------------------------------------------------------------------
/src/streaming_sparse_gp/osvgpc.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import gpflow
  4 | from gpflow import Parameter, default_float
  5 | from gpflow import conditionals, kullback_leiblers
  6 | from gpflow.inducing_variables import InducingPoints
  7 | from gpflow.models import GPModel, InternalDataTrainingLossMixin
  8 | from gpflow.utilities import positive, triangular
  9 | from packaging import version  # required to handle GPflow breaking changes
 10 | 
 11 | 
 12 | class OSVGPC(GPModel, InternalDataTrainingLossMixin):
 13 |     """
 14 |     Online Sparse Variational GP classification.
 15 | 
 16 |     Streaming Gaussian process approximations
 17 |     Thang D. Bui, Cuong V. Nguyen, Richard E. Turner
 18 |     NIPS 2017
 19 |     """
 20 | 
 21 |     def __init__(self, data, kernel, likelihood, mu_old, Su_old, Kaa_old, Z_old, Z, mean_function=None,
 22 |                  q_diag=False, whiten=True, num_latent_gps=None):
 23 | 
 24 |         self.data = gpflow.models.util.data_input_to_tensor(data)
 25 |         # self.num_data = X.shape[0]
 26 |         self.num_data = None
 27 | 
 28 |         # init the super class, accept args
 29 |         if num_latent_gps is None:
 30 |             num_latent_gps = GPModel.calc_num_latent_gps_from_data(data, kernel, likelihood)
 31 |         super().__init__(kernel, likelihood, mean_function, num_latent_gps)
 32 | 
 33 |         self.q_diag, self.whiten = q_diag, whiten
 34 |         self.inducing_variable = InducingPoints(Z)
 35 |         num_inducing = self.inducing_variable.num_inducing
 36 | 
 37 |         # init variational parameters
 38 |         q_mu = np.zeros((num_inducing, self.num_latent_gps))
 39 |         self.q_mu = Parameter(q_mu, dtype=default_float())  # [M, P]
 40 | 
 41 |         if q_diag:
 42 |             ones = np.ones(
 43 |                 (num_inducing, self.num_latent_gps), dtype=default_float()
 44 |             )
 45 |             self.q_sqrt = Parameter(ones, transform=positive())  # [M, P]
 46 |         else:
 47 |             np_q_sqrt = np.array(
 48 |                 [
 49 |                     np.eye(num_inducing, dtype=default_float())
 50 |                     for _ in range(self.num_latent_gps)
 51 |                 ]
 52 |             )
 53 |             self.q_sqrt = Parameter(np_q_sqrt, transform=triangular())  # [P, M, M]
 54 | 
 55 |         self.mu_old = tf.Variable(mu_old, shape=tf.TensorShape(None), trainable=False)
 56 |         self.M_old = Z_old.shape[0]
 57 |         self.Su_old = tf.Variable(Su_old, shape=tf.TensorShape(None), trainable=False)
 58 |         self.Kaa_old = tf.Variable(Kaa_old, shape=tf.TensorShape(None), trainable=False)
 59 |         self.Z_old = tf.Variable(Z_old, shape=tf.TensorShape(None), trainable=False)
 60 | 
 61 |     def prior_kl(self):
 62 |         return kullback_leiblers.prior_kl(self.inducing_variable, self.kernel, self.q_mu, self.q_sqrt, whiten=self.whiten)
 63 | 
 64 |     def correction_term(self):
 65 |         Mb = self.inducing_variable.num_inducing
 66 |         Ma = self.M_old
 67 |         # jitter = gpflow.default_jitter()
 68 |         jitter = gpflow.utilities.to_default_float(1e-4)
 69 |         Saa = self.Su_old
 70 |         ma = self.mu_old
 71 |         # a is old inducing points, b is new
 72 |         mu, Sigma = self.predict_f(self.Z_old, full_cov=True)
 73 |         # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 74 |         # Added by us for split-MNIST
 75 |         # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 76 |         if Sigma.shape[0] == 1:
 77 |             Sigma = tf.squeeze(Sigma, axis=0)
 78 |         Smm = Sigma + tf.matmul(mu, mu, transpose_b=True)
 79 |         Kaa = gpflow.utilities.add_noise_cov(self.Kaa_old, jitter)
 80 |         LSa = tf.linalg.cholesky(Saa)
 81 |         LKa = tf.linalg.cholesky(Kaa)
 82 |         obj = tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LKa)))
 83 |         obj += - tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LSa)))
 84 | 
 85 |         Sainv_ma = tf.linalg.cholesky_solve(LSa, ma)
 86 |         obj += -0.5 * tf.reduce_sum(ma * Sainv_ma)
 87 |         obj += tf.reduce_sum(mu * Sainv_ma)
 88 | 
 89 |         Sainv_Smm = tf.linalg.cholesky_solve(LSa, Smm)
 90 |         Kainv_Smm = tf.linalg.cholesky_solve(LKa, Smm)
 91 |         obj += -0.5 * tf.reduce_sum(tf.linalg.diag_part(Sainv_Smm) - tf.linalg.diag_part(Kainv_Smm))
 92 |         return obj
 93 | 
 94 |     def maximum_log_likelihood_objective(self) -> tf.Tensor:  # type: ignore
 95 |         return self.elbo()
 96 | 
 97 |     def elbo(self):
 98 |         """
 99 |         This gives a variational bound on the model likelihood.
100 |         """
101 |         X, Y = self.data
102 | 
103 |         # Get prior KL.
104 |         kl = self.prior_kl()
105 | 
106 |         # Get conditionals
107 |         fmean, fvar = self.predict_f(X, full_cov=False)
108 | 
109 |         # Get variational expectations.
110 |         if version.parse(gpflow.__version__) < version.Version("2.6.0"):
111 |             var_exp = self.likelihood.variational_expectations(fmean, fvar, Y)
112 |         else:
113 |             # breaking change https://github.com/GPflow/GPflow/pull/1919
114 |             var_exp = self.likelihood.variational_expectations(X, fmean, fvar, Y)
115 | 
116 |         # re-scale for minibatch size
117 |         if self.num_data is not None:
118 |             raise NotImplementedError("need to update code to ExternalDataTrainingLossMixin")
119 |             num_data = tf.cast(self.num_data, kl.dtype)
120 |             minibatch_size = tf.cast(tf.shape(X)[0], kl.dtype)
121 |             scale = num_data / minibatch_size
122 |         else:
123 |             scale = tf.cast(1.0, kl.dtype)
124 | 
125 |         # compute online correction term
126 |         online_reg = self.correction_term()
127 | 
128 |         return tf.reduce_sum(var_exp) * scale - kl + online_reg
129 | 
130 |     def predict_f(self, Xnew, full_cov=False, full_output_cov=False):
131 |         mu, var = conditionals.conditional(Xnew, self.inducing_variable, self.kernel, self.q_mu,
132 |                                            q_sqrt=self.q_sqrt, full_cov=full_cov, white=self.whiten,
133 |                                            full_output_cov=full_output_cov)
134 |         return mu + self.mean_function(Xnew), var
135 | 


--------------------------------------------------------------------------------
/src/streaming_sparse_gp/readme.md:
--------------------------------------------------------------------------------
1 | # Streaming sparse GP
2 | 
3 | The model files are taken from the official code available at https://github.com/thangbui/streaming_sparse_gp as the code is not available as a package. 
4 | 
5 | 


--------------------------------------------------------------------------------